In addition to start/end, track sets of nodes belonging to tokens

This commit is contained in:
Vidar Holen 2022-07-22 10:29:19 -07:00
parent 3ee4419ef4
commit e7f05d662a
3 changed files with 50 additions and 30 deletions

View File

@ -168,8 +168,10 @@ data CFGParameters = CFGParameters {
data CFGResult = CFGResult { data CFGResult = CFGResult {
-- The graph itself -- The graph itself
cfGraph :: CFGraph, cfGraph :: CFGraph,
-- Map from Id to start/end node -- Map from Id to nominal start&end node (i.e. assuming normal execution without exits)
cfIdToNode :: M.Map Id (Node, Node) cfIdToRange :: M.Map Id (Node, Node),
-- A set of all nodes belonging to an Id, recursively
cfIdToNodes :: M.Map Id (S.Set Node)
} }
deriving (Show) deriving (Show)
@ -177,21 +179,24 @@ buildGraph :: CFGParameters -> Token -> CFGResult
buildGraph params root = buildGraph params root =
let let
(nextNode, base) = execRWS (buildRoot root) (newCFContext params) 0 (nextNode, base) = execRWS (buildRoot root) (newCFContext params) 0
(nodes, edges, mapping) = (nodes, edges, mapping, association) =
-- renumberTopologically $ -- renumberTopologically $
removeUnnecessaryStructuralNodes removeUnnecessaryStructuralNodes
base base
in in
CFGResult { CFGResult {
cfGraph = mkGraph nodes edges, cfGraph = mkGraph nodes edges,
cfIdToNode = M.fromList mapping cfIdToRange = M.fromList mapping,
cfIdToNodes = M.fromListWith S.union $ map (\(id, n) -> (id, S.singleton n)) association
} }
remapGraph remap (nodes, edges, mapping) = remapGraph :: M.Map Node Node -> CFW -> CFW
remapGraph remap (nodes, edges, mapping, assoc) =
( (
map (remapNode remap) nodes, map (remapNode remap) nodes,
map (remapEdge remap) edges, map (remapEdge remap) edges,
map (\(id, (a,b)) -> (id, (remapHelper remap a, remapHelper remap b))) mapping map (\(id, (a,b)) -> (id, (remapHelper remap a, remapHelper remap b))) mapping,
map (\(id, n) -> (id, remapHelper remap n)) assoc
) )
prop_testRenumbering = prop_testRenumbering =
@ -200,17 +205,20 @@ prop_testRenumbering =
before = ( before = (
[(1,s), (3,s), (4, s), (8,s)], [(1,s), (3,s), (4, s), (8,s)],
[(1,3,CFEFlow), (3,4, CFEFlow), (4,8,CFEFlow)], [(1,3,CFEFlow), (3,4, CFEFlow), (4,8,CFEFlow)],
[(Id 0, (3,4))] [(Id 0, (3,4))],
[(Id 1, 3), (Id 2, 4)]
) )
after = ( after = (
[(0,s), (1,s), (2,s), (3,s)], [(0,s), (1,s), (2,s), (3,s)],
[(0,1,CFEFlow), (1,2, CFEFlow), (2,3,CFEFlow)], [(0,1,CFEFlow), (1,2, CFEFlow), (2,3,CFEFlow)],
[(Id 0, (1,2))] [(Id 0, (1,2))],
[(Id 1, 1), (Id 2, 2)]
) )
in after == renumberGraph before in after == renumberGraph before
-- Renumber the graph for prettiness, so there are no gaps in node numbers -- Renumber the graph for prettiness, so there are no gaps in node numbers
renumberGraph g@(nodes, edges, mapping) = renumberGraph :: CFW -> CFW
renumberGraph g@(nodes, edges, mapping, assoc) =
let renumbering = M.fromList (flip zip [0..] $ sort $ map fst nodes) let renumbering = M.fromList (flip zip [0..] $ sort $ map fst nodes)
in remapGraph renumbering g in remapGraph renumbering g
@ -220,17 +228,19 @@ prop_testRenumberTopologically =
before = ( before = (
[(4,s), (2,s), (3, s)], [(4,s), (2,s), (3, s)],
[(4,2,CFEFlow), (2,3, CFEFlow)], [(4,2,CFEFlow), (2,3, CFEFlow)],
[(Id 0, (4,2))] [(Id 0, (4,2))],
[]
) )
after = ( after = (
[(0,s), (1,s), (2,s)], [(0,s), (1,s), (2,s)],
[(0,1,CFEFlow), (1,2, CFEFlow)], [(0,1,CFEFlow), (1,2, CFEFlow)],
[(Id 0, (0,1))] [(Id 0, (0,1))],
[]
) )
in after == renumberTopologically before in after == renumberTopologically before
-- Renumber the graph in topological order -- Renumber the graph in topological order
renumberTopologically g@(nodes, edges, mapping) = renumberTopologically g@(nodes, edges, mapping, assoc) =
let renumbering = M.fromList (flip zip [0..] $ topsort (mkGraph nodes edges :: CFGraph)) let renumbering = M.fromList (flip zip [0..] $ topsort (mkGraph nodes edges :: CFGraph))
in remapGraph renumbering g in remapGraph renumbering g
@ -240,12 +250,14 @@ prop_testRemoveStructural =
before = ( before = (
[(1,s), (2,s), (3, s), (4,s)], [(1,s), (2,s), (3, s), (4,s)],
[(1,2,CFEFlow), (2,3, CFEFlow), (3,4,CFEFlow)], [(1,2,CFEFlow), (2,3, CFEFlow), (3,4,CFEFlow)],
[(Id 0, (2,3))] [(Id 0, (2,3))],
[(Id 0, 3)]
) )
after = ( after = (
[(1,s), (2,s), (4,s)], [(1,s), (2,s), (4,s)],
[(1,2,CFEFlow), (2,4,CFEFlow)], [(1,2,CFEFlow), (2,4,CFEFlow)],
[(Id 0, (2,2))] [(Id 0, (2,2))],
[(Id 0, 2)]
) )
in after == removeUnnecessaryStructuralNodes before in after == removeUnnecessaryStructuralNodes before
@ -255,12 +267,13 @@ prop_testRemoveStructural =
-- Note in particular that we can't remove a structural node x in -- Note in particular that we can't remove a structural node x in
-- foo -> x -> bar , because then the pre/post-condition for tokens -- foo -> x -> bar , because then the pre/post-condition for tokens
-- previously pointing to x would be wrong. -- previously pointing to x would be wrong.
removeUnnecessaryStructuralNodes (nodes, edges, mapping) = removeUnnecessaryStructuralNodes (nodes, edges, mapping, association) =
remapGraph recursiveRemapping remapGraph recursiveRemapping
( (
filter (\(n, _) -> n `M.notMember` recursiveRemapping) nodes, filter (\(n, _) -> n `M.notMember` recursiveRemapping) nodes,
filter (`S.notMember` edgesToCollapse) edges, filter (`S.notMember` edgesToCollapse) edges,
mapping mapping,
association
) )
where where
regularEdges = filter isRegularEdge edges regularEdges = filter isRegularEdge edges
@ -305,8 +318,6 @@ remapNode m (node, label) =
newLabel = case label of newLabel = case label of
CFApplyEffects effects -> CFApplyEffects (map (remapEffect m) effects) CFApplyEffects effects -> CFApplyEffects (map (remapEffect m) effects)
CFExecuteSubshell s a b -> CFExecuteSubshell s (remapHelper m a) (remapHelper m b) CFExecuteSubshell s a b -> CFExecuteSubshell s (remapHelper m a) (remapHelper m b)
-- CFSubShellStart reason node -> CFSubShellStart reason (remapHelper m node)
_ -> label _ -> label
remapEffect map old@(IdTagged id effect) = remapEffect map old@(IdTagged id effect) =
@ -325,6 +336,7 @@ data CFContext = CFContext {
cfIsCondition :: Bool, cfIsCondition :: Bool,
cfIsFunction :: Bool, cfIsFunction :: Bool,
cfLoopStack :: [(Node, Node)], cfLoopStack :: [(Node, Node)],
cfTokenStack :: [Id],
cfExitTarget :: Maybe Node, cfExitTarget :: Maybe Node,
cfReturnTarget :: Maybe Node, cfReturnTarget :: Maybe Node,
cfParameters :: CFGParameters cfParameters :: CFGParameters
@ -333,19 +345,22 @@ newCFContext params = CFContext {
cfIsCondition = False, cfIsCondition = False,
cfIsFunction = False, cfIsFunction = False,
cfLoopStack = [], cfLoopStack = [],
cfTokenStack = [],
cfExitTarget = Nothing, cfExitTarget = Nothing,
cfReturnTarget = Nothing, cfReturnTarget = Nothing,
cfParameters = params cfParameters = params
} }
-- The monad we generate a graph in -- The monad we generate a graph in
type CFM a = RWS CFContext ([LNode CFNode], [LEdge CFEdge], [(Id, (Node, Node))]) Int a type CFM a = RWS CFContext CFW Int a
type CFW = ([LNode CFNode], [LEdge CFEdge], [(Id, (Node, Node))], [(Id, Node)])
newNode :: CFNode -> CFM Node newNode :: CFNode -> CFM Node
newNode label = do newNode label = do
n <- get n <- get
stack <- asks cfTokenStack
put (n+1) put (n+1)
tell ([(n, label)], [], []) tell ([(n, label)], [], [], map (\c -> (c, n)) stack)
return n return n
newNodeRange :: CFNode -> CFM Range newNodeRange :: CFNode -> CFM Range
@ -367,16 +382,19 @@ withFunctionScope p = do
body <- local (\c -> c { cfReturnTarget = Just end, cfIsFunction = True }) p body <- local (\c -> c { cfReturnTarget = Just end, cfIsFunction = True }) p
linkRanges [body, nodeToRange end] linkRanges [body, nodeToRange end]
-- Anything that happens recursively in f will be attributed to this id
under :: Id -> CFM a -> CFM a
under id f = local (\c -> c { cfTokenStack = id:(cfTokenStack c) }) f
nodeToRange :: Node -> Range nodeToRange :: Node -> Range
nodeToRange n = Range n n nodeToRange n = Range n n
link :: Node -> Node -> CFEdge -> CFM () link :: Node -> Node -> CFEdge -> CFM ()
link from to label = do link from to label = do
tell ([], [(from, to, label)], []) tell ([], [(from, to, label)], [], [])
registerNode :: Id -> Range -> CFM () registerNode :: Id -> Range -> CFM ()
registerNode id (Range start end) = tell ([], [], [(id, (start, end))]) registerNode id (Range start end) = tell ([], [], [(id, (start, end))], [])
linkRange :: Range -> Range -> CFM Range linkRange :: Range -> Range -> CFM Range
linkRange = linkRangeAs CFEFlow linkRange = linkRangeAs CFEFlow
@ -412,7 +430,7 @@ asCondition = withContext (\c -> c { cfIsCondition = True })
newStructuralNode = newNodeRange CFStructuralNode newStructuralNode = newNodeRange CFStructuralNode
buildRoot :: Token -> CFM Range buildRoot :: Token -> CFM Range
buildRoot t = do buildRoot t = under (getId t) $ do
entry <- newNodeRange $ CFEntryPoint "MAIN" entry <- newNodeRange $ CFEntryPoint "MAIN"
impliedExit <- newNode CFImpliedExit impliedExit <- newNode CFImpliedExit
end <- newNode CFStructuralNode end <- newNode CFStructuralNode
@ -426,7 +444,7 @@ applySingle e = CFApplyEffects [e]
-- Build the CFG. -- Build the CFG.
build :: Token -> CFM Range build :: Token -> CFM Range
build t = do build t = do
range <- build' t range <- under (getId t) $ build' t
registerNode (getId t) range registerNode (getId t) range
return range return range
where where

View File

@ -97,7 +97,8 @@ logInfo log = do
-- The result of the data flow analysis -- The result of the data flow analysis
data CFGAnalysis = CFGAnalysis { data CFGAnalysis = CFGAnalysis {
graph :: CFGraph, graph :: CFGraph,
tokenToNode :: M.Map Id (Node, Node), tokenToRange :: M.Map Id (Node, Node),
tokenToNodes :: M.Map Id (S.Set Node),
nodeToData :: M.Map Node (ProgramState, ProgramState) nodeToData :: M.Map Node (ProgramState, ProgramState)
} deriving (Show, Generic, NFData) } deriving (Show, Generic, NFData)
@ -111,13 +112,13 @@ data ProgramState = ProgramState {
-- Conveniently get the state before a token id -- Conveniently get the state before a token id
getIncomingState :: CFGAnalysis -> Id -> Maybe ProgramState getIncomingState :: CFGAnalysis -> Id -> Maybe ProgramState
getIncomingState analysis id = do getIncomingState analysis id = do
(start,end) <- M.lookup id $ tokenToNode analysis (start,end) <- M.lookup id $ tokenToRange analysis
fst <$> M.lookup start (nodeToData analysis) fst <$> M.lookup start (nodeToData analysis)
-- Conveniently get the state after a token id -- Conveniently get the state after a token id
getOutgoingState :: CFGAnalysis -> Id -> Maybe ProgramState getOutgoingState :: CFGAnalysis -> Id -> Maybe ProgramState
getOutgoingState analysis id = do getOutgoingState analysis id = do
(start,end) <- M.lookup id $ tokenToNode analysis (start,end) <- M.lookup id $ tokenToRange analysis
snd <$> M.lookup end (nodeToData analysis) snd <$> M.lookup end (nodeToData analysis)
getDataForNode analysis node = M.lookup node $ nodeToData analysis getDataForNode analysis node = M.lookup node $ nodeToData analysis
@ -1216,7 +1217,7 @@ analyzeControlFlow :: CFGParameters -> Token -> CFGAnalysis
analyzeControlFlow params t = analyzeControlFlow params t =
let let
cfg = buildGraph params t cfg = buildGraph params t
(entry, exit) = M.findWithDefault (error $ pleaseReport "Missing root") (getId t) (cfIdToNode cfg) (entry, exit) = M.findWithDefault (error $ pleaseReport "Missing root") (getId t) (cfIdToRange cfg)
in in
runST $ f cfg entry exit runST $ f cfg entry exit
where where
@ -1250,7 +1251,8 @@ analyzeControlFlow params t =
return $ nodeToData `deepseq` CFGAnalysis { return $ nodeToData `deepseq` CFGAnalysis {
graph = cfGraph cfg, graph = cfGraph cfg,
tokenToNode = cfIdToNode cfg, tokenToRange = cfIdToRange cfg,
tokenToNodes = cfIdToNodes cfg,
nodeToData = nodeToData nodeToData = nodeToData
} }

View File

@ -202,7 +202,7 @@ stringToDetailedCfgViz scriptString = cfgToGraphVizWith nodeLabel graph
idToToken = M.fromList $ execWriter $ doAnalysis (\c -> tell [(getId c, c)]) ast idToToken = M.fromList $ execWriter $ doAnalysis (\c -> tell [(getId c, c)]) ast
idToNode :: M.Map Id (Node, Node) idToNode :: M.Map Id (Node, Node)
idToNode = cfIdToNode cfgResult idToNode = cfIdToRange cfgResult
nodeToStartIds :: M.Map Node (S.Set Id) nodeToStartIds :: M.Map Node (S.Set Id)
nodeToStartIds = nodeToStartIds =