{- Copyright 2022 Vidar Holen This file is part of ShellCheck. https://www.shellcheck.net ShellCheck is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. ShellCheck is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE DeriveAnyClass, DeriveGeneric #-} -- Constructs a Control Flow Graph from an AST module ShellCheck.CFG ( CFNode (..), CFEdge (..), CFEffect (..), CFStringPart (..), CFVariableProp (..), CFGResult (..), CFValue (..), CFGraph, CFGParameters (..), IdTagged (..), Scope (..), buildGraph , ShellCheck.CFG.runTests -- STRIP ) where import GHC.Generics (Generic) import ShellCheck.AST import ShellCheck.ASTLib import ShellCheck.Interface import ShellCheck.Prelude import ShellCheck.Regex import Control.DeepSeq import Control.Monad import Control.Monad.Identity import Data.List hiding (map) import Data.Maybe import qualified Data.Map as M import qualified Data.Set as S import Control.Monad.RWS.Lazy import Data.Graph.Inductive.Graph import Data.Graph.Inductive.Query.DFS import Data.Graph.Inductive.Basic import Data.Graph.Inductive.Query.Dominators import Data.Graph.Inductive.PatriciaTree as G import Debug.Trace -- STRIP import Test.QuickCheck.All (forAllProperties) import Test.QuickCheck.Test (quickCheckWithResult, stdArgs, maxSuccess) -- Our basic Graph type type CFGraph = G.Gr CFNode CFEdge -- Node labels in a Control Flow Graph data CFNode = -- A no-op node for structural purposes CFStructuralNode -- A no-op for graph inspection purposes | CFEntryPoint String -- Drop current prefix assignments | CFDropPrefixAssignments -- A node with a certain effect on program state | CFApplyEffects [IdTagged CFEffect] -- The execution of a command or function by literal string if possible | CFExecuteCommand (Maybe String) -- Execute a subshell. These are represented by disjoint graphs just like -- functions, but they don't require any form of name resolution | CFExecuteSubshell String Node Node -- Assignment of $? | CFSetExitCode Id -- The virtual 'exit' at the natural end of a subshell | CFImpliedExit -- An exit statement resolvable at CFG build time | CFResolvedExit -- An exit statement only resolvable at DFA time | CFUnresolvedExit -- An unreachable node, serving as the unconnected end point of a range | CFUnreachable -- Assignment of $! | CFSetBackgroundPid Id deriving (Eq, Ord, Show, Generic, NFData) -- Edge labels in a Control Flow Graph data CFEdge = CFEErrExit -- Regular control flow edge | CFEFlow -- An edge that a human might think exists (e.g. from a backgrounded process to its parent) | CFEFalseFlow -- An edge followed on exit | CFEExit deriving (Eq, Ord, Show, Generic, NFData) -- Actions we track data CFEffect = CFSetProps Scope String (S.Set CFVariableProp) | CFUnsetProps Scope String (S.Set CFVariableProp) | CFReadVariable String | CFWriteVariable String CFValue | CFWriteGlobal String CFValue | CFWriteLocal String CFValue | CFWritePrefix String CFValue | CFDefineFunction String Id Node Node | CFUndefine String | CFUndefineVariable String | CFUndefineFunction String | CFUndefineNameref String -- Usage implies that this is an array (e.g. it's expanded with index) | CFHintArray String -- Operation implies that the variable will be defined (e.g. [ -z "$var" ]) | CFHintDefined String deriving (Eq, Ord, Show, Generic, NFData) data IdTagged a = IdTagged Id a deriving (Eq, Ord, Show, Generic, NFData) -- Where a variable's value comes from data CFValue = -- The special 'uninitialized' value CFValueUninitialized -- An arbitrary array value | CFValueArray -- An arbitrary string value | CFValueString -- An arbitrary integer | CFValueInteger -- Token 'Id' concatenates and assigns the given parts | CFValueComputed Id [CFStringPart] deriving (Eq, Ord, Show, Generic, NFData) -- Simplified computed strings data CFStringPart = -- A known literal string value, like 'foo' CFStringLiteral String -- The contents of a variable, like $foo (may not be a string) | CFStringVariable String -- An value that is unknown but an integer | CFStringInteger -- An unknown string value, for things we can't handle | CFStringUnknown deriving (Eq, Ord, Show, Generic, NFData) -- The properties of a variable data CFVariableProp = CFVPExport | CFVPArray | CFVPAssociative | CFVPInteger deriving (Eq, Ord, Show, Generic, NFData) -- Options when generating CFG data CFGParameters = CFGParameters { -- Whether the last element in a pipeline runs in the current shell cfLastpipe :: Bool, -- Whether all elements in a pipeline count towards the exit status cfPipefail :: Bool } data CFGResult = CFGResult { -- The graph itself cfGraph :: CFGraph, -- Map from Id to nominal start&end node (i.e. assuming normal execution without exits) cfIdToRange :: M.Map Id (Node, Node), -- A set of all nodes belonging to an Id, recursively cfIdToNodes :: M.Map Id (S.Set Node), -- A map to nodes that the given node postdominates cfPostDominators :: M.Map Node (S.Set Node) } deriving (Show, Generic, NFData) buildGraph :: CFGParameters -> Token -> CFGResult buildGraph params root = let (nextNode, base) = execRWS (buildRoot root) (newCFContext params) 0 (nodes, edges, mapping, association) = -- renumberTopologically $ removeUnnecessaryStructuralNodes base idToRange = M.fromList mapping isRealEdge (from, to, edge) = case edge of CFEFlow -> True; _ -> False onlyRealEdges = filter isRealEdge edges (_, mainExit) = fromJust $ M.lookup (getId root) idToRange result = CFGResult { cfGraph = mkGraph nodes edges, cfIdToRange = idToRange, cfIdToNodes = M.fromListWith S.union $ map (\(id, n) -> (id, S.singleton n)) association, cfPostDominators = findPostDominators mainExit $ mkGraph nodes onlyRealEdges } in deepseq result result remapGraph :: M.Map Node Node -> CFW -> CFW remapGraph remap (nodes, edges, mapping, assoc) = ( map (remapNode remap) nodes, map (remapEdge remap) edges, map (\(id, (a,b)) -> (id, (remapHelper remap a, remapHelper remap b))) mapping, map (\(id, n) -> (id, remapHelper remap n)) assoc ) prop_testRenumbering = let s = CFStructuralNode before = ( [(1,s), (3,s), (4, s), (8,s)], [(1,3,CFEFlow), (3,4, CFEFlow), (4,8,CFEFlow)], [(Id 0, (3,4))], [(Id 1, 3), (Id 2, 4)] ) after = ( [(0,s), (1,s), (2,s), (3,s)], [(0,1,CFEFlow), (1,2, CFEFlow), (2,3,CFEFlow)], [(Id 0, (1,2))], [(Id 1, 1), (Id 2, 2)] ) in after == renumberGraph before -- Renumber the graph for prettiness, so there are no gaps in node numbers renumberGraph :: CFW -> CFW renumberGraph g@(nodes, edges, mapping, assoc) = let renumbering = M.fromList (flip zip [0..] $ sort $ map fst nodes) in remapGraph renumbering g prop_testRenumberTopologically = let s = CFStructuralNode before = ( [(4,s), (2,s), (3, s)], [(4,2,CFEFlow), (2,3, CFEFlow)], [(Id 0, (4,2))], [] ) after = ( [(0,s), (1,s), (2,s)], [(0,1,CFEFlow), (1,2, CFEFlow)], [(Id 0, (0,1))], [] ) in after == renumberTopologically before -- Renumber the graph in topological order renumberTopologically g@(nodes, edges, mapping, assoc) = let renumbering = M.fromList (flip zip [0..] $ topsort (mkGraph nodes edges :: CFGraph)) in remapGraph renumbering g prop_testRemoveStructural = let s = CFStructuralNode before = ( [(1,s), (2,s), (3, s), (4,s)], [(1,2,CFEFlow), (2,3, CFEFlow), (3,4,CFEFlow)], [(Id 0, (2,3))], [(Id 0, 3)] ) after = ( [(1,s), (2,s), (4,s)], [(1,2,CFEFlow), (2,4,CFEFlow)], [(Id 0, (2,2))], [(Id 0, 2)] ) in after == removeUnnecessaryStructuralNodes before -- Collapse structural nodes that just form long chains like x->x->x. -- This way we can generate them with abandon, without making DFA slower. -- -- Note in particular that we can't remove a structural node x in -- foo -> x -> bar , because then the pre/post-condition for tokens -- previously pointing to x would be wrong. removeUnnecessaryStructuralNodes (nodes, edges, mapping, association) = remapGraph recursiveRemapping ( filter (\(n, _) -> n `M.notMember` recursiveRemapping) nodes, filter (`S.notMember` edgesToCollapse) edges, mapping, association ) where regularEdges = filter isRegularEdge edges inDegree = counter $ map (\(from,to,_) -> from) regularEdges outDegree = counter $ map (\(from,to,_) -> to) regularEdges structuralNodes = S.fromList $ map fst $ filter isStructural nodes candidateNodes = S.filter isLinear structuralNodes edgesToCollapse = S.fromList $ filter filterEdges regularEdges remapping :: M.Map Node Node remapping = foldl' (\m (new, old) -> M.insert old new m) M.empty $ map orderEdge $ S.toList edgesToCollapse recursiveRemapping = M.fromList $ map (\c -> (c, recursiveLookup remapping c)) $ M.keys remapping filterEdges (a,b,_) = a `S.member` candidateNodes && b `S.member` candidateNodes orderEdge (a,b,_) = if a < b then (a,b) else (b,a) counter = foldl' (\map key -> M.insertWith (+) key 1 map) M.empty isRegularEdge (_, _, CFEFlow) = True isRegularEdge _ = False recursiveLookup :: M.Map Node Node -> Node -> Node recursiveLookup map node = case M.lookup node map of Nothing -> node Just x -> recursiveLookup map x isStructural (node, label) = case label of CFStructuralNode -> True _ -> False isLinear node = M.findWithDefault 0 node inDegree == 1 && M.findWithDefault 0 node outDegree == 1 remapNode :: M.Map Node Node -> LNode CFNode -> LNode CFNode remapNode m (node, label) = (remapHelper m node, newLabel) where newLabel = case label of CFApplyEffects effects -> CFApplyEffects (map (remapEffect m) effects) CFExecuteSubshell s a b -> CFExecuteSubshell s (remapHelper m a) (remapHelper m b) _ -> label remapEffect map old@(IdTagged id effect) = case effect of CFDefineFunction name id start end -> IdTagged id $ CFDefineFunction name id (remapHelper map start) (remapHelper map end) _ -> old remapEdge :: M.Map Node Node -> LEdge CFEdge -> LEdge CFEdge remapEdge map (from, to, label) = (remapHelper map from, remapHelper map to, label) remapHelper map n = M.findWithDefault n n map data Range = Range Node Node deriving (Eq, Show) data CFContext = CFContext { cfIsCondition :: Bool, cfIsFunction :: Bool, cfLoopStack :: [(Node, Node)], cfTokenStack :: [Id], cfExitTarget :: Maybe Node, cfReturnTarget :: Maybe Node, cfParameters :: CFGParameters } newCFContext params = CFContext { cfIsCondition = False, cfIsFunction = False, cfLoopStack = [], cfTokenStack = [], cfExitTarget = Nothing, cfReturnTarget = Nothing, cfParameters = params } -- The monad we generate a graph in type CFM a = RWS CFContext CFW Int a type CFW = ([LNode CFNode], [LEdge CFEdge], [(Id, (Node, Node))], [(Id, Node)]) newNode :: CFNode -> CFM Node newNode label = do n <- get stack <- asks cfTokenStack put (n+1) tell ([(n, label)], [], [], map (\c -> (c, n)) stack) return n newNodeRange :: CFNode -> CFM Range -- newNodeRange label = nodeToRange <$> newNode label newNodeRange label = nodeToRange <$> newNode label -- Build a disjoint piece of the graph and return a CFExecuteSubshell. The Id is used purely for debug naming. subshell :: Id -> String -> CFM Range -> CFM Range subshell id reason p = do start <- newNode $ CFEntryPoint $ "Subshell " ++ show id ++ ": " ++ reason end <- newNode CFStructuralNode middle <- local (\c -> c { cfExitTarget = Just end, cfReturnTarget = Just end}) p linkRanges [nodeToRange start, middle, nodeToRange end] newNodeRange $ CFExecuteSubshell reason start end withFunctionScope p = do end <- newNode CFStructuralNode body <- local (\c -> c { cfReturnTarget = Just end, cfIsFunction = True }) p linkRanges [body, nodeToRange end] -- Anything that happens recursively in f will be attributed to this id under :: Id -> CFM a -> CFM a under id f = local (\c -> c { cfTokenStack = id:(cfTokenStack c) }) f nodeToRange :: Node -> Range nodeToRange n = Range n n link :: Node -> Node -> CFEdge -> CFM () link from to label = do tell ([], [(from, to, label)], [], []) registerNode :: Id -> Range -> CFM () registerNode id (Range start end) = tell ([], [], [(id, (start, end))], []) linkRange :: Range -> Range -> CFM Range linkRange = linkRangeAs CFEFlow linkRangeAs :: CFEdge -> Range -> Range -> CFM Range linkRangeAs label (Range start mid1) (Range mid2 end) = do link mid1 mid2 label return (Range start end) -- Like linkRange but without actually linking spanRange :: Range -> Range -> Range spanRange (Range start mid1) (Range mid2 end) = Range start end linkRanges :: [Range] -> CFM Range linkRanges [] = error "Empty range" linkRanges (first:rest) = foldM linkRange first rest sequentially :: [Token] -> CFM Range sequentially list = do first <- newStructuralNode rest <- mapM build list linkRanges (first:rest) withContext :: (CFContext -> CFContext) -> CFM a -> CFM a withContext = local withReturn :: Range -> CFM a -> CFM a withReturn _ p = p asCondition :: CFM Range -> CFM Range asCondition = withContext (\c -> c { cfIsCondition = True }) newStructuralNode = newNodeRange CFStructuralNode buildRoot :: Token -> CFM Range buildRoot t = under (getId t) $ do entry <- newNodeRange $ CFEntryPoint "MAIN" impliedExit <- newNode CFImpliedExit end <- newNode CFStructuralNode start <- local (\c -> c { cfExitTarget = Just end, cfReturnTarget = Just impliedExit}) $ build t range <- linkRanges [entry, start, nodeToRange impliedExit, nodeToRange end] registerNode (getId t) range return range applySingle e = CFApplyEffects [e] -- Build the CFG. build :: Token -> CFM Range build t = do range <- under (getId t) $ build' t registerNode (getId t) range return range where build' t = case t of T_Annotation _ _ list -> build list T_Script _ _ list -> do sequentially list TA_Assignment id op var@(TA_Variable _ name indices) rhs -> do -- value first: (( var[x=1] = (x=2) )) runs x=1 last value <- build rhs subscript <- sequentially indices read <- if op == "=" then none -- This is += or something else newNodeRange $ applySingle $ IdTagged id $ CFReadVariable name write <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name $ if null indices then CFValueInteger else CFValueArray linkRanges [value, subscript, read, write] TA_Assignment id op lhs rhs -> do -- This is likely an invalid assignment like (( 1 = 2 )), but it -- could be e.g. x=y; (( $x = 3 )); echo $y, so expand both sides -- without updating anything sequentially [lhs, rhs] TA_Binary _ _ a b -> sequentially [a,b] TA_Expansion _ list -> sequentially list TA_Sequence _ list -> sequentially list TA_Parentesis _ t -> build t TA_Trinary _ cond a b -> do condition <- build cond ifthen <- build a elsethen <- build b end <- newStructuralNode linkRanges [condition, ifthen, end] linkRanges [condition, elsethen, end] TA_Variable id name indices -> do subscript <- sequentially indices hint <- if null indices then none else nodeToRange <$> newNode (applySingle $ IdTagged id $ CFHintArray name) read <- nodeToRange <$> newNode (applySingle $ IdTagged id $ CFReadVariable name) linkRanges [subscript, hint, read] TA_Unary id op (TA_Variable _ name indices) | "--" `isInfixOf` op || "++" `isInfixOf` op -> do subscript <- sequentially indices read <- newNodeRange $ applySingle $ IdTagged id $ CFReadVariable name write <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name $ if null indices then CFValueInteger else CFValueArray linkRanges [subscript, read, write] TA_Unary _ _ arg -> build arg TC_And _ SingleBracket _ lhs rhs -> do sequentially [lhs, rhs] TC_And _ DoubleBracket _ lhs rhs -> do left <- build lhs right <- build rhs end <- newStructuralNode -- complete linkRanges [left, right, end] -- short circuit linkRange left end -- TODO: Handle integer ops TC_Binary _ mode str lhs rhs -> do left <- build lhs right <- build rhs linkRange left right TC_Empty {} -> newStructuralNode TC_Group _ _ t -> build t -- TODO: Mark as checked TC_Nullary _ _ arg -> build arg TC_Or _ SingleBracket _ lhs rhs -> sequentially [lhs, rhs] TC_Or _ DoubleBracket _ lhs rhs -> do left <- build lhs right <- build rhs end <- newStructuralNode -- complete linkRanges [left, right, end] -- short circuit linkRange left end -- TODO: Handle -v, -z, -n TC_Unary _ _ op arg -> do build arg T_Arithmetic id root -> do exe <- build root status <- newNodeRange (CFSetExitCode id) linkRange exe status T_AndIf _ lhs rhs -> do left <- build lhs right <- build rhs end <- newStructuralNode linkRange left right linkRange right end linkRange left end T_Array _ list -> sequentially list T_Assignment {} -> buildAssignment DefaultScope t T_Backgrounded id body -> do start <- newStructuralNode fork <- subshell id "backgrounding '&'" $ build body pid <- newNodeRange $ CFSetBackgroundPid id status <- newNodeRange $ CFSetExitCode id linkRange start fork -- Add a join from the fork to warn about variable changes linkRangeAs CFEFalseFlow fork pid linkRanges [start, pid, status] T_Backticked id body -> subshell id "`..` expansion" $ sequentially body T_Banged id cmd -> do main <- build cmd status <- newNodeRange (CFSetExitCode id) linkRange main status T_BatsTest id _ body -> do -- These are technically set by the 'run' command, but we'll just define them -- up front to avoid figuring out which commands named "run" belong to Bats. status <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable "status" CFValueInteger output <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable "output" CFValueString main <- build body linkRanges [status, output, main] T_BraceExpansion _ list -> sequentially list T_BraceGroup id body -> sequentially body T_CaseExpression id t [] -> build t T_CaseExpression id t list -> do start <- newStructuralNode token <- build t branches <- mapM buildBranch list end <- newStructuralNode let neighbors = zip branches $ tail branches let (_, firstCond, _) = head branches let (_, lastCond, lastBody) = last branches linkRange start token linkRange token firstCond mapM_ (uncurry $ linkBranch end) neighbors linkRange lastBody end unless (any hasCatchAll list) $ -- There's no *) branch, so assume we can fall through void $ linkRange token end return $ spanRange start end where -- for a | b | c, evaluate each in turn and allow short circuiting buildCond list = do start <- newStructuralNode conds <- mapM build list end <- newStructuralNode linkRanges (start:conds) mapM_ (`linkRange` end) conds return $ spanRange start end buildBranch (typ, cond, body) = do c <- buildCond cond b <- sequentially body linkRange c b return (typ, c, b) linkBranch end (typ, cond, body) (_, nextCond, nextBody) = do -- Failure case linkRange cond nextCond -- After body case typ of CaseBreak -> linkRange body end CaseFallThrough -> linkRange body nextBody CaseContinue -> linkRange body nextCond -- Find a *) if any hasCatchAll (_,cond,_) = any isCatchAll cond isCatchAll c = fromMaybe False $ do pg <- wordToExactPseudoGlob c return $ pg `pseudoGlobIsSuperSetof` [PGMany] T_Condition id _ op -> do cond <- build op status <- newNodeRange $ CFSetExitCode id linkRange cond status T_CoProc id maybeName t -> do let name = fromMaybe "COPROC" maybeName start <- newStructuralNode parent <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name CFValueArray child <- subshell id "coproc" $ build t end <- newNodeRange $ CFSetExitCode id linkRange start parent linkRange start child linkRange parent end linkRangeAs CFEFalseFlow child end return $ spanRange start end T_CoProcBody _ t -> build t T_DollarArithmetic _ arith -> build arith T_DollarDoubleQuoted _ list -> sequentially list T_DollarSingleQuoted _ _ -> none T_DollarBracket _ t -> build t T_DollarBraced id _ t -> do let str = concat $ oversimplify t let modifier = getBracedModifier str let reference = getBracedReference str let indices = getIndexReferences str let offsets = getOffsetReferences str vals <- build t others <- mapM (\x -> nodeToRange <$> newNode (applySingle $ IdTagged id $ CFReadVariable x)) (indices ++ offsets) deps <- linkRanges (vals:others) read <- nodeToRange <$> newNode (applySingle $ IdTagged id $ CFReadVariable reference) totalRead <- linkRange deps read if any (`isPrefixOf` modifier) ["=", ":="] then do optionalAssign <- newNodeRange (applySingle $ IdTagged id $ CFWriteVariable reference CFValueString) result <- newStructuralNode linkRange optionalAssign result linkRange totalRead result else return totalRead T_DoubleQuoted _ list -> sequentially list T_DollarExpansion id body -> subshell id "$(..) expansion" $ sequentially body T_Extglob _ _ list -> sequentially list T_FdRedirect id ('{':identifier) op -> do let name = takeWhile (/= '}') identifier expression <- build op rw <- newNodeRange $ if isClosingFileOp op then applySingle $ IdTagged id $ CFReadVariable name else applySingle $ IdTagged id $ CFWriteVariable name CFValueInteger linkRange expression rw T_FdRedirect _ name t -> do build t T_ForArithmetic _ initT condT incT bodyT -> do init <- build initT cond <- build condT body <- sequentially bodyT inc <- build incT end <- newStructuralNode -- Forward edges linkRanges [init, cond, body, inc] linkRange cond end -- Backward edge linkRange inc cond return $ spanRange init end T_ForIn id name words body -> forInHelper id name words body -- For functions we generate an unlinked subgraph, and mention that in its definition node T_Function id _ _ name body -> do range <- local (\c -> c { cfExitTarget = Nothing }) $ do entry <- newNodeRange $ CFEntryPoint $ "function " ++ name f <- withFunctionScope $ build body linkRange entry f let (Range entry exit) = range definition <- newNodeRange (applySingle $ IdTagged id $ CFDefineFunction name id entry exit) exe <- newNodeRange (CFSetExitCode id) linkRange definition exe T_Glob {} -> none T_HereString _ t -> build t T_HereDoc _ _ _ _ list -> sequentially list T_IfExpression id ifs elses -> do start <- newStructuralNode branches <- doBranches start ifs elses [] end <- newStructuralNode mapM_ (`linkRange` end) branches return $ spanRange start end where doBranches start ((conds, thens):rest) elses result = do cond <- asCondition $ sequentially conds action <- sequentially thens linkRange start cond linkRange cond action doBranches cond rest elses (action:result) doBranches start [] elses result = do rest <- if null elses then newNodeRange (CFSetExitCode id) else sequentially elses linkRange start rest return (rest:result) T_Include _ t -> build t T_IndexedElement _ indicesT valueT -> do indices <- sequentially indicesT value <- build valueT linkRange indices value T_IoDuplicate _ op _ -> build op T_IoFile _ op t -> do exp <- build t doesntDoMuch <- build op linkRange exp doesntDoMuch T_Literal {} -> none T_NormalWord _ list -> sequentially list T_OrIf _ lhs rhs -> do left <- build lhs right <- build rhs end <- newStructuralNode linkRange left right linkRange right end linkRange left end T_Pipeline _ _ [cmd] -> build cmd T_Pipeline id _ cmds -> do start <- newStructuralNode hasLastpipe <- reader $ cfLastpipe . cfParameters (leading, last) <- buildPipe hasLastpipe cmds -- Ideally we'd let this exit code be that of the last command in the pipeline but ok end <- newNodeRange $ CFSetExitCode id mapM_ (linkRange start) leading mapM_ (\c -> linkRangeAs CFEFalseFlow c end) leading linkRanges $ [start] ++ last ++ [end] where buildPipe True [x] = do last <- build x return ([], [last]) buildPipe lp (first:rest) = do this <- subshell id "pipeline" $ build first (leading, last) <- buildPipe lp rest return (this:leading, last) buildPipe _ [] = return ([], []) T_ProcSub id op cmds -> do start <- newStructuralNode body <- subshell id (op ++ "() process substitution") $ sequentially cmds end <- newStructuralNode linkRange start body linkRangeAs CFEFalseFlow body end linkRange start end T_Redirecting _ redirs cmd -> do -- For simple commands, this is the other way around in bash -- We do it in this order for comound commands like { x=name; } > "$x" redir <- sequentially redirs body <- build cmd linkRange redir body T_SelectIn id name words body -> forInHelper id name words body T_SimpleCommand id vars [] -> do -- Vars can also be empty, as in the command "> foo" assignments <- sequentially vars status <- newNodeRange (CFSetExitCode id) linkRange assignments status T_SimpleCommand id vars list@(cmd:_) -> handleCommand t vars list $ getUnquotedLiteral cmd T_SingleQuoted _ _ -> none T_SourceCommand _ originalCommand inlinedSource -> do cmd <- build originalCommand end <- newStructuralNode inline <- withReturn end $ build inlinedSource linkRange cmd inline linkRange inline end return $ spanRange cmd inline T_Subshell id body -> do main <- subshell id "explicit (..) subshell" $ sequentially body status <- newNodeRange (CFSetExitCode id) linkRange main status T_UntilExpression id cond body -> whileHelper id cond body T_WhileExpression id cond body -> whileHelper id cond body T_CLOBBER _ -> none T_GREATAND _ -> none T_LESSAND _ -> none T_LESSGREAT _ -> none T_DGREAT _ -> none T_Greater _ -> none T_Less _ -> none T_ParamSubSpecialChar _ _ -> none x -> error ("Unimplemented: " ++ show x) -- Still in `where` clause forInHelper id name words body = do entry <- newStructuralNode expansion <- sequentially words assignmentChoice <- newStructuralNode assignments <- if null words || any willSplit words then (:[]) <$> (newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name CFValueString) else mapM (\t -> newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name $ CFValueComputed (getId t) $ tokenToParts t) words body <- sequentially body exit <- newStructuralNode -- Forward edges linkRanges [entry, expansion, assignmentChoice] mapM_ (\t -> linkRanges [assignmentChoice, t, body]) assignments linkRange body exit linkRange expansion exit -- Backward edge linkRange body assignmentChoice return $ spanRange entry exit whileHelper id cond body = do condRange <- asCondition $ sequentially cond bodyRange <- sequentially body end <- newNodeRange (CFSetExitCode id) linkRange condRange bodyRange linkRange bodyRange condRange linkRange condRange end handleCommand cmd vars args literalCmd = do -- TODO: Handle assignments in declaring commands case literalCmd of Just "exit" -> regularExpansion vars args $ handleExit Just "return" -> regularExpansion vars args $ handleReturn Just "unset" -> regularExpansionWithStatus vars args $ handleUnset args Just "declare" -> handleDeclare args Just "local" -> handleDeclare args Just "typeset" -> handleDeclare args Just "printf" -> regularExpansionWithStatus vars args $ handlePrintf args Just "wait" -> regularExpansionWithStatus vars args $ handleWait args Just "mapfile" -> regularExpansionWithStatus vars args $ handleMapfile args Just "readarray" -> regularExpansionWithStatus vars args $ handleMapfile args Just "DEFINE_boolean" -> regularExpansionWithStatus vars args $ handleDEFINE args Just "DEFINE_float" -> regularExpansionWithStatus vars args $ handleDEFINE args Just "DEFINE_integer" -> regularExpansionWithStatus vars args $ handleDEFINE args Just "DEFINE_string" -> regularExpansionWithStatus vars args $ handleDEFINE args -- This will mostly behave like 'command' but ok Just "builtin" -> case args of [_] -> regular (_:newargs@(newcmd:_)) -> handleCommand newcmd vars newargs $ getLiteralString newcmd Just "command" -> case args of [_] -> regular (_:newargs@(newcmd:_)) -> handleOthers (getId newcmd) vars newargs $ getLiteralString newcmd _ -> regular where regular = handleOthers (getId cmd) vars args literalCmd handleExit = do exitNode <- reader cfExitTarget case exitNode of Just target -> do exit <- newNode CFResolvedExit link exit target CFEExit unreachable <- newNode CFUnreachable return $ Range exit unreachable Nothing -> do exit <- newNode CFUnresolvedExit unreachable <- newNode CFUnreachable return $ Range exit unreachable handleReturn = do returnTarget <- reader cfReturnTarget case returnTarget of Nothing -> error $ pleaseReport "missing return target" Just target -> do ret <- newNode CFStructuralNode link ret target CFEFlow unreachable <- newNode CFUnreachable return $ Range ret unreachable handleUnset (cmd:args) = do case () of _ | "n" `elem` flagNames -> unsetWith CFUndefineNameref _ | "v" `elem` flagNames -> unsetWith CFUndefineVariable _ | "f" `elem` flagNames -> unsetWith CFUndefineFunction _ -> unsetWith CFUndefine where pairs :: [(String, Token)] -- [(Flag string, token)] e.g. [("-f", t), ("", myfunc)] pairs = map (\(str, (flag, val)) -> (str, flag)) $ fromMaybe (map (\c -> ("", (c,c))) args) $ getGnuOpts "vfn" args (names, flags) = partition (null . fst) pairs flagNames = map fst flags literalNames :: [(Token, String)] -- Literal names to unset, e.g. [(myfuncToken, "myfunc")] literalNames = mapMaybe (\(_, t) -> getLiteralString t >>= (return . (,) t)) names -- Apply a constructor like CFUndefineVariable to each literalName, and tag with its id unsetWith c = newNodeRange $ CFApplyEffects $ map (\(token, name) -> IdTagged (getId token) $ c name) literalNames variableAssignRegex = mkRegex "^([_a-zA-Z][_a-zA-Z0-9]*)=" handleDeclare (cmd:args) = do isFunc <- asks cfIsFunction -- This is a bit of a kludge: we don't have great support for things like -- 'declare -i x=$x' so do one round with declare x=$x, followed by declare -i x let (evaluated, assignments, added, removed) = mconcat $ map (toEffects isFunc) args before <- sequentially $ evaluated assignments <- newNodeRange $ CFApplyEffects assignments addedProps <- if null added then newStructuralNode else newNodeRange $ CFApplyEffects added removedProps <- if null removed then newStructuralNode else newNodeRange $ CFApplyEffects removed result <- newNodeRange $ CFSetExitCode (getId cmd) linkRanges [before, assignments, addedProps, removedProps, result] where opts = map fst $ getGenericOpts args array = "a" `elem` opts || associative associative = "A" `elem` opts integer = "i" `elem` opts func = "f" `elem` opts || "F" `elem` opts global = "g" `elem` opts export = "x" `elem` opts writer isFunc = case () of _ | global -> CFWriteGlobal _ | isFunc -> CFWriteLocal _ -> CFWriteVariable scope isFunc = case () of _ | global -> GlobalScope _ | isFunc -> LocalScope _ -> DefaultScope addedProps = S.fromList $ concat $ [ [ CFVPArray | array ], [ CFVPInteger | integer ], [ CFVPExport | export ], [ CFVPAssociative | associative ] ] removedProps = S.fromList $ concat $ [ -- Array property can't be unset [ CFVPInteger | 'i' `elem` unsetOptions ], [ CFVPExport | 'e' `elem` unsetOptions ] ] toEffects isFunc (T_Assignment id mode var idx t) = let pre = idx ++ [t] val = [ IdTagged id $ (writer isFunc) var $ CFValueComputed (getId t) $ [ CFStringVariable var | mode == Append ] ++ tokenToParts t ] added = [ IdTagged id $ CFSetProps (scope isFunc) var addedProps | not $ S.null addedProps ] removed = [ IdTagged id $ CFUnsetProps (scope isFunc) var addedProps | not $ S.null removedProps ] in (pre, val, added, removed) toEffects isFunc t = let id = getId t pre = [t] literal = fromJust $ getLiteralStringExt (const $ Just "\0") t isKnown = '\0' `notElem` literal match = fmap head $ variableAssignRegex `matchRegex` literal name = fromMaybe literal match asLiteral = IdTagged id $ (writer isFunc) name $ CFValueComputed (getId t) [ CFStringLiteral $ drop 1 $ dropWhile (/= '=') $ literal ] asUnknown = IdTagged id $ (writer isFunc) name $ CFValueString added = [ IdTagged id $ CFSetProps (scope isFunc) name addedProps ] removed = [ IdTagged id $ CFUnsetProps (scope isFunc) name removedProps ] in case () of _ | not (isVariableName name) -> (pre, [], [], []) _ | isJust match && isKnown -> (pre, [asLiteral], added, removed) _ | isJust match -> (pre, [asUnknown], added, removed) -- e.g. declare -i x _ -> (pre, [], added, removed) -- find "ia" from `define +i +a` unsetOptions :: String unsetOptions = let strings = mapMaybe getLiteralString args plusses = filter ("+" `isPrefixOf`) strings in concatMap (drop 1) plusses handlePrintf (cmd:args) = newNodeRange $ CFApplyEffects $ maybeToList findVar where findVar = do flags <- getBsdOpts "v:" args (flag, arg) <- lookup "v" flags name <- getLiteralString arg return $ IdTagged (getId arg) $ CFWriteVariable name CFValueString handleWait (cmd:args) = newNodeRange $ CFApplyEffects $ maybeToList findVar where findVar = do let flags = getGenericOpts args (flag, arg) <- lookup "p" flags name <- getLiteralString arg return $ IdTagged (getId arg) $ CFWriteVariable name CFValueInteger handleMapfile (cmd:args) = newNodeRange $ CFApplyEffects [findVar] where findVar = let (id, name) = fromMaybe (getId cmd, "MAPFILE") $ getFromArg `mplus` getFromFallback in IdTagged id $ CFWriteVariable name CFValueArray getFromArg = do flags <- getGnuOpts "d:n:O:s:u:C:c:t" args (_, arg) <- lookup "" flags name <- getLiteralString arg return (getId arg, name) getFromFallback = listToMaybe $ mapMaybe getIfVar $ reverse args getIfVar c = do name <- getLiteralString c guard $ isVariableName name return (getId c, name) handleDEFINE (cmd:args) = newNodeRange $ CFApplyEffects $ maybeToList findVar where findVar = do name <- listToMaybe $ drop 1 args str <- getLiteralString name guard $ isVariableName str return $ IdTagged (getId name) $ CFWriteVariable str CFValueString handleOthers id vars args cmd = regularExpansion vars args $ do exe <- newNodeRange $ CFExecuteCommand cmd status <- newNodeRange $ CFSetExitCode id linkRange exe status regularExpansion vars args p = do args <- sequentially args assignments <- mapM (buildAssignment PrefixScope) vars exe <- p dropAssignments <- if null vars then return [] else do drop <- newNodeRange CFDropPrefixAssignments return [drop] linkRanges $ [args] ++ assignments ++ [exe] ++ dropAssignments regularExpansionWithStatus vars args@(cmd:_) p = do initial <- regularExpansion vars args p status <- newNodeRange $ CFSetExitCode (getId cmd) linkRange initial status none = newStructuralNode data Scope = DefaultScope | GlobalScope | LocalScope | PrefixScope deriving (Eq, Ord, Show, Generic, NFData) buildAssignment scope t = do op <- case t of T_Assignment id mode var indices value -> do expand <- build value index <- sequentially indices read <- case mode of Append -> newNodeRange (applySingle $ IdTagged id $ CFReadVariable var) Assign -> none let valueType = if null indices then f id value else CFValueArray let scoper = case scope of PrefixScope -> CFWritePrefix LocalScope -> CFWriteLocal GlobalScope -> CFWriteGlobal DefaultScope -> CFWriteVariable write <- newNodeRange $ applySingle $ IdTagged id $ scoper var valueType linkRanges [expand, index, read, write] where f :: Id -> Token -> CFValue f id t@T_NormalWord {} = CFValueComputed id $ [CFStringVariable var | mode == Append] ++ tokenToParts t f id t@(T_Literal _ str) = CFValueComputed id $ [CFStringVariable var | mode == Append] ++ tokenToParts t f _ T_Array {} = CFValueArray registerNode (getId t) op return op tokenToParts t = case t of T_NormalWord _ list -> concatMap tokenToParts list T_DoubleQuoted _ list -> concatMap tokenToParts list T_SingleQuoted _ str -> [ CFStringLiteral str ] T_Literal _ str -> [ CFStringLiteral str ] T_DollarArithmetic {} -> [ CFStringInteger ] T_DollarBracket {} -> [ CFStringInteger ] T_DollarBraced _ _ list | isUnmodifiedParameterExpansion t -> [ CFStringVariable (getBracedReference $ concat $ oversimplify list) ] -- Check if getLiteralString can handle it, if not it's unknown _ -> [maybe CFStringUnknown CFStringLiteral $ getLiteralString t] -- Change all subshell invocations to instead link directly to their contents. -- This is used for producing dominator trees. inlineSubshells :: CFGraph -> CFGraph inlineSubshells graph = relinkedGraph where subshells = ufold find [] graph find (incoming, node, label, outgoing) acc = case label of CFExecuteSubshell _ start end -> (node, label, start, end, incoming, outgoing):acc _ -> acc relinkedGraph = foldl' relink graph subshells relink graph (node, label, start, end, incoming, outgoing) = let -- Link CFExecuteSubshell to the CFEntryPoint subshellToStart = (incoming, node, label, [(CFEFlow, start)]) -- Link the subshell exit to the endToNexts = (endIncoming, endNode, endLabel, outgoing) (endIncoming, endNode, endLabel, _) = context graph end in subshellToStart & (endToNexts & graph) findEntryNodes :: CFGraph -> [Node] findEntryNodes graph = ufold find [] graph where find (incoming, node, label, _) list = case label of CFEntryPoint {} | null incoming -> node:list _ -> list findDominators main graph = asSetMap where inlined = inlineSubshells graph entryNodes = main : findEntryNodes graph asLists = concatMap (dom inlined) entryNodes asSetMap = M.fromList $ map (\(node, list) -> (node, S.fromList list)) asLists findTerminalNodes :: CFGraph -> [Node] findTerminalNodes graph = ufold find [] graph where find (_, node, label, _) list = case label of CFUnresolvedExit -> node:list CFApplyEffects effects -> f effects list _ -> list f [] list = list f (IdTagged _ (CFDefineFunction _ id start end):rest) list = f rest (end:list) f (_:rest) list = f rest list findPostDominators :: Node -> CFGraph -> M.Map Node (S.Set Node) findPostDominators mainexit graph = asSetMap where inlined = inlineSubshells graph terminals = findTerminalNodes inlined (incoming, _, label, outgoing) = context graph mainexit withExitEdges = (incoming ++ map (\c -> (CFEFlow, c)) terminals, mainexit, label, outgoing) & inlined reversed = grev withExitEdges postDoms = dom reversed mainexit asSetMap = M.fromList $ map (\(node, list) -> (node, S.fromList list)) postDoms return [] runTests = $( [| $(forAllProperties) (quickCheckWithResult (stdArgs { maxSuccess = 1 }) ) |])