shellcheck/src/ShellCheck/CFG.hs

1269 lines
46 KiB
Haskell

{-
Copyright 2022 Vidar Holen
This file is part of ShellCheck.
https://www.shellcheck.net
ShellCheck is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
ShellCheck is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DeriveAnyClass, DeriveGeneric #-}
-- Constructs a Control Flow Graph from an AST
module ShellCheck.CFG (
CFNode (..),
CFEdge (..),
CFEffect (..),
CFStringPart (..),
CFVariableProp (..),
CFGResult (..),
CFValue (..),
CFGraph,
CFGParameters (..),
IdTagged (..),
Scope (..),
buildGraph
, ShellCheck.CFG.runTests -- STRIP
)
where
import GHC.Generics (Generic)
import ShellCheck.AST
import ShellCheck.ASTLib
import ShellCheck.Interface
import ShellCheck.Prelude
import ShellCheck.Regex
import Control.DeepSeq
import Control.Monad
import Control.Monad.Identity
import Data.List hiding (map)
import Data.Maybe
import qualified Data.Map as M
import qualified Data.Set as S
import Control.Monad.RWS.Lazy
import Data.Graph.Inductive.Graph
import Data.Graph.Inductive.Query.DFS
import Data.Graph.Inductive.Basic
import Data.Graph.Inductive.Query.Dominators
import Data.Graph.Inductive.PatriciaTree as G
import Debug.Trace -- STRIP
import Test.QuickCheck.All (forAllProperties)
import Test.QuickCheck.Test (quickCheckWithResult, stdArgs, maxSuccess)
-- Our basic Graph type
type CFGraph = G.Gr CFNode CFEdge
-- Node labels in a Control Flow Graph
data CFNode =
-- A no-op node for structural purposes
CFStructuralNode
-- A no-op for graph inspection purposes
| CFEntryPoint String
-- Drop current prefix assignments
| CFDropPrefixAssignments
-- A node with a certain effect on program state
| CFApplyEffects [IdTagged CFEffect]
-- The execution of a command or function by literal string if possible
| CFExecuteCommand (Maybe String)
-- Execute a subshell. These are represented by disjoint graphs just like
-- functions, but they don't require any form of name resolution
| CFExecuteSubshell String Node Node
-- Assignment of $?
| CFSetExitCode Id
-- The virtual 'exit' at the natural end of a subshell
| CFImpliedExit
-- An exit statement resolvable at CFG build time
| CFResolvedExit
-- An exit statement only resolvable at DFA time
| CFUnresolvedExit
-- An unreachable node, serving as the unconnected end point of a range
| CFUnreachable
-- Assignment of $!
| CFSetBackgroundPid Id
deriving (Eq, Ord, Show, Generic, NFData)
-- Edge labels in a Control Flow Graph
data CFEdge =
CFEErrExit
-- Regular control flow edge
| CFEFlow
-- An edge that a human might think exists (e.g. from a backgrounded process to its parent)
| CFEFalseFlow
-- An edge followed on exit
| CFEExit
deriving (Eq, Ord, Show, Generic, NFData)
-- Actions we track
data CFEffect =
CFSetProps Scope String (S.Set CFVariableProp)
| CFUnsetProps Scope String (S.Set CFVariableProp)
| CFReadVariable String
| CFWriteVariable String CFValue
| CFWriteGlobal String CFValue
| CFWriteLocal String CFValue
| CFWritePrefix String CFValue
| CFDefineFunction String Id Node Node
| CFUndefine String
| CFUndefineVariable String
| CFUndefineFunction String
| CFUndefineNameref String
-- Usage implies that this is an array (e.g. it's expanded with index)
| CFHintArray String
-- Operation implies that the variable will be defined (e.g. [ -z "$var" ])
| CFHintDefined String
deriving (Eq, Ord, Show, Generic, NFData)
data IdTagged a = IdTagged Id a
deriving (Eq, Ord, Show, Generic, NFData)
-- Where a variable's value comes from
data CFValue =
-- The special 'uninitialized' value
CFValueUninitialized
-- An arbitrary array value
| CFValueArray
-- An arbitrary string value
| CFValueString
-- An arbitrary integer
| CFValueInteger
-- Token 'Id' concatenates and assigns the given parts
| CFValueComputed Id [CFStringPart]
deriving (Eq, Ord, Show, Generic, NFData)
-- Simplified computed strings
data CFStringPart =
-- A known literal string value, like 'foo'
CFStringLiteral String
-- The contents of a variable, like $foo (may not be a string)
| CFStringVariable String
-- An value that is unknown but an integer
| CFStringInteger
-- An unknown string value, for things we can't handle
| CFStringUnknown
deriving (Eq, Ord, Show, Generic, NFData)
-- The properties of a variable
data CFVariableProp = CFVPExport | CFVPArray | CFVPAssociative | CFVPInteger
deriving (Eq, Ord, Show, Generic, NFData)
-- Options when generating CFG
data CFGParameters = CFGParameters {
-- Whether the last element in a pipeline runs in the current shell
cfLastpipe :: Bool,
-- Whether all elements in a pipeline count towards the exit status
cfPipefail :: Bool
}
data CFGResult = CFGResult {
-- The graph itself
cfGraph :: CFGraph,
-- Map from Id to nominal start&end node (i.e. assuming normal execution without exits)
cfIdToRange :: M.Map Id (Node, Node),
-- A set of all nodes belonging to an Id, recursively
cfIdToNodes :: M.Map Id (S.Set Node),
-- A map to nodes that the given node postdominates
cfPostDominators :: M.Map Node (S.Set Node)
}
deriving (Show, Generic, NFData)
buildGraph :: CFGParameters -> Token -> CFGResult
buildGraph params root =
let
(nextNode, base) = execRWS (buildRoot root) (newCFContext params) 0
(nodes, edges, mapping, association) =
-- renumberTopologically $
removeUnnecessaryStructuralNodes
base
idToRange = M.fromList mapping
isRealEdge (from, to, edge) = case edge of CFEFlow -> True; _ -> False
onlyRealEdges = filter isRealEdge edges
(_, mainExit) = fromJust $ M.lookup (getId root) idToRange
result = CFGResult {
cfGraph = mkGraph nodes edges,
cfIdToRange = idToRange,
cfIdToNodes = M.fromListWith S.union $ map (\(id, n) -> (id, S.singleton n)) association,
cfPostDominators = findPostDominators mainExit $ mkGraph nodes onlyRealEdges
}
in
deepseq result result
remapGraph :: M.Map Node Node -> CFW -> CFW
remapGraph remap (nodes, edges, mapping, assoc) =
(
map (remapNode remap) nodes,
map (remapEdge remap) edges,
map (\(id, (a,b)) -> (id, (remapHelper remap a, remapHelper remap b))) mapping,
map (\(id, n) -> (id, remapHelper remap n)) assoc
)
prop_testRenumbering =
let
s = CFStructuralNode
before = (
[(1,s), (3,s), (4, s), (8,s)],
[(1,3,CFEFlow), (3,4, CFEFlow), (4,8,CFEFlow)],
[(Id 0, (3,4))],
[(Id 1, 3), (Id 2, 4)]
)
after = (
[(0,s), (1,s), (2,s), (3,s)],
[(0,1,CFEFlow), (1,2, CFEFlow), (2,3,CFEFlow)],
[(Id 0, (1,2))],
[(Id 1, 1), (Id 2, 2)]
)
in after == renumberGraph before
-- Renumber the graph for prettiness, so there are no gaps in node numbers
renumberGraph :: CFW -> CFW
renumberGraph g@(nodes, edges, mapping, assoc) =
let renumbering = M.fromList (flip zip [0..] $ sort $ map fst nodes)
in remapGraph renumbering g
prop_testRenumberTopologically =
let
s = CFStructuralNode
before = (
[(4,s), (2,s), (3, s)],
[(4,2,CFEFlow), (2,3, CFEFlow)],
[(Id 0, (4,2))],
[]
)
after = (
[(0,s), (1,s), (2,s)],
[(0,1,CFEFlow), (1,2, CFEFlow)],
[(Id 0, (0,1))],
[]
)
in after == renumberTopologically before
-- Renumber the graph in topological order
renumberTopologically g@(nodes, edges, mapping, assoc) =
let renumbering = M.fromList (flip zip [0..] $ topsort (mkGraph nodes edges :: CFGraph))
in remapGraph renumbering g
prop_testRemoveStructural =
let
s = CFStructuralNode
before = (
[(1,s), (2,s), (3, s), (4,s)],
[(1,2,CFEFlow), (2,3, CFEFlow), (3,4,CFEFlow)],
[(Id 0, (2,3))],
[(Id 0, 3)]
)
after = (
[(1,s), (2,s), (4,s)],
[(1,2,CFEFlow), (2,4,CFEFlow)],
[(Id 0, (2,2))],
[(Id 0, 2)]
)
in after == removeUnnecessaryStructuralNodes before
-- Collapse structural nodes that just form long chains like x->x->x.
-- This way we can generate them with abandon, without making DFA slower.
--
-- Note in particular that we can't remove a structural node x in
-- foo -> x -> bar , because then the pre/post-condition for tokens
-- previously pointing to x would be wrong.
removeUnnecessaryStructuralNodes (nodes, edges, mapping, association) =
remapGraph recursiveRemapping
(
filter (\(n, _) -> n `M.notMember` recursiveRemapping) nodes,
filter (`S.notMember` edgesToCollapse) edges,
mapping,
association
)
where
regularEdges = filter isRegularEdge edges
inDegree = counter $ map (\(from,to,_) -> from) regularEdges
outDegree = counter $ map (\(from,to,_) -> to) regularEdges
structuralNodes = S.fromList $ map fst $ filter isStructural nodes
candidateNodes = S.filter isLinear structuralNodes
edgesToCollapse = S.fromList $ filter filterEdges regularEdges
remapping :: M.Map Node Node
remapping = foldl' (\m (new, old) -> M.insert old new m) M.empty $ map orderEdge $ S.toList edgesToCollapse
recursiveRemapping = M.fromList $ map (\c -> (c, recursiveLookup remapping c)) $ M.keys remapping
filterEdges (a,b,_) =
a `S.member` candidateNodes && b `S.member` candidateNodes
orderEdge (a,b,_) = if a < b then (a,b) else (b,a)
counter = foldl' (\map key -> M.insertWith (+) key 1 map) M.empty
isRegularEdge (_, _, CFEFlow) = True
isRegularEdge _ = False
recursiveLookup :: M.Map Node Node -> Node -> Node
recursiveLookup map node =
case M.lookup node map of
Nothing -> node
Just x -> recursiveLookup map x
isStructural (node, label) =
case label of
CFStructuralNode -> True
_ -> False
isLinear node =
M.findWithDefault 0 node inDegree == 1
&& M.findWithDefault 0 node outDegree == 1
remapNode :: M.Map Node Node -> LNode CFNode -> LNode CFNode
remapNode m (node, label) =
(remapHelper m node, newLabel)
where
newLabel = case label of
CFApplyEffects effects -> CFApplyEffects (map (remapEffect m) effects)
CFExecuteSubshell s a b -> CFExecuteSubshell s (remapHelper m a) (remapHelper m b)
_ -> label
remapEffect map old@(IdTagged id effect) =
case effect of
CFDefineFunction name id start end -> IdTagged id $ CFDefineFunction name id (remapHelper map start) (remapHelper map end)
_ -> old
remapEdge :: M.Map Node Node -> LEdge CFEdge -> LEdge CFEdge
remapEdge map (from, to, label) = (remapHelper map from, remapHelper map to, label)
remapHelper map n = M.findWithDefault n n map
data Range = Range Node Node
deriving (Eq, Show)
data CFContext = CFContext {
cfIsCondition :: Bool,
cfIsFunction :: Bool,
cfLoopStack :: [(Node, Node)],
cfTokenStack :: [Id],
cfExitTarget :: Maybe Node,
cfReturnTarget :: Maybe Node,
cfParameters :: CFGParameters
}
newCFContext params = CFContext {
cfIsCondition = False,
cfIsFunction = False,
cfLoopStack = [],
cfTokenStack = [],
cfExitTarget = Nothing,
cfReturnTarget = Nothing,
cfParameters = params
}
-- The monad we generate a graph in
type CFM a = RWS CFContext CFW Int a
type CFW = ([LNode CFNode], [LEdge CFEdge], [(Id, (Node, Node))], [(Id, Node)])
newNode :: CFNode -> CFM Node
newNode label = do
n <- get
stack <- asks cfTokenStack
put (n+1)
tell ([(n, label)], [], [], map (\c -> (c, n)) stack)
return n
newNodeRange :: CFNode -> CFM Range
-- newNodeRange label = nodeToRange <$> newNode label
newNodeRange label = nodeToRange <$> newNode label
-- Build a disjoint piece of the graph and return a CFExecuteSubshell. The Id is used purely for debug naming.
subshell :: Id -> String -> CFM Range -> CFM Range
subshell id reason p = do
start <- newNode $ CFEntryPoint $ "Subshell " ++ show id ++ ": " ++ reason
end <- newNode CFStructuralNode
middle <- local (\c -> c { cfExitTarget = Just end, cfReturnTarget = Just end}) p
linkRanges [nodeToRange start, middle, nodeToRange end]
newNodeRange $ CFExecuteSubshell reason start end
withFunctionScope p = do
end <- newNode CFStructuralNode
body <- local (\c -> c { cfReturnTarget = Just end, cfIsFunction = True }) p
linkRanges [body, nodeToRange end]
-- Anything that happens recursively in f will be attributed to this id
under :: Id -> CFM a -> CFM a
under id f = local (\c -> c { cfTokenStack = id:(cfTokenStack c) }) f
nodeToRange :: Node -> Range
nodeToRange n = Range n n
link :: Node -> Node -> CFEdge -> CFM ()
link from to label = do
tell ([], [(from, to, label)], [], [])
registerNode :: Id -> Range -> CFM ()
registerNode id (Range start end) = tell ([], [], [(id, (start, end))], [])
linkRange :: Range -> Range -> CFM Range
linkRange = linkRangeAs CFEFlow
linkRangeAs :: CFEdge -> Range -> Range -> CFM Range
linkRangeAs label (Range start mid1) (Range mid2 end) = do
link mid1 mid2 label
return (Range start end)
-- Like linkRange but without actually linking
spanRange :: Range -> Range -> Range
spanRange (Range start mid1) (Range mid2 end) = Range start end
linkRanges :: [Range] -> CFM Range
linkRanges [] = error "Empty range"
linkRanges (first:rest) = foldM linkRange first rest
sequentially :: [Token] -> CFM Range
sequentially list = do
first <- newStructuralNode
rest <- mapM build list
linkRanges (first:rest)
withContext :: (CFContext -> CFContext) -> CFM a -> CFM a
withContext = local
withReturn :: Range -> CFM a -> CFM a
withReturn _ p = p
asCondition :: CFM Range -> CFM Range
asCondition = withContext (\c -> c { cfIsCondition = True })
newStructuralNode = newNodeRange CFStructuralNode
buildRoot :: Token -> CFM Range
buildRoot t = under (getId t) $ do
entry <- newNodeRange $ CFEntryPoint "MAIN"
impliedExit <- newNode CFImpliedExit
end <- newNode CFStructuralNode
start <- local (\c -> c { cfExitTarget = Just end, cfReturnTarget = Just impliedExit}) $ build t
range <- linkRanges [entry, start, nodeToRange impliedExit, nodeToRange end]
registerNode (getId t) range
return range
applySingle e = CFApplyEffects [e]
-- Build the CFG.
build :: Token -> CFM Range
build t = do
range <- under (getId t) $ build' t
registerNode (getId t) range
return range
where
build' t = case t of
T_Annotation _ _ list -> build list
T_Script _ _ list -> do
sequentially list
TA_Assignment id op var@(TA_Variable _ name indices) rhs -> do
-- value first: (( var[x=1] = (x=2) )) runs x=1 last
value <- build rhs
subscript <- sequentially indices
read <-
if op == "="
then none
-- This is += or something
else newNodeRange $ applySingle $ IdTagged id $ CFReadVariable name
write <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name $
if null indices
then CFValueInteger
else CFValueArray
linkRanges [value, subscript, read, write]
TA_Assignment id op lhs rhs -> do
-- This is likely an invalid assignment like (( 1 = 2 )), but it
-- could be e.g. x=y; (( $x = 3 )); echo $y, so expand both sides
-- without updating anything
sequentially [lhs, rhs]
TA_Binary _ _ a b -> sequentially [a,b]
TA_Expansion _ list -> sequentially list
TA_Sequence _ list -> sequentially list
TA_Parentesis _ t -> build t
TA_Trinary _ cond a b -> do
condition <- build cond
ifthen <- build a
elsethen <- build b
end <- newStructuralNode
linkRanges [condition, ifthen, end]
linkRanges [condition, elsethen, end]
TA_Variable id name indices -> do
subscript <- sequentially indices
hint <-
if null indices
then none
else nodeToRange <$> newNode (applySingle $ IdTagged id $ CFHintArray name)
read <- nodeToRange <$> newNode (applySingle $ IdTagged id $ CFReadVariable name)
linkRanges [subscript, hint, read]
TA_Unary id op (TA_Variable _ name indices) | "--" `isInfixOf` op || "++" `isInfixOf` op -> do
subscript <- sequentially indices
read <- newNodeRange $ applySingle $ IdTagged id $ CFReadVariable name
write <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name $
if null indices
then CFValueInteger
else CFValueArray
linkRanges [subscript, read, write]
TA_Unary _ _ arg -> build arg
TC_And _ SingleBracket _ lhs rhs -> do
sequentially [lhs, rhs]
TC_And _ DoubleBracket _ lhs rhs -> do
left <- build lhs
right <- build rhs
end <- newStructuralNode
-- complete
linkRanges [left, right, end]
-- short circuit
linkRange left end
-- TODO: Handle integer ops
TC_Binary _ mode str lhs rhs -> do
left <- build lhs
right <- build rhs
linkRange left right
TC_Empty {} -> newStructuralNode
TC_Group _ _ t -> build t
-- TODO: Mark as checked
TC_Nullary _ _ arg -> build arg
TC_Or _ SingleBracket _ lhs rhs -> sequentially [lhs, rhs]
TC_Or _ DoubleBracket _ lhs rhs -> do
left <- build lhs
right <- build rhs
end <- newStructuralNode
-- complete
linkRanges [left, right, end]
-- short circuit
linkRange left end
-- TODO: Handle -v, -z, -n
TC_Unary _ _ op arg -> do
build arg
T_Arithmetic id root -> do
exe <- build root
status <- newNodeRange (CFSetExitCode id)
linkRange exe status
T_AndIf _ lhs rhs -> do
left <- build lhs
right <- build rhs
end <- newStructuralNode
linkRange left right
linkRange right end
linkRange left end
T_Array _ list -> sequentially list
T_Assignment {} -> buildAssignment DefaultScope t
T_Backgrounded id body -> do
start <- newStructuralNode
fork <- subshell id "backgrounding '&'" $ build body
pid <- newNodeRange $ CFSetBackgroundPid id
status <- newNodeRange $ CFSetExitCode id
linkRange start fork
-- Add a join from the fork to warn about variable changes
linkRangeAs CFEFalseFlow fork pid
linkRanges [start, pid, status]
T_Backticked id body ->
subshell id "`..` expansion" $ sequentially body
T_Banged id cmd -> do
main <- build cmd
status <- newNodeRange (CFSetExitCode id)
linkRange main status
T_BatsTest id _ body -> do
-- These are technically set by the 'run' command, but we'll just define them
-- up front to avoid figuring out which commands named "run" belong to Bats.
status <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable "status" CFValueInteger
output <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable "output" CFValueString
main <- build body
linkRanges [status, output, main]
T_BraceExpansion _ list -> sequentially list
T_BraceGroup id body ->
sequentially body
T_CaseExpression id t [] -> build t
T_CaseExpression id t list -> do
start <- newStructuralNode
token <- build t
branches <- mapM buildBranch list
end <- newStructuralNode
let neighbors = zip branches $ tail branches
let (_, firstCond, _) = head branches
let (_, lastCond, lastBody) = last branches
linkRange start token
linkRange token firstCond
mapM_ (uncurry $ linkBranch end) neighbors
linkRange lastBody end
unless (any hasCatchAll list) $
-- There's no *) branch, so assume we can fall through
void $ linkRange token end
return $ spanRange start end
where
-- for a | b | c, evaluate each in turn and allow short circuiting
buildCond list = do
start <- newStructuralNode
conds <- mapM build list
end <- newStructuralNode
linkRanges (start:conds)
mapM_ (`linkRange` end) conds
return $ spanRange start end
buildBranch (typ, cond, body) = do
c <- buildCond cond
b <- sequentially body
linkRange c b
return (typ, c, b)
linkBranch end (typ, cond, body) (_, nextCond, nextBody) = do
-- Failure case
linkRange cond nextCond
-- After body
case typ of
CaseBreak -> linkRange body end
CaseFallThrough -> linkRange body nextBody
CaseContinue -> linkRange body nextCond
-- Find a *) if any
hasCatchAll (_,cond,_) = any isCatchAll cond
isCatchAll c = fromMaybe False $ do
pg <- wordToExactPseudoGlob c
return $ pg `pseudoGlobIsSuperSetof` [PGMany]
T_Condition id _ op -> do
cond <- build op
status <- newNodeRange $ CFSetExitCode id
linkRange cond status
T_CoProc id maybeName t -> do
let name = fromMaybe "COPROC" maybeName
start <- newStructuralNode
parent <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name CFValueArray
child <- subshell id "coproc" $ build t
end <- newNodeRange $ CFSetExitCode id
linkRange start parent
linkRange start child
linkRange parent end
linkRangeAs CFEFalseFlow child end
return $ spanRange start end
T_CoProcBody _ t -> build t
T_DollarArithmetic _ arith -> build arith
T_DollarDoubleQuoted _ list -> sequentially list
T_DollarSingleQuoted _ _ -> none
T_DollarBracket _ t -> build t
T_DollarBraced id _ t -> do
let str = concat $ oversimplify t
let modifier = getBracedModifier str
let reference = getBracedReference str
let indices = getIndexReferences str
let offsets = getOffsetReferences str
vals <- build t
others <- mapM (\x -> nodeToRange <$> newNode (applySingle $ IdTagged id $ CFReadVariable x)) (indices ++ offsets)
deps <- linkRanges (vals:others)
read <- nodeToRange <$> newNode (applySingle $ IdTagged id $ CFReadVariable reference)
totalRead <- linkRange deps read
if any (`isPrefixOf` modifier) ["=", ":="]
then do
optionalAssign <- newNodeRange (applySingle $ IdTagged id $ CFWriteVariable reference CFValueString)
result <- newStructuralNode
linkRange optionalAssign result
linkRange totalRead result
else return totalRead
T_DoubleQuoted _ list -> sequentially list
T_DollarExpansion id body ->
subshell id "$(..) expansion" $ sequentially body
T_Extglob _ _ list -> sequentially list
T_FdRedirect id ('{':identifier) op -> do
let name = takeWhile (/= '}') identifier
expression <- build op
rw <- newNodeRange $
if isClosingFileOp op
then applySingle $ IdTagged id $ CFReadVariable name
else applySingle $ IdTagged id $ CFWriteVariable name CFValueInteger
linkRange expression rw
T_FdRedirect _ name t -> do
build t
T_ForArithmetic _ initT condT incT bodyT -> do
init <- build initT
cond <- build condT
body <- sequentially bodyT
inc <- build incT
end <- newStructuralNode
-- Forward edges
linkRanges [init, cond, body, inc]
linkRange cond end
-- Backward edge
linkRange inc cond
return $ spanRange init end
T_ForIn id name words body -> forInHelper id name words body
-- For functions we generate an unlinked subgraph, and mention that in its definition node
T_Function id _ _ name body -> do
range <- local (\c -> c { cfExitTarget = Nothing }) $ do
entry <- newNodeRange $ CFEntryPoint $ "function " ++ name
f <- withFunctionScope $ build body
linkRange entry f
let (Range entry exit) = range
definition <- newNodeRange (applySingle $ IdTagged id $ CFDefineFunction name id entry exit)
exe <- newNodeRange (CFSetExitCode id)
linkRange definition exe
T_Glob {} -> none
T_HereString _ t -> build t
T_HereDoc _ _ _ _ list -> sequentially list
T_IfExpression id ifs elses -> do
start <- newStructuralNode
branches <- doBranches start ifs elses []
end <- newStructuralNode
mapM_ (`linkRange` end) branches
return $ spanRange start end
where
doBranches start ((conds, thens):rest) elses result = do
cond <- asCondition $ sequentially conds
action <- sequentially thens
linkRange start cond
linkRange cond action
doBranches cond rest elses (action:result)
doBranches start [] elses result = do
rest <-
if null elses
then newNodeRange (CFSetExitCode id)
else sequentially elses
linkRange start rest
return (rest:result)
T_Include _ t -> build t
T_IndexedElement _ indicesT valueT -> do
indices <- sequentially indicesT
value <- build valueT
linkRange indices value
T_IoDuplicate _ op _ -> build op
T_IoFile _ op t -> do
exp <- build t
doesntDoMuch <- build op
linkRange exp doesntDoMuch
T_Literal {} -> none
T_NormalWord _ list -> sequentially list
T_OrIf _ lhs rhs -> do
left <- build lhs
right <- build rhs
end <- newStructuralNode
linkRange left right
linkRange right end
linkRange left end
T_Pipeline _ _ [cmd] -> build cmd
T_Pipeline id _ cmds -> do
start <- newStructuralNode
hasLastpipe <- reader $ cfLastpipe . cfParameters
(leading, last) <- buildPipe hasLastpipe cmds
-- Ideally we'd let this exit code be that of the last command in the pipeline but ok
end <- newNodeRange $ CFSetExitCode id
mapM_ (linkRange start) leading
mapM_ (\c -> linkRangeAs CFEFalseFlow c end) leading
linkRanges $ [start] ++ last ++ [end]
where
buildPipe True [x] = do
last <- build x
return ([], [last])
buildPipe lp (first:rest) = do
this <- subshell id "pipeline" $ build first
(leading, last) <- buildPipe lp rest
return (this:leading, last)
buildPipe _ [] = return ([], [])
T_ProcSub id op cmds -> do
start <- newStructuralNode
body <- subshell id (op ++ "() process substitution") $ sequentially cmds
end <- newStructuralNode
linkRange start body
linkRangeAs CFEFalseFlow body end
linkRange start end
T_Redirecting _ redirs cmd -> do
-- For simple commands, this is the other way around in bash
-- We do it in this order for comound commands like { x=name; } > "$x"
redir <- sequentially redirs
body <- build cmd
linkRange redir body
T_SelectIn id name words body -> forInHelper id name words body
T_SimpleCommand id vars [] -> do
-- Vars can also be empty, as in the command "> foo"
assignments <- sequentially vars
status <- newNodeRange (CFSetExitCode id)
linkRange assignments status
T_SimpleCommand id vars list@(cmd:_) ->
handleCommand t vars list $ getUnquotedLiteral cmd
T_SingleQuoted _ _ -> none
T_SourceCommand _ originalCommand inlinedSource -> do
cmd <- build originalCommand
end <- newStructuralNode
inline <- withReturn end $ build inlinedSource
linkRange cmd inline
linkRange inline end
return $ spanRange cmd inline
T_Subshell id body -> do
main <- subshell id "explicit (..) subshell" $ sequentially body
status <- newNodeRange (CFSetExitCode id)
linkRange main status
T_UntilExpression id cond body -> whileHelper id cond body
T_WhileExpression id cond body -> whileHelper id cond body
T_CLOBBER _ -> none
T_GREATAND _ -> none
T_LESSAND _ -> none
T_LESSGREAT _ -> none
T_DGREAT _ -> none
T_Greater _ -> none
T_Less _ -> none
T_ParamSubSpecialChar _ _ -> none
x -> error ("Unimplemented: " ++ show x)
-- Still in `where` clause
forInHelper id name words body = do
entry <- newStructuralNode
expansion <- sequentially words
assignmentChoice <- newStructuralNode
assignments <-
if null words || any willSplit words
then (:[]) <$> (newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name CFValueString)
else mapM (\t -> newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name $ CFValueComputed (getId t) $ tokenToParts t) words
body <- sequentially body
exit <- newStructuralNode
-- Forward edges
linkRanges [entry, expansion, assignmentChoice]
mapM_ (\t -> linkRanges [assignmentChoice, t, body]) assignments
linkRange body exit
linkRange expansion exit
-- Backward edge
linkRange body assignmentChoice
return $ spanRange entry exit
whileHelper id cond body = do
condRange <- asCondition $ sequentially cond
bodyRange <- sequentially body
end <- newNodeRange (CFSetExitCode id)
linkRange condRange bodyRange
linkRange bodyRange condRange
linkRange condRange end
handleCommand cmd vars args literalCmd = do
-- TODO: Handle assignments in declaring commands
case literalCmd of
Just "exit" -> regularExpansion vars args $ handleExit
Just "return" -> regularExpansion vars args $ handleReturn
Just "unset" -> regularExpansionWithStatus vars args $ handleUnset args
Just "declare" -> handleDeclare args
Just "local" -> handleDeclare args
Just "typeset" -> handleDeclare args
Just "printf" -> regularExpansionWithStatus vars args $ handlePrintf args
Just "wait" -> regularExpansionWithStatus vars args $ handleWait args
Just "mapfile" -> regularExpansionWithStatus vars args $ handleMapfile args
Just "readarray" -> regularExpansionWithStatus vars args $ handleMapfile args
Just "DEFINE_boolean" -> regularExpansionWithStatus vars args $ handleDEFINE args
Just "DEFINE_float" -> regularExpansionWithStatus vars args $ handleDEFINE args
Just "DEFINE_integer" -> regularExpansionWithStatus vars args $ handleDEFINE args
Just "DEFINE_string" -> regularExpansionWithStatus vars args $ handleDEFINE args
-- This will mostly behave like 'command' but ok
Just "builtin" ->
case args of
[_] -> regular
(_:newargs@(newcmd:_)) ->
handleCommand newcmd vars newargs $ getLiteralString newcmd
Just "command" ->
case args of
[_] -> regular
(_:newargs@(newcmd:_)) ->
handleOthers (getId newcmd) vars newargs $ getLiteralString newcmd
_ -> regular
where
regular = handleOthers (getId cmd) vars args literalCmd
handleExit = do
exitNode <- reader cfExitTarget
case exitNode of
Just target -> do
exit <- newNode CFResolvedExit
link exit target CFEExit
unreachable <- newNode CFUnreachable
return $ Range exit unreachable
Nothing -> do
exit <- newNode CFUnresolvedExit
unreachable <- newNode CFUnreachable
return $ Range exit unreachable
handleReturn = do
returnTarget <- reader cfReturnTarget
case returnTarget of
Nothing -> error $ pleaseReport "missing return target"
Just target -> do
ret <- newNode CFStructuralNode
link ret target CFEFlow
unreachable <- newNode CFUnreachable
return $ Range ret unreachable
handleUnset (cmd:args) = do
case () of
_ | "n" `elem` flagNames -> unsetWith CFUndefineNameref
_ | "v" `elem` flagNames -> unsetWith CFUndefineVariable
_ | "f" `elem` flagNames -> unsetWith CFUndefineFunction
_ -> unsetWith CFUndefine
where
pairs :: [(String, Token)] -- [(Flag string, token)] e.g. [("-f", t), ("", myfunc)]
pairs = map (\(str, (flag, val)) -> (str, flag)) $ fromMaybe (map (\c -> ("", (c,c))) args) $ getGnuOpts "vfn" args
(names, flags) = partition (null . fst) pairs
flagNames = map fst flags
literalNames :: [(Token, String)] -- Literal names to unset, e.g. [(myfuncToken, "myfunc")]
literalNames = mapMaybe (\(_, t) -> getLiteralString t >>= (return . (,) t)) names
-- Apply a constructor like CFUndefineVariable to each literalName, and tag with its id
unsetWith c = newNodeRange $ CFApplyEffects $ map (\(token, name) -> IdTagged (getId token) $ c name) literalNames
variableAssignRegex = mkRegex "^([_a-zA-Z][_a-zA-Z0-9]*)="
handleDeclare (cmd:args) = do
isFunc <- asks cfIsFunction
-- This is a bit of a kludge: we don't have great support for things like
-- 'declare -i x=$x' so do one round with declare x=$x, followed by declare -i x
let (evaluated, assignments, added, removed) = mconcat $ map (toEffects isFunc) args
before <- sequentially $ evaluated
assignments <- newNodeRange $ CFApplyEffects assignments
addedProps <- if null added then newStructuralNode else newNodeRange $ CFApplyEffects added
removedProps <- if null removed then newStructuralNode else newNodeRange $ CFApplyEffects removed
result <- newNodeRange $ CFSetExitCode (getId cmd)
linkRanges [before, assignments, addedProps, removedProps, result]
where
opts = map fst $ getGenericOpts args
array = "a" `elem` opts || associative
associative = "A" `elem` opts
integer = "i" `elem` opts
func = "f" `elem` opts || "F" `elem` opts
global = "g" `elem` opts
export = "x" `elem` opts
writer isFunc =
case () of
_ | global -> CFWriteGlobal
_ | isFunc -> CFWriteLocal
_ -> CFWriteVariable
scope isFunc =
case () of
_ | global -> GlobalScope
_ | isFunc -> LocalScope
_ -> DefaultScope
addedProps = S.fromList $ concat $ [
[ CFVPArray | array ],
[ CFVPInteger | integer ],
[ CFVPExport | export ],
[ CFVPAssociative | associative ]
]
removedProps = S.fromList $ concat $ [
-- Array property can't be unset
[ CFVPInteger | 'i' `elem` unsetOptions ],
[ CFVPExport | 'e' `elem` unsetOptions ]
]
toEffects isFunc (T_Assignment id mode var idx t) =
let
pre = idx ++ [t]
val = [ IdTagged id $ (writer isFunc) var $ CFValueComputed (getId t) $ [ CFStringVariable var | mode == Append ] ++ tokenToParts t ]
added = [ IdTagged id $ CFSetProps (scope isFunc) var addedProps | not $ S.null addedProps ]
removed = [ IdTagged id $ CFUnsetProps (scope isFunc) var addedProps | not $ S.null removedProps ]
in
(pre, val, added, removed)
toEffects isFunc t =
let
id = getId t
pre = [t]
literal = fromJust $ getLiteralStringExt (const $ Just "\0") t
isKnown = '\0' `notElem` literal
match = fmap head $ variableAssignRegex `matchRegex` literal
name = fromMaybe literal match
asLiteral =
IdTagged id $ (writer isFunc) name $
CFValueComputed (getId t) [ CFStringLiteral $ drop 1 $ dropWhile (/= '=') $ literal ]
asUnknown =
IdTagged id $ (writer isFunc) name $
CFValueString
added = [ IdTagged id $ CFSetProps (scope isFunc) name addedProps ]
removed = [ IdTagged id $ CFUnsetProps (scope isFunc) name removedProps ]
in
case () of
_ | not (isVariableName name) -> (pre, [], [], [])
_ | isJust match && isKnown -> (pre, [asLiteral], added, removed)
_ | isJust match -> (pre, [asUnknown], added, removed)
-- e.g. declare -i x
_ -> (pre, [], added, removed)
-- find "ia" from `define +i +a`
unsetOptions :: String
unsetOptions =
let
strings = mapMaybe getLiteralString args
plusses = filter ("+" `isPrefixOf`) strings
in
concatMap (drop 1) plusses
handlePrintf (cmd:args) =
newNodeRange $ CFApplyEffects $ maybeToList findVar
where
findVar = do
flags <- getBsdOpts "v:" args
(flag, arg) <- lookup "v" flags
name <- getLiteralString arg
return $ IdTagged (getId arg) $ CFWriteVariable name CFValueString
handleWait (cmd:args) =
newNodeRange $ CFApplyEffects $ maybeToList findVar
where
findVar = do
let flags = getGenericOpts args
(flag, arg) <- lookup "p" flags
name <- getLiteralString arg
return $ IdTagged (getId arg) $ CFWriteVariable name CFValueInteger
handleMapfile (cmd:args) =
newNodeRange $ CFApplyEffects [findVar]
where
findVar =
let (id, name) = fromMaybe (getId cmd, "MAPFILE") $ getFromArg `mplus` getFromFallback
in IdTagged id $ CFWriteVariable name CFValueArray
getFromArg = do
flags <- getGnuOpts "d:n:O:s:u:C:c:t" args
(_, arg) <- lookup "" flags
name <- getLiteralString arg
return (getId arg, name)
getFromFallback =
listToMaybe $ mapMaybe getIfVar $ reverse args
getIfVar c = do
name <- getLiteralString c
guard $ isVariableName name
return (getId c, name)
handleDEFINE (cmd:args) =
newNodeRange $ CFApplyEffects $ maybeToList findVar
where
findVar = do
name <- listToMaybe $ drop 1 args
str <- getLiteralString name
guard $ isVariableName str
return $ IdTagged (getId name) $ CFWriteVariable str CFValueString
handleOthers id vars args cmd =
regularExpansion vars args $ do
exe <- newNodeRange $ CFExecuteCommand cmd
status <- newNodeRange $ CFSetExitCode id
linkRange exe status
regularExpansion vars args p = do
args <- sequentially args
assignments <- mapM (buildAssignment PrefixScope) vars
exe <- p
dropAssignments <-
if null vars
then
return []
else do
drop <- newNodeRange CFDropPrefixAssignments
return [drop]
linkRanges $ [args] ++ assignments ++ [exe] ++ dropAssignments
regularExpansionWithStatus vars args@(cmd:_) p = do
initial <- regularExpansion vars args p
status <- newNodeRange $ CFSetExitCode (getId cmd)
linkRange initial status
none = newStructuralNode
data Scope = DefaultScope | GlobalScope | LocalScope | PrefixScope
deriving (Eq, Ord, Show, Generic, NFData)
buildAssignment scope t = do
op <- case t of
T_Assignment id mode var indices value -> do
expand <- build value
index <- sequentially indices
read <- case mode of
Append -> newNodeRange (applySingle $ IdTagged id $ CFReadVariable var)
Assign -> none
let valueType = if null indices then f id value else CFValueArray
let scoper =
case scope of
PrefixScope -> CFWritePrefix
LocalScope -> CFWriteLocal
GlobalScope -> CFWriteGlobal
DefaultScope -> CFWriteVariable
write <- newNodeRange $ applySingle $ IdTagged id $ scoper var valueType
linkRanges [expand, index, read, write]
where
f :: Id -> Token -> CFValue
f id t@T_NormalWord {} = CFValueComputed id $ [CFStringVariable var | mode == Append] ++ tokenToParts t
f id t@(T_Literal _ str) = CFValueComputed id $ [CFStringVariable var | mode == Append] ++ tokenToParts t
f _ T_Array {} = CFValueArray
registerNode (getId t) op
return op
tokenToParts t =
case t of
T_NormalWord _ list -> concatMap tokenToParts list
T_DoubleQuoted _ list -> concatMap tokenToParts list
T_SingleQuoted _ str -> [ CFStringLiteral str ]
T_Literal _ str -> [ CFStringLiteral str ]
T_DollarArithmetic {} -> [ CFStringInteger ]
T_DollarBracket {} -> [ CFStringInteger ]
T_DollarBraced _ _ list | isUnmodifiedParameterExpansion t -> [ CFStringVariable (getBracedReference $ concat $ oversimplify list) ]
-- Check if getLiteralString can handle it, if not it's unknown
_ -> [maybe CFStringUnknown CFStringLiteral $ getLiteralString t]
-- Change all subshell invocations to instead link directly to their contents.
-- This is used for producing dominator trees.
inlineSubshells :: CFGraph -> CFGraph
inlineSubshells graph = relinkedGraph
where
subshells = ufold find [] graph
find (incoming, node, label, outgoing) acc =
case label of
CFExecuteSubshell _ start end -> (node, label, start, end, incoming, outgoing):acc
_ -> acc
relinkedGraph = foldl' relink graph subshells
relink graph (node, label, start, end, incoming, outgoing) =
let
-- Link CFExecuteSubshell to the CFEntryPoint
subshellToStart = (incoming, node, label, [(CFEFlow, start)])
-- Link the subshell exit to the
endToNexts = (endIncoming, endNode, endLabel, outgoing)
(endIncoming, endNode, endLabel, _) = context graph end
in
subshellToStart & (endToNexts & graph)
findEntryNodes :: CFGraph -> [Node]
findEntryNodes graph = ufold find [] graph
where
find (incoming, node, label, _) list =
case label of
CFEntryPoint {} | null incoming -> node:list
_ -> list
findDominators main graph = asSetMap
where
inlined = inlineSubshells graph
entryNodes = main : findEntryNodes graph
asLists = concatMap (dom inlined) entryNodes
asSetMap = M.fromList $ map (\(node, list) -> (node, S.fromList list)) asLists
findTerminalNodes :: CFGraph -> [Node]
findTerminalNodes graph = ufold find [] graph
where
find (_, node, label, _) list =
case label of
CFUnresolvedExit -> node:list
CFApplyEffects effects -> f effects list
_ -> list
f [] list = list
f (IdTagged _ (CFDefineFunction _ id start end):rest) list = f rest (end:list)
f (_:rest) list = f rest list
findPostDominators :: Node -> CFGraph -> M.Map Node (S.Set Node)
findPostDominators mainexit graph = asSetMap
where
inlined = inlineSubshells graph
terminals = findTerminalNodes inlined
(incoming, _, label, outgoing) = context graph mainexit
withExitEdges = (incoming ++ map (\c -> (CFEFlow, c)) terminals, mainexit, label, outgoing) & inlined
reversed = grev withExitEdges
postDoms = dom reversed mainexit
asSetMap = M.fromList $ map (\(node, list) -> (node, S.fromList list)) postDoms
return []
runTests = $( [| $(forAllProperties) (quickCheckWithResult (stdArgs { maxSuccess = 1 }) ) |])