diff --git a/CHANGELOG.md b/CHANGELOG.md
index fb733ce..4763ddd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,10 @@
### Fixed
### Changed
+- ShellCheck now has a Data Flow Analysis engine to make smarter decisions
+ based on control flow rather than just syntax. Existing checks will
+ gradually start using it, which may cause them to trigger differently
+ (but more accurately).
## v0.8.0 - 2021-11-06
diff --git a/ShellCheck.cabal b/ShellCheck.cabal
index 1167c82..b22b5c8 100644
--- a/ShellCheck.cabal
+++ b/ShellCheck.cabal
@@ -53,6 +53,7 @@ library
deepseq >= 1.4.0.0,
Diff >= 0.2.0,
directory >= 1.2.3.0,
+ fgl,
mtl >= 2.2.1,
filepath,
parsec,
@@ -66,11 +67,15 @@ library
ShellCheck.Analytics
ShellCheck.Analyzer
ShellCheck.AnalyzerLib
+ ShellCheck.CFG
+ ShellCheck.CFGAnalysis
ShellCheck.Checker
ShellCheck.Checks.Commands
+ ShellCheck.Checks.ControlFlow
ShellCheck.Checks.Custom
ShellCheck.Checks.ShellSupport
ShellCheck.Data
+ ShellCheck.Debug
ShellCheck.Fixer
ShellCheck.Formatter.Format
ShellCheck.Formatter.CheckStyle
@@ -82,6 +87,7 @@ library
ShellCheck.Formatter.Quiet
ShellCheck.Interface
ShellCheck.Parser
+ ShellCheck.Prelude
ShellCheck.Regex
other-modules:
Paths_ShellCheck
@@ -100,6 +106,7 @@ executable shellcheck
deepseq >= 1.4.0.0,
Diff >= 0.2.0,
directory >= 1.2.3.0,
+ fgl,
mtl >= 2.2.1,
filepath,
parsec >= 3.0,
@@ -120,6 +127,7 @@ test-suite test-shellcheck
deepseq >= 1.4.0.0,
Diff >= 0.2.0,
directory >= 1.2.3.0,
+ fgl,
mtl >= 2.2.1,
filepath,
parsec,
diff --git a/src/ShellCheck/ASTLib.hs b/src/ShellCheck/ASTLib.hs
index 7c88432..7cc5af2 100644
--- a/src/ShellCheck/ASTLib.hs
+++ b/src/ShellCheck/ASTLib.hs
@@ -21,6 +21,7 @@
module ShellCheck.ASTLib where
import ShellCheck.AST
+import ShellCheck.Prelude
import ShellCheck.Regex
import Control.Monad.Writer
@@ -138,7 +139,7 @@ getFlagsUntil stopCondition (T_SimpleCommand _ _ (_:args)) =
flag (x, '-':'-':arg) = [ (x, takeWhile (/= '=') arg) ]
flag (x, '-':args) = map (\v -> (x, [v])) args
flag (x, _) = [ (x, "") ]
-getFlagsUntil _ _ = error "Internal shellcheck error, please report! (getFlags on non-command)"
+getFlagsUntil _ _ = error $ pleaseReport "getFlags on non-command"
-- Get all flags in a GNU way, up until --
getAllFlags :: Token -> [(Token, String)]
@@ -785,5 +786,118 @@ executableFromShebang = shellFor
basename s = reverse . takeWhile (/= '/') . reverse $ s
skipFlags = dropWhile ("-" `isPrefixOf`)
+
+-- Determining if a name is a variable
+isVariableStartChar x = x == '_' || isAsciiLower x || isAsciiUpper x
+isVariableChar x = isVariableStartChar x || isDigit x
+isSpecialVariableChar = (`elem` "*@#?-$!")
+variableNameRegex = mkRegex "[_a-zA-Z][_a-zA-Z0-9]*"
+
+prop_isVariableName1 = isVariableName "_fo123"
+prop_isVariableName2 = not $ isVariableName "4"
+prop_isVariableName3 = not $ isVariableName "test: "
+isVariableName (x:r) = isVariableStartChar x && all isVariableChar r
+isVariableName _ = False
+
+
+-- Get the variable name from an expansion like ${var:-foo}
+prop_getBracedReference1 = getBracedReference "foo" == "foo"
+prop_getBracedReference2 = getBracedReference "#foo" == "foo"
+prop_getBracedReference3 = getBracedReference "#" == "#"
+prop_getBracedReference4 = getBracedReference "##" == "#"
+prop_getBracedReference5 = getBracedReference "#!" == "!"
+prop_getBracedReference6 = getBracedReference "!#" == "#"
+prop_getBracedReference7 = getBracedReference "!foo#?" == "foo"
+prop_getBracedReference8 = getBracedReference "foo-bar" == "foo"
+prop_getBracedReference9 = getBracedReference "foo:-bar" == "foo"
+prop_getBracedReference10= getBracedReference "foo: -1" == "foo"
+prop_getBracedReference11= getBracedReference "!os*" == ""
+prop_getBracedReference11b= getBracedReference "!os@" == ""
+prop_getBracedReference12= getBracedReference "!os?bar**" == ""
+prop_getBracedReference13= getBracedReference "foo[bar]" == "foo"
+getBracedReference s = fromMaybe s $
+ nameExpansion s `mplus` takeName noPrefix `mplus` getSpecial noPrefix `mplus` getSpecial s
+ where
+ noPrefix = dropPrefix s
+ dropPrefix (c:rest) | c `elem` "!#" = rest
+ dropPrefix cs = cs
+ takeName s = do
+ let name = takeWhile isVariableChar s
+ guard . not $ null name
+ return name
+ getSpecial (c:_) | isSpecialVariableChar c = return [c]
+ getSpecial _ = fail "empty or not special"
+
+ nameExpansion ('!':next:rest) = do -- e.g. ${!foo*bar*}
+ guard $ isVariableChar next -- e.g. ${!@}
+ first <- find (not . isVariableChar) rest
+ guard $ first `elem` "*?@"
+ return ""
+ nameExpansion _ = Nothing
+
+-- Get the variable modifier like /a/b in ${var/a/b}
+prop_getBracedModifier1 = getBracedModifier "foo:bar:baz" == ":bar:baz"
+prop_getBracedModifier2 = getBracedModifier "!var:-foo" == ":-foo"
+prop_getBracedModifier3 = getBracedModifier "foo[bar]" == "[bar]"
+prop_getBracedModifier4 = getBracedModifier "foo[@]@Q" == "[@]@Q"
+prop_getBracedModifier5 = getBracedModifier "@@Q" == "@Q"
+getBracedModifier s = headOrDefault "" $ do
+ let var = getBracedReference s
+ a <- dropModifier s
+ dropPrefix var a
+ where
+ dropPrefix [] t = return t
+ dropPrefix (a:b) (c:d) | a == c = dropPrefix b d
+ dropPrefix _ _ = []
+
+ dropModifier (c:rest) | c `elem` "#!" = [rest, c:rest]
+ dropModifier x = [x]
+
+-- Get the variables from indices like ["x", "y"] in ${var[x+y+1]}
+prop_getIndexReferences1 = getIndexReferences "var[x+y+1]" == ["x", "y"]
+getIndexReferences s = fromMaybe [] $ do
+ match <- matchRegex re s
+ index <- match !!! 0
+ return $ matchAllStrings variableNameRegex index
+ where
+ re = mkRegex "(\\[.*\\])"
+
+prop_getOffsetReferences1 = getOffsetReferences ":bar" == ["bar"]
+prop_getOffsetReferences2 = getOffsetReferences ":bar:baz" == ["bar", "baz"]
+prop_getOffsetReferences3 = getOffsetReferences "[foo]:bar" == ["bar"]
+prop_getOffsetReferences4 = getOffsetReferences "[foo]:bar:baz" == ["bar", "baz"]
+getOffsetReferences mods = fromMaybe [] $ do
+-- if mods start with [, then drop until ]
+ match <- matchRegex re mods
+ offsets <- match !!! 1
+ return $ matchAllStrings variableNameRegex offsets
+ where
+ re = mkRegex "^(\\[.+\\])? *:([^-=?+].*)"
+
+
+-- Returns whether a token is a parameter expansion without any modifiers.
+-- True for $var ${var} $1 $#
+-- False for ${#var} ${var[x]} ${var:-0}
+isUnmodifiedParameterExpansion t =
+ case t of
+ T_DollarBraced _ False _ -> True
+ T_DollarBraced _ _ list ->
+ let str = concat $ oversimplify list
+ in getBracedReference str == str
+ _ -> False
+
+--- A list of the element and all its parents up to the root node.
+getPath tree t = t :
+ case Map.lookup (getId t) tree of
+ Nothing -> []
+ Just parent -> getPath tree parent
+
+isClosingFileOp op =
+ case op of
+ T_IoDuplicate _ (T_GREATAND _) "-" -> True
+ T_IoDuplicate _ (T_LESSAND _) "-" -> True
+ _ -> False
+
+
return []
runTests = $quickCheckAll
diff --git a/src/ShellCheck/Analytics.hs b/src/ShellCheck/Analytics.hs
index f5ff4df..bf5d179 100644
--- a/src/ShellCheck/Analytics.hs
+++ b/src/ShellCheck/Analytics.hs
@@ -24,8 +24,10 @@ module ShellCheck.Analytics (runAnalytics, optionalChecks, ShellCheck.Analytics.
import ShellCheck.AST
import ShellCheck.ASTLib
import ShellCheck.AnalyzerLib hiding (producesComments)
+import qualified ShellCheck.CFGAnalysis as CF
import ShellCheck.Data
import ShellCheck.Parser
+import ShellCheck.Prelude
import ShellCheck.Interface
import ShellCheck.Regex
diff --git a/src/ShellCheck/Analyzer.hs b/src/ShellCheck/Analyzer.hs
index eb231c2..ff2e457 100644
--- a/src/ShellCheck/Analyzer.hs
+++ b/src/ShellCheck/Analyzer.hs
@@ -25,6 +25,7 @@ import ShellCheck.Interface
import Data.List
import Data.Monoid
import qualified ShellCheck.Checks.Commands
+import qualified ShellCheck.Checks.ControlFlow
import qualified ShellCheck.Checks.Custom
import qualified ShellCheck.Checks.ShellSupport
@@ -42,11 +43,13 @@ analyzeScript spec = newAnalysisResult {
checkers spec params = mconcat $ map ($ params) [
ShellCheck.Checks.Commands.checker spec,
+ ShellCheck.Checks.ControlFlow.checker spec,
ShellCheck.Checks.Custom.checker,
ShellCheck.Checks.ShellSupport.checker
]
optionalChecks = mconcat $ [
ShellCheck.Analytics.optionalChecks,
- ShellCheck.Checks.Commands.optionalChecks
+ ShellCheck.Checks.Commands.optionalChecks,
+ ShellCheck.Checks.ControlFlow.optionalChecks
]
diff --git a/src/ShellCheck/AnalyzerLib.hs b/src/ShellCheck/AnalyzerLib.hs
index 67c35b4..e998f2c 100644
--- a/src/ShellCheck/AnalyzerLib.hs
+++ b/src/ShellCheck/AnalyzerLib.hs
@@ -23,9 +23,11 @@ module ShellCheck.AnalyzerLib where
import ShellCheck.AST
import ShellCheck.ASTLib
+import qualified ShellCheck.CFGAnalysis as CF
import ShellCheck.Data
import ShellCheck.Interface
import ShellCheck.Parser
+import ShellCheck.Prelude
import ShellCheck.Regex
import Control.Arrow (first)
@@ -96,7 +98,9 @@ data Parameters = Parameters {
-- The root node of the AST
rootNode :: Token,
-- map from token id to start and end position
- tokenPositions :: Map.Map Id (Position, Position)
+ tokenPositions :: Map.Map Id (Position, Position),
+ -- Result from Control Flow Graph analysis (including data flow analysis)
+ cfgAnalysis :: CF.CFGAnalysis
} deriving (Show)
-- TODO: Cache results of common AST ops here
@@ -189,8 +193,9 @@ makeCommentWithFix severity id code str fix =
}
in force withFix
-makeParameters spec =
- let params = Parameters {
+makeParameters spec = params
+ where
+ params = Parameters {
rootNode = root,
shellType = fromMaybe (determineShell (asFallbackShell spec) root) $ asShellType spec,
hasSetE = containsSetE root,
@@ -215,9 +220,14 @@ makeParameters spec =
shellTypeSpecified = isJust (asShellType spec) || isJust (asFallbackShell spec),
parentMap = getParentTree root,
variableFlow = getVariableFlow params root,
- tokenPositions = asTokenPositions spec
- } in params
- where root = asScript spec
+ tokenPositions = asTokenPositions spec,
+ cfgAnalysis = CF.analyzeControlFlow cfParams root
+ }
+ cfParams = CF.CFGParameters {
+ CF.cfLastpipe = hasLastpipe params,
+ CF.cfPipefail = hasPipefail params
+ }
+ root = asScript spec
-- Does this script mention 'set -e' anywhere?
@@ -408,12 +418,6 @@ usedAsCommandName tree token = go (getId token) (tail $ getPath tree token)
getId word == currentId || getId (getCommandTokenOrThis t) == currentId
go _ _ = False
--- A list of the element and all its parents up to the root node.
-getPath tree t = t :
- case Map.lookup (getId t) tree of
- Nothing -> []
- Just parent -> getPath tree parent
-
-- Version of the above taking the map from the current context
-- Todo: give this the name "getPath"
getPathM t = do
@@ -559,12 +563,6 @@ getModifiedVariables t =
return (place, t, str, DataString SourceChecked)
_ -> Nothing
-isClosingFileOp op =
- case op of
- T_IoDuplicate _ (T_GREATAND _) "-" -> True
- T_IoDuplicate _ (T_LESSAND _) "-" -> True
- _ -> False
-
-- Consider 'export/declare -x' a reference, since it makes the var available
getReferencedVariableCommand base@(T_SimpleCommand _ _ (T_NormalWord _ (T_Literal _ x:_):rest)) =
@@ -746,13 +744,6 @@ getModifiedVariableCommand base@(T_SimpleCommand id cmdPrefix (T_NormalWord _ (T
getModifiedVariableCommand _ = []
-getIndexReferences s = fromMaybe [] $ do
- match <- matchRegex re s
- index <- match !!! 0
- return $ matchAllStrings variableNameRegex index
- where
- re = mkRegex "(\\[.*\\])"
-
-- Given a NormalWord like foo or foo[$bar], get foo.
-- Primarily used to get references for [[ -v foo[bar] ]]
getVariableForTestDashV :: Token -> Maybe String
@@ -767,18 +758,6 @@ getVariableForTestDashV t = do
-- in a non-constant expression (while filtering out foo$x[$y])
toStr _ = return "\0"
-prop_getOffsetReferences1 = getOffsetReferences ":bar" == ["bar"]
-prop_getOffsetReferences2 = getOffsetReferences ":bar:baz" == ["bar", "baz"]
-prop_getOffsetReferences3 = getOffsetReferences "[foo]:bar" == ["bar"]
-prop_getOffsetReferences4 = getOffsetReferences "[foo]:bar:baz" == ["bar", "baz"]
-getOffsetReferences mods = fromMaybe [] $ do
--- if mods start with [, then drop until ]
- match <- matchRegex re mods
- offsets <- match !!! 1
- return $ matchAllStrings variableNameRegex offsets
- where
- re = mkRegex "^(\\[.+\\])? *:([^-=?+].*)"
-
getReferencedVariables parents t =
case t of
T_DollarBraced id _ l -> let str = concat $ oversimplify l in
@@ -857,17 +836,6 @@ isConfusedGlobRegex ('*':_) = True
isConfusedGlobRegex [x,'*'] | x `notElem` "\\." = True
isConfusedGlobRegex _ = False
-isVariableStartChar x = x == '_' || isAsciiLower x || isAsciiUpper x
-isVariableChar x = isVariableStartChar x || isDigit x
-isSpecialVariableChar = (`elem` "*@#?-$!")
-variableNameRegex = mkRegex "[_a-zA-Z][_a-zA-Z0-9]*"
-
-prop_isVariableName1 = isVariableName "_fo123"
-prop_isVariableName2 = not $ isVariableName "4"
-prop_isVariableName3 = not $ isVariableName "test: "
-isVariableName (x:r) = isVariableStartChar x && all isVariableChar r
-isVariableName _ = False
-
getVariablesFromLiteralToken token =
getVariablesFromLiteral (getLiteralStringDef " " token)
@@ -880,73 +848,6 @@ getVariablesFromLiteral string =
where
variableRegex = mkRegex "\\$\\{?([A-Za-z0-9_]+)"
--- Get the variable name from an expansion like ${var:-foo}
-prop_getBracedReference1 = getBracedReference "foo" == "foo"
-prop_getBracedReference2 = getBracedReference "#foo" == "foo"
-prop_getBracedReference3 = getBracedReference "#" == "#"
-prop_getBracedReference4 = getBracedReference "##" == "#"
-prop_getBracedReference5 = getBracedReference "#!" == "!"
-prop_getBracedReference6 = getBracedReference "!#" == "#"
-prop_getBracedReference7 = getBracedReference "!foo#?" == "foo"
-prop_getBracedReference8 = getBracedReference "foo-bar" == "foo"
-prop_getBracedReference9 = getBracedReference "foo:-bar" == "foo"
-prop_getBracedReference10= getBracedReference "foo: -1" == "foo"
-prop_getBracedReference11= getBracedReference "!os*" == ""
-prop_getBracedReference11b= getBracedReference "!os@" == ""
-prop_getBracedReference12= getBracedReference "!os?bar**" == ""
-prop_getBracedReference13= getBracedReference "foo[bar]" == "foo"
-getBracedReference s = fromMaybe s $
- nameExpansion s `mplus` takeName noPrefix `mplus` getSpecial noPrefix `mplus` getSpecial s
- where
- noPrefix = dropPrefix s
- dropPrefix (c:rest) | c `elem` "!#" = rest
- dropPrefix cs = cs
- takeName s = do
- let name = takeWhile isVariableChar s
- guard . not $ null name
- return name
- getSpecial (c:_) | isSpecialVariableChar c = return [c]
- getSpecial _ = fail "empty or not special"
-
- nameExpansion ('!':next:rest) = do -- e.g. ${!foo*bar*}
- guard $ isVariableChar next -- e.g. ${!@}
- first <- find (not . isVariableChar) rest
- guard $ first `elem` "*?@"
- return ""
- nameExpansion _ = Nothing
-
-prop_getBracedModifier1 = getBracedModifier "foo:bar:baz" == ":bar:baz"
-prop_getBracedModifier2 = getBracedModifier "!var:-foo" == ":-foo"
-prop_getBracedModifier3 = getBracedModifier "foo[bar]" == "[bar]"
-prop_getBracedModifier4 = getBracedModifier "foo[@]@Q" == "[@]@Q"
-prop_getBracedModifier5 = getBracedModifier "@@Q" == "@Q"
-getBracedModifier s = headOrDefault "" $ do
- let var = getBracedReference s
- a <- dropModifier s
- dropPrefix var a
- where
- dropPrefix [] t = return t
- dropPrefix (a:b) (c:d) | a == c = dropPrefix b d
- dropPrefix _ _ = []
-
- dropModifier (c:rest) | c `elem` "#!" = [rest, c:rest]
- dropModifier x = [x]
-
--- Useful generic functions.
-
--- Get element 0 or a default. Like `head` but safe.
-headOrDefault _ (a:_) = a
-headOrDefault def _ = def
-
--- Get the last element or a default. Like `last` but safe.
-lastOrDefault def [] = def
-lastOrDefault _ list = last list
-
---- Get element n of a list, or Nothing. Like `!!` but safe.
-(!!!) list i =
- case drop i list of
- [] -> Nothing
- (r:_) -> Just r
-- Run a command if the shell is in the given list
whenShell l c = do
@@ -999,17 +900,6 @@ isBashLike params =
Dash -> False
Sh -> False
--- Returns whether a token is a parameter expansion without any modifiers.
--- True for $var ${var} $1 $#
--- False for ${#var} ${var[x]} ${var:-0}
-isUnmodifiedParameterExpansion t =
- case t of
- T_DollarBraced _ False _ -> True
- T_DollarBraced _ _ list ->
- let str = concat $ oversimplify list
- in getBracedReference str == str
- _ -> False
-
isTrueAssignmentSource c =
case c of
DataString SourceChecked -> False
diff --git a/src/ShellCheck/CFG.hs b/src/ShellCheck/CFG.hs
new file mode 100644
index 0000000..101a0d7
--- /dev/null
+++ b/src/ShellCheck/CFG.hs
@@ -0,0 +1,1147 @@
+{-
+ Copyright 2022 Vidar Holen
+
+ This file is part of ShellCheck.
+ https://www.shellcheck.net
+
+ ShellCheck is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ShellCheck is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+-}
+{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE DeriveAnyClass, DeriveGeneric #-}
+
+-- Constructs a Control Flow Graph from an AST
+module ShellCheck.CFG (
+ CFNode (..),
+ CFEdge (..),
+ CFEffect (..),
+ CFStringPart (..),
+ CFVariableProp (..),
+ CFGResult (..),
+ CFValue (..),
+ CFGraph,
+ CFGParameters (..),
+ IdTagged (..),
+ buildGraph
+ , ShellCheck.CFG.runTests -- STRIP
+ )
+ where
+
+import GHC.Generics (Generic)
+import ShellCheck.AST
+import ShellCheck.ASTLib
+import ShellCheck.Interface
+import ShellCheck.Prelude
+import ShellCheck.Regex
+import Control.DeepSeq
+import Control.Monad
+import Control.Monad.Identity
+import Data.List hiding (map)
+import Data.Maybe
+import qualified Data.Map as M
+import qualified Data.Set as S
+import Control.Monad.RWS.Lazy
+import Data.Graph.Inductive.Graph
+import Data.Graph.Inductive.Query.DFS
+import Data.Graph.Inductive.PatriciaTree as G
+import Debug.Trace -- STRIP
+
+import Test.QuickCheck.All (forAllProperties)
+import Test.QuickCheck.Test (quickCheckWithResult, stdArgs, maxSuccess)
+
+
+-- Our basic Graph type
+type CFGraph = G.Gr CFNode CFEdge
+
+-- Node labels in a Control Flow Graph
+data CFNode =
+ -- A no-op node for structural purposes
+ CFStructuralNode
+ -- A no-op for graph inspection purposes
+ | CFEntryPoint String
+ -- Drop current prefix assignments
+ | CFDropPrefixAssignments
+ -- A node with a certain effect on program state
+ | CFApplyEffects [IdTagged CFEffect]
+ -- The execution of a command or function by literal string if possible
+ | CFExecuteCommand (Maybe String)
+ -- Execute a subshell. These are represented by disjoint graphs just like
+ -- functions, but they don't require any form of name resolution
+ | CFExecuteSubshell String Node Node
+ -- Assignment of $?
+ | CFSetExitCode Id
+ -- The virtual 'exit' at the natural end of a subshell
+ | CFImpliedExit
+ -- An exit statement resolvable at CFG build time
+ | CFResolvedExit
+ -- An exit statement only resolvable at DFA time
+ | CFUnresolvedExit
+ -- An unreachable node, serving as the unconnected end point of a range
+ | CFUnreachable
+ -- Assignment of $!
+ | CFSetBackgroundPid Id
+ deriving (Eq, Ord, Show, Generic, NFData)
+
+-- Edge labels in a Control Flow Graph
+data CFEdge =
+ CFEErrExit
+ -- Regular control flow edge
+ | CFEFlow
+ -- An edge that a human might think exists (e.g. from a backgrounded process to its parent)
+ | CFEFalseFlow
+ -- An edge followed on exit
+ | CFEExit
+ deriving (Eq, Ord, Show, Generic, NFData)
+
+-- Actions we track
+data CFEffect =
+ CFModifyProps String [CFVariableProp]
+ | CFReadVariable String
+ | CFWriteVariable String CFValue
+ | CFWriteGlobal String CFValue
+ | CFWriteLocal String CFValue
+ | CFWritePrefix String CFValue
+ | CFDefineFunction String Id Node Node
+ | CFUndefine String
+ | CFUndefineVariable String
+ | CFUndefineFunction String
+ | CFUndefineNameref String
+ -- Usage implies that this is an array (e.g. it's expanded with index)
+ | CFHintArray String
+ -- Operation implies that the variable will be defined (e.g. [ -z "$var" ])
+ | CFHintDefined String
+ deriving (Eq, Ord, Show, Generic, NFData)
+
+data IdTagged a = IdTagged Id a
+ deriving (Eq, Ord, Show, Generic, NFData)
+
+-- Where a variable's value comes from
+data CFValue =
+ -- The special 'uninitialized' value
+ CFValueUninitialized
+ -- An arbitrary array value
+ | CFValueArray
+ -- An arbitrary string value
+ | CFValueString
+ -- An arbitrary integer
+ | CFValueInteger
+ -- Token 'Id' concatenates and assigns the given parts
+ | CFValueComputed Id [CFStringPart]
+ deriving (Eq, Ord, Show, Generic, NFData)
+
+-- Simplified computed strings
+data CFStringPart =
+ -- A known literal string value, like 'foo'
+ CFStringLiteral String
+ -- The contents of a variable, like $foo
+ | CFStringVariable String
+ -- An value that is unknown but an integer
+ | CFStringInteger
+ -- An unknown string value, for things we can't handle
+ | CFStringUnknown
+ deriving (Eq, Ord, Show, Generic, NFData)
+
+-- The properties of a variable
+data CFVariableProp = CFVPExport | CFVPArray
+ deriving (Eq, Ord, Show, Generic, NFData)
+
+-- Options when generating CFG
+data CFGParameters = CFGParameters {
+ -- Whether the last element in a pipeline runs in the current shell
+ cfLastpipe :: Bool,
+ -- Whether all elements in a pipeline count towards the exit status
+ cfPipefail :: Bool
+}
+
+data CFGResult = CFGResult {
+ -- The graph itself
+ cfGraph :: CFGraph,
+ -- Map from Id to start/end node
+ cfIdToNode :: M.Map Id (Node, Node)
+}
+ deriving (Show)
+
+buildGraph :: CFGParameters -> Token -> CFGResult
+buildGraph params root =
+ let
+ (nextNode, base) = execRWS (buildRoot root) (newCFContext params) 0
+ (nodes, edges, mapping) =
+-- renumberTopologically $
+ removeUnnecessaryStructuralNodes
+ base
+ in
+ CFGResult {
+ cfGraph = mkGraph nodes edges,
+ cfIdToNode = M.fromList mapping
+ }
+
+remapGraph remap (nodes, edges, mapping) =
+ (
+ map (remapNode remap) nodes,
+ map (remapEdge remap) edges,
+ map (\(id, (a,b)) -> (id, (remapHelper remap a, remapHelper remap b))) mapping
+ )
+
+prop_testRenumbering =
+ let
+ s = CFStructuralNode
+ before = (
+ [(1,s), (3,s), (4, s), (8,s)],
+ [(1,3,CFEFlow), (3,4, CFEFlow), (4,8,CFEFlow)],
+ [(Id 0, (3,4))]
+ )
+ after = (
+ [(0,s), (1,s), (2,s), (3,s)],
+ [(0,1,CFEFlow), (1,2, CFEFlow), (2,3,CFEFlow)],
+ [(Id 0, (1,2))]
+ )
+ in after == renumberGraph before
+
+-- Renumber the graph for prettiness, so there are no gaps in node numbers
+renumberGraph g@(nodes, edges, mapping) =
+ let renumbering = M.fromList (flip zip [0..] $ sort $ map fst nodes)
+ in remapGraph renumbering g
+
+prop_testRenumberTopologically =
+ let
+ s = CFStructuralNode
+ before = (
+ [(4,s), (2,s), (3, s)],
+ [(4,2,CFEFlow), (2,3, CFEFlow)],
+ [(Id 0, (4,2))]
+ )
+ after = (
+ [(0,s), (1,s), (2,s)],
+ [(0,1,CFEFlow), (1,2, CFEFlow)],
+ [(Id 0, (0,1))]
+ )
+ in after == renumberTopologically before
+
+-- Renumber the graph in topological order
+renumberTopologically g@(nodes, edges, mapping) =
+ let renumbering = M.fromList (flip zip [0..] $ topsort (mkGraph nodes edges :: CFGraph))
+ in remapGraph renumbering g
+
+prop_testRemoveStructural =
+ let
+ s = CFStructuralNode
+ before = (
+ [(1,s), (2,s), (3, s), (4,s)],
+ [(1,2,CFEFlow), (2,3, CFEFlow), (3,4,CFEFlow)],
+ [(Id 0, (2,3))]
+ )
+ after = (
+ [(1,s), (2,s), (4,s)],
+ [(1,2,CFEFlow), (2,4,CFEFlow)],
+ [(Id 0, (2,2))]
+ )
+ in after == removeUnnecessaryStructuralNodes before
+
+-- Collapse structural nodes that just form long chains like x->x->x.
+-- This way we can generate them with abandon, without making DFA slower.
+--
+-- Note in particular that we can't remove a structural node x in
+-- foo -> x -> bar , because then the pre/post-condition for tokens
+-- previously pointing to x would be wrong.
+removeUnnecessaryStructuralNodes (nodes, edges, mapping) =
+ remapGraph recursiveRemapping
+ (
+ filter (\(n, _) -> n `M.notMember` recursiveRemapping) nodes,
+ filter (`S.notMember` edgesToCollapse) edges,
+ mapping
+ )
+ where
+ regularEdges = filter isRegularEdge edges
+ inDegree = counter $ map (\(from,to,_) -> from) regularEdges
+ outDegree = counter $ map (\(from,to,_) -> to) regularEdges
+ structuralNodes = S.fromList $ map fst $ filter isStructural nodes
+ candidateNodes = S.filter isLinear structuralNodes
+ edgesToCollapse = S.fromList $ filter filterEdges regularEdges
+
+ remapping :: M.Map Node Node
+ remapping = foldl' (\m (new, old) -> M.insert old new m) M.empty $ map orderEdge $ S.toList edgesToCollapse
+ recursiveRemapping = M.fromList $ map (\c -> (c, recursiveLookup remapping c)) $ M.keys remapping
+
+ filterEdges (a,b,_) =
+ a `S.member` candidateNodes && b `S.member` candidateNodes
+
+ orderEdge (a,b,_) = if a < b then (a,b) else (b,a)
+ counter = foldl' (\map key -> M.insertWith (+) key 1 map) M.empty
+ isRegularEdge (_, _, CFEFlow) = True
+ isRegularEdge _ = False
+
+ recursiveLookup :: M.Map Node Node -> Node -> Node
+ recursiveLookup map node =
+ case M.lookup node map of
+ Nothing -> node
+ Just x -> recursiveLookup map x
+
+ isStructural (node, label) =
+ case label of
+ CFStructuralNode -> True
+ _ -> False
+
+ isLinear node =
+ M.findWithDefault 0 node inDegree == 1
+ && M.findWithDefault 0 node outDegree == 1
+
+
+remapNode :: M.Map Node Node -> LNode CFNode -> LNode CFNode
+remapNode m (node, label) =
+ (remapHelper m node, newLabel)
+ where
+ newLabel = case label of
+ CFApplyEffects effects -> CFApplyEffects (map (remapEffect m) effects)
+ CFExecuteSubshell s a b -> CFExecuteSubshell s (remapHelper m a) (remapHelper m b)
+-- CFSubShellStart reason node -> CFSubShellStart reason (remapHelper m node)
+
+ _ -> label
+
+remapEffect map old@(IdTagged id effect) =
+ case effect of
+ CFDefineFunction name id start end -> IdTagged id $ CFDefineFunction name id (remapHelper map start) (remapHelper map end)
+ _ -> old
+
+remapEdge :: M.Map Node Node -> LEdge CFEdge -> LEdge CFEdge
+remapEdge map (from, to, label) = (remapHelper map from, remapHelper map to, label)
+remapHelper map n = M.findWithDefault n n map
+
+data Range = Range Node Node
+ deriving (Eq, Show)
+
+data CFContext = CFContext {
+ cfIsCondition :: Bool,
+ cfIsFunction :: Bool,
+ cfLoopStack :: [(Node, Node)],
+ cfExitTarget :: Maybe Node,
+ cfReturnTarget :: Maybe Node,
+ cfParameters :: CFGParameters
+}
+newCFContext params = CFContext {
+ cfIsCondition = False,
+ cfIsFunction = False,
+ cfLoopStack = [],
+ cfExitTarget = Nothing,
+ cfReturnTarget = Nothing,
+ cfParameters = params
+}
+
+-- The monad we generate a graph in
+type CFM a = RWS CFContext ([LNode CFNode], [LEdge CFEdge], [(Id, (Node, Node))]) Int a
+
+newNode :: CFNode -> CFM Node
+newNode label = do
+ n <- get
+ put (n+1)
+ tell ([(n, label)], [], [])
+ return n
+
+newNodeRange :: CFNode -> CFM Range
+-- newNodeRange label = nodeToRange <$> newNode label
+newNodeRange label = nodeToRange <$> newNode label
+
+-- Build a disjoint piece of the graph and return a CFExecuteSubshell. The Id is used purely for debug naming.
+subshell :: Id -> String -> CFM Range -> CFM Range
+subshell id reason p = do
+ start <- newNode $ CFEntryPoint $ "Subshell " ++ show id ++ ": " ++ reason
+ end <- newNode CFStructuralNode
+ middle <- local (\c -> c { cfExitTarget = Just end, cfReturnTarget = Just end}) p
+ linkRanges [nodeToRange start, middle, nodeToRange end]
+ newNodeRange $ CFExecuteSubshell reason start end
+
+
+withFunctionScope p = do
+ end <- newNode CFStructuralNode
+ body <- local (\c -> c { cfReturnTarget = Just end, cfIsFunction = True }) p
+ linkRanges [body, nodeToRange end]
+
+
+nodeToRange :: Node -> Range
+nodeToRange n = Range n n
+
+link :: Node -> Node -> CFEdge -> CFM ()
+link from to label = do
+ tell ([], [(from, to, label)], [])
+
+registerNode :: Id -> Range -> CFM ()
+registerNode id (Range start end) = tell ([], [], [(id, (start, end))])
+
+linkRange :: Range -> Range -> CFM Range
+linkRange = linkRangeAs CFEFlow
+
+linkRangeAs :: CFEdge -> Range -> Range -> CFM Range
+linkRangeAs label (Range start mid1) (Range mid2 end) = do
+ link mid1 mid2 label
+ return (Range start end)
+
+-- Like linkRange but without actually linking
+spanRange :: Range -> Range -> Range
+spanRange (Range start mid1) (Range mid2 end) = Range start end
+
+linkRanges :: [Range] -> CFM Range
+linkRanges [] = error "Empty range"
+linkRanges (first:rest) = foldM linkRange first rest
+
+sequentially :: [Token] -> CFM Range
+sequentially list = do
+ first <- newStructuralNode
+ rest <- mapM build list
+ linkRanges (first:rest)
+
+withContext :: (CFContext -> CFContext) -> CFM a -> CFM a
+withContext = local
+
+withReturn :: Range -> CFM a -> CFM a
+withReturn _ p = p
+
+asCondition :: CFM Range -> CFM Range
+asCondition = withContext (\c -> c { cfIsCondition = True })
+
+newStructuralNode = newNodeRange CFStructuralNode
+
+buildRoot :: Token -> CFM Range
+buildRoot t = do
+ entry <- newNodeRange $ CFEntryPoint "MAIN"
+ impliedExit <- newNode CFImpliedExit
+ end <- newNode CFStructuralNode
+ start <- local (\c -> c { cfExitTarget = Just end, cfReturnTarget = Just impliedExit}) $ build t
+ range <- linkRanges [entry, start, nodeToRange impliedExit, nodeToRange end]
+ registerNode (getId t) range
+ return range
+
+applySingle e = CFApplyEffects [e]
+
+-- Build the CFG.
+build :: Token -> CFM Range
+build t = do
+ range <- build' t
+ registerNode (getId t) range
+ return range
+ where
+ build' t = case t of
+ T_Annotation _ _ list -> build list
+ T_Script _ _ list -> do
+ sequentially list
+
+ TA_Assignment id op var@(TA_Variable _ name indices) rhs -> do
+ -- value first: (( var[x=1] = (x=2) )) runs x=1 last
+ value <- build rhs
+ subscript <- sequentially indices
+ read <-
+ if op == "="
+ then none
+ -- This is += or something
+ else newNodeRange $ applySingle $ IdTagged id $ CFReadVariable name
+
+ write <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name $
+ if null indices
+ then CFValueInteger
+ else CFValueArray
+
+ linkRanges [value, subscript, read, write]
+
+ TA_Assignment id op lhs rhs -> do
+ -- This is likely an invalid assignment like (( 1 = 2 )), but it
+ -- could be e.g. x=y; (( $x = 3 )); echo $y, so expand both sides
+ -- without updating anything
+ sequentially [lhs, rhs]
+
+ TA_Binary _ _ a b -> sequentially [a,b]
+ TA_Expansion _ list -> sequentially list
+ TA_Sequence _ list -> sequentially list
+
+ TA_Trinary _ cond a b -> do
+ condition <- build cond
+ ifthen <- build a
+ elsethen <- build b
+ end <- newStructuralNode
+ linkRanges [condition, ifthen, end]
+ linkRanges [condition, elsethen, end]
+
+ TA_Variable id name indices -> do
+ subscript <- sequentially indices
+ hint <-
+ if null indices
+ then none
+ else nodeToRange <$> newNode (applySingle $ IdTagged id $ CFHintArray name)
+ read <- nodeToRange <$> newNode (applySingle $ IdTagged id $ CFReadVariable name)
+ linkRanges [subscript, hint, read]
+
+ TA_Unary id op (TA_Variable _ name indices) | "--" `isInfixOf` op || "++" `isInfixOf` op -> do
+ subscript <- sequentially indices
+ read <- newNodeRange $ applySingle $ IdTagged id $ CFReadVariable name
+ write <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name $
+ if null indices
+ then CFValueInteger
+ else CFValueArray
+ linkRanges [subscript, read, write]
+ TA_Unary _ _ arg -> build arg
+
+ TC_And _ SingleBracket _ lhs rhs -> do
+ sequentially [lhs, rhs]
+
+ TC_And _ DoubleBracket _ lhs rhs -> do
+ left <- build lhs
+ right <- build rhs
+ end <- newStructuralNode
+ -- complete
+ linkRanges [left, right, end]
+ -- short circuit
+ linkRange left end
+
+ -- TODO: Handle integer ops
+ TC_Binary _ mode str lhs rhs -> do
+ left <- build lhs
+ right <- build rhs
+ linkRange left right
+
+ TC_Empty {} -> newStructuralNode
+
+ TC_Group _ _ t -> build t
+
+ -- TODO: Mark as checked
+ TC_Nullary _ _ arg -> build arg
+
+ TC_Or _ SingleBracket _ lhs rhs -> sequentially [lhs, rhs]
+
+ TC_Or _ DoubleBracket _ lhs rhs -> do
+ left <- build lhs
+ right <- build rhs
+ end <- newStructuralNode
+ -- complete
+ linkRanges [left, right, end]
+ -- short circuit
+ linkRange left end
+
+ -- TODO: Handle -v, -z, -n
+ TC_Unary _ _ op arg -> do
+ build arg
+
+ T_Arithmetic id root -> do
+ exe <- build root
+ status <- newNodeRange (CFSetExitCode id)
+ linkRange exe status
+
+ T_AndIf _ lhs rhs -> do
+ left <- build lhs
+ right <- build rhs
+ end <- newStructuralNode
+ linkRange left right
+ linkRange right end
+ linkRange left end
+
+ T_Array _ list -> sequentially list
+
+ T_Assignment {} -> buildAssignment DefaultScope t
+
+ T_Backgrounded id body -> do
+ start <- newStructuralNode
+ fork <- subshell id "backgrounding '&'" $ build body
+ pid <- newNodeRange $ CFSetBackgroundPid id
+ status <- newNodeRange $ CFSetExitCode id
+
+ linkRange start fork
+ -- Add a join from the fork to warn about variable changes
+ linkRangeAs CFEFalseFlow fork pid
+ linkRanges [start, pid, status]
+
+ T_Backticked id body ->
+ subshell id "`..` expansion" $ sequentially body
+
+ T_Banged id cmd -> do
+ main <- build cmd
+ status <- newNodeRange (CFSetExitCode id)
+ linkRange main status
+
+ T_BatsTest id _ body -> do
+ -- These are technically set by the 'run' command, but we'll just define them
+ -- up front to avoid figuring out which commands named "run" belong to Bats.
+ status <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable "status" CFValueInteger
+ output <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable "output" CFValueString
+ main <- build body
+ linkRanges [status, output, main]
+
+ T_BraceExpansion _ list -> sequentially list
+
+ T_BraceGroup id body ->
+ sequentially body
+
+ T_CaseExpression id t [] -> build t
+
+ T_CaseExpression id t list -> do
+ start <- newStructuralNode
+ token <- build t
+ branches <- mapM buildBranch list
+ end <- newStructuralNode
+
+ let neighbors = zip branches $ tail branches
+ let (_, firstCond, _) = head branches
+ let (_, lastCond, lastBody) = last branches
+
+ linkRange start token
+ linkRange token firstCond
+ mapM_ (uncurry $ linkBranch end) neighbors
+ linkRange lastBody end
+
+ unless (any hasCatchAll list) $
+ -- There's no *) branch, so assume we can fall through
+ void $ linkRange token end
+
+ return $ spanRange start end
+
+ where
+ -- for a | b | c, evaluate each in turn and allow short circuiting
+ buildCond list = do
+ start <- newStructuralNode
+ conds <- mapM build list
+ end <- newStructuralNode
+ linkRanges (start:conds)
+ mapM_ (`linkRange` end) conds
+ return $ spanRange start end
+
+ buildBranch (typ, cond, body) = do
+ c <- buildCond cond
+ b <- sequentially body
+ linkRange c b
+ return (typ, c, b)
+
+ linkBranch end (typ, cond, body) (_, nextCond, nextBody) = do
+ -- Failure case
+ linkRange cond nextCond
+ -- After body
+ case typ of
+ CaseBreak -> linkRange body end
+ CaseFallThrough -> linkRange body nextBody
+ CaseContinue -> linkRange body nextCond
+
+ -- Find a *) if any
+
+ hasCatchAll (_,cond,_) = any isCatchAll cond
+ isCatchAll c = fromMaybe False $ do
+ pg <- wordToExactPseudoGlob c
+ return $ pg `pseudoGlobIsSuperSetof` [PGMany]
+
+ T_Condition _ _ op -> build op
+
+ T_CoProc id maybeName t -> do
+ let name = fromMaybe "COPROC" maybeName
+ start <- newStructuralNode
+ parent <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name CFValueArray
+ child <- subshell id "coproc" $ build t
+ end <- newNodeRange $ CFSetExitCode id
+
+ linkRange start parent
+ linkRange start child
+ linkRange parent end
+ linkRangeAs CFEFalseFlow child end
+
+ return $ spanRange start end
+ T_CoProcBody _ t -> build t
+
+ T_DollarArithmetic _ arith -> build arith
+ T_DollarDoubleQuoted _ list -> sequentially list
+ T_DollarSingleQuoted _ _ -> none
+ T_DollarBracket _ t -> build t
+
+ T_DollarBraced id _ t -> do
+ let str = concat $ oversimplify t
+ let modifier = getBracedModifier str
+ let reference = getBracedReference str
+ let indices = getIndexReferences str
+ let offsets = getOffsetReferences str
+ vals <- build t
+ others <- mapM (\x -> nodeToRange <$> newNode (applySingle $ IdTagged id $ CFReadVariable x)) (indices ++ offsets)
+ deps <- linkRanges (vals:others)
+ read <- nodeToRange <$> newNode (applySingle $ IdTagged id $ CFReadVariable reference)
+ totalRead <- linkRange deps read
+
+ if any (`isPrefixOf` modifier) ["=", ":="]
+ then do
+ optionalAssign <- newNodeRange (applySingle $ IdTagged id $ CFWriteVariable reference CFValueString)
+ result <- newStructuralNode
+ linkRange optionalAssign result
+ linkRange totalRead result
+ else return totalRead
+
+ T_DoubleQuoted _ list -> sequentially list
+
+ T_DollarExpansion id body ->
+ subshell id "$(..) expansion" $ sequentially body
+
+ T_Extglob _ _ list -> sequentially list
+
+ T_FdRedirect id ('{':identifier) op -> do
+ let name = takeWhile (/= '}') identifier
+ expression <- build op
+ rw <- newNodeRange $
+ if isClosingFileOp op
+ then applySingle $ IdTagged id $ CFReadVariable name
+ else applySingle $ IdTagged id $ CFWriteVariable name CFValueInteger
+
+ linkRange expression rw
+
+
+ T_FdRedirect _ name t -> do
+ build t
+
+ T_ForArithmetic _ initT condT incT bodyT -> do
+ init <- build initT
+ cond <- build condT
+ body <- sequentially bodyT
+ inc <- build incT
+ end <- newStructuralNode
+
+ -- Forward edges
+ linkRanges [init, cond, body, inc]
+ linkRange cond end
+ -- Backward edge
+ linkRange inc cond
+ return $ spanRange init end
+
+ T_ForIn id name words body -> forInHelper id name words body
+
+ -- For functions we generate an unlinked subgraph, and mention that in its definition node
+ T_Function id _ _ name body -> do
+ range <- local (\c -> c { cfExitTarget = Nothing }) $ do
+ entry <- newNodeRange $ CFEntryPoint $ "function " ++ name
+ f <- withFunctionScope $ build body
+ linkRange entry f
+ let (Range entry exit) = range
+ definition <- newNodeRange (applySingle $ IdTagged id $ CFDefineFunction name id entry exit)
+ exe <- newNodeRange (CFSetExitCode id)
+ linkRange definition exe
+
+ T_Glob {} -> none
+
+ T_HereString _ t -> build t
+ T_HereDoc _ _ _ _ list -> sequentially list
+
+ T_IfExpression id ifs elses -> do
+ start <- newStructuralNode
+ branches <- doBranches start ifs elses []
+ end <- newStructuralNode
+ mapM_ (`linkRange` end) branches
+ return $ spanRange start end
+ where
+ doBranches start ((conds, thens):rest) elses result = do
+ cond <- asCondition $ sequentially conds
+ action <- sequentially thens
+ linkRange start cond
+ linkRange cond action
+ doBranches cond rest elses (action:result)
+ doBranches start [] elses result = do
+ rest <-
+ if null elses
+ then newNodeRange (CFSetExitCode id)
+ else sequentially elses
+ linkRange start rest
+ return (rest:result)
+
+ T_Include _ t -> build t
+
+ T_IndexedElement _ indicesT valueT -> do
+ indices <- sequentially indicesT
+ value <- build valueT
+ linkRange indices value
+
+ T_IoDuplicate _ op _ -> build op
+
+ T_IoFile _ op t -> do
+ exp <- build t
+ doesntDoMuch <- build op
+ linkRange exp doesntDoMuch
+
+ T_Literal {} -> none
+
+ T_NormalWord _ list -> sequentially list
+
+ T_OrIf _ lhs rhs -> do
+ left <- build lhs
+ right <- build rhs
+ end <- newStructuralNode
+ linkRange left right
+ linkRange right end
+ linkRange left end
+
+ T_Pipeline _ _ [cmd] -> build cmd
+ T_Pipeline id _ cmds -> do
+ start <- newStructuralNode
+ hasLastpipe <- reader $ cfLastpipe . cfParameters
+ (leading, last) <- buildPipe hasLastpipe cmds
+ end <- newStructuralNode
+
+ mapM_ (linkRange start) leading
+ mapM_ (\c -> linkRangeAs CFEFalseFlow c end) leading
+ linkRanges $ [start] ++ last ++ [end]
+ where
+ buildPipe True [x] = do
+ last <- build x
+ return ([], [last])
+ buildPipe lp (first:rest) = do
+ this <- subshell id "pipeline" $ build first
+ (leading, last) <- buildPipe lp rest
+ return (this:leading, last)
+ buildPipe _ [] = return ([], [])
+
+ T_ProcSub id op cmds -> do
+ start <- newStructuralNode
+ body <- subshell id (op ++ "() process substitution") $ sequentially cmds
+ end <- newStructuralNode
+
+ linkRange start body
+ linkRangeAs CFEFalseFlow body end
+ linkRange start end
+
+ T_Redirecting _ redirs cmd -> do
+ -- For simple commands, this is the other way around in bash
+ -- We do it in this order for comound commands like { x=name; } > "$x"
+ redir <- sequentially redirs
+ body <- build cmd
+ linkRange redir body
+
+ T_SelectIn id name words body -> forInHelper id name words body
+
+ T_SimpleCommand id vars [] -> do
+ -- Vars can also be empty, as in the command "> foo"
+ assignments <- sequentially vars
+ status <- newNodeRange (CFSetExitCode id)
+ linkRange assignments status
+
+ T_SimpleCommand id vars list@(cmd:_) ->
+ handleCommand t vars list $ getUnquotedLiteral cmd
+
+ T_SingleQuoted _ _ -> none
+
+ T_SourceCommand _ originalCommand inlinedSource -> do
+ cmd <- build originalCommand
+ end <- newStructuralNode
+ inline <- withReturn end $ build inlinedSource
+ linkRange cmd inline
+ linkRange inline end
+ return $ spanRange cmd inline
+
+ T_Subshell id body -> do
+ main <- subshell id "explicit (..) subshell" $ sequentially body
+ status <- newNodeRange (CFSetExitCode id)
+ linkRange main status
+
+ T_UntilExpression id cond body -> whileHelper id cond body
+ T_WhileExpression id cond body -> whileHelper id cond body
+
+ T_CLOBBER _ -> none
+ T_GREATAND _ -> none
+ T_LESSAND _ -> none
+ T_LESSGREAT _ -> none
+ T_DGREAT _ -> none
+ T_Greater _ -> none
+ T_Less _ -> none
+ T_ParamSubSpecialChar _ _ -> none
+
+ x -> error ("Unimplemented: " ++ show x)
+
+-- Still in `where` clause
+ forInHelper id name words body = do
+ entry <- newStructuralNode
+ expansion <- sequentially words
+ assignmentChoice <- newStructuralNode
+ assignments <-
+ if null words || any willSplit words
+ then (:[]) <$> (newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name CFValueString)
+ else mapM (\t -> newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name $ CFValueComputed (getId t) $ tokenToParts t) words
+ body <- sequentially body
+ exit <- newStructuralNode
+ -- Forward edges
+ linkRanges [entry, expansion, assignmentChoice]
+ mapM_ (\t -> linkRanges [assignmentChoice, t, body]) assignments
+ linkRange body exit
+ linkRange expansion exit
+ -- Backward edge
+ linkRange body assignmentChoice
+ return $ spanRange entry exit
+
+ whileHelper id cond body = do
+ condRange <- asCondition $ sequentially cond
+ bodyRange <- sequentially body
+ end <- newNodeRange (CFSetExitCode id)
+
+ linkRange condRange bodyRange
+ linkRange bodyRange condRange
+ linkRange condRange end
+
+
+handleCommand cmd vars args literalCmd = do
+ -- TODO: Handle assignments in declaring commands
+
+ case literalCmd of
+ Just "exit" -> regularExpansion vars args $ handleExit
+ Just "return" -> regularExpansion vars args $ handleReturn
+ Just "unset" -> regularExpansionWithStatus vars args $ handleUnset args
+
+ Just "declare" -> handleDeclare args
+ Just "local" -> handleDeclare args
+ Just "typeset" -> handleDeclare args
+
+ Just "printf" -> regularExpansionWithStatus vars args $ handlePrintf args
+ Just "wait" -> regularExpansionWithStatus vars args $ handleWait args
+
+ Just "mapfile" -> regularExpansionWithStatus vars args $ handleMapfile args
+ Just "readarray" -> regularExpansionWithStatus vars args $ handleMapfile args
+
+ Just "DEFINE_boolean" -> regularExpansionWithStatus vars args $ handleDEFINE args
+ Just "DEFINE_float" -> regularExpansionWithStatus vars args $ handleDEFINE args
+ Just "DEFINE_integer" -> regularExpansionWithStatus vars args $ handleDEFINE args
+ Just "DEFINE_string" -> regularExpansionWithStatus vars args $ handleDEFINE args
+
+ -- This will mostly behave like 'command' but ok
+ Just "builtin" ->
+ case args of
+ [_] -> regular
+ (_:newargs@(newcmd:_)) ->
+ handleCommand newcmd vars newargs $ getLiteralString newcmd
+ Just "command" ->
+ case args of
+ [_] -> regular
+ (_:newargs@(newcmd:_)) ->
+ handleOthers (getId newcmd) vars newargs $ getLiteralString newcmd
+ _ -> regular
+
+ where
+ regular = handleOthers (getId cmd) vars args literalCmd
+ handleExit = do
+ exitNode <- reader cfExitTarget
+ case exitNode of
+ Just target -> do
+ exit <- newNode CFResolvedExit
+ link exit target CFEExit
+ unreachable <- newNode CFUnreachable
+ return $ Range exit unreachable
+ Nothing -> do
+ exit <- newNode CFUnresolvedExit
+ unreachable <- newNode CFUnreachable
+ return $ Range exit unreachable
+
+ handleReturn = do
+ returnTarget <- reader cfReturnTarget
+ case returnTarget of
+ Nothing -> error $ pleaseReport "missing return target"
+ Just target -> do
+ ret <- newNode CFStructuralNode
+ link ret target CFEFlow
+ unreachable <- newNode CFUnreachable
+ return $ Range ret unreachable
+
+ handleUnset (cmd:args) = do
+ case () of
+ _ | "n" `elem` flagNames -> unsetWith CFUndefineNameref
+ _ | "v" `elem` flagNames -> unsetWith CFUndefineVariable
+ _ | "f" `elem` flagNames -> unsetWith CFUndefineFunction
+ _ -> unsetWith CFUndefine
+ where
+ pairs :: [(String, Token)] -- [(Flag string, token)] e.g. [("-f", t), ("", myfunc)]
+ pairs = map (\(str, (flag, val)) -> (str, flag)) $ fromMaybe (map (\c -> ("", (c,c))) args) $ getGnuOpts "vfn" args
+ (names, flags) = partition (null . fst) pairs
+ flagNames = map fst flags
+ literalNames :: [(Token, String)] -- Literal names to unset, e.g. [(myfuncToken, "myfunc")]
+ literalNames = mapMaybe (\(_, t) -> getLiteralString t >>= (return . (,) t)) names
+ -- Apply a constructor like CFUndefineVariable to each literalName, and tag with its id
+ unsetWith c = newNodeRange $ CFApplyEffects $ map (\(token, name) -> IdTagged (getId token) $ c name) literalNames
+
+
+ variableAssignRegex = mkRegex "^([_a-zA-Z][_a-zA-Z0-9]*)="
+
+ handleDeclare (cmd:args) = do
+ isFunc <- asks cfIsFunction
+ let (evaluated, effects) = mconcat $ map (toEffects isFunc) args
+ before <- sequentially $ evaluated
+ effect <- newNodeRange $ CFApplyEffects effects
+ result <- newNodeRange $ CFSetExitCode (getId cmd)
+ linkRanges [before, effect, result]
+ where
+ opts = map fst $ getGenericOpts args
+ array = "a" `elem` opts || "A" `elem` opts
+ integer = "i" `elem` opts
+ func = "f" `elem` opts || "F" `elem` opts
+ global = "g" `elem` opts
+ writer isFunc =
+ case () of
+ _ | global -> CFWriteGlobal
+ _ | isFunc -> CFWriteLocal
+ _ -> CFWriteVariable
+
+ toEffects :: Bool -> Token -> ([Token], [IdTagged CFEffect])
+ toEffects isFunc (T_Assignment id mode var idx t) =
+ let
+ pre = idx ++ [t]
+ isArray = array || (not $ null idx)
+ asArray = [ IdTagged id $ (writer isFunc) var CFValueArray ]
+ asString = [ IdTagged id $ (writer isFunc) var $
+ if integer
+ then CFValueInteger -- TODO: Also handle integer variable property
+ else CFValueComputed (getId t) $ [ CFStringVariable var | mode == Append ] ++ tokenToParts t
+ ]
+ in
+ (pre, if isArray then asArray else asString )
+
+ toEffects isFunc t =
+ let
+ pre = [t]
+ literal = fromJust $ getLiteralStringExt (const $ Just "\0") t
+ isKnown = '\0' `notElem` literal
+ match = fmap head $ variableAssignRegex `matchRegex` literal
+ name = fromMaybe literal match
+
+ typer def =
+ if array
+ then CFValueArray
+ else
+ if integer
+ then CFValueInteger
+ else def
+
+ asLiteral = [
+ IdTagged (getId t) $ (writer isFunc) name $
+ typer $ CFValueComputed (getId t) [ CFStringLiteral $ drop 1 $ dropWhile (/= '=') $ literal ]
+ ]
+ asUnknown = [
+ IdTagged (getId t) $ (writer isFunc) name $
+ typer $ CFValueString
+ ]
+ asBlank = [
+ IdTagged (getId t) $ (writer isFunc) name $
+ typer $ CFValueComputed (getId t) []
+ ]
+ in
+ case () of
+ _ | not (isVariableName name) -> (pre, [])
+ _ | isJust match && isKnown -> (pre, asLiteral)
+ _ | isJust match -> (pre, asUnknown)
+ _ -> (pre, asBlank)
+
+ handlePrintf (cmd:args) =
+ newNodeRange $ CFApplyEffects $ maybeToList findVar
+ where
+ findVar = do
+ flags <- getBsdOpts "v:" args
+ (flag, arg) <- lookup "v" flags
+ name <- getLiteralString arg
+ return $ IdTagged (getId arg) $ CFWriteVariable name CFValueString
+
+ handleWait (cmd:args) =
+ newNodeRange $ CFApplyEffects $ maybeToList findVar
+ where
+ findVar = do
+ let flags = getGenericOpts args
+ (flag, arg) <- lookup "p" flags
+ name <- getLiteralString arg
+ return $ IdTagged (getId arg) $ CFWriteVariable name CFValueInteger
+
+ handleMapfile (cmd:args) =
+ newNodeRange $ CFApplyEffects [findVar]
+ where
+ findVar =
+ let (id, name) = fromMaybe (getId cmd, "MAPFILE") $ getFromArg `mplus` getFromFallback
+ in IdTagged id $ CFWriteVariable name CFValueArray
+
+ getFromArg = do
+ flags <- getGnuOpts "d:n:O:s:u:C:c:t" args
+ (_, arg) <- lookup "" flags
+ name <- getLiteralString arg
+ return (getId arg, name)
+
+ getFromFallback =
+ listToMaybe $ mapMaybe getIfVar $ reverse args
+ getIfVar c = do
+ name <- getLiteralString c
+ guard $ isVariableName name
+ return (getId c, name)
+
+ handleDEFINE (cmd:args) =
+ newNodeRange $ CFApplyEffects $ maybeToList findVar
+ where
+ findVar = do
+ name <- listToMaybe $ drop 1 args
+ str <- getLiteralString name
+ guard $ isVariableName str
+ return $ IdTagged (getId name) $ CFWriteVariable str CFValueString
+
+ handleOthers id vars args cmd =
+ regularExpansion vars args $ do
+ exe <- newNodeRange $ CFExecuteCommand cmd
+ status <- newNodeRange $ CFSetExitCode id
+ linkRange exe status
+
+ regularExpansion vars args p = do
+ args <- sequentially args
+ assignments <- mapM (buildAssignment PrefixScope) vars
+ exe <- p
+ dropAssignments <-
+ if null vars
+ then
+ return []
+ else do
+ drop <- newNodeRange CFDropPrefixAssignments
+ return [drop]
+
+ linkRanges $ [args] ++ assignments ++ [exe] ++ dropAssignments
+
+ regularExpansionWithStatus vars args@(cmd:_) p = do
+ initial <- regularExpansion vars args p
+ status <- newNodeRange $ CFSetExitCode (getId cmd)
+ linkRange initial status
+
+
+none = newStructuralNode
+
+data Scope = DefaultScope | GlobalScope | LocalScope | PrefixScope
+
+buildAssignment scope t = do
+ op <- case t of
+ T_Assignment id mode var indices value -> do
+ expand <- build value
+ index <- sequentially indices
+ read <- case mode of
+ Append -> newNodeRange (applySingle $ IdTagged id $ CFReadVariable var)
+ Assign -> none
+ let valueType = if null indices then f id value else CFValueArray
+ let scoper =
+ case scope of
+ PrefixScope -> CFWritePrefix
+ LocalScope -> CFWriteLocal
+ GlobalScope -> CFWriteGlobal
+ DefaultScope -> CFWriteVariable
+ write <- newNodeRange $ applySingle $ IdTagged id $ scoper var valueType
+ linkRanges [expand, index, read, write]
+ where
+ f :: Id -> Token -> CFValue
+ f id t@T_NormalWord {} = CFValueComputed id $ [CFStringVariable var | mode == Append] ++ tokenToParts t
+ f id t@(T_Literal _ str) = CFValueComputed id $ [CFStringVariable var | mode == Append] ++ tokenToParts t
+ f _ T_Array {} = CFValueArray
+
+ registerNode (getId t) op
+ return op
+
+
+tokenToParts t =
+ case t of
+ T_NormalWord _ list -> concatMap tokenToParts list
+ T_DoubleQuoted _ list -> concatMap tokenToParts list
+ T_SingleQuoted _ str -> [ CFStringLiteral str ]
+ T_Literal _ str -> [ CFStringLiteral str ]
+ T_DollarArithmetic {} -> [ CFStringInteger ]
+ T_DollarBracket {} -> [ CFStringInteger ]
+ T_DollarBraced _ _ list | isUnmodifiedParameterExpansion t -> [ CFStringVariable (getBracedReference $ concat $ oversimplify list) ]
+ -- Check if getLiteralString can handle it, if not it's unknown
+ _ -> [maybe CFStringUnknown CFStringLiteral $ getLiteralString t]
+
+return []
+runTests = $( [| $(forAllProperties) (quickCheckWithResult (stdArgs { maxSuccess = 1 }) ) |])
diff --git a/src/ShellCheck/CFGAnalysis.hs b/src/ShellCheck/CFGAnalysis.hs
new file mode 100644
index 0000000..99ce450
--- /dev/null
+++ b/src/ShellCheck/CFGAnalysis.hs
@@ -0,0 +1,1113 @@
+{-
+ Copyright 2022 Vidar Holen
+
+ This file is part of ShellCheck.
+ https://www.shellcheck.net
+
+ ShellCheck is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ShellCheck is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+-}
+{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE DeriveAnyClass, DeriveGeneric #-}
+
+{-
+ Data Flow Analysis on a Control Flow Graph.
+
+ This module implements a pretty standard iterative Data Flow Analysis.
+ For an overview of the process, see Wikipedia.
+
+ Since shell scripts rely heavily on global variables, this DFA includes
+ tracking the value of globals across calls. Each function invocation is
+ treated as a separate DFA problem, and a caching mechanism (hopefully)
+ avoids any exponential explosions.
+
+ To do efficient DFA join operations (or merges, as the code calls them),
+ some of the data structures have an integer version attached. On update,
+ the version is changed. If two states have the same version number,
+ a merge is skipped on the grounds that they are identical. It is easy
+ to unintentionally forget to update/invalidate the version number,
+ and bugs will ensure.
+
+ For performance reasons, the entire code runs in plain ST, with a manual
+ context object Ctx being passed around. It relies heavily on mutable
+ STRefs. However, this turned out to be literally thousands of times faster
+ than my several attempts using RWST, so it can't be helped.
+-}
+
+module ShellCheck.CFGAnalysis (
+ analyzeControlFlow
+ ,CFGParameters (..)
+ ,CFGAnalysis (..)
+ ,ProgramState (..)
+ ,VariableValue (..)
+ ,SpaceStatus (..)
+ ,getIncomingState
+ ,getOutgoingState
+ ,ShellCheck.CFGAnalysis.runTests -- STRIP
+ ) where
+
+import GHC.Generics (Generic)
+import ShellCheck.AST
+import ShellCheck.CFG
+import qualified ShellCheck.Data as Data
+import ShellCheck.Prelude
+import Control.Monad
+import Control.Monad.ST
+import Control.DeepSeq
+import Data.List hiding (map)
+import Data.STRef
+import Data.Maybe
+import qualified Data.Map as M
+import qualified Data.Set as S
+import Data.Graph.Inductive.Graph
+import Data.Graph.Inductive.Query.DFS
+import Debug.Trace -- STRIP
+
+import Test.QuickCheck
+
+
+iterationCount = 1000000
+cacheEntries = 10
+
+-- The result of the data flow analysis
+data CFGAnalysis = CFGAnalysis {
+ graph :: CFGraph,
+ tokenToNode :: M.Map Id (Node, Node),
+ nodeToData :: M.Map Node (ProgramState, ProgramState)
+} deriving (Show, Generic, NFData)
+
+-- The program state we expose externally
+data ProgramState = ProgramState {
+ variablesInScope :: M.Map String VariableValue,
+ stateIsReachable :: Bool
+-- internalState :: InternalState
+} deriving (Show, Eq, Generic, NFData)
+
+-- Conveniently get the state before a token id
+getIncomingState :: CFGAnalysis -> Id -> Maybe ProgramState
+getIncomingState analysis id = do
+ (start,end) <- M.lookup id $ tokenToNode analysis
+ fst <$> M.lookup start (nodeToData analysis)
+
+-- Conveniently get the state after a token id
+getOutgoingState :: CFGAnalysis -> Id -> Maybe ProgramState
+getOutgoingState analysis id = do
+ (start,end) <- M.lookup id $ tokenToNode analysis
+ snd <$> M.lookup end (nodeToData analysis)
+
+getDataForNode analysis node = M.lookup node $ nodeToData analysis
+
+-- The current state of data flow at a point in the program, potentially as a diff
+data InternalState = InternalState {
+ sVersion :: Integer,
+ sGlobalValues :: VersionedMap String VariableValue,
+ sLocalValues :: VersionedMap String VariableValue,
+ sPrefixValues :: VersionedMap String VariableValue,
+ sFunctionTargets :: VersionedMap String FunctionValue,
+ sIsReachable :: Maybe Bool
+} deriving (Show, Generic, NFData)
+
+newInternalState = InternalState {
+ sVersion = 0,
+ sGlobalValues = vmEmpty,
+ sLocalValues = vmEmpty,
+ sPrefixValues = vmEmpty,
+ sFunctionTargets = vmEmpty,
+ sIsReachable = Nothing
+}
+
+unreachableState = modified newInternalState {
+ sIsReachable = Just False
+}
+
+-- The default state we assume we get from the environment
+createEnvironmentState :: InternalState
+createEnvironmentState = do
+ foldl' (flip ($)) newInternalState $ concat [
+ addVars Data.internalVariables unknownVariableValue,
+ addVars Data.variablesWithoutSpaces spacelessVariableValue,
+ addVars Data.specialIntegerVariables spacelessVariableValue
+ ]
+ where
+ addVars names val = map (\name -> insertGlobal name val) names
+ spacelessVariableValue = VariableValue {
+ literalValue = Nothing,
+ spaceStatus = SpaceStatusClean
+ }
+
+
+modified s = s { sVersion = -1 }
+
+insertGlobal :: String -> VariableValue -> InternalState -> InternalState
+insertGlobal name value state = modified state {
+ sGlobalValues = vmInsert name value $ sGlobalValues state
+}
+
+insertLocal :: String -> VariableValue -> InternalState -> InternalState
+insertLocal name value state = modified state {
+ sLocalValues = vmInsert name value $ sLocalValues state
+}
+
+insertPrefix :: String -> VariableValue -> InternalState -> InternalState
+insertPrefix name value state = modified state {
+ sPrefixValues = vmInsert name value $ sPrefixValues state
+}
+
+insertFunction :: String -> FunctionValue -> InternalState -> InternalState
+insertFunction name value state = modified state {
+ sFunctionTargets = vmInsert name value $ sFunctionTargets state
+}
+
+internalToExternal :: InternalState -> ProgramState
+internalToExternal s =
+ ProgramState {
+ -- Avoid introducing dependencies on the literal value as this is only for debugging purposes right now
+ variablesInScope = M.map (\c -> c { literalValue = Nothing }) flatVars,
+ -- internalState = s, -- For debugging
+ stateIsReachable = fromMaybe True $ sIsReachable s
+ }
+ where
+ flatVars = M.unionsWith (\_ last -> last) $ map mapStorage [sGlobalValues s, sLocalValues s, sPrefixValues s]
+
+-- Dependencies on values, e.g. "if there is a global variable named 'foo' without spaces"
+-- This is used to see if the DFA of a function would result in the same state, so anything
+-- that affects DFA must be tracked.
+data StateDependency =
+ DepGlobalValue String VariableValue
+ | DepLocalValue String VariableValue
+ | DepPrefixValue String VariableValue
+ | DepFunction String (S.Set FunctionDefinition)
+ -- Whether invoking the node would result in recursion (i.e., is the function on the stack?)
+ | DepIsRecursive Node Bool
+ deriving (Show, Eq, Ord, Generic, NFData)
+
+-- A function definition, or lack thereof
+data FunctionDefinition = FunctionUnknown | FunctionDefinition String Node Node
+ deriving (Show, Eq, Ord, Generic, NFData)
+
+-- The Set of places a command name can point (it's a Set to handle conditionally defined functions)
+type FunctionValue = S.Set FunctionDefinition
+
+-- The scope of a function. ("Prefix" refers to e.g. `foo=1 env`)
+data VariableScope = PrefixVar | LocalVar | GlobalVar
+ deriving (Show, Eq, Ord, Generic, NFData)
+
+-- Create an InternalState that fulfills the given dependencies
+depsToState :: S.Set StateDependency -> InternalState
+depsToState set = foldl insert newInternalState $ S.toList set
+ where
+ insert :: InternalState -> StateDependency -> InternalState
+ insert state dep =
+ case dep of
+ DepFunction name val -> insertFunction name val state
+ DepGlobalValue name val -> insertGlobal name val state
+ DepLocalValue name val -> insertLocal name val state
+ DepPrefixValue name val -> insertPrefix name val state
+ DepIsRecursive _ _ -> state
+
+unknownFunctionValue = S.singleton FunctionUnknown
+
+-- The information about the value of a single variable
+data VariableValue = VariableValue {
+ literalValue :: Maybe String, -- TODO: For debugging. Remove me.
+ spaceStatus :: SpaceStatus
+}
+ deriving (Show, Eq, Ord, Generic, NFData)
+
+-- Whether or not the value needs quoting (has spaces/globs), or we don't know
+data SpaceStatus = SpaceStatusEmpty | SpaceStatusClean | SpaceStatusDirty deriving (Show, Eq, Ord, Generic, NFData)
+
+
+unknownVariableValue = VariableValue {
+ literalValue = Nothing,
+ spaceStatus = SpaceStatusDirty
+}
+
+emptyVariableValue = VariableValue {
+ literalValue = Just "",
+ spaceStatus = SpaceStatusEmpty
+}
+
+mergeVariableValue a b = VariableValue {
+ literalValue = if literalValue a == literalValue b then literalValue a else Nothing,
+ spaceStatus = mergeSpaceStatus (spaceStatus a) (spaceStatus b)
+}
+
+mergeSpaceStatus a b =
+ case (a,b) of
+ (SpaceStatusEmpty, y) -> y
+ (x, SpaceStatusEmpty) -> x
+ (SpaceStatusClean, SpaceStatusClean) -> SpaceStatusClean
+ _ -> SpaceStatusDirty
+
+-- A VersionedMap is a Map that keeps an additional integer version to quickly determine if it has changed.
+-- * Version -1 means it's unknown (possibly and presumably changed)
+-- * Version 0 means it's empty
+-- * Version N means it's equal to any other map with Version N (this is required but not enforced)
+data VersionedMap k v = VersionedMap {
+ mapVersion :: Integer,
+ mapStorage :: M.Map k v
+}
+ deriving (Generic, NFData)
+
+-- This makes states more readable but inhibits copy-paste
+instance (Show k, Show v) => Show (VersionedMap k v) where
+ show m = (if mapVersion m >= 0 then "V" ++ show (mapVersion m) else "U") ++ " " ++ show (mapStorage m)
+
+instance Eq InternalState where
+ (==) a b = stateIsQuickEqual a b || stateIsSlowEqual a b
+
+instance (Eq k, Eq v) => Eq (VersionedMap k v) where
+ (==) a b = vmIsQuickEqual a b || mapStorage a == mapStorage b
+
+instance (Ord k, Ord v) => Ord (VersionedMap k v) where
+ compare a b =
+ if vmIsQuickEqual a b
+ then EQ
+ else mapStorage a `compare` mapStorage b
+
+
+-- A context with STRefs manually passed around to function.
+-- This is done because it was dramatically much faster than any RWS type stack
+data Ctx s = Ctx {
+ -- The current node
+ cNode :: STRef s Node,
+ -- The current input state
+ cInput :: STRef s InternalState,
+ -- The current output state
+ cOutput :: STRef s InternalState,
+
+ -- The current functions/subshells stack
+ cStack :: [StackEntry s],
+ -- The input graph
+ cGraph :: CFGraph,
+ -- An incrementing counter to version maps
+ cCounter :: STRef s Integer,
+ -- A cache of input state dependencies to output effects
+ cCache :: STRef s (M.Map Node [(S.Set StateDependency, InternalState)]),
+ -- The states resulting from data flows per invocation path
+ cInvocations :: STRef s (M.Map [Node] (S.Set StateDependency, M.Map Node (InternalState, InternalState)))
+}
+
+-- Whenever a function (or subshell) is invoked, a value like this is pushed onto the stack
+data StackEntry s = StackEntry {
+ -- The entry point of this stack entry for the purpose of detecting recursion
+ entryPoint :: Node,
+ -- The node where this entry point was invoked
+ callSite :: Node,
+ -- A mutable set of dependencies we fetched from here or higher in the stack
+ dependencies :: STRef s (S.Set StateDependency),
+ -- The original input state for this stack entry
+ stackState :: InternalState
+}
+ deriving (Eq, Generic, NFData)
+
+
+-- Overwrite a base state with the contents of a diff state
+-- This is unrelated to join/merge.
+patchState :: InternalState -> InternalState -> InternalState
+patchState base diff =
+ case () of
+ _ | sVersion diff == 0 -> base
+ _ | sVersion base == 0 -> diff
+ _ | stateIsQuickEqual base diff -> diff
+ _ ->
+ InternalState {
+ sVersion = -1,
+ sGlobalValues = vmPatch (sGlobalValues base) (sGlobalValues diff),
+ sLocalValues = vmPatch (sLocalValues base) (sLocalValues diff),
+ sPrefixValues = vmPatch (sPrefixValues base) (sPrefixValues diff),
+ sFunctionTargets = vmPatch (sFunctionTargets base) (sFunctionTargets diff),
+ sIsReachable = sIsReachable diff `mplus` sIsReachable base
+ }
+
+patchOutputM ctx diff = do
+ let cOut = cOutput ctx
+ oldState <- readSTRef cOut
+ let newState = patchState oldState diff
+ writeSTRef cOut newState
+
+-- Merge (aka Join) two states. This is monadic because it requires looking up
+-- values from the current context. For example:
+--
+-- f() {
+-- foo || x=2
+-- HERE # This merge requires looking up the value of $x in the parent frame
+-- }
+-- x=1
+-- f
+mergeState :: forall s. Ctx s -> InternalState -> InternalState -> ST s InternalState
+mergeState ctx a b = do
+ -- Kludge: we want `readVariable` & friends not to read from an intermediate state,
+ -- so temporarily set a blank input.
+ let cin = cInput ctx
+ old <- readSTRef cin
+ writeSTRef cin newInternalState
+ x <- merge a b
+ writeSTRef cin old
+ return x
+
+ where
+
+ merge a b =
+ case () of
+ _ | sIsReachable a == Just True && sIsReachable b == Just False
+ || sIsReachable a == Just False && sIsReachable b == Just True ->
+ error $ pleaseReport "Unexpected merge of reachable and unreachable state"
+ _ | sIsReachable a == Just False && sIsReachable b == Just False ->
+ return unreachableState
+ _ | sVersion a >= 0 && sVersion b >= 0 && sVersion a == sVersion b -> return a
+ _ -> do
+ globals <- mergeMaps ctx mergeVariableValue readGlobal (sGlobalValues a) (sGlobalValues b)
+ locals <- mergeMaps ctx mergeVariableValue readVariable (sLocalValues a) (sLocalValues b)
+ prefix <- mergeMaps ctx mergeVariableValue readVariable (sPrefixValues a) (sPrefixValues b)
+ funcs <- mergeMaps ctx S.union readFunction (sFunctionTargets a) (sFunctionTargets b)
+ return $ InternalState {
+ sVersion = -1,
+ sGlobalValues = globals,
+ sLocalValues = locals,
+ sPrefixValues = prefix,
+ sFunctionTargets = funcs,
+ sIsReachable = liftM2 (&&) (sIsReachable a) (sIsReachable b)
+ }
+
+-- Merge a number of states, or return a default if there are no states
+-- (it can't fold from newInternalState because this would be equivalent of adding a new input edge).
+mergeStates :: forall s. Ctx s -> InternalState -> [InternalState] -> ST s InternalState
+mergeStates ctx def list =
+ case list of
+ [] -> return def
+ (first:rest) -> foldM (mergeState ctx) first rest
+
+-- Merge two maps, key by key. If both maps have a key, the 'merger' is used.
+-- If only one has the key, the 'reader' is used to fetch a second, and the two are merged as above.
+mergeMaps :: (Ord k) => forall s.
+ Ctx s ->
+ (v -> v -> v) ->
+ (Ctx s -> k -> ST s v) ->
+ (VersionedMap k v) ->
+ (VersionedMap k v) ->
+ ST s (VersionedMap k v)
+mergeMaps ctx merger reader a b =
+ if vmIsQuickEqual a b
+ then return a
+ else do
+ new <- M.fromDistinctAscList <$> reverse <$> f [] (M.toAscList $ mapStorage a) (M.toAscList $ mapStorage b)
+ vmFromMap ctx new
+ where
+ f l [] [] = return l
+ f l [] b = f l b []
+ f l ((k,v):rest1) [] = do
+ other <- reader ctx k
+ f ((k, merger v other):l) rest1 []
+ f l l1@((k1, v1):rest1) l2@((k2, v2):rest2) =
+ case k1 `compare` k2 of
+ EQ ->
+ f ((k1, merger v1 v2):l) rest1 rest2
+ LT -> do
+ nv2 <- reader ctx k1
+ f ((k1, merger v1 nv2):l) rest1 l2
+ GT -> do
+ nv1 <- reader ctx k2
+ f ((k2, merger nv1 v2):l) l1 rest2
+
+vmFromMap ctx map = return $ VersionedMap {
+ mapVersion = -1,
+ mapStorage = map
+}
+
+-- Give a VersionedMap a version if it does not already have one.
+versionMap ctx map =
+ if mapVersion map >= 0
+ then return map
+ else do
+ v <- nextVersion ctx
+ return map {
+ mapVersion = v
+ }
+
+-- Give an InternalState a version if it does not already have one.
+versionState ctx state =
+ if sVersion state >= 0
+ then return state
+ else do
+ self <- nextVersion ctx
+ ssGlobalValues <- versionMap ctx $ sGlobalValues state
+ ssLocalValues <- versionMap ctx $ sLocalValues state
+ ssFunctionTargets <- versionMap ctx $ sFunctionTargets state
+ return state {
+ sVersion = self,
+ sGlobalValues = ssGlobalValues,
+ sLocalValues = ssLocalValues,
+ sFunctionTargets = ssFunctionTargets
+ }
+
+-- Like 'not null' but for 2+ elements
+is2plus :: [a] -> Bool
+is2plus l = case l of
+ _:_:_ -> True
+ _ -> False
+
+-- Use versions to see if two states are trivially identical
+stateIsQuickEqual a b =
+ let
+ va = sVersion a
+ vb = sVersion b
+ in
+ va >= 0 && vb >= 0 && va == vb
+
+-- A manual slow path 'Eq' (it's not derived because it's part of the custom Eq instance)
+stateIsSlowEqual a b =
+ check sGlobalValues
+ && check sLocalValues
+ && check sPrefixValues
+ && check sFunctionTargets
+ && check sIsReachable
+ where
+ check f = f a == f b
+
+-- Check if two VersionedMaps are trivially equal
+vmIsQuickEqual :: VersionedMap k v -> VersionedMap k v -> Bool
+vmIsQuickEqual a b =
+ let
+ va = mapVersion a
+ vb = mapVersion b
+ in
+ va >= 0 && vb >= 0 && va == vb
+
+-- A new, empty VersionedMap
+vmEmpty = VersionedMap {
+ mapVersion = 0,
+ mapStorage = M.empty
+}
+
+-- Map.null for VersionedMaps
+vmNull :: VersionedMap k v -> Bool
+vmNull m = mapVersion m == 0 || (M.null $ mapStorage m)
+
+-- Map.lookup for VersionedMaps
+vmLookup name map = M.lookup name $ mapStorage map
+
+-- Map.insert for VersionedMaps
+vmInsert key val map = VersionedMap {
+ mapVersion = -1,
+ mapStorage = M.insert key val $ mapStorage map
+}
+
+-- Overwrite all keys in the first map with values from the second
+vmPatch :: (Ord k) => VersionedMap k v -> VersionedMap k v -> VersionedMap k v
+vmPatch base diff =
+ case () of
+ _ | mapVersion base == 0 -> diff
+ _ | mapVersion diff == 0 -> base
+ _ | vmIsQuickEqual base diff -> diff
+ _ -> VersionedMap {
+ mapVersion = -1,
+ mapStorage = M.unionWith (flip const) (mapStorage base) (mapStorage diff)
+ }
+
+-- Modify a variable as with x=1. This applies it to the appropriate scope.
+writeVariable :: forall s. Ctx s -> String -> VariableValue -> ST s ()
+writeVariable ctx name val = do
+ (_, typ) <- readVariableWithScope ctx name
+ case typ of
+ GlobalVar -> writeGlobal ctx name val
+ LocalVar -> writeLocal ctx name val
+ -- Prefixed variables actually become local variables in the invoked function
+ PrefixVar -> writeLocal ctx name val
+
+writeGlobal ctx name val = do
+ modifySTRef (cOutput ctx) $ insertGlobal name val
+
+writeLocal ctx name val = do
+ modifySTRef (cOutput ctx) $ insertLocal name val
+
+writePrefix ctx name val = do
+ modifySTRef (cOutput ctx) $ insertPrefix name val
+
+-- Look up a variable value, and also return its scope
+readVariableWithScope :: forall s. Ctx s -> String -> ST s (VariableValue, VariableScope)
+readVariableWithScope ctx name = lookupStack get dep def ctx name
+ where
+ def = (unknownVariableValue, GlobalVar)
+ get = getVariableWithScope
+ dep k v =
+ case v of
+ (val, GlobalVar) -> DepGlobalValue k val
+ (val, LocalVar) -> DepLocalValue k val
+ (val, PrefixVar) -> DepPrefixValue k val
+
+getVariableWithScope :: InternalState -> String -> Maybe (VariableValue, VariableScope)
+getVariableWithScope s name =
+ case (vmLookup name $ sPrefixValues s, vmLookup name $ sLocalValues s, vmLookup name $ sGlobalValues s) of
+ (Just var, _, _) -> return (var, PrefixVar)
+ (_, Just var, _) -> return (var, LocalVar)
+ (_, _, Just var) -> return (var, GlobalVar)
+ _ -> Nothing
+
+undefineFunction ctx name =
+ writeFunction ctx name $ FunctionUnknown
+
+undefineVariable ctx name =
+ writeVariable ctx name $ emptyVariableValue
+
+readVariable ctx name = fst <$> readVariableWithScope ctx name
+
+readGlobal ctx name = lookupStack get dep def ctx name
+ where
+ def = unknownVariableValue
+ get s name = vmLookup name $ sGlobalValues s
+ dep k v = DepGlobalValue k v
+
+readFunction ctx name = lookupStack get dep def ctx name
+ where
+ def = unknownFunctionValue
+ get s name = vmLookup name $ sFunctionTargets s
+ dep k v = DepFunction k v
+
+writeFunction ctx name val = do
+ modifySTRef (cOutput ctx) $ insertFunction name $ S.singleton val
+
+-- Look up each state on the stack until a value is found (or the default is used),
+-- then add this value as a StateDependency.
+lookupStack :: forall s k v.
+ -- A function that maybe finds a value from a state
+ (InternalState -> k -> Maybe v)
+ -- A function that creates a dependency on what was found
+ -> (k -> v -> StateDependency)
+ -- A default value, if the value can't be found anywhere
+ -> v
+ -- Context
+ -> Ctx s
+ -- The key to look up
+ -> k
+ -- Returning the result
+ -> ST s v
+lookupStack get dep def ctx key = do
+ top <- readSTRef $ cInput ctx
+ case get top key of
+ Just v -> return v
+ Nothing -> f (cStack ctx)
+ where
+ f [] = return def
+ f (s:rest) = do
+ -- Go up the stack until we find the value, and add
+ -- a dependency on each state (including where it was found)
+ res <- fromMaybe (f rest) (return <$> get (stackState s) key)
+ modifySTRef (dependencies s) $ S.insert $ dep key res
+ return res
+
+-- Like lookupStack but without adding dependencies
+peekStack get def ctx key = do
+ top <- readSTRef $ cInput ctx
+ case get top key of
+ Just v -> return v
+ Nothing -> f (cStack ctx)
+ where
+ f [] = return def
+ f (s:rest) =
+ case get (stackState s) key of
+ Just v -> return v
+ Nothing -> f rest
+
+-- Check if the current context fulfills a StateDependency
+fulfillsDependency ctx dep =
+ case dep of
+ DepGlobalValue name val -> (== (val, GlobalVar)) <$> peek ctx name
+ DepLocalValue name val -> (== (val, LocalVar)) <$> peek ctx name
+ DepPrefixValue name val -> (== (val, PrefixVar)) <$> peek ctx name
+ DepFunction name val -> (== val) <$> peekFunc ctx name
+ DepIsRecursive node val -> return $ val == any (\f -> entryPoint f == node) (cStack ctx)
+ -- _ -> error $ "Unknown dep " ++ show dep
+ where
+ peek = peekStack getVariableWithScope (unknownVariableValue, GlobalVar)
+ peekFunc = peekStack (\state name -> vmLookup name $ sFunctionTargets state) unknownFunctionValue
+
+-- Check if the current context fulfills all StateDependencies
+fulfillsDependencies ctx deps =
+ f $ S.toList deps
+ where
+ f [] = return True
+ f (dep:rest) = do
+ res <- fulfillsDependency ctx dep
+ if res
+ then f rest
+ else return False
+
+-- Create a brand new Ctx given a Control Flow Graph (CFG)
+newCtx g = do
+ c <- newSTRef 1
+ input <- newSTRef undefined
+ output <- newSTRef undefined
+ node <- newSTRef undefined
+ cache <- newSTRef M.empty
+ invocations <- newSTRef M.empty
+ return $ Ctx {
+ cCounter = c,
+ cInput = input,
+ cOutput = output,
+ cNode = node,
+ cCache = cache,
+ cStack = [],
+ cInvocations = invocations,
+ cGraph = g
+ }
+
+-- The next incrementing version for VersionedMaps
+nextVersion ctx = do
+ let ctr = cCounter ctx
+ n <- readSTRef ctr
+ writeSTRef ctr $! n+1
+ return n
+
+-- Create a new StackEntry
+newStackEntry ctx point = do
+ deps <- newSTRef S.empty
+ state <- readSTRef $ cOutput ctx
+ callsite <- readSTRef $ cNode ctx
+ return $ StackEntry {
+ entryPoint = point,
+ callSite = callsite,
+ dependencies = deps,
+ stackState = state
+ }
+
+-- Call a function with a new stack entry on the stack
+withNewStackFrame ctx node f = do
+ newEntry <- newStackEntry ctx node
+ newInput <- newSTRef newInternalState
+ newOutput <- newSTRef newInternalState
+ newNode <- newSTRef node
+ let newCtx = ctx {
+ cInput = newInput,
+ cOutput = newOutput,
+ cNode = newNode,
+ cStack = newEntry : cStack ctx
+ }
+ x <- f newCtx
+
+ {-
+ deps <- readSTRef $ dependencies newEntry
+ selfcheck <- fulfillsDependencies newCtx deps
+ unless selfcheck $ error $ pleaseReport $ "Unmet stack dependencies on " ++ show (node, deps)
+ -}
+
+ return (x, newEntry)
+
+-- Check if invoking this function would be a recursive loop
+-- (i.e. we already have the function on the stack)
+wouldBeRecursive ctx node = f (cStack ctx)
+ where
+ f [] = return False
+ f (s:rest) = do
+ res <-
+ if entryPoint s == node
+ then return True
+ else f rest
+ modifySTRef (dependencies s) $ S.insert $ DepIsRecursive node res
+ return res
+
+-- The main DFA 'transfer' function, applying the effects of a node to the output state
+transfer ctx label =
+ --traceShow ("Transferring", label) $
+ case label of
+ CFStructuralNode -> return ()
+ CFEntryPoint _ -> return ()
+ CFImpliedExit -> return ()
+ CFResolvedExit {} -> return ()
+
+ CFExecuteCommand cmd -> transferCommand ctx cmd
+ CFExecuteSubshell reason entry exit -> transferSubshell ctx reason entry exit
+ CFApplyEffects effects -> mapM_ (\(IdTagged _ f) -> transferEffect ctx f) effects
+
+ CFUnresolvedExit -> patchOutputM ctx unreachableState
+ CFUnreachable -> patchOutputM ctx unreachableState
+
+ -- TODO
+ CFSetBackgroundPid _ -> return ()
+ CFSetExitCode _ -> return ()
+ CFDropPrefixAssignments {} ->
+ modifySTRef (cOutput ctx) $ \c -> modified c { sPrefixValues = vmEmpty }
+-- _ -> error $ "Unknown " ++ show label
+
+
+-- Transfer the effects of a subshell invocation. This is similar to a function call
+-- to allow easily discarding the effects (otherwise the InternalState would have
+-- to represent subshell depth, while this way it can simply use the function stack).
+transferSubshell ctx reason entry exit = do
+ let cout = cOutput ctx
+ initial <- readSTRef cout
+ runCached ctx entry (f entry exit)
+ -- Clear subshell changes. TODO: track this to warn about modifications.
+ writeSTRef cout initial
+ where
+ f entry exit ctx = do
+ (states, frame) <- withNewStackFrame ctx entry (flip dataflow $ entry)
+ let (_, res) = fromMaybe (error $ pleaseReport "Subshell has no exit") $ M.lookup exit states
+ deps <- readSTRef $ dependencies frame
+ registerFlowResult ctx entry states deps
+ return (deps, res)
+
+-- Transfer the effects of executing a command, i.e. the merged union of all possible function definitions.
+transferCommand ctx Nothing = return ()
+transferCommand ctx (Just name) = do
+ targets <- readFunction ctx name
+ --traceShowM ("Transferring ",name,targets)
+ transferMultiple ctx $ map (flip transferFunctionValue) $ S.toList targets
+
+-- Transfer a set of function definitions and merge the output states.
+transferMultiple ctx funcs = do
+-- traceShowM ("Transferring set of ", length funcs)
+ original <- readSTRef out
+ branches <- mapM (apply ctx original) funcs
+ merged <- mergeStates ctx original branches
+ let patched = patchState original merged
+ writeSTRef out patched
+ where
+ out = cOutput ctx
+ apply ctx original f = do
+ writeSTRef out original
+ f ctx
+ readSTRef out
+
+-- Transfer the effects of a single function definition.
+transferFunctionValue ctx funcVal =
+ case funcVal of
+ FunctionUnknown -> return ()
+ FunctionDefinition name entry exit -> do
+ isRecursive <- wouldBeRecursive ctx entry
+ if isRecursive
+ then return () -- TODO: Find a better strategy for recursion
+ else runCached ctx entry (f name entry exit)
+ where
+ f name entry exit ctx = do
+ (states, frame) <- withNewStackFrame ctx entry (flip dataflow $ entry)
+ deps <- readSTRef $ dependencies frame
+ let res =
+ case M.lookup exit states of
+ Just (input, output) -> do
+ -- Discard local variables. TODO: track&retain variables declared local in previous scopes?
+ modified output { sLocalValues = vmEmpty }
+ Nothing -> do
+ -- e.g. f() { exit; }
+ unreachableState
+ registerFlowResult ctx entry states deps
+ return (deps, res)
+
+
+-- Register/save the result of a dataflow of a function.
+-- At the end, all the different values from different flows are merged together.
+registerFlowResult ctx entry states deps = do
+ -- This function is called in the context of a CFExecuteCommand and not its invoked function,
+ -- so manually add the current node to the stack.
+ current <- readSTRef $ cNode ctx
+ let parents = map callSite $ cStack ctx
+ -- A unique path to this flow context. The specific value doesn't matter, as long as it's
+ -- unique per invocation of the function. This is required so that 'x=1; f; x=2; f' won't
+ -- overwrite each other.
+ let path = entry : current : parents
+ modifySTRef (cInvocations ctx) $ M.insert path (deps, states)
+
+
+-- Look up a node in the cache and see if the dependencies of any entries are matched.
+-- In that case, reuse the previous result instead of doing a new data flow.
+runCached :: forall s. Ctx s -> Node -> (Ctx s -> ST s (S.Set StateDependency, InternalState)) -> ST s ()
+runCached ctx node f = do
+ cache <- getCache ctx node
+ case cache of
+ Just v -> do
+ -- traceShowM $ ("Running cached", node)
+ patchOutputM ctx v
+ Nothing -> do
+ -- traceShowM $ ("Cache failed", node)
+ (deps, diff) <- f ctx
+ modifySTRef (cCache ctx) (M.insertWith (\_ old -> (deps, diff):(take cacheEntries old)) node [(deps,diff)])
+ -- traceShowM $ ("Recomputed cache for", node, deps)
+ patchOutputM ctx diff
+
+-- Get a cached version whose dependencies are currently fulfilled, if any.
+getCache :: forall s. Ctx s -> Node -> ST s (Maybe InternalState)
+getCache ctx node = do
+ cache <- readSTRef $ cCache ctx
+ -- traceShowM $ ("Cache for", node, "length", length $ M.findWithDefault [] node cache, M.lookup node cache)
+ f $ M.findWithDefault [] node cache
+ where
+ f [] = return Nothing
+ f ((deps, value):rest) = do
+ match <- fulfillsDependencies ctx deps
+ if match
+ then return $ Just value
+ else f rest
+
+-- Transfer a single CFEffect to the output state.
+transferEffect ctx effect =
+ case effect of
+ CFReadVariable name -> do
+ void $ readVariable ctx name
+ CFWriteVariable name value -> do
+ val <- cfValueToVariableValue ctx value
+ writeVariable ctx name val
+ CFWriteGlobal name value -> do
+ val <- cfValueToVariableValue ctx value
+ writeGlobal ctx name val
+ CFWriteLocal name value -> do
+ val <- cfValueToVariableValue ctx value
+ writeLocal ctx name val
+ CFWritePrefix name value -> do
+ val <- cfValueToVariableValue ctx value
+ writePrefix ctx name val
+ CFUndefineVariable name -> undefineVariable ctx name
+ CFUndefineFunction name -> undefineFunction ctx name
+ CFUndefine name -> do
+ -- This should really just unset one or the other
+ undefineVariable ctx name
+ undefineFunction ctx name
+ CFDefineFunction name id entry exit ->
+ writeFunction ctx name $ FunctionDefinition name entry exit
+
+ -- TODO
+ CFUndefineNameref name -> undefineVariable ctx name
+ CFHintArray name -> return ()
+ CFHintDefined name -> return ()
+ CFModifyProps {} -> return ()
+-- _ -> error $ "Unknown effect " ++ show effect
+
+
+
+-- Transfer the CFG's idea of a value into our VariableState
+cfValueToVariableValue ctx val =
+ case val of
+ CFValueArray -> return unknownVariableValue -- TODO: Track array status
+ CFValueComputed _ parts -> foldM f emptyVariableValue parts
+ CFValueInteger -> return unknownIntegerValue
+ CFValueString -> return unknownVariableValue
+ CFValueUninitialized -> return emptyVariableValue
+-- _ -> error $ "Unknown value: " ++ show val
+ where
+ f val part = do
+ next <- computeValue ctx part
+ return $ val `appendVariableValue` next
+
+-- A value can be computed from 0 or more parts, such as x="literal$y$z"
+computeValue ctx part =
+ case part of
+ CFStringLiteral str -> return $ literalToVariableValue str
+ CFStringInteger -> return unknownIntegerValue
+ CFStringUnknown -> return unknownVariableValue
+ CFStringVariable name -> readVariable ctx name
+
+-- Append two VariableValues as if with z="$x$y"
+appendVariableValue :: VariableValue -> VariableValue -> VariableValue
+appendVariableValue a b =
+ VariableValue {
+ literalValue = liftM2 (++) (literalValue a) (literalValue b),
+ spaceStatus = appendSpaceStatus (spaceStatus a) (spaceStatus b)
+ }
+
+appendSpaceStatus a b =
+ case (a,b) of
+ (SpaceStatusEmpty, _) -> b
+ (_, SpaceStatusEmpty) -> a
+ (SpaceStatusClean, SpaceStatusClean) -> a
+ _ ->SpaceStatusDirty
+
+unknownIntegerValue = VariableValue {
+ literalValue = Nothing,
+ spaceStatus = SpaceStatusClean
+}
+
+literalToVariableValue str = VariableValue {
+ literalValue = Just str,
+ spaceStatus = literalToSpaceStatus str
+}
+
+withoutChanges ctx f = do
+ let inp = cInput ctx
+ let out = cOutput ctx
+ prevInput <- readSTRef inp
+ prevOutput <- readSTRef out
+ res <- f
+ writeSTRef inp prevInput
+ writeSTRef out prevOutput
+ return res
+
+-- Get the SpaceStatus for a literal string, i.e. if it needs quoting
+literalToSpaceStatus str =
+ case str of
+ "" -> SpaceStatusEmpty
+ _ | all (`notElem` " \t\n*?[") str -> SpaceStatusClean
+ _ -> SpaceStatusDirty
+
+type StateMap = M.Map Node (InternalState, InternalState)
+
+-- Classic, iterative Data Flow Analysis. See Wikipedia for a description of the process.
+dataflow :: forall s. Ctx s -> Node -> ST s StateMap
+dataflow ctx entry = do
+ pending <- newSTRef $ S.singleton entry
+ states <- newSTRef $ M.empty
+ -- Should probably be done via a stack frame instead
+ withoutChanges ctx $
+ f iterationCount pending states
+ readSTRef states
+ where
+ graph = cGraph ctx
+ f 0 _ _ = error $ pleaseReport "DFA did not reach fix point"
+ f n pending states = do
+ ps <- readSTRef pending
+ if S.null ps
+ then return ()
+ else do
+ let (next, rest) = S.deleteFindMin ps
+ nexts <- process states next
+ writeSTRef pending $ foldl (flip S.insert) rest nexts
+ f (n-1) pending states
+
+ process states node = do
+ stateMap <- readSTRef states
+ let inputs = filter (\c -> sIsReachable c /= Just False) $ mapMaybe (\c -> fmap snd $ M.lookup c stateMap) incoming
+ input <-
+ case incoming of
+ [] -> return newInternalState
+ _ ->
+ case inputs of
+ [] -> return unreachableState
+ (x:rest) -> foldM (mergeState ctx) x rest
+ writeSTRef (cInput ctx) $ input
+ writeSTRef (cOutput ctx) $ input
+ writeSTRef (cNode ctx) $ node
+ transfer ctx label
+ newOutput <- readSTRef $ cOutput ctx
+ result <-
+ if is2plus outgoing
+ then
+ -- Version the state because we split and will probably merge later
+ versionState ctx newOutput
+ else return newOutput
+ writeSTRef states $ M.insert node (input, result) stateMap
+ case M.lookup node stateMap of
+ Nothing -> return outgoing
+ Just (oldInput, oldOutput) ->
+ if oldOutput == result
+ then return []
+ else return outgoing
+ where
+ (incomingL, _, label, outgoingL) = context graph $ node
+ incoming = map snd $ filter isRegular $ incomingL
+ outgoing = map snd outgoingL
+ isRegular = ((== CFEFlow) . fst)
+
+runRoot ctx entry exit = do
+ let env = createEnvironmentState
+ writeSTRef (cInput ctx) $ env
+ writeSTRef (cOutput ctx) $ env
+ writeSTRef (cNode ctx) $ entry
+ (states, frame) <- withNewStackFrame ctx entry $ \c -> dataflow c entry
+ deps <- readSTRef $ dependencies frame
+ registerFlowResult ctx entry states deps
+ -- Return the final state, used to invoke functions that were declared but not invoked
+ return $ snd $ fromMaybe (error $ pleaseReport "Missing exit state") $ M.lookup exit states
+
+
+analyzeControlFlow :: CFGParameters -> Token -> CFGAnalysis
+analyzeControlFlow params t =
+ let
+ cfg = buildGraph params t
+ (entry, exit) = M.findWithDefault (error $ pleaseReport "Missing root") (getId t) (cfIdToNode cfg)
+ in
+ runST $ f cfg entry exit
+ where
+ f cfg entry exit = do
+ ctx <- newCtx $ cfGraph cfg
+ -- Do a dataflow analysis starting on the root node
+ exitState <- runRoot ctx entry exit
+
+ -- All nodes we've touched
+ invocations <- readSTRef $ cInvocations ctx
+ let invokedNodes = M.fromDistinctAscList $ map (\c -> (c, ())) $ S.toList $ M.keysSet $ groupByNode $ M.map snd invocations
+
+ -- Invoke all functions that were declared but not invoked
+ -- This is so that we still get warnings for dead code
+ -- (it's probably not actually dead, just used by a script that sources ours)
+ let declaredFunctions = getFunctionTargets exitState
+ let uninvoked = M.difference declaredFunctions invokedNodes
+ analyzeStragglers ctx exitState uninvoked
+
+ -- Now round up all the states from all data flows
+ -- (FIXME: this excludes functions that were defined in straggling functions)
+ invocations <- readSTRef $ cInvocations ctx
+ invokedStates <- flattenByNode ctx $ groupByNode $ M.map addDeps invocations
+
+ -- Fill in the map with unreachable states for anything we didn't get to
+ let baseStates = M.fromDistinctAscList $ map (\c -> (c, (unreachableState, unreachableState))) $ uncurry enumFromTo $ nodeRange $ cfGraph cfg
+ let allStates = M.unionWith (flip const) baseStates invokedStates
+
+ -- Convert to external states
+ let nodeToData = M.map (\(a,b) -> (internalToExternal a, internalToExternal b)) allStates
+
+ return $ nodeToData `deepseq` CFGAnalysis {
+ graph = cfGraph cfg,
+ tokenToNode = cfIdToNode cfg,
+ nodeToData = nodeToData
+ }
+
+
+ -- Include the dependencies in the state of each function, e.g. if it depends on `x=foo` then add that.
+ addDeps :: (S.Set StateDependency, M.Map Node (InternalState, InternalState)) -> M.Map Node (InternalState, InternalState)
+ addDeps (deps, m) = let base = depsToState deps in M.map (\(a,b) -> (base `patchState` a, base `patchState` b)) m
+
+ -- Collect all the states that each node has resulted in.
+ groupByNode :: forall k v. M.Map k (M.Map Node v) -> M.Map Node [v]
+ groupByNode pathMap = M.fromListWith (++) $ map (\(k,v) -> (k,[v])) $ concatMap M.toList $ M.elems pathMap
+
+ -- Merge all the pre/post states for each node. This would have been a foldM if Map had one.
+ flattenByNode ctx m = M.fromDistinctAscList <$> (mapM (mergePair ctx) $ M.toList m)
+
+ mergeAllStates ctx pairs =
+ let
+ (pres, posts) = unzip pairs
+ in do
+ pre <- mergeStates ctx (error $ pleaseReport "Null node states") pres
+ post <- mergeStates ctx (error $ pleaseReport "Null node states") posts
+ return (pre, post)
+
+ mergePair ctx (node, list) = do
+ merged <- mergeAllStates ctx list
+ return (node, merged)
+
+ -- Get the all the functions defined in an InternalState
+ getFunctionTargets :: InternalState -> M.Map Node FunctionDefinition
+ getFunctionTargets state =
+ let
+ declaredFuncs = S.unions $ mapStorage $ sFunctionTargets state
+ getFunc d =
+ case d of
+ FunctionDefinition _ entry _ -> Just (entry, d)
+ _ -> Nothing
+ funcs = mapMaybe getFunc $ S.toList declaredFuncs
+ in
+ M.fromList funcs
+
+
+analyzeStragglers ctx state stragglers = do
+ mapM_ f $ M.elems stragglers
+ where
+ f def@(FunctionDefinition name entry exit) = do
+ writeSTRef (cInput ctx) state
+ writeSTRef (cOutput ctx) state
+ writeSTRef (cNode ctx) entry
+ transferFunctionValue ctx def
+
+
+return []
+runTests = $quickCheckAll
diff --git a/src/ShellCheck/Checks/Commands.hs b/src/ShellCheck/Checks/Commands.hs
index e65dc68..cac06bc 100644
--- a/src/ShellCheck/Checks/Commands.hs
+++ b/src/ShellCheck/Checks/Commands.hs
@@ -30,6 +30,7 @@ import ShellCheck.AnalyzerLib
import ShellCheck.Data
import ShellCheck.Interface
import ShellCheck.Parser
+import ShellCheck.Prelude
import ShellCheck.Regex
import Control.Monad
diff --git a/src/ShellCheck/Checks/ControlFlow.hs b/src/ShellCheck/Checks/ControlFlow.hs
new file mode 100644
index 0000000..9b7635e
--- /dev/null
+++ b/src/ShellCheck/Checks/ControlFlow.hs
@@ -0,0 +1,101 @@
+{-
+ Copyright 2022 Vidar Holen
+
+ This file is part of ShellCheck.
+ https://www.shellcheck.net
+
+ ShellCheck is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ShellCheck is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+-}
+{-# LANGUAGE TemplateHaskell #-}
+
+-- Checks that run on the Control Flow Graph (as opposed to the AST)
+-- This is scaffolding for a work in progress.
+
+module ShellCheck.Checks.ControlFlow (checker, optionalChecks, ShellCheck.Checks.ControlFlow.runTests) where
+
+import ShellCheck.AST
+import ShellCheck.ASTLib
+import ShellCheck.CFG hiding (cfgAnalysis)
+import ShellCheck.CFGAnalysis
+import ShellCheck.AnalyzerLib
+import ShellCheck.Data
+import ShellCheck.Interface
+
+import Control.Monad
+import Control.Monad.Reader
+import Data.Graph.Inductive.Graph
+import qualified Data.Map as M
+import qualified Data.Set as S
+import Data.List
+import Data.Maybe
+
+import Test.QuickCheck.All (forAllProperties)
+import Test.QuickCheck.Test (quickCheckWithResult, stdArgs, maxSuccess)
+
+
+optionalChecks :: [CheckDescription]
+optionalChecks = []
+
+-- A check that runs on the entire graph
+type ControlFlowCheck = Analysis
+-- A check invoked once per node, with its (pre,post) data
+type ControlFlowNodeCheck = LNode CFNode -> (ProgramState, ProgramState) -> Analysis
+-- A check invoked once per effect, with its node's (pre,post) data
+type ControlFlowEffectCheck = IdTagged CFEffect -> Node -> (ProgramState, ProgramState) -> Analysis
+
+
+checker :: AnalysisSpec -> Parameters -> Checker
+checker spec params = Checker {
+ perScript = const $ sequence_ controlFlowChecks,
+ perToken = const $ return ()
+}
+
+controlFlowChecks :: [ControlFlowCheck]
+controlFlowChecks = [
+ runNodeChecks controlFlowNodeChecks
+ ]
+
+controlFlowNodeChecks :: [ControlFlowNodeCheck]
+controlFlowNodeChecks = [
+ runEffectChecks controlFlowEffectChecks
+ ]
+
+controlFlowEffectChecks :: [ControlFlowEffectCheck]
+controlFlowEffectChecks = [
+ ]
+
+runNodeChecks :: [ControlFlowNodeCheck] -> ControlFlowCheck
+runNodeChecks perNode = do
+ cfg <- asks cfgAnalysis
+ runOnAll cfg
+ where
+ getData datas n@(node, label) = do
+ (pre, post) <- M.lookup node datas
+ return (n, (pre, post))
+
+ runOn :: (LNode CFNode, (ProgramState, ProgramState)) -> Analysis
+ runOn (node, prepost) = mapM_ (\c -> c node prepost) perNode
+ runOnAll cfg = mapM_ runOn $ mapMaybe (getData $ nodeToData cfg) $ labNodes (graph cfg)
+
+runEffectChecks :: [ControlFlowEffectCheck] -> ControlFlowNodeCheck
+runEffectChecks list = checkNode
+ where
+ checkNode (node, label) prepost =
+ case label of
+ CFApplyEffects effects -> mapM_ (\effect -> mapM_ (\c -> c effect node prepost) list) effects
+ _ -> return ()
+
+
+return []
+runTests = $( [| $(forAllProperties) (quickCheckWithResult (stdArgs { maxSuccess = 1 }) ) |])
diff --git a/src/ShellCheck/Checks/ShellSupport.hs b/src/ShellCheck/Checks/ShellSupport.hs
index 22a6a5f..9ad17f5 100644
--- a/src/ShellCheck/Checks/ShellSupport.hs
+++ b/src/ShellCheck/Checks/ShellSupport.hs
@@ -25,6 +25,7 @@ import ShellCheck.AST
import ShellCheck.ASTLib
import ShellCheck.AnalyzerLib
import ShellCheck.Interface
+import ShellCheck.Prelude
import ShellCheck.Regex
import Control.Monad
diff --git a/src/ShellCheck/Data.hs b/src/ShellCheck/Data.hs
index e22b424..fb82ca8 100644
--- a/src/ShellCheck/Data.hs
+++ b/src/ShellCheck/Data.hs
@@ -2,9 +2,27 @@ module ShellCheck.Data where
import ShellCheck.Interface
import Data.Version (showVersion)
-import Paths_ShellCheck (version)
-shellcheckVersion = showVersion version -- VERSIONSTRING
+
+{-
+If you are here because you saw an error about Paths_ShellCheck in this file,
+simply comment out the import below and define the version as a constant string.
+
+Instead of:
+
+ import Paths_ShellCheck (version)
+ shellcheckVersion = showVersion version
+
+Use:
+
+ -- import Paths_ShellCheck (version)
+ shellcheckVersion = "kludge"
+
+-}
+
+import Paths_ShellCheck (version)
+shellcheckVersion = showVersion version -- VERSIONSTRING
+
internalVariables = [
-- Generic
@@ -43,9 +61,12 @@ internalVariables = [
"flags_error", "flags_return"
]
-specialVariablesWithoutSpaces = [
- "$", "-", "?", "!", "#"
+specialIntegerVariables = [
+ "$", "?", "!", "#"
]
+
+specialVariablesWithoutSpaces = "-" : specialIntegerVariables
+
variablesWithoutSpaces = specialVariablesWithoutSpaces ++ [
"BASHPID", "BASH_ARGC", "BASH_LINENO", "BASH_SUBSHELL", "EUID", "LINENO",
"OPTIND", "PPID", "RANDOM", "SECONDS", "SHELLOPTS", "SHLVL", "UID",
diff --git a/src/ShellCheck/Debug.hs b/src/ShellCheck/Debug.hs
new file mode 100644
index 0000000..c991308
--- /dev/null
+++ b/src/ShellCheck/Debug.hs
@@ -0,0 +1,313 @@
+{-
+
+This file contains useful functions for debugging and developing ShellCheck.
+
+To invoke them interactively, run:
+
+ cabal repl
+
+At the ghci prompt, enter:
+
+ :load ShellCheck.Debug
+
+You can now invoke the functions. Here are some examples:
+
+ shellcheckString "echo $1"
+ stringToAst "(( x+1 ))"
+ stringToCfg "if foo; then bar; else baz; fi"
+ writeFile "/tmp/test.dot" $ stringToCfgViz "while foo; do bar; done"
+
+The latter file can be rendered to png with GraphViz:
+
+ dot -Tpng /tmp/test.dot > /tmp/test.png
+
+To run all unit tests in a module:
+
+ ShellCheck.Parser.runTests
+ ShellCheck.Analytics.runTests
+
+To run a specific test:
+
+ :load ShellCheck.Analytics
+ prop_checkUuoc3
+
+If you make code changes, reload in seconds at any time with:
+
+ :r
+
+===========================================================================
+
+Crash course in printf debugging in Haskell:
+
+ import Debug.Trace
+
+ greet 0 = return ()
+ -- Print when a function is invoked
+ greet n | trace ("calling greet " ++ show n) False = undefined
+ greet n = do
+ putStrLn "Enter name"
+ name <- getLine
+ -- Print at some point in any monadic function
+ traceM $ "user entered " ++ name
+ putStrLn $ "Hello " ++ name
+ -- Print a value before passing it on
+ greet $ traceShowId (n - 1)
+
+
+===========================================================================
+
+If you want to invoke `ghci` directly, such as on `shellcheck.hs`, to
+debug all of ShellCheck including I/O, you may see an error like this:
+
+ src/ShellCheck/Data.hs:5:1: error:
+ Could not load module ‘Paths_ShellCheck’
+ it is a hidden module in the package ‘ShellCheck-0.8.0’
+
+This can easily be circumvented by running `./setgitversion` or manually
+editing src/ShellCheck/Data.hs to replace the auto-deduced version number
+with a constant string as indicated.
+
+Afterwards, you can run the ShellCheck tool, as if from the shell, with:
+
+ $ ghci shellcheck.hs
+ ghci> runMain ["-x", "file.sh"]
+
+-}
+
+module ShellCheck.Debug () where
+
+import ShellCheck.Analyzer
+import ShellCheck.AST
+import ShellCheck.CFG
+import ShellCheck.Checker
+import ShellCheck.CFGAnalysis as CF
+import ShellCheck.Interface
+import ShellCheck.Parser
+import ShellCheck.Prelude
+
+import Control.Monad
+import Control.Monad.Identity
+import Control.Monad.RWS
+import Control.Monad.Writer
+import Data.Graph.Inductive.Graph as G
+import Data.List
+import Data.Maybe
+import qualified Data.Map as M
+import qualified Data.Set as S
+
+
+-- Run all of ShellCheck (minus output formatters)
+shellcheckString :: String -> CheckResult
+shellcheckString scriptString =
+ runIdentity $ checkScript dummySystemInterface checkSpec
+ where
+ checkSpec :: CheckSpec
+ checkSpec = emptyCheckSpec {
+ csScript = scriptString
+ }
+
+dummySystemInterface :: SystemInterface Identity
+dummySystemInterface = mockedSystemInterface [
+ -- A tiny, fake filesystem for sourced files
+ ("lib/mylib1.sh", "foo=$(cat $1 | wc -l)"),
+ ("lib/mylib2.sh", "bar=42")
+ ]
+
+-- Parameters used when generating Control Flow Graphs
+cfgParams :: CFGParameters
+cfgParams = CFGParameters {
+ cfLastpipe = False,
+ cfPipefail = False
+}
+
+-- An example script to play with
+exampleScript :: String
+exampleScript = unlines [
+ "#!/bin/sh",
+ "count=0",
+ "for file in *",
+ "do",
+ " (( count++ ))",
+ "done",
+ "echo $count"
+ ]
+
+-- Parse the script string into ShellCheck's ParseResult
+parseScriptString :: String -> ParseResult
+parseScriptString scriptString =
+ runIdentity $ parseScript dummySystemInterface parseSpec
+ where
+ parseSpec :: ParseSpec
+ parseSpec = newParseSpec {
+ psFilename = "myscript",
+ psScript = scriptString
+ }
+
+
+-- Parse the script string into an Abstract Syntax Tree
+stringToAst :: String -> Token
+stringToAst scriptString =
+ case maybeRoot of
+ Just root -> root
+ Nothing -> error $ "Script failed to parse: " ++ show parserWarnings
+ where
+ parseResult :: ParseResult
+ parseResult = parseScriptString scriptString
+
+ maybeRoot :: Maybe Token
+ maybeRoot = prRoot parseResult
+
+ parserWarnings :: [PositionedComment]
+ parserWarnings = prComments parseResult
+
+
+astToCfgResult :: Token -> CFGResult
+astToCfgResult = buildGraph cfgParams
+
+astToDfa :: Token -> CFGAnalysis
+astToDfa = analyzeControlFlow cfgParams
+
+astToCfg :: Token -> CFGraph
+astToCfg = cfGraph . astToCfgResult
+
+stringToCfg :: String -> CFGraph
+stringToCfg = astToCfg . stringToAst
+
+stringToDfa :: String -> CFGAnalysis
+stringToDfa = astToDfa . stringToAst
+
+cfgToGraphViz :: CFGraph -> String
+cfgToGraphViz = cfgToGraphVizWith show
+
+stringToCfgViz :: String -> String
+stringToCfgViz = cfgToGraphViz . stringToCfg
+
+stringToDfaViz :: String -> String
+stringToDfaViz = dfaToGraphViz . stringToDfa
+
+-- Dump a Control Flow Graph as GraphViz with extended information
+stringToDetailedCfgViz :: String -> String
+stringToDetailedCfgViz scriptString = cfgToGraphVizWith nodeLabel graph
+ where
+ ast :: Token
+ ast = stringToAst scriptString
+
+ cfgResult :: CFGResult
+ cfgResult = astToCfgResult ast
+
+ graph :: CFGraph
+ graph = cfGraph cfgResult
+
+ idToToken :: M.Map Id Token
+ idToToken = M.fromList $ execWriter $ doAnalysis (\c -> tell [(getId c, c)]) ast
+
+ idToNode :: M.Map Id (Node, Node)
+ idToNode = cfIdToNode cfgResult
+
+ nodeToStartIds :: M.Map Node (S.Set Id)
+ nodeToStartIds =
+ M.fromListWith S.union $
+ map (\(id, (start, _)) -> (start, S.singleton id)) $
+ M.toList idToNode
+
+ nodeToEndIds :: M.Map Node (S.Set Id)
+ nodeToEndIds =
+ M.fromListWith S.union $
+ map (\(id, (_, end)) -> (end, S.singleton id)) $
+ M.toList idToNode
+
+ formatId :: Id -> String
+ formatId id = fromMaybe ("Unknown " ++ show id) $ do
+ (OuterToken _ token) <- M.lookup id idToToken
+ firstWord <- words (show token) !!! 0
+ -- Strip off "Inner_"
+ (_ : tokenName) <- return $ dropWhile (/= '_') firstWord
+ return $ tokenName ++ " " ++ show id
+
+ formatGroup :: S.Set Id -> String
+ formatGroup set = intercalate ", " $ map formatId $ S.toList set
+
+ nodeLabel (node, label) = unlines [
+ show node ++ ". " ++ show label,
+ "Begin: " ++ formatGroup (M.findWithDefault S.empty node nodeToStartIds),
+ "End: " ++ formatGroup (M.findWithDefault S.empty node nodeToEndIds)
+ ]
+
+
+-- Dump a Control Flow Graph with Data Flow Analysis as GraphViz
+dfaToGraphViz :: CF.CFGAnalysis -> String
+dfaToGraphViz analysis = cfgToGraphVizWith label $ CF.graph analysis
+ where
+ label (node, label) =
+ let
+ desc = show node ++ ". " ++ show label
+ in
+ fromMaybe ("No DFA available\n\n" ++ desc) $ do
+ (pre, post) <- M.lookup node $ CF.nodeToData analysis
+ return $ unlines [
+ "Precondition: " ++ show pre,
+ "",
+ desc,
+ "",
+ "Postcondition: " ++ show post
+ ]
+
+
+-- Dump an Control Flow Graph to GraphViz with a given node formatter
+cfgToGraphVizWith :: (LNode CFNode -> String) -> CFGraph -> String
+cfgToGraphVizWith nodeLabel graph = concat [
+ "digraph {\n",
+ concatMap dumpNode (labNodes graph),
+ concatMap dumpLink (labEdges graph),
+ tagVizEntries graph,
+ "}\n"
+ ]
+ where
+ dumpNode l@(node, label) = show node ++ " [label=" ++ quoteViz (nodeLabel l) ++ "]\n"
+ dumpLink (from, to, typ) = show from ++ " -> " ++ show to ++ " [style=" ++ quoteViz (edgeStyle typ) ++ "]\n"
+ edgeStyle CFEFlow = "solid"
+ edgeStyle CFEExit = "bold"
+ edgeStyle CFEFalseFlow = "dotted"
+
+quoteViz str = "\"" ++ escapeViz str ++ "\""
+escapeViz [] = []
+escapeViz (c:rest) =
+ case c of
+ '\"' -> '\\' : '\"' : escapeViz rest
+ '\n' -> '\\' : 'l' : escapeViz rest
+ '\\' -> '\\' : '\\' : escapeViz rest
+ _ -> c : escapeViz rest
+
+
+-- Dump an Abstract Syntax Tree (or branch thereof) to GraphViz format
+astToGraphViz :: Token -> String
+astToGraphViz token = concat [
+ "digraph {\n",
+ formatTree token,
+ "}\n"
+ ]
+ where
+ formatTree :: Token -> String
+ formatTree t = snd $ execRWS (doStackAnalysis push pop t) () []
+
+ push :: Token -> RWS () String [Int] ()
+ push (OuterToken (Id n) inner) = do
+ stack <- get
+ put (n : stack)
+ case stack of
+ [] -> return ()
+ (top:_) -> tell $ show top ++ " -> " ++ show n ++ "\n"
+ tell $ show n ++ " [label=" ++ quoteViz (show n ++ ": " ++ take 32 (show inner)) ++ "]\n"
+
+ pop :: Token -> RWS () String [Int] ()
+ pop _ = modify tail
+
+
+-- For each entry point, set the rank so that they'll align in the graph
+tagVizEntries :: CFGraph -> String
+tagVizEntries graph = "{ rank=same " ++ rank ++ " }"
+ where
+ entries = mapMaybe find $ labNodes graph
+ find (node, CFEntryPoint name) = return (node, name)
+ find _ = Nothing
+ rank = unwords $ map (\(c, _) -> show c) entries
diff --git a/src/ShellCheck/Fixer.hs b/src/ShellCheck/Fixer.hs
index 1409b24..2376842 100644
--- a/src/ShellCheck/Fixer.hs
+++ b/src/ShellCheck/Fixer.hs
@@ -22,6 +22,7 @@
module ShellCheck.Fixer (applyFix, removeTabStops, mapPositions, Ranged(..), runTests) where
import ShellCheck.Interface
+import ShellCheck.Prelude
import Control.Monad.State
import Data.Array
import Data.List
@@ -228,7 +229,7 @@ applyReplacement2 rep string = do
let (l1, l2) = tmap posLine originalPos in
when (l1 /= 1 || l2 /= 1) $
- error "ShellCheck internal error, please report: bad cross-line fix"
+ error $ pleaseReport "bad cross-line fix"
let replacer = repString rep
let shift = (length replacer) - (oldEnd - oldStart)
diff --git a/src/ShellCheck/Parser.hs b/src/ShellCheck/Parser.hs
index 3958406..9f9241c 100644
--- a/src/ShellCheck/Parser.hs
+++ b/src/ShellCheck/Parser.hs
@@ -27,6 +27,7 @@ import ShellCheck.AST
import ShellCheck.ASTLib hiding (runTests)
import ShellCheck.Data
import ShellCheck.Interface
+import ShellCheck.Prelude
import Control.Applicative ((<*), (*>))
import Control.Monad
@@ -210,7 +211,7 @@ getNextIdSpanningTokenList list =
-- Get the span covered by an id
getSpanForId :: Monad m => Id -> SCParser m (SourcePos, SourcePos)
getSpanForId id =
- Map.findWithDefault (error "Internal error: no position for id. Please report!") id <$>
+ Map.findWithDefault (error $ pleaseReport "no parser span for id") id <$>
getMap
-- Create a new id with the same span as an existing one
@@ -1918,7 +1919,7 @@ readPendingHereDocs = do
-- The end token is just a prefix
skipLine
| hasTrailer ->
- error "ShellCheck bug, please report (here doc trailer)."
+ error $ pleaseReport "unexpected heredoc trailer"
-- The following cases assume no trailing text:
| dashed == Undashed && (not $ null leadingSpace) -> do
diff --git a/src/ShellCheck/Prelude.hs b/src/ShellCheck/Prelude.hs
new file mode 100644
index 0000000..7e9011b
--- /dev/null
+++ b/src/ShellCheck/Prelude.hs
@@ -0,0 +1,48 @@
+{-
+ Copyright 2022 Vidar Holen
+
+ This file is part of ShellCheck.
+ https://www.shellcheck.net
+
+ ShellCheck is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ShellCheck is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+-}
+
+-- Generic basic utility functions
+module ShellCheck.Prelude where
+
+-- Get element 0 or a default. Like `head` but safe.
+headOrDefault _ (a:_) = a
+headOrDefault def _ = def
+
+-- Get the last element or a default. Like `last` but safe.
+lastOrDefault def [] = def
+lastOrDefault _ list = last list
+
+--- Get element n of a list, or Nothing. Like `!!` but safe.
+(!!!) list i =
+ case drop i list of
+ [] -> Nothing
+ (r:_) -> Just r
+
+
+-- Like mconcat but for Semigroups
+sconcat1 :: (Semigroup t) => [t] -> t
+sconcat1 [x] = x
+sconcat1 (x:xs) = x <> sconcat1 xs
+
+sconcatOrDefault def [] = def
+sconcatOrDefault _ list = sconcat1 list
+
+-- For more actionable "impossible" errors
+pleaseReport str = "ShellCheck internal error, please report: " ++ str
diff --git a/test/shellcheck.hs b/test/shellcheck.hs
index e463403..1a272af 100644
--- a/test/shellcheck.hs
+++ b/test/shellcheck.hs
@@ -5,8 +5,11 @@ import System.Exit
import qualified ShellCheck.Analytics
import qualified ShellCheck.AnalyzerLib
import qualified ShellCheck.ASTLib
+import qualified ShellCheck.CFG
+import qualified ShellCheck.CFGAnalysis
import qualified ShellCheck.Checker
import qualified ShellCheck.Checks.Commands
+import qualified ShellCheck.Checks.ControlFlow
import qualified ShellCheck.Checks.Custom
import qualified ShellCheck.Checks.ShellSupport
import qualified ShellCheck.Fixer
@@ -19,8 +22,11 @@ main = do
ShellCheck.Analytics.runTests
,ShellCheck.AnalyzerLib.runTests
,ShellCheck.ASTLib.runTests
+ ,ShellCheck.CFG.runTests
+ ,ShellCheck.CFGAnalysis.runTests
,ShellCheck.Checker.runTests
,ShellCheck.Checks.Commands.runTests
+ ,ShellCheck.Checks.ControlFlow.runTests
,ShellCheck.Checks.Custom.runTests
,ShellCheck.Checks.ShellSupport.runTests
,ShellCheck.Fixer.runTests