diff --git a/CHANGELOG.md b/CHANGELOG.md index fb733ce..4763ddd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ ### Fixed ### Changed +- ShellCheck now has a Data Flow Analysis engine to make smarter decisions + based on control flow rather than just syntax. Existing checks will + gradually start using it, which may cause them to trigger differently + (but more accurately). ## v0.8.0 - 2021-11-06 diff --git a/ShellCheck.cabal b/ShellCheck.cabal index 1167c82..b22b5c8 100644 --- a/ShellCheck.cabal +++ b/ShellCheck.cabal @@ -53,6 +53,7 @@ library deepseq >= 1.4.0.0, Diff >= 0.2.0, directory >= 1.2.3.0, + fgl, mtl >= 2.2.1, filepath, parsec, @@ -66,11 +67,15 @@ library ShellCheck.Analytics ShellCheck.Analyzer ShellCheck.AnalyzerLib + ShellCheck.CFG + ShellCheck.CFGAnalysis ShellCheck.Checker ShellCheck.Checks.Commands + ShellCheck.Checks.ControlFlow ShellCheck.Checks.Custom ShellCheck.Checks.ShellSupport ShellCheck.Data + ShellCheck.Debug ShellCheck.Fixer ShellCheck.Formatter.Format ShellCheck.Formatter.CheckStyle @@ -82,6 +87,7 @@ library ShellCheck.Formatter.Quiet ShellCheck.Interface ShellCheck.Parser + ShellCheck.Prelude ShellCheck.Regex other-modules: Paths_ShellCheck @@ -100,6 +106,7 @@ executable shellcheck deepseq >= 1.4.0.0, Diff >= 0.2.0, directory >= 1.2.3.0, + fgl, mtl >= 2.2.1, filepath, parsec >= 3.0, @@ -120,6 +127,7 @@ test-suite test-shellcheck deepseq >= 1.4.0.0, Diff >= 0.2.0, directory >= 1.2.3.0, + fgl, mtl >= 2.2.1, filepath, parsec, diff --git a/src/ShellCheck/ASTLib.hs b/src/ShellCheck/ASTLib.hs index 7c88432..7cc5af2 100644 --- a/src/ShellCheck/ASTLib.hs +++ b/src/ShellCheck/ASTLib.hs @@ -21,6 +21,7 @@ module ShellCheck.ASTLib where import ShellCheck.AST +import ShellCheck.Prelude import ShellCheck.Regex import Control.Monad.Writer @@ -138,7 +139,7 @@ getFlagsUntil stopCondition (T_SimpleCommand _ _ (_:args)) = flag (x, '-':'-':arg) = [ (x, takeWhile (/= '=') arg) ] flag (x, '-':args) = map (\v -> (x, [v])) args flag (x, _) = [ (x, "") ] -getFlagsUntil _ _ = error "Internal shellcheck error, please report! (getFlags on non-command)" +getFlagsUntil _ _ = error $ pleaseReport "getFlags on non-command" -- Get all flags in a GNU way, up until -- getAllFlags :: Token -> [(Token, String)] @@ -785,5 +786,118 @@ executableFromShebang = shellFor basename s = reverse . takeWhile (/= '/') . reverse $ s skipFlags = dropWhile ("-" `isPrefixOf`) + +-- Determining if a name is a variable +isVariableStartChar x = x == '_' || isAsciiLower x || isAsciiUpper x +isVariableChar x = isVariableStartChar x || isDigit x +isSpecialVariableChar = (`elem` "*@#?-$!") +variableNameRegex = mkRegex "[_a-zA-Z][_a-zA-Z0-9]*" + +prop_isVariableName1 = isVariableName "_fo123" +prop_isVariableName2 = not $ isVariableName "4" +prop_isVariableName3 = not $ isVariableName "test: " +isVariableName (x:r) = isVariableStartChar x && all isVariableChar r +isVariableName _ = False + + +-- Get the variable name from an expansion like ${var:-foo} +prop_getBracedReference1 = getBracedReference "foo" == "foo" +prop_getBracedReference2 = getBracedReference "#foo" == "foo" +prop_getBracedReference3 = getBracedReference "#" == "#" +prop_getBracedReference4 = getBracedReference "##" == "#" +prop_getBracedReference5 = getBracedReference "#!" == "!" +prop_getBracedReference6 = getBracedReference "!#" == "#" +prop_getBracedReference7 = getBracedReference "!foo#?" == "foo" +prop_getBracedReference8 = getBracedReference "foo-bar" == "foo" +prop_getBracedReference9 = getBracedReference "foo:-bar" == "foo" +prop_getBracedReference10= getBracedReference "foo: -1" == "foo" +prop_getBracedReference11= getBracedReference "!os*" == "" +prop_getBracedReference11b= getBracedReference "!os@" == "" +prop_getBracedReference12= getBracedReference "!os?bar**" == "" +prop_getBracedReference13= getBracedReference "foo[bar]" == "foo" +getBracedReference s = fromMaybe s $ + nameExpansion s `mplus` takeName noPrefix `mplus` getSpecial noPrefix `mplus` getSpecial s + where + noPrefix = dropPrefix s + dropPrefix (c:rest) | c `elem` "!#" = rest + dropPrefix cs = cs + takeName s = do + let name = takeWhile isVariableChar s + guard . not $ null name + return name + getSpecial (c:_) | isSpecialVariableChar c = return [c] + getSpecial _ = fail "empty or not special" + + nameExpansion ('!':next:rest) = do -- e.g. ${!foo*bar*} + guard $ isVariableChar next -- e.g. ${!@} + first <- find (not . isVariableChar) rest + guard $ first `elem` "*?@" + return "" + nameExpansion _ = Nothing + +-- Get the variable modifier like /a/b in ${var/a/b} +prop_getBracedModifier1 = getBracedModifier "foo:bar:baz" == ":bar:baz" +prop_getBracedModifier2 = getBracedModifier "!var:-foo" == ":-foo" +prop_getBracedModifier3 = getBracedModifier "foo[bar]" == "[bar]" +prop_getBracedModifier4 = getBracedModifier "foo[@]@Q" == "[@]@Q" +prop_getBracedModifier5 = getBracedModifier "@@Q" == "@Q" +getBracedModifier s = headOrDefault "" $ do + let var = getBracedReference s + a <- dropModifier s + dropPrefix var a + where + dropPrefix [] t = return t + dropPrefix (a:b) (c:d) | a == c = dropPrefix b d + dropPrefix _ _ = [] + + dropModifier (c:rest) | c `elem` "#!" = [rest, c:rest] + dropModifier x = [x] + +-- Get the variables from indices like ["x", "y"] in ${var[x+y+1]} +prop_getIndexReferences1 = getIndexReferences "var[x+y+1]" == ["x", "y"] +getIndexReferences s = fromMaybe [] $ do + match <- matchRegex re s + index <- match !!! 0 + return $ matchAllStrings variableNameRegex index + where + re = mkRegex "(\\[.*\\])" + +prop_getOffsetReferences1 = getOffsetReferences ":bar" == ["bar"] +prop_getOffsetReferences2 = getOffsetReferences ":bar:baz" == ["bar", "baz"] +prop_getOffsetReferences3 = getOffsetReferences "[foo]:bar" == ["bar"] +prop_getOffsetReferences4 = getOffsetReferences "[foo]:bar:baz" == ["bar", "baz"] +getOffsetReferences mods = fromMaybe [] $ do +-- if mods start with [, then drop until ] + match <- matchRegex re mods + offsets <- match !!! 1 + return $ matchAllStrings variableNameRegex offsets + where + re = mkRegex "^(\\[.+\\])? *:([^-=?+].*)" + + +-- Returns whether a token is a parameter expansion without any modifiers. +-- True for $var ${var} $1 $# +-- False for ${#var} ${var[x]} ${var:-0} +isUnmodifiedParameterExpansion t = + case t of + T_DollarBraced _ False _ -> True + T_DollarBraced _ _ list -> + let str = concat $ oversimplify list + in getBracedReference str == str + _ -> False + +--- A list of the element and all its parents up to the root node. +getPath tree t = t : + case Map.lookup (getId t) tree of + Nothing -> [] + Just parent -> getPath tree parent + +isClosingFileOp op = + case op of + T_IoDuplicate _ (T_GREATAND _) "-" -> True + T_IoDuplicate _ (T_LESSAND _) "-" -> True + _ -> False + + return [] runTests = $quickCheckAll diff --git a/src/ShellCheck/Analytics.hs b/src/ShellCheck/Analytics.hs index f5ff4df..bf5d179 100644 --- a/src/ShellCheck/Analytics.hs +++ b/src/ShellCheck/Analytics.hs @@ -24,8 +24,10 @@ module ShellCheck.Analytics (runAnalytics, optionalChecks, ShellCheck.Analytics. import ShellCheck.AST import ShellCheck.ASTLib import ShellCheck.AnalyzerLib hiding (producesComments) +import qualified ShellCheck.CFGAnalysis as CF import ShellCheck.Data import ShellCheck.Parser +import ShellCheck.Prelude import ShellCheck.Interface import ShellCheck.Regex diff --git a/src/ShellCheck/Analyzer.hs b/src/ShellCheck/Analyzer.hs index eb231c2..ff2e457 100644 --- a/src/ShellCheck/Analyzer.hs +++ b/src/ShellCheck/Analyzer.hs @@ -25,6 +25,7 @@ import ShellCheck.Interface import Data.List import Data.Monoid import qualified ShellCheck.Checks.Commands +import qualified ShellCheck.Checks.ControlFlow import qualified ShellCheck.Checks.Custom import qualified ShellCheck.Checks.ShellSupport @@ -42,11 +43,13 @@ analyzeScript spec = newAnalysisResult { checkers spec params = mconcat $ map ($ params) [ ShellCheck.Checks.Commands.checker spec, + ShellCheck.Checks.ControlFlow.checker spec, ShellCheck.Checks.Custom.checker, ShellCheck.Checks.ShellSupport.checker ] optionalChecks = mconcat $ [ ShellCheck.Analytics.optionalChecks, - ShellCheck.Checks.Commands.optionalChecks + ShellCheck.Checks.Commands.optionalChecks, + ShellCheck.Checks.ControlFlow.optionalChecks ] diff --git a/src/ShellCheck/AnalyzerLib.hs b/src/ShellCheck/AnalyzerLib.hs index 67c35b4..e998f2c 100644 --- a/src/ShellCheck/AnalyzerLib.hs +++ b/src/ShellCheck/AnalyzerLib.hs @@ -23,9 +23,11 @@ module ShellCheck.AnalyzerLib where import ShellCheck.AST import ShellCheck.ASTLib +import qualified ShellCheck.CFGAnalysis as CF import ShellCheck.Data import ShellCheck.Interface import ShellCheck.Parser +import ShellCheck.Prelude import ShellCheck.Regex import Control.Arrow (first) @@ -96,7 +98,9 @@ data Parameters = Parameters { -- The root node of the AST rootNode :: Token, -- map from token id to start and end position - tokenPositions :: Map.Map Id (Position, Position) + tokenPositions :: Map.Map Id (Position, Position), + -- Result from Control Flow Graph analysis (including data flow analysis) + cfgAnalysis :: CF.CFGAnalysis } deriving (Show) -- TODO: Cache results of common AST ops here @@ -189,8 +193,9 @@ makeCommentWithFix severity id code str fix = } in force withFix -makeParameters spec = - let params = Parameters { +makeParameters spec = params + where + params = Parameters { rootNode = root, shellType = fromMaybe (determineShell (asFallbackShell spec) root) $ asShellType spec, hasSetE = containsSetE root, @@ -215,9 +220,14 @@ makeParameters spec = shellTypeSpecified = isJust (asShellType spec) || isJust (asFallbackShell spec), parentMap = getParentTree root, variableFlow = getVariableFlow params root, - tokenPositions = asTokenPositions spec - } in params - where root = asScript spec + tokenPositions = asTokenPositions spec, + cfgAnalysis = CF.analyzeControlFlow cfParams root + } + cfParams = CF.CFGParameters { + CF.cfLastpipe = hasLastpipe params, + CF.cfPipefail = hasPipefail params + } + root = asScript spec -- Does this script mention 'set -e' anywhere? @@ -408,12 +418,6 @@ usedAsCommandName tree token = go (getId token) (tail $ getPath tree token) getId word == currentId || getId (getCommandTokenOrThis t) == currentId go _ _ = False --- A list of the element and all its parents up to the root node. -getPath tree t = t : - case Map.lookup (getId t) tree of - Nothing -> [] - Just parent -> getPath tree parent - -- Version of the above taking the map from the current context -- Todo: give this the name "getPath" getPathM t = do @@ -559,12 +563,6 @@ getModifiedVariables t = return (place, t, str, DataString SourceChecked) _ -> Nothing -isClosingFileOp op = - case op of - T_IoDuplicate _ (T_GREATAND _) "-" -> True - T_IoDuplicate _ (T_LESSAND _) "-" -> True - _ -> False - -- Consider 'export/declare -x' a reference, since it makes the var available getReferencedVariableCommand base@(T_SimpleCommand _ _ (T_NormalWord _ (T_Literal _ x:_):rest)) = @@ -746,13 +744,6 @@ getModifiedVariableCommand base@(T_SimpleCommand id cmdPrefix (T_NormalWord _ (T getModifiedVariableCommand _ = [] -getIndexReferences s = fromMaybe [] $ do - match <- matchRegex re s - index <- match !!! 0 - return $ matchAllStrings variableNameRegex index - where - re = mkRegex "(\\[.*\\])" - -- Given a NormalWord like foo or foo[$bar], get foo. -- Primarily used to get references for [[ -v foo[bar] ]] getVariableForTestDashV :: Token -> Maybe String @@ -767,18 +758,6 @@ getVariableForTestDashV t = do -- in a non-constant expression (while filtering out foo$x[$y]) toStr _ = return "\0" -prop_getOffsetReferences1 = getOffsetReferences ":bar" == ["bar"] -prop_getOffsetReferences2 = getOffsetReferences ":bar:baz" == ["bar", "baz"] -prop_getOffsetReferences3 = getOffsetReferences "[foo]:bar" == ["bar"] -prop_getOffsetReferences4 = getOffsetReferences "[foo]:bar:baz" == ["bar", "baz"] -getOffsetReferences mods = fromMaybe [] $ do --- if mods start with [, then drop until ] - match <- matchRegex re mods - offsets <- match !!! 1 - return $ matchAllStrings variableNameRegex offsets - where - re = mkRegex "^(\\[.+\\])? *:([^-=?+].*)" - getReferencedVariables parents t = case t of T_DollarBraced id _ l -> let str = concat $ oversimplify l in @@ -857,17 +836,6 @@ isConfusedGlobRegex ('*':_) = True isConfusedGlobRegex [x,'*'] | x `notElem` "\\." = True isConfusedGlobRegex _ = False -isVariableStartChar x = x == '_' || isAsciiLower x || isAsciiUpper x -isVariableChar x = isVariableStartChar x || isDigit x -isSpecialVariableChar = (`elem` "*@#?-$!") -variableNameRegex = mkRegex "[_a-zA-Z][_a-zA-Z0-9]*" - -prop_isVariableName1 = isVariableName "_fo123" -prop_isVariableName2 = not $ isVariableName "4" -prop_isVariableName3 = not $ isVariableName "test: " -isVariableName (x:r) = isVariableStartChar x && all isVariableChar r -isVariableName _ = False - getVariablesFromLiteralToken token = getVariablesFromLiteral (getLiteralStringDef " " token) @@ -880,73 +848,6 @@ getVariablesFromLiteral string = where variableRegex = mkRegex "\\$\\{?([A-Za-z0-9_]+)" --- Get the variable name from an expansion like ${var:-foo} -prop_getBracedReference1 = getBracedReference "foo" == "foo" -prop_getBracedReference2 = getBracedReference "#foo" == "foo" -prop_getBracedReference3 = getBracedReference "#" == "#" -prop_getBracedReference4 = getBracedReference "##" == "#" -prop_getBracedReference5 = getBracedReference "#!" == "!" -prop_getBracedReference6 = getBracedReference "!#" == "#" -prop_getBracedReference7 = getBracedReference "!foo#?" == "foo" -prop_getBracedReference8 = getBracedReference "foo-bar" == "foo" -prop_getBracedReference9 = getBracedReference "foo:-bar" == "foo" -prop_getBracedReference10= getBracedReference "foo: -1" == "foo" -prop_getBracedReference11= getBracedReference "!os*" == "" -prop_getBracedReference11b= getBracedReference "!os@" == "" -prop_getBracedReference12= getBracedReference "!os?bar**" == "" -prop_getBracedReference13= getBracedReference "foo[bar]" == "foo" -getBracedReference s = fromMaybe s $ - nameExpansion s `mplus` takeName noPrefix `mplus` getSpecial noPrefix `mplus` getSpecial s - where - noPrefix = dropPrefix s - dropPrefix (c:rest) | c `elem` "!#" = rest - dropPrefix cs = cs - takeName s = do - let name = takeWhile isVariableChar s - guard . not $ null name - return name - getSpecial (c:_) | isSpecialVariableChar c = return [c] - getSpecial _ = fail "empty or not special" - - nameExpansion ('!':next:rest) = do -- e.g. ${!foo*bar*} - guard $ isVariableChar next -- e.g. ${!@} - first <- find (not . isVariableChar) rest - guard $ first `elem` "*?@" - return "" - nameExpansion _ = Nothing - -prop_getBracedModifier1 = getBracedModifier "foo:bar:baz" == ":bar:baz" -prop_getBracedModifier2 = getBracedModifier "!var:-foo" == ":-foo" -prop_getBracedModifier3 = getBracedModifier "foo[bar]" == "[bar]" -prop_getBracedModifier4 = getBracedModifier "foo[@]@Q" == "[@]@Q" -prop_getBracedModifier5 = getBracedModifier "@@Q" == "@Q" -getBracedModifier s = headOrDefault "" $ do - let var = getBracedReference s - a <- dropModifier s - dropPrefix var a - where - dropPrefix [] t = return t - dropPrefix (a:b) (c:d) | a == c = dropPrefix b d - dropPrefix _ _ = [] - - dropModifier (c:rest) | c `elem` "#!" = [rest, c:rest] - dropModifier x = [x] - --- Useful generic functions. - --- Get element 0 or a default. Like `head` but safe. -headOrDefault _ (a:_) = a -headOrDefault def _ = def - --- Get the last element or a default. Like `last` but safe. -lastOrDefault def [] = def -lastOrDefault _ list = last list - ---- Get element n of a list, or Nothing. Like `!!` but safe. -(!!!) list i = - case drop i list of - [] -> Nothing - (r:_) -> Just r -- Run a command if the shell is in the given list whenShell l c = do @@ -999,17 +900,6 @@ isBashLike params = Dash -> False Sh -> False --- Returns whether a token is a parameter expansion without any modifiers. --- True for $var ${var} $1 $# --- False for ${#var} ${var[x]} ${var:-0} -isUnmodifiedParameterExpansion t = - case t of - T_DollarBraced _ False _ -> True - T_DollarBraced _ _ list -> - let str = concat $ oversimplify list - in getBracedReference str == str - _ -> False - isTrueAssignmentSource c = case c of DataString SourceChecked -> False diff --git a/src/ShellCheck/CFG.hs b/src/ShellCheck/CFG.hs new file mode 100644 index 0000000..101a0d7 --- /dev/null +++ b/src/ShellCheck/CFG.hs @@ -0,0 +1,1147 @@ +{- + Copyright 2022 Vidar Holen + + This file is part of ShellCheck. + https://www.shellcheck.net + + ShellCheck is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ShellCheck is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE DeriveAnyClass, DeriveGeneric #-} + +-- Constructs a Control Flow Graph from an AST +module ShellCheck.CFG ( + CFNode (..), + CFEdge (..), + CFEffect (..), + CFStringPart (..), + CFVariableProp (..), + CFGResult (..), + CFValue (..), + CFGraph, + CFGParameters (..), + IdTagged (..), + buildGraph + , ShellCheck.CFG.runTests -- STRIP + ) + where + +import GHC.Generics (Generic) +import ShellCheck.AST +import ShellCheck.ASTLib +import ShellCheck.Interface +import ShellCheck.Prelude +import ShellCheck.Regex +import Control.DeepSeq +import Control.Monad +import Control.Monad.Identity +import Data.List hiding (map) +import Data.Maybe +import qualified Data.Map as M +import qualified Data.Set as S +import Control.Monad.RWS.Lazy +import Data.Graph.Inductive.Graph +import Data.Graph.Inductive.Query.DFS +import Data.Graph.Inductive.PatriciaTree as G +import Debug.Trace -- STRIP + +import Test.QuickCheck.All (forAllProperties) +import Test.QuickCheck.Test (quickCheckWithResult, stdArgs, maxSuccess) + + +-- Our basic Graph type +type CFGraph = G.Gr CFNode CFEdge + +-- Node labels in a Control Flow Graph +data CFNode = + -- A no-op node for structural purposes + CFStructuralNode + -- A no-op for graph inspection purposes + | CFEntryPoint String + -- Drop current prefix assignments + | CFDropPrefixAssignments + -- A node with a certain effect on program state + | CFApplyEffects [IdTagged CFEffect] + -- The execution of a command or function by literal string if possible + | CFExecuteCommand (Maybe String) + -- Execute a subshell. These are represented by disjoint graphs just like + -- functions, but they don't require any form of name resolution + | CFExecuteSubshell String Node Node + -- Assignment of $? + | CFSetExitCode Id + -- The virtual 'exit' at the natural end of a subshell + | CFImpliedExit + -- An exit statement resolvable at CFG build time + | CFResolvedExit + -- An exit statement only resolvable at DFA time + | CFUnresolvedExit + -- An unreachable node, serving as the unconnected end point of a range + | CFUnreachable + -- Assignment of $! + | CFSetBackgroundPid Id + deriving (Eq, Ord, Show, Generic, NFData) + +-- Edge labels in a Control Flow Graph +data CFEdge = + CFEErrExit + -- Regular control flow edge + | CFEFlow + -- An edge that a human might think exists (e.g. from a backgrounded process to its parent) + | CFEFalseFlow + -- An edge followed on exit + | CFEExit + deriving (Eq, Ord, Show, Generic, NFData) + +-- Actions we track +data CFEffect = + CFModifyProps String [CFVariableProp] + | CFReadVariable String + | CFWriteVariable String CFValue + | CFWriteGlobal String CFValue + | CFWriteLocal String CFValue + | CFWritePrefix String CFValue + | CFDefineFunction String Id Node Node + | CFUndefine String + | CFUndefineVariable String + | CFUndefineFunction String + | CFUndefineNameref String + -- Usage implies that this is an array (e.g. it's expanded with index) + | CFHintArray String + -- Operation implies that the variable will be defined (e.g. [ -z "$var" ]) + | CFHintDefined String + deriving (Eq, Ord, Show, Generic, NFData) + +data IdTagged a = IdTagged Id a + deriving (Eq, Ord, Show, Generic, NFData) + +-- Where a variable's value comes from +data CFValue = + -- The special 'uninitialized' value + CFValueUninitialized + -- An arbitrary array value + | CFValueArray + -- An arbitrary string value + | CFValueString + -- An arbitrary integer + | CFValueInteger + -- Token 'Id' concatenates and assigns the given parts + | CFValueComputed Id [CFStringPart] + deriving (Eq, Ord, Show, Generic, NFData) + +-- Simplified computed strings +data CFStringPart = + -- A known literal string value, like 'foo' + CFStringLiteral String + -- The contents of a variable, like $foo + | CFStringVariable String + -- An value that is unknown but an integer + | CFStringInteger + -- An unknown string value, for things we can't handle + | CFStringUnknown + deriving (Eq, Ord, Show, Generic, NFData) + +-- The properties of a variable +data CFVariableProp = CFVPExport | CFVPArray + deriving (Eq, Ord, Show, Generic, NFData) + +-- Options when generating CFG +data CFGParameters = CFGParameters { + -- Whether the last element in a pipeline runs in the current shell + cfLastpipe :: Bool, + -- Whether all elements in a pipeline count towards the exit status + cfPipefail :: Bool +} + +data CFGResult = CFGResult { + -- The graph itself + cfGraph :: CFGraph, + -- Map from Id to start/end node + cfIdToNode :: M.Map Id (Node, Node) +} + deriving (Show) + +buildGraph :: CFGParameters -> Token -> CFGResult +buildGraph params root = + let + (nextNode, base) = execRWS (buildRoot root) (newCFContext params) 0 + (nodes, edges, mapping) = +-- renumberTopologically $ + removeUnnecessaryStructuralNodes + base + in + CFGResult { + cfGraph = mkGraph nodes edges, + cfIdToNode = M.fromList mapping + } + +remapGraph remap (nodes, edges, mapping) = + ( + map (remapNode remap) nodes, + map (remapEdge remap) edges, + map (\(id, (a,b)) -> (id, (remapHelper remap a, remapHelper remap b))) mapping + ) + +prop_testRenumbering = + let + s = CFStructuralNode + before = ( + [(1,s), (3,s), (4, s), (8,s)], + [(1,3,CFEFlow), (3,4, CFEFlow), (4,8,CFEFlow)], + [(Id 0, (3,4))] + ) + after = ( + [(0,s), (1,s), (2,s), (3,s)], + [(0,1,CFEFlow), (1,2, CFEFlow), (2,3,CFEFlow)], + [(Id 0, (1,2))] + ) + in after == renumberGraph before + +-- Renumber the graph for prettiness, so there are no gaps in node numbers +renumberGraph g@(nodes, edges, mapping) = + let renumbering = M.fromList (flip zip [0..] $ sort $ map fst nodes) + in remapGraph renumbering g + +prop_testRenumberTopologically = + let + s = CFStructuralNode + before = ( + [(4,s), (2,s), (3, s)], + [(4,2,CFEFlow), (2,3, CFEFlow)], + [(Id 0, (4,2))] + ) + after = ( + [(0,s), (1,s), (2,s)], + [(0,1,CFEFlow), (1,2, CFEFlow)], + [(Id 0, (0,1))] + ) + in after == renumberTopologically before + +-- Renumber the graph in topological order +renumberTopologically g@(nodes, edges, mapping) = + let renumbering = M.fromList (flip zip [0..] $ topsort (mkGraph nodes edges :: CFGraph)) + in remapGraph renumbering g + +prop_testRemoveStructural = + let + s = CFStructuralNode + before = ( + [(1,s), (2,s), (3, s), (4,s)], + [(1,2,CFEFlow), (2,3, CFEFlow), (3,4,CFEFlow)], + [(Id 0, (2,3))] + ) + after = ( + [(1,s), (2,s), (4,s)], + [(1,2,CFEFlow), (2,4,CFEFlow)], + [(Id 0, (2,2))] + ) + in after == removeUnnecessaryStructuralNodes before + +-- Collapse structural nodes that just form long chains like x->x->x. +-- This way we can generate them with abandon, without making DFA slower. +-- +-- Note in particular that we can't remove a structural node x in +-- foo -> x -> bar , because then the pre/post-condition for tokens +-- previously pointing to x would be wrong. +removeUnnecessaryStructuralNodes (nodes, edges, mapping) = + remapGraph recursiveRemapping + ( + filter (\(n, _) -> n `M.notMember` recursiveRemapping) nodes, + filter (`S.notMember` edgesToCollapse) edges, + mapping + ) + where + regularEdges = filter isRegularEdge edges + inDegree = counter $ map (\(from,to,_) -> from) regularEdges + outDegree = counter $ map (\(from,to,_) -> to) regularEdges + structuralNodes = S.fromList $ map fst $ filter isStructural nodes + candidateNodes = S.filter isLinear structuralNodes + edgesToCollapse = S.fromList $ filter filterEdges regularEdges + + remapping :: M.Map Node Node + remapping = foldl' (\m (new, old) -> M.insert old new m) M.empty $ map orderEdge $ S.toList edgesToCollapse + recursiveRemapping = M.fromList $ map (\c -> (c, recursiveLookup remapping c)) $ M.keys remapping + + filterEdges (a,b,_) = + a `S.member` candidateNodes && b `S.member` candidateNodes + + orderEdge (a,b,_) = if a < b then (a,b) else (b,a) + counter = foldl' (\map key -> M.insertWith (+) key 1 map) M.empty + isRegularEdge (_, _, CFEFlow) = True + isRegularEdge _ = False + + recursiveLookup :: M.Map Node Node -> Node -> Node + recursiveLookup map node = + case M.lookup node map of + Nothing -> node + Just x -> recursiveLookup map x + + isStructural (node, label) = + case label of + CFStructuralNode -> True + _ -> False + + isLinear node = + M.findWithDefault 0 node inDegree == 1 + && M.findWithDefault 0 node outDegree == 1 + + +remapNode :: M.Map Node Node -> LNode CFNode -> LNode CFNode +remapNode m (node, label) = + (remapHelper m node, newLabel) + where + newLabel = case label of + CFApplyEffects effects -> CFApplyEffects (map (remapEffect m) effects) + CFExecuteSubshell s a b -> CFExecuteSubshell s (remapHelper m a) (remapHelper m b) +-- CFSubShellStart reason node -> CFSubShellStart reason (remapHelper m node) + + _ -> label + +remapEffect map old@(IdTagged id effect) = + case effect of + CFDefineFunction name id start end -> IdTagged id $ CFDefineFunction name id (remapHelper map start) (remapHelper map end) + _ -> old + +remapEdge :: M.Map Node Node -> LEdge CFEdge -> LEdge CFEdge +remapEdge map (from, to, label) = (remapHelper map from, remapHelper map to, label) +remapHelper map n = M.findWithDefault n n map + +data Range = Range Node Node + deriving (Eq, Show) + +data CFContext = CFContext { + cfIsCondition :: Bool, + cfIsFunction :: Bool, + cfLoopStack :: [(Node, Node)], + cfExitTarget :: Maybe Node, + cfReturnTarget :: Maybe Node, + cfParameters :: CFGParameters +} +newCFContext params = CFContext { + cfIsCondition = False, + cfIsFunction = False, + cfLoopStack = [], + cfExitTarget = Nothing, + cfReturnTarget = Nothing, + cfParameters = params +} + +-- The monad we generate a graph in +type CFM a = RWS CFContext ([LNode CFNode], [LEdge CFEdge], [(Id, (Node, Node))]) Int a + +newNode :: CFNode -> CFM Node +newNode label = do + n <- get + put (n+1) + tell ([(n, label)], [], []) + return n + +newNodeRange :: CFNode -> CFM Range +-- newNodeRange label = nodeToRange <$> newNode label +newNodeRange label = nodeToRange <$> newNode label + +-- Build a disjoint piece of the graph and return a CFExecuteSubshell. The Id is used purely for debug naming. +subshell :: Id -> String -> CFM Range -> CFM Range +subshell id reason p = do + start <- newNode $ CFEntryPoint $ "Subshell " ++ show id ++ ": " ++ reason + end <- newNode CFStructuralNode + middle <- local (\c -> c { cfExitTarget = Just end, cfReturnTarget = Just end}) p + linkRanges [nodeToRange start, middle, nodeToRange end] + newNodeRange $ CFExecuteSubshell reason start end + + +withFunctionScope p = do + end <- newNode CFStructuralNode + body <- local (\c -> c { cfReturnTarget = Just end, cfIsFunction = True }) p + linkRanges [body, nodeToRange end] + + +nodeToRange :: Node -> Range +nodeToRange n = Range n n + +link :: Node -> Node -> CFEdge -> CFM () +link from to label = do + tell ([], [(from, to, label)], []) + +registerNode :: Id -> Range -> CFM () +registerNode id (Range start end) = tell ([], [], [(id, (start, end))]) + +linkRange :: Range -> Range -> CFM Range +linkRange = linkRangeAs CFEFlow + +linkRangeAs :: CFEdge -> Range -> Range -> CFM Range +linkRangeAs label (Range start mid1) (Range mid2 end) = do + link mid1 mid2 label + return (Range start end) + +-- Like linkRange but without actually linking +spanRange :: Range -> Range -> Range +spanRange (Range start mid1) (Range mid2 end) = Range start end + +linkRanges :: [Range] -> CFM Range +linkRanges [] = error "Empty range" +linkRanges (first:rest) = foldM linkRange first rest + +sequentially :: [Token] -> CFM Range +sequentially list = do + first <- newStructuralNode + rest <- mapM build list + linkRanges (first:rest) + +withContext :: (CFContext -> CFContext) -> CFM a -> CFM a +withContext = local + +withReturn :: Range -> CFM a -> CFM a +withReturn _ p = p + +asCondition :: CFM Range -> CFM Range +asCondition = withContext (\c -> c { cfIsCondition = True }) + +newStructuralNode = newNodeRange CFStructuralNode + +buildRoot :: Token -> CFM Range +buildRoot t = do + entry <- newNodeRange $ CFEntryPoint "MAIN" + impliedExit <- newNode CFImpliedExit + end <- newNode CFStructuralNode + start <- local (\c -> c { cfExitTarget = Just end, cfReturnTarget = Just impliedExit}) $ build t + range <- linkRanges [entry, start, nodeToRange impliedExit, nodeToRange end] + registerNode (getId t) range + return range + +applySingle e = CFApplyEffects [e] + +-- Build the CFG. +build :: Token -> CFM Range +build t = do + range <- build' t + registerNode (getId t) range + return range + where + build' t = case t of + T_Annotation _ _ list -> build list + T_Script _ _ list -> do + sequentially list + + TA_Assignment id op var@(TA_Variable _ name indices) rhs -> do + -- value first: (( var[x=1] = (x=2) )) runs x=1 last + value <- build rhs + subscript <- sequentially indices + read <- + if op == "=" + then none + -- This is += or something + else newNodeRange $ applySingle $ IdTagged id $ CFReadVariable name + + write <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name $ + if null indices + then CFValueInteger + else CFValueArray + + linkRanges [value, subscript, read, write] + + TA_Assignment id op lhs rhs -> do + -- This is likely an invalid assignment like (( 1 = 2 )), but it + -- could be e.g. x=y; (( $x = 3 )); echo $y, so expand both sides + -- without updating anything + sequentially [lhs, rhs] + + TA_Binary _ _ a b -> sequentially [a,b] + TA_Expansion _ list -> sequentially list + TA_Sequence _ list -> sequentially list + + TA_Trinary _ cond a b -> do + condition <- build cond + ifthen <- build a + elsethen <- build b + end <- newStructuralNode + linkRanges [condition, ifthen, end] + linkRanges [condition, elsethen, end] + + TA_Variable id name indices -> do + subscript <- sequentially indices + hint <- + if null indices + then none + else nodeToRange <$> newNode (applySingle $ IdTagged id $ CFHintArray name) + read <- nodeToRange <$> newNode (applySingle $ IdTagged id $ CFReadVariable name) + linkRanges [subscript, hint, read] + + TA_Unary id op (TA_Variable _ name indices) | "--" `isInfixOf` op || "++" `isInfixOf` op -> do + subscript <- sequentially indices + read <- newNodeRange $ applySingle $ IdTagged id $ CFReadVariable name + write <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name $ + if null indices + then CFValueInteger + else CFValueArray + linkRanges [subscript, read, write] + TA_Unary _ _ arg -> build arg + + TC_And _ SingleBracket _ lhs rhs -> do + sequentially [lhs, rhs] + + TC_And _ DoubleBracket _ lhs rhs -> do + left <- build lhs + right <- build rhs + end <- newStructuralNode + -- complete + linkRanges [left, right, end] + -- short circuit + linkRange left end + + -- TODO: Handle integer ops + TC_Binary _ mode str lhs rhs -> do + left <- build lhs + right <- build rhs + linkRange left right + + TC_Empty {} -> newStructuralNode + + TC_Group _ _ t -> build t + + -- TODO: Mark as checked + TC_Nullary _ _ arg -> build arg + + TC_Or _ SingleBracket _ lhs rhs -> sequentially [lhs, rhs] + + TC_Or _ DoubleBracket _ lhs rhs -> do + left <- build lhs + right <- build rhs + end <- newStructuralNode + -- complete + linkRanges [left, right, end] + -- short circuit + linkRange left end + + -- TODO: Handle -v, -z, -n + TC_Unary _ _ op arg -> do + build arg + + T_Arithmetic id root -> do + exe <- build root + status <- newNodeRange (CFSetExitCode id) + linkRange exe status + + T_AndIf _ lhs rhs -> do + left <- build lhs + right <- build rhs + end <- newStructuralNode + linkRange left right + linkRange right end + linkRange left end + + T_Array _ list -> sequentially list + + T_Assignment {} -> buildAssignment DefaultScope t + + T_Backgrounded id body -> do + start <- newStructuralNode + fork <- subshell id "backgrounding '&'" $ build body + pid <- newNodeRange $ CFSetBackgroundPid id + status <- newNodeRange $ CFSetExitCode id + + linkRange start fork + -- Add a join from the fork to warn about variable changes + linkRangeAs CFEFalseFlow fork pid + linkRanges [start, pid, status] + + T_Backticked id body -> + subshell id "`..` expansion" $ sequentially body + + T_Banged id cmd -> do + main <- build cmd + status <- newNodeRange (CFSetExitCode id) + linkRange main status + + T_BatsTest id _ body -> do + -- These are technically set by the 'run' command, but we'll just define them + -- up front to avoid figuring out which commands named "run" belong to Bats. + status <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable "status" CFValueInteger + output <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable "output" CFValueString + main <- build body + linkRanges [status, output, main] + + T_BraceExpansion _ list -> sequentially list + + T_BraceGroup id body -> + sequentially body + + T_CaseExpression id t [] -> build t + + T_CaseExpression id t list -> do + start <- newStructuralNode + token <- build t + branches <- mapM buildBranch list + end <- newStructuralNode + + let neighbors = zip branches $ tail branches + let (_, firstCond, _) = head branches + let (_, lastCond, lastBody) = last branches + + linkRange start token + linkRange token firstCond + mapM_ (uncurry $ linkBranch end) neighbors + linkRange lastBody end + + unless (any hasCatchAll list) $ + -- There's no *) branch, so assume we can fall through + void $ linkRange token end + + return $ spanRange start end + + where + -- for a | b | c, evaluate each in turn and allow short circuiting + buildCond list = do + start <- newStructuralNode + conds <- mapM build list + end <- newStructuralNode + linkRanges (start:conds) + mapM_ (`linkRange` end) conds + return $ spanRange start end + + buildBranch (typ, cond, body) = do + c <- buildCond cond + b <- sequentially body + linkRange c b + return (typ, c, b) + + linkBranch end (typ, cond, body) (_, nextCond, nextBody) = do + -- Failure case + linkRange cond nextCond + -- After body + case typ of + CaseBreak -> linkRange body end + CaseFallThrough -> linkRange body nextBody + CaseContinue -> linkRange body nextCond + + -- Find a *) if any + + hasCatchAll (_,cond,_) = any isCatchAll cond + isCatchAll c = fromMaybe False $ do + pg <- wordToExactPseudoGlob c + return $ pg `pseudoGlobIsSuperSetof` [PGMany] + + T_Condition _ _ op -> build op + + T_CoProc id maybeName t -> do + let name = fromMaybe "COPROC" maybeName + start <- newStructuralNode + parent <- newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name CFValueArray + child <- subshell id "coproc" $ build t + end <- newNodeRange $ CFSetExitCode id + + linkRange start parent + linkRange start child + linkRange parent end + linkRangeAs CFEFalseFlow child end + + return $ spanRange start end + T_CoProcBody _ t -> build t + + T_DollarArithmetic _ arith -> build arith + T_DollarDoubleQuoted _ list -> sequentially list + T_DollarSingleQuoted _ _ -> none + T_DollarBracket _ t -> build t + + T_DollarBraced id _ t -> do + let str = concat $ oversimplify t + let modifier = getBracedModifier str + let reference = getBracedReference str + let indices = getIndexReferences str + let offsets = getOffsetReferences str + vals <- build t + others <- mapM (\x -> nodeToRange <$> newNode (applySingle $ IdTagged id $ CFReadVariable x)) (indices ++ offsets) + deps <- linkRanges (vals:others) + read <- nodeToRange <$> newNode (applySingle $ IdTagged id $ CFReadVariable reference) + totalRead <- linkRange deps read + + if any (`isPrefixOf` modifier) ["=", ":="] + then do + optionalAssign <- newNodeRange (applySingle $ IdTagged id $ CFWriteVariable reference CFValueString) + result <- newStructuralNode + linkRange optionalAssign result + linkRange totalRead result + else return totalRead + + T_DoubleQuoted _ list -> sequentially list + + T_DollarExpansion id body -> + subshell id "$(..) expansion" $ sequentially body + + T_Extglob _ _ list -> sequentially list + + T_FdRedirect id ('{':identifier) op -> do + let name = takeWhile (/= '}') identifier + expression <- build op + rw <- newNodeRange $ + if isClosingFileOp op + then applySingle $ IdTagged id $ CFReadVariable name + else applySingle $ IdTagged id $ CFWriteVariable name CFValueInteger + + linkRange expression rw + + + T_FdRedirect _ name t -> do + build t + + T_ForArithmetic _ initT condT incT bodyT -> do + init <- build initT + cond <- build condT + body <- sequentially bodyT + inc <- build incT + end <- newStructuralNode + + -- Forward edges + linkRanges [init, cond, body, inc] + linkRange cond end + -- Backward edge + linkRange inc cond + return $ spanRange init end + + T_ForIn id name words body -> forInHelper id name words body + + -- For functions we generate an unlinked subgraph, and mention that in its definition node + T_Function id _ _ name body -> do + range <- local (\c -> c { cfExitTarget = Nothing }) $ do + entry <- newNodeRange $ CFEntryPoint $ "function " ++ name + f <- withFunctionScope $ build body + linkRange entry f + let (Range entry exit) = range + definition <- newNodeRange (applySingle $ IdTagged id $ CFDefineFunction name id entry exit) + exe <- newNodeRange (CFSetExitCode id) + linkRange definition exe + + T_Glob {} -> none + + T_HereString _ t -> build t + T_HereDoc _ _ _ _ list -> sequentially list + + T_IfExpression id ifs elses -> do + start <- newStructuralNode + branches <- doBranches start ifs elses [] + end <- newStructuralNode + mapM_ (`linkRange` end) branches + return $ spanRange start end + where + doBranches start ((conds, thens):rest) elses result = do + cond <- asCondition $ sequentially conds + action <- sequentially thens + linkRange start cond + linkRange cond action + doBranches cond rest elses (action:result) + doBranches start [] elses result = do + rest <- + if null elses + then newNodeRange (CFSetExitCode id) + else sequentially elses + linkRange start rest + return (rest:result) + + T_Include _ t -> build t + + T_IndexedElement _ indicesT valueT -> do + indices <- sequentially indicesT + value <- build valueT + linkRange indices value + + T_IoDuplicate _ op _ -> build op + + T_IoFile _ op t -> do + exp <- build t + doesntDoMuch <- build op + linkRange exp doesntDoMuch + + T_Literal {} -> none + + T_NormalWord _ list -> sequentially list + + T_OrIf _ lhs rhs -> do + left <- build lhs + right <- build rhs + end <- newStructuralNode + linkRange left right + linkRange right end + linkRange left end + + T_Pipeline _ _ [cmd] -> build cmd + T_Pipeline id _ cmds -> do + start <- newStructuralNode + hasLastpipe <- reader $ cfLastpipe . cfParameters + (leading, last) <- buildPipe hasLastpipe cmds + end <- newStructuralNode + + mapM_ (linkRange start) leading + mapM_ (\c -> linkRangeAs CFEFalseFlow c end) leading + linkRanges $ [start] ++ last ++ [end] + where + buildPipe True [x] = do + last <- build x + return ([], [last]) + buildPipe lp (first:rest) = do + this <- subshell id "pipeline" $ build first + (leading, last) <- buildPipe lp rest + return (this:leading, last) + buildPipe _ [] = return ([], []) + + T_ProcSub id op cmds -> do + start <- newStructuralNode + body <- subshell id (op ++ "() process substitution") $ sequentially cmds + end <- newStructuralNode + + linkRange start body + linkRangeAs CFEFalseFlow body end + linkRange start end + + T_Redirecting _ redirs cmd -> do + -- For simple commands, this is the other way around in bash + -- We do it in this order for comound commands like { x=name; } > "$x" + redir <- sequentially redirs + body <- build cmd + linkRange redir body + + T_SelectIn id name words body -> forInHelper id name words body + + T_SimpleCommand id vars [] -> do + -- Vars can also be empty, as in the command "> foo" + assignments <- sequentially vars + status <- newNodeRange (CFSetExitCode id) + linkRange assignments status + + T_SimpleCommand id vars list@(cmd:_) -> + handleCommand t vars list $ getUnquotedLiteral cmd + + T_SingleQuoted _ _ -> none + + T_SourceCommand _ originalCommand inlinedSource -> do + cmd <- build originalCommand + end <- newStructuralNode + inline <- withReturn end $ build inlinedSource + linkRange cmd inline + linkRange inline end + return $ spanRange cmd inline + + T_Subshell id body -> do + main <- subshell id "explicit (..) subshell" $ sequentially body + status <- newNodeRange (CFSetExitCode id) + linkRange main status + + T_UntilExpression id cond body -> whileHelper id cond body + T_WhileExpression id cond body -> whileHelper id cond body + + T_CLOBBER _ -> none + T_GREATAND _ -> none + T_LESSAND _ -> none + T_LESSGREAT _ -> none + T_DGREAT _ -> none + T_Greater _ -> none + T_Less _ -> none + T_ParamSubSpecialChar _ _ -> none + + x -> error ("Unimplemented: " ++ show x) + +-- Still in `where` clause + forInHelper id name words body = do + entry <- newStructuralNode + expansion <- sequentially words + assignmentChoice <- newStructuralNode + assignments <- + if null words || any willSplit words + then (:[]) <$> (newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name CFValueString) + else mapM (\t -> newNodeRange $ applySingle $ IdTagged id $ CFWriteVariable name $ CFValueComputed (getId t) $ tokenToParts t) words + body <- sequentially body + exit <- newStructuralNode + -- Forward edges + linkRanges [entry, expansion, assignmentChoice] + mapM_ (\t -> linkRanges [assignmentChoice, t, body]) assignments + linkRange body exit + linkRange expansion exit + -- Backward edge + linkRange body assignmentChoice + return $ spanRange entry exit + + whileHelper id cond body = do + condRange <- asCondition $ sequentially cond + bodyRange <- sequentially body + end <- newNodeRange (CFSetExitCode id) + + linkRange condRange bodyRange + linkRange bodyRange condRange + linkRange condRange end + + +handleCommand cmd vars args literalCmd = do + -- TODO: Handle assignments in declaring commands + + case literalCmd of + Just "exit" -> regularExpansion vars args $ handleExit + Just "return" -> regularExpansion vars args $ handleReturn + Just "unset" -> regularExpansionWithStatus vars args $ handleUnset args + + Just "declare" -> handleDeclare args + Just "local" -> handleDeclare args + Just "typeset" -> handleDeclare args + + Just "printf" -> regularExpansionWithStatus vars args $ handlePrintf args + Just "wait" -> regularExpansionWithStatus vars args $ handleWait args + + Just "mapfile" -> regularExpansionWithStatus vars args $ handleMapfile args + Just "readarray" -> regularExpansionWithStatus vars args $ handleMapfile args + + Just "DEFINE_boolean" -> regularExpansionWithStatus vars args $ handleDEFINE args + Just "DEFINE_float" -> regularExpansionWithStatus vars args $ handleDEFINE args + Just "DEFINE_integer" -> regularExpansionWithStatus vars args $ handleDEFINE args + Just "DEFINE_string" -> regularExpansionWithStatus vars args $ handleDEFINE args + + -- This will mostly behave like 'command' but ok + Just "builtin" -> + case args of + [_] -> regular + (_:newargs@(newcmd:_)) -> + handleCommand newcmd vars newargs $ getLiteralString newcmd + Just "command" -> + case args of + [_] -> regular + (_:newargs@(newcmd:_)) -> + handleOthers (getId newcmd) vars newargs $ getLiteralString newcmd + _ -> regular + + where + regular = handleOthers (getId cmd) vars args literalCmd + handleExit = do + exitNode <- reader cfExitTarget + case exitNode of + Just target -> do + exit <- newNode CFResolvedExit + link exit target CFEExit + unreachable <- newNode CFUnreachable + return $ Range exit unreachable + Nothing -> do + exit <- newNode CFUnresolvedExit + unreachable <- newNode CFUnreachable + return $ Range exit unreachable + + handleReturn = do + returnTarget <- reader cfReturnTarget + case returnTarget of + Nothing -> error $ pleaseReport "missing return target" + Just target -> do + ret <- newNode CFStructuralNode + link ret target CFEFlow + unreachable <- newNode CFUnreachable + return $ Range ret unreachable + + handleUnset (cmd:args) = do + case () of + _ | "n" `elem` flagNames -> unsetWith CFUndefineNameref + _ | "v" `elem` flagNames -> unsetWith CFUndefineVariable + _ | "f" `elem` flagNames -> unsetWith CFUndefineFunction + _ -> unsetWith CFUndefine + where + pairs :: [(String, Token)] -- [(Flag string, token)] e.g. [("-f", t), ("", myfunc)] + pairs = map (\(str, (flag, val)) -> (str, flag)) $ fromMaybe (map (\c -> ("", (c,c))) args) $ getGnuOpts "vfn" args + (names, flags) = partition (null . fst) pairs + flagNames = map fst flags + literalNames :: [(Token, String)] -- Literal names to unset, e.g. [(myfuncToken, "myfunc")] + literalNames = mapMaybe (\(_, t) -> getLiteralString t >>= (return . (,) t)) names + -- Apply a constructor like CFUndefineVariable to each literalName, and tag with its id + unsetWith c = newNodeRange $ CFApplyEffects $ map (\(token, name) -> IdTagged (getId token) $ c name) literalNames + + + variableAssignRegex = mkRegex "^([_a-zA-Z][_a-zA-Z0-9]*)=" + + handleDeclare (cmd:args) = do + isFunc <- asks cfIsFunction + let (evaluated, effects) = mconcat $ map (toEffects isFunc) args + before <- sequentially $ evaluated + effect <- newNodeRange $ CFApplyEffects effects + result <- newNodeRange $ CFSetExitCode (getId cmd) + linkRanges [before, effect, result] + where + opts = map fst $ getGenericOpts args + array = "a" `elem` opts || "A" `elem` opts + integer = "i" `elem` opts + func = "f" `elem` opts || "F" `elem` opts + global = "g" `elem` opts + writer isFunc = + case () of + _ | global -> CFWriteGlobal + _ | isFunc -> CFWriteLocal + _ -> CFWriteVariable + + toEffects :: Bool -> Token -> ([Token], [IdTagged CFEffect]) + toEffects isFunc (T_Assignment id mode var idx t) = + let + pre = idx ++ [t] + isArray = array || (not $ null idx) + asArray = [ IdTagged id $ (writer isFunc) var CFValueArray ] + asString = [ IdTagged id $ (writer isFunc) var $ + if integer + then CFValueInteger -- TODO: Also handle integer variable property + else CFValueComputed (getId t) $ [ CFStringVariable var | mode == Append ] ++ tokenToParts t + ] + in + (pre, if isArray then asArray else asString ) + + toEffects isFunc t = + let + pre = [t] + literal = fromJust $ getLiteralStringExt (const $ Just "\0") t + isKnown = '\0' `notElem` literal + match = fmap head $ variableAssignRegex `matchRegex` literal + name = fromMaybe literal match + + typer def = + if array + then CFValueArray + else + if integer + then CFValueInteger + else def + + asLiteral = [ + IdTagged (getId t) $ (writer isFunc) name $ + typer $ CFValueComputed (getId t) [ CFStringLiteral $ drop 1 $ dropWhile (/= '=') $ literal ] + ] + asUnknown = [ + IdTagged (getId t) $ (writer isFunc) name $ + typer $ CFValueString + ] + asBlank = [ + IdTagged (getId t) $ (writer isFunc) name $ + typer $ CFValueComputed (getId t) [] + ] + in + case () of + _ | not (isVariableName name) -> (pre, []) + _ | isJust match && isKnown -> (pre, asLiteral) + _ | isJust match -> (pre, asUnknown) + _ -> (pre, asBlank) + + handlePrintf (cmd:args) = + newNodeRange $ CFApplyEffects $ maybeToList findVar + where + findVar = do + flags <- getBsdOpts "v:" args + (flag, arg) <- lookup "v" flags + name <- getLiteralString arg + return $ IdTagged (getId arg) $ CFWriteVariable name CFValueString + + handleWait (cmd:args) = + newNodeRange $ CFApplyEffects $ maybeToList findVar + where + findVar = do + let flags = getGenericOpts args + (flag, arg) <- lookup "p" flags + name <- getLiteralString arg + return $ IdTagged (getId arg) $ CFWriteVariable name CFValueInteger + + handleMapfile (cmd:args) = + newNodeRange $ CFApplyEffects [findVar] + where + findVar = + let (id, name) = fromMaybe (getId cmd, "MAPFILE") $ getFromArg `mplus` getFromFallback + in IdTagged id $ CFWriteVariable name CFValueArray + + getFromArg = do + flags <- getGnuOpts "d:n:O:s:u:C:c:t" args + (_, arg) <- lookup "" flags + name <- getLiteralString arg + return (getId arg, name) + + getFromFallback = + listToMaybe $ mapMaybe getIfVar $ reverse args + getIfVar c = do + name <- getLiteralString c + guard $ isVariableName name + return (getId c, name) + + handleDEFINE (cmd:args) = + newNodeRange $ CFApplyEffects $ maybeToList findVar + where + findVar = do + name <- listToMaybe $ drop 1 args + str <- getLiteralString name + guard $ isVariableName str + return $ IdTagged (getId name) $ CFWriteVariable str CFValueString + + handleOthers id vars args cmd = + regularExpansion vars args $ do + exe <- newNodeRange $ CFExecuteCommand cmd + status <- newNodeRange $ CFSetExitCode id + linkRange exe status + + regularExpansion vars args p = do + args <- sequentially args + assignments <- mapM (buildAssignment PrefixScope) vars + exe <- p + dropAssignments <- + if null vars + then + return [] + else do + drop <- newNodeRange CFDropPrefixAssignments + return [drop] + + linkRanges $ [args] ++ assignments ++ [exe] ++ dropAssignments + + regularExpansionWithStatus vars args@(cmd:_) p = do + initial <- regularExpansion vars args p + status <- newNodeRange $ CFSetExitCode (getId cmd) + linkRange initial status + + +none = newStructuralNode + +data Scope = DefaultScope | GlobalScope | LocalScope | PrefixScope + +buildAssignment scope t = do + op <- case t of + T_Assignment id mode var indices value -> do + expand <- build value + index <- sequentially indices + read <- case mode of + Append -> newNodeRange (applySingle $ IdTagged id $ CFReadVariable var) + Assign -> none + let valueType = if null indices then f id value else CFValueArray + let scoper = + case scope of + PrefixScope -> CFWritePrefix + LocalScope -> CFWriteLocal + GlobalScope -> CFWriteGlobal + DefaultScope -> CFWriteVariable + write <- newNodeRange $ applySingle $ IdTagged id $ scoper var valueType + linkRanges [expand, index, read, write] + where + f :: Id -> Token -> CFValue + f id t@T_NormalWord {} = CFValueComputed id $ [CFStringVariable var | mode == Append] ++ tokenToParts t + f id t@(T_Literal _ str) = CFValueComputed id $ [CFStringVariable var | mode == Append] ++ tokenToParts t + f _ T_Array {} = CFValueArray + + registerNode (getId t) op + return op + + +tokenToParts t = + case t of + T_NormalWord _ list -> concatMap tokenToParts list + T_DoubleQuoted _ list -> concatMap tokenToParts list + T_SingleQuoted _ str -> [ CFStringLiteral str ] + T_Literal _ str -> [ CFStringLiteral str ] + T_DollarArithmetic {} -> [ CFStringInteger ] + T_DollarBracket {} -> [ CFStringInteger ] + T_DollarBraced _ _ list | isUnmodifiedParameterExpansion t -> [ CFStringVariable (getBracedReference $ concat $ oversimplify list) ] + -- Check if getLiteralString can handle it, if not it's unknown + _ -> [maybe CFStringUnknown CFStringLiteral $ getLiteralString t] + +return [] +runTests = $( [| $(forAllProperties) (quickCheckWithResult (stdArgs { maxSuccess = 1 }) ) |]) diff --git a/src/ShellCheck/CFGAnalysis.hs b/src/ShellCheck/CFGAnalysis.hs new file mode 100644 index 0000000..99ce450 --- /dev/null +++ b/src/ShellCheck/CFGAnalysis.hs @@ -0,0 +1,1113 @@ +{- + Copyright 2022 Vidar Holen + + This file is part of ShellCheck. + https://www.shellcheck.net + + ShellCheck is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ShellCheck is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE DeriveAnyClass, DeriveGeneric #-} + +{- + Data Flow Analysis on a Control Flow Graph. + + This module implements a pretty standard iterative Data Flow Analysis. + For an overview of the process, see Wikipedia. + + Since shell scripts rely heavily on global variables, this DFA includes + tracking the value of globals across calls. Each function invocation is + treated as a separate DFA problem, and a caching mechanism (hopefully) + avoids any exponential explosions. + + To do efficient DFA join operations (or merges, as the code calls them), + some of the data structures have an integer version attached. On update, + the version is changed. If two states have the same version number, + a merge is skipped on the grounds that they are identical. It is easy + to unintentionally forget to update/invalidate the version number, + and bugs will ensure. + + For performance reasons, the entire code runs in plain ST, with a manual + context object Ctx being passed around. It relies heavily on mutable + STRefs. However, this turned out to be literally thousands of times faster + than my several attempts using RWST, so it can't be helped. +-} + +module ShellCheck.CFGAnalysis ( + analyzeControlFlow + ,CFGParameters (..) + ,CFGAnalysis (..) + ,ProgramState (..) + ,VariableValue (..) + ,SpaceStatus (..) + ,getIncomingState + ,getOutgoingState + ,ShellCheck.CFGAnalysis.runTests -- STRIP + ) where + +import GHC.Generics (Generic) +import ShellCheck.AST +import ShellCheck.CFG +import qualified ShellCheck.Data as Data +import ShellCheck.Prelude +import Control.Monad +import Control.Monad.ST +import Control.DeepSeq +import Data.List hiding (map) +import Data.STRef +import Data.Maybe +import qualified Data.Map as M +import qualified Data.Set as S +import Data.Graph.Inductive.Graph +import Data.Graph.Inductive.Query.DFS +import Debug.Trace -- STRIP + +import Test.QuickCheck + + +iterationCount = 1000000 +cacheEntries = 10 + +-- The result of the data flow analysis +data CFGAnalysis = CFGAnalysis { + graph :: CFGraph, + tokenToNode :: M.Map Id (Node, Node), + nodeToData :: M.Map Node (ProgramState, ProgramState) +} deriving (Show, Generic, NFData) + +-- The program state we expose externally +data ProgramState = ProgramState { + variablesInScope :: M.Map String VariableValue, + stateIsReachable :: Bool +-- internalState :: InternalState +} deriving (Show, Eq, Generic, NFData) + +-- Conveniently get the state before a token id +getIncomingState :: CFGAnalysis -> Id -> Maybe ProgramState +getIncomingState analysis id = do + (start,end) <- M.lookup id $ tokenToNode analysis + fst <$> M.lookup start (nodeToData analysis) + +-- Conveniently get the state after a token id +getOutgoingState :: CFGAnalysis -> Id -> Maybe ProgramState +getOutgoingState analysis id = do + (start,end) <- M.lookup id $ tokenToNode analysis + snd <$> M.lookup end (nodeToData analysis) + +getDataForNode analysis node = M.lookup node $ nodeToData analysis + +-- The current state of data flow at a point in the program, potentially as a diff +data InternalState = InternalState { + sVersion :: Integer, + sGlobalValues :: VersionedMap String VariableValue, + sLocalValues :: VersionedMap String VariableValue, + sPrefixValues :: VersionedMap String VariableValue, + sFunctionTargets :: VersionedMap String FunctionValue, + sIsReachable :: Maybe Bool +} deriving (Show, Generic, NFData) + +newInternalState = InternalState { + sVersion = 0, + sGlobalValues = vmEmpty, + sLocalValues = vmEmpty, + sPrefixValues = vmEmpty, + sFunctionTargets = vmEmpty, + sIsReachable = Nothing +} + +unreachableState = modified newInternalState { + sIsReachable = Just False +} + +-- The default state we assume we get from the environment +createEnvironmentState :: InternalState +createEnvironmentState = do + foldl' (flip ($)) newInternalState $ concat [ + addVars Data.internalVariables unknownVariableValue, + addVars Data.variablesWithoutSpaces spacelessVariableValue, + addVars Data.specialIntegerVariables spacelessVariableValue + ] + where + addVars names val = map (\name -> insertGlobal name val) names + spacelessVariableValue = VariableValue { + literalValue = Nothing, + spaceStatus = SpaceStatusClean + } + + +modified s = s { sVersion = -1 } + +insertGlobal :: String -> VariableValue -> InternalState -> InternalState +insertGlobal name value state = modified state { + sGlobalValues = vmInsert name value $ sGlobalValues state +} + +insertLocal :: String -> VariableValue -> InternalState -> InternalState +insertLocal name value state = modified state { + sLocalValues = vmInsert name value $ sLocalValues state +} + +insertPrefix :: String -> VariableValue -> InternalState -> InternalState +insertPrefix name value state = modified state { + sPrefixValues = vmInsert name value $ sPrefixValues state +} + +insertFunction :: String -> FunctionValue -> InternalState -> InternalState +insertFunction name value state = modified state { + sFunctionTargets = vmInsert name value $ sFunctionTargets state +} + +internalToExternal :: InternalState -> ProgramState +internalToExternal s = + ProgramState { + -- Avoid introducing dependencies on the literal value as this is only for debugging purposes right now + variablesInScope = M.map (\c -> c { literalValue = Nothing }) flatVars, + -- internalState = s, -- For debugging + stateIsReachable = fromMaybe True $ sIsReachable s + } + where + flatVars = M.unionsWith (\_ last -> last) $ map mapStorage [sGlobalValues s, sLocalValues s, sPrefixValues s] + +-- Dependencies on values, e.g. "if there is a global variable named 'foo' without spaces" +-- This is used to see if the DFA of a function would result in the same state, so anything +-- that affects DFA must be tracked. +data StateDependency = + DepGlobalValue String VariableValue + | DepLocalValue String VariableValue + | DepPrefixValue String VariableValue + | DepFunction String (S.Set FunctionDefinition) + -- Whether invoking the node would result in recursion (i.e., is the function on the stack?) + | DepIsRecursive Node Bool + deriving (Show, Eq, Ord, Generic, NFData) + +-- A function definition, or lack thereof +data FunctionDefinition = FunctionUnknown | FunctionDefinition String Node Node + deriving (Show, Eq, Ord, Generic, NFData) + +-- The Set of places a command name can point (it's a Set to handle conditionally defined functions) +type FunctionValue = S.Set FunctionDefinition + +-- The scope of a function. ("Prefix" refers to e.g. `foo=1 env`) +data VariableScope = PrefixVar | LocalVar | GlobalVar + deriving (Show, Eq, Ord, Generic, NFData) + +-- Create an InternalState that fulfills the given dependencies +depsToState :: S.Set StateDependency -> InternalState +depsToState set = foldl insert newInternalState $ S.toList set + where + insert :: InternalState -> StateDependency -> InternalState + insert state dep = + case dep of + DepFunction name val -> insertFunction name val state + DepGlobalValue name val -> insertGlobal name val state + DepLocalValue name val -> insertLocal name val state + DepPrefixValue name val -> insertPrefix name val state + DepIsRecursive _ _ -> state + +unknownFunctionValue = S.singleton FunctionUnknown + +-- The information about the value of a single variable +data VariableValue = VariableValue { + literalValue :: Maybe String, -- TODO: For debugging. Remove me. + spaceStatus :: SpaceStatus +} + deriving (Show, Eq, Ord, Generic, NFData) + +-- Whether or not the value needs quoting (has spaces/globs), or we don't know +data SpaceStatus = SpaceStatusEmpty | SpaceStatusClean | SpaceStatusDirty deriving (Show, Eq, Ord, Generic, NFData) + + +unknownVariableValue = VariableValue { + literalValue = Nothing, + spaceStatus = SpaceStatusDirty +} + +emptyVariableValue = VariableValue { + literalValue = Just "", + spaceStatus = SpaceStatusEmpty +} + +mergeVariableValue a b = VariableValue { + literalValue = if literalValue a == literalValue b then literalValue a else Nothing, + spaceStatus = mergeSpaceStatus (spaceStatus a) (spaceStatus b) +} + +mergeSpaceStatus a b = + case (a,b) of + (SpaceStatusEmpty, y) -> y + (x, SpaceStatusEmpty) -> x + (SpaceStatusClean, SpaceStatusClean) -> SpaceStatusClean + _ -> SpaceStatusDirty + +-- A VersionedMap is a Map that keeps an additional integer version to quickly determine if it has changed. +-- * Version -1 means it's unknown (possibly and presumably changed) +-- * Version 0 means it's empty +-- * Version N means it's equal to any other map with Version N (this is required but not enforced) +data VersionedMap k v = VersionedMap { + mapVersion :: Integer, + mapStorage :: M.Map k v +} + deriving (Generic, NFData) + +-- This makes states more readable but inhibits copy-paste +instance (Show k, Show v) => Show (VersionedMap k v) where + show m = (if mapVersion m >= 0 then "V" ++ show (mapVersion m) else "U") ++ " " ++ show (mapStorage m) + +instance Eq InternalState where + (==) a b = stateIsQuickEqual a b || stateIsSlowEqual a b + +instance (Eq k, Eq v) => Eq (VersionedMap k v) where + (==) a b = vmIsQuickEqual a b || mapStorage a == mapStorage b + +instance (Ord k, Ord v) => Ord (VersionedMap k v) where + compare a b = + if vmIsQuickEqual a b + then EQ + else mapStorage a `compare` mapStorage b + + +-- A context with STRefs manually passed around to function. +-- This is done because it was dramatically much faster than any RWS type stack +data Ctx s = Ctx { + -- The current node + cNode :: STRef s Node, + -- The current input state + cInput :: STRef s InternalState, + -- The current output state + cOutput :: STRef s InternalState, + + -- The current functions/subshells stack + cStack :: [StackEntry s], + -- The input graph + cGraph :: CFGraph, + -- An incrementing counter to version maps + cCounter :: STRef s Integer, + -- A cache of input state dependencies to output effects + cCache :: STRef s (M.Map Node [(S.Set StateDependency, InternalState)]), + -- The states resulting from data flows per invocation path + cInvocations :: STRef s (M.Map [Node] (S.Set StateDependency, M.Map Node (InternalState, InternalState))) +} + +-- Whenever a function (or subshell) is invoked, a value like this is pushed onto the stack +data StackEntry s = StackEntry { + -- The entry point of this stack entry for the purpose of detecting recursion + entryPoint :: Node, + -- The node where this entry point was invoked + callSite :: Node, + -- A mutable set of dependencies we fetched from here or higher in the stack + dependencies :: STRef s (S.Set StateDependency), + -- The original input state for this stack entry + stackState :: InternalState +} + deriving (Eq, Generic, NFData) + + +-- Overwrite a base state with the contents of a diff state +-- This is unrelated to join/merge. +patchState :: InternalState -> InternalState -> InternalState +patchState base diff = + case () of + _ | sVersion diff == 0 -> base + _ | sVersion base == 0 -> diff + _ | stateIsQuickEqual base diff -> diff + _ -> + InternalState { + sVersion = -1, + sGlobalValues = vmPatch (sGlobalValues base) (sGlobalValues diff), + sLocalValues = vmPatch (sLocalValues base) (sLocalValues diff), + sPrefixValues = vmPatch (sPrefixValues base) (sPrefixValues diff), + sFunctionTargets = vmPatch (sFunctionTargets base) (sFunctionTargets diff), + sIsReachable = sIsReachable diff `mplus` sIsReachable base + } + +patchOutputM ctx diff = do + let cOut = cOutput ctx + oldState <- readSTRef cOut + let newState = patchState oldState diff + writeSTRef cOut newState + +-- Merge (aka Join) two states. This is monadic because it requires looking up +-- values from the current context. For example: +-- +-- f() { +-- foo || x=2 +-- HERE # This merge requires looking up the value of $x in the parent frame +-- } +-- x=1 +-- f +mergeState :: forall s. Ctx s -> InternalState -> InternalState -> ST s InternalState +mergeState ctx a b = do + -- Kludge: we want `readVariable` & friends not to read from an intermediate state, + -- so temporarily set a blank input. + let cin = cInput ctx + old <- readSTRef cin + writeSTRef cin newInternalState + x <- merge a b + writeSTRef cin old + return x + + where + + merge a b = + case () of + _ | sIsReachable a == Just True && sIsReachable b == Just False + || sIsReachable a == Just False && sIsReachable b == Just True -> + error $ pleaseReport "Unexpected merge of reachable and unreachable state" + _ | sIsReachable a == Just False && sIsReachable b == Just False -> + return unreachableState + _ | sVersion a >= 0 && sVersion b >= 0 && sVersion a == sVersion b -> return a + _ -> do + globals <- mergeMaps ctx mergeVariableValue readGlobal (sGlobalValues a) (sGlobalValues b) + locals <- mergeMaps ctx mergeVariableValue readVariable (sLocalValues a) (sLocalValues b) + prefix <- mergeMaps ctx mergeVariableValue readVariable (sPrefixValues a) (sPrefixValues b) + funcs <- mergeMaps ctx S.union readFunction (sFunctionTargets a) (sFunctionTargets b) + return $ InternalState { + sVersion = -1, + sGlobalValues = globals, + sLocalValues = locals, + sPrefixValues = prefix, + sFunctionTargets = funcs, + sIsReachable = liftM2 (&&) (sIsReachable a) (sIsReachable b) + } + +-- Merge a number of states, or return a default if there are no states +-- (it can't fold from newInternalState because this would be equivalent of adding a new input edge). +mergeStates :: forall s. Ctx s -> InternalState -> [InternalState] -> ST s InternalState +mergeStates ctx def list = + case list of + [] -> return def + (first:rest) -> foldM (mergeState ctx) first rest + +-- Merge two maps, key by key. If both maps have a key, the 'merger' is used. +-- If only one has the key, the 'reader' is used to fetch a second, and the two are merged as above. +mergeMaps :: (Ord k) => forall s. + Ctx s -> + (v -> v -> v) -> + (Ctx s -> k -> ST s v) -> + (VersionedMap k v) -> + (VersionedMap k v) -> + ST s (VersionedMap k v) +mergeMaps ctx merger reader a b = + if vmIsQuickEqual a b + then return a + else do + new <- M.fromDistinctAscList <$> reverse <$> f [] (M.toAscList $ mapStorage a) (M.toAscList $ mapStorage b) + vmFromMap ctx new + where + f l [] [] = return l + f l [] b = f l b [] + f l ((k,v):rest1) [] = do + other <- reader ctx k + f ((k, merger v other):l) rest1 [] + f l l1@((k1, v1):rest1) l2@((k2, v2):rest2) = + case k1 `compare` k2 of + EQ -> + f ((k1, merger v1 v2):l) rest1 rest2 + LT -> do + nv2 <- reader ctx k1 + f ((k1, merger v1 nv2):l) rest1 l2 + GT -> do + nv1 <- reader ctx k2 + f ((k2, merger nv1 v2):l) l1 rest2 + +vmFromMap ctx map = return $ VersionedMap { + mapVersion = -1, + mapStorage = map +} + +-- Give a VersionedMap a version if it does not already have one. +versionMap ctx map = + if mapVersion map >= 0 + then return map + else do + v <- nextVersion ctx + return map { + mapVersion = v + } + +-- Give an InternalState a version if it does not already have one. +versionState ctx state = + if sVersion state >= 0 + then return state + else do + self <- nextVersion ctx + ssGlobalValues <- versionMap ctx $ sGlobalValues state + ssLocalValues <- versionMap ctx $ sLocalValues state + ssFunctionTargets <- versionMap ctx $ sFunctionTargets state + return state { + sVersion = self, + sGlobalValues = ssGlobalValues, + sLocalValues = ssLocalValues, + sFunctionTargets = ssFunctionTargets + } + +-- Like 'not null' but for 2+ elements +is2plus :: [a] -> Bool +is2plus l = case l of + _:_:_ -> True + _ -> False + +-- Use versions to see if two states are trivially identical +stateIsQuickEqual a b = + let + va = sVersion a + vb = sVersion b + in + va >= 0 && vb >= 0 && va == vb + +-- A manual slow path 'Eq' (it's not derived because it's part of the custom Eq instance) +stateIsSlowEqual a b = + check sGlobalValues + && check sLocalValues + && check sPrefixValues + && check sFunctionTargets + && check sIsReachable + where + check f = f a == f b + +-- Check if two VersionedMaps are trivially equal +vmIsQuickEqual :: VersionedMap k v -> VersionedMap k v -> Bool +vmIsQuickEqual a b = + let + va = mapVersion a + vb = mapVersion b + in + va >= 0 && vb >= 0 && va == vb + +-- A new, empty VersionedMap +vmEmpty = VersionedMap { + mapVersion = 0, + mapStorage = M.empty +} + +-- Map.null for VersionedMaps +vmNull :: VersionedMap k v -> Bool +vmNull m = mapVersion m == 0 || (M.null $ mapStorage m) + +-- Map.lookup for VersionedMaps +vmLookup name map = M.lookup name $ mapStorage map + +-- Map.insert for VersionedMaps +vmInsert key val map = VersionedMap { + mapVersion = -1, + mapStorage = M.insert key val $ mapStorage map +} + +-- Overwrite all keys in the first map with values from the second +vmPatch :: (Ord k) => VersionedMap k v -> VersionedMap k v -> VersionedMap k v +vmPatch base diff = + case () of + _ | mapVersion base == 0 -> diff + _ | mapVersion diff == 0 -> base + _ | vmIsQuickEqual base diff -> diff + _ -> VersionedMap { + mapVersion = -1, + mapStorage = M.unionWith (flip const) (mapStorage base) (mapStorage diff) + } + +-- Modify a variable as with x=1. This applies it to the appropriate scope. +writeVariable :: forall s. Ctx s -> String -> VariableValue -> ST s () +writeVariable ctx name val = do + (_, typ) <- readVariableWithScope ctx name + case typ of + GlobalVar -> writeGlobal ctx name val + LocalVar -> writeLocal ctx name val + -- Prefixed variables actually become local variables in the invoked function + PrefixVar -> writeLocal ctx name val + +writeGlobal ctx name val = do + modifySTRef (cOutput ctx) $ insertGlobal name val + +writeLocal ctx name val = do + modifySTRef (cOutput ctx) $ insertLocal name val + +writePrefix ctx name val = do + modifySTRef (cOutput ctx) $ insertPrefix name val + +-- Look up a variable value, and also return its scope +readVariableWithScope :: forall s. Ctx s -> String -> ST s (VariableValue, VariableScope) +readVariableWithScope ctx name = lookupStack get dep def ctx name + where + def = (unknownVariableValue, GlobalVar) + get = getVariableWithScope + dep k v = + case v of + (val, GlobalVar) -> DepGlobalValue k val + (val, LocalVar) -> DepLocalValue k val + (val, PrefixVar) -> DepPrefixValue k val + +getVariableWithScope :: InternalState -> String -> Maybe (VariableValue, VariableScope) +getVariableWithScope s name = + case (vmLookup name $ sPrefixValues s, vmLookup name $ sLocalValues s, vmLookup name $ sGlobalValues s) of + (Just var, _, _) -> return (var, PrefixVar) + (_, Just var, _) -> return (var, LocalVar) + (_, _, Just var) -> return (var, GlobalVar) + _ -> Nothing + +undefineFunction ctx name = + writeFunction ctx name $ FunctionUnknown + +undefineVariable ctx name = + writeVariable ctx name $ emptyVariableValue + +readVariable ctx name = fst <$> readVariableWithScope ctx name + +readGlobal ctx name = lookupStack get dep def ctx name + where + def = unknownVariableValue + get s name = vmLookup name $ sGlobalValues s + dep k v = DepGlobalValue k v + +readFunction ctx name = lookupStack get dep def ctx name + where + def = unknownFunctionValue + get s name = vmLookup name $ sFunctionTargets s + dep k v = DepFunction k v + +writeFunction ctx name val = do + modifySTRef (cOutput ctx) $ insertFunction name $ S.singleton val + +-- Look up each state on the stack until a value is found (or the default is used), +-- then add this value as a StateDependency. +lookupStack :: forall s k v. + -- A function that maybe finds a value from a state + (InternalState -> k -> Maybe v) + -- A function that creates a dependency on what was found + -> (k -> v -> StateDependency) + -- A default value, if the value can't be found anywhere + -> v + -- Context + -> Ctx s + -- The key to look up + -> k + -- Returning the result + -> ST s v +lookupStack get dep def ctx key = do + top <- readSTRef $ cInput ctx + case get top key of + Just v -> return v + Nothing -> f (cStack ctx) + where + f [] = return def + f (s:rest) = do + -- Go up the stack until we find the value, and add + -- a dependency on each state (including where it was found) + res <- fromMaybe (f rest) (return <$> get (stackState s) key) + modifySTRef (dependencies s) $ S.insert $ dep key res + return res + +-- Like lookupStack but without adding dependencies +peekStack get def ctx key = do + top <- readSTRef $ cInput ctx + case get top key of + Just v -> return v + Nothing -> f (cStack ctx) + where + f [] = return def + f (s:rest) = + case get (stackState s) key of + Just v -> return v + Nothing -> f rest + +-- Check if the current context fulfills a StateDependency +fulfillsDependency ctx dep = + case dep of + DepGlobalValue name val -> (== (val, GlobalVar)) <$> peek ctx name + DepLocalValue name val -> (== (val, LocalVar)) <$> peek ctx name + DepPrefixValue name val -> (== (val, PrefixVar)) <$> peek ctx name + DepFunction name val -> (== val) <$> peekFunc ctx name + DepIsRecursive node val -> return $ val == any (\f -> entryPoint f == node) (cStack ctx) + -- _ -> error $ "Unknown dep " ++ show dep + where + peek = peekStack getVariableWithScope (unknownVariableValue, GlobalVar) + peekFunc = peekStack (\state name -> vmLookup name $ sFunctionTargets state) unknownFunctionValue + +-- Check if the current context fulfills all StateDependencies +fulfillsDependencies ctx deps = + f $ S.toList deps + where + f [] = return True + f (dep:rest) = do + res <- fulfillsDependency ctx dep + if res + then f rest + else return False + +-- Create a brand new Ctx given a Control Flow Graph (CFG) +newCtx g = do + c <- newSTRef 1 + input <- newSTRef undefined + output <- newSTRef undefined + node <- newSTRef undefined + cache <- newSTRef M.empty + invocations <- newSTRef M.empty + return $ Ctx { + cCounter = c, + cInput = input, + cOutput = output, + cNode = node, + cCache = cache, + cStack = [], + cInvocations = invocations, + cGraph = g + } + +-- The next incrementing version for VersionedMaps +nextVersion ctx = do + let ctr = cCounter ctx + n <- readSTRef ctr + writeSTRef ctr $! n+1 + return n + +-- Create a new StackEntry +newStackEntry ctx point = do + deps <- newSTRef S.empty + state <- readSTRef $ cOutput ctx + callsite <- readSTRef $ cNode ctx + return $ StackEntry { + entryPoint = point, + callSite = callsite, + dependencies = deps, + stackState = state + } + +-- Call a function with a new stack entry on the stack +withNewStackFrame ctx node f = do + newEntry <- newStackEntry ctx node + newInput <- newSTRef newInternalState + newOutput <- newSTRef newInternalState + newNode <- newSTRef node + let newCtx = ctx { + cInput = newInput, + cOutput = newOutput, + cNode = newNode, + cStack = newEntry : cStack ctx + } + x <- f newCtx + + {- + deps <- readSTRef $ dependencies newEntry + selfcheck <- fulfillsDependencies newCtx deps + unless selfcheck $ error $ pleaseReport $ "Unmet stack dependencies on " ++ show (node, deps) + -} + + return (x, newEntry) + +-- Check if invoking this function would be a recursive loop +-- (i.e. we already have the function on the stack) +wouldBeRecursive ctx node = f (cStack ctx) + where + f [] = return False + f (s:rest) = do + res <- + if entryPoint s == node + then return True + else f rest + modifySTRef (dependencies s) $ S.insert $ DepIsRecursive node res + return res + +-- The main DFA 'transfer' function, applying the effects of a node to the output state +transfer ctx label = + --traceShow ("Transferring", label) $ + case label of + CFStructuralNode -> return () + CFEntryPoint _ -> return () + CFImpliedExit -> return () + CFResolvedExit {} -> return () + + CFExecuteCommand cmd -> transferCommand ctx cmd + CFExecuteSubshell reason entry exit -> transferSubshell ctx reason entry exit + CFApplyEffects effects -> mapM_ (\(IdTagged _ f) -> transferEffect ctx f) effects + + CFUnresolvedExit -> patchOutputM ctx unreachableState + CFUnreachable -> patchOutputM ctx unreachableState + + -- TODO + CFSetBackgroundPid _ -> return () + CFSetExitCode _ -> return () + CFDropPrefixAssignments {} -> + modifySTRef (cOutput ctx) $ \c -> modified c { sPrefixValues = vmEmpty } +-- _ -> error $ "Unknown " ++ show label + + +-- Transfer the effects of a subshell invocation. This is similar to a function call +-- to allow easily discarding the effects (otherwise the InternalState would have +-- to represent subshell depth, while this way it can simply use the function stack). +transferSubshell ctx reason entry exit = do + let cout = cOutput ctx + initial <- readSTRef cout + runCached ctx entry (f entry exit) + -- Clear subshell changes. TODO: track this to warn about modifications. + writeSTRef cout initial + where + f entry exit ctx = do + (states, frame) <- withNewStackFrame ctx entry (flip dataflow $ entry) + let (_, res) = fromMaybe (error $ pleaseReport "Subshell has no exit") $ M.lookup exit states + deps <- readSTRef $ dependencies frame + registerFlowResult ctx entry states deps + return (deps, res) + +-- Transfer the effects of executing a command, i.e. the merged union of all possible function definitions. +transferCommand ctx Nothing = return () +transferCommand ctx (Just name) = do + targets <- readFunction ctx name + --traceShowM ("Transferring ",name,targets) + transferMultiple ctx $ map (flip transferFunctionValue) $ S.toList targets + +-- Transfer a set of function definitions and merge the output states. +transferMultiple ctx funcs = do +-- traceShowM ("Transferring set of ", length funcs) + original <- readSTRef out + branches <- mapM (apply ctx original) funcs + merged <- mergeStates ctx original branches + let patched = patchState original merged + writeSTRef out patched + where + out = cOutput ctx + apply ctx original f = do + writeSTRef out original + f ctx + readSTRef out + +-- Transfer the effects of a single function definition. +transferFunctionValue ctx funcVal = + case funcVal of + FunctionUnknown -> return () + FunctionDefinition name entry exit -> do + isRecursive <- wouldBeRecursive ctx entry + if isRecursive + then return () -- TODO: Find a better strategy for recursion + else runCached ctx entry (f name entry exit) + where + f name entry exit ctx = do + (states, frame) <- withNewStackFrame ctx entry (flip dataflow $ entry) + deps <- readSTRef $ dependencies frame + let res = + case M.lookup exit states of + Just (input, output) -> do + -- Discard local variables. TODO: track&retain variables declared local in previous scopes? + modified output { sLocalValues = vmEmpty } + Nothing -> do + -- e.g. f() { exit; } + unreachableState + registerFlowResult ctx entry states deps + return (deps, res) + + +-- Register/save the result of a dataflow of a function. +-- At the end, all the different values from different flows are merged together. +registerFlowResult ctx entry states deps = do + -- This function is called in the context of a CFExecuteCommand and not its invoked function, + -- so manually add the current node to the stack. + current <- readSTRef $ cNode ctx + let parents = map callSite $ cStack ctx + -- A unique path to this flow context. The specific value doesn't matter, as long as it's + -- unique per invocation of the function. This is required so that 'x=1; f; x=2; f' won't + -- overwrite each other. + let path = entry : current : parents + modifySTRef (cInvocations ctx) $ M.insert path (deps, states) + + +-- Look up a node in the cache and see if the dependencies of any entries are matched. +-- In that case, reuse the previous result instead of doing a new data flow. +runCached :: forall s. Ctx s -> Node -> (Ctx s -> ST s (S.Set StateDependency, InternalState)) -> ST s () +runCached ctx node f = do + cache <- getCache ctx node + case cache of + Just v -> do + -- traceShowM $ ("Running cached", node) + patchOutputM ctx v + Nothing -> do + -- traceShowM $ ("Cache failed", node) + (deps, diff) <- f ctx + modifySTRef (cCache ctx) (M.insertWith (\_ old -> (deps, diff):(take cacheEntries old)) node [(deps,diff)]) + -- traceShowM $ ("Recomputed cache for", node, deps) + patchOutputM ctx diff + +-- Get a cached version whose dependencies are currently fulfilled, if any. +getCache :: forall s. Ctx s -> Node -> ST s (Maybe InternalState) +getCache ctx node = do + cache <- readSTRef $ cCache ctx + -- traceShowM $ ("Cache for", node, "length", length $ M.findWithDefault [] node cache, M.lookup node cache) + f $ M.findWithDefault [] node cache + where + f [] = return Nothing + f ((deps, value):rest) = do + match <- fulfillsDependencies ctx deps + if match + then return $ Just value + else f rest + +-- Transfer a single CFEffect to the output state. +transferEffect ctx effect = + case effect of + CFReadVariable name -> do + void $ readVariable ctx name + CFWriteVariable name value -> do + val <- cfValueToVariableValue ctx value + writeVariable ctx name val + CFWriteGlobal name value -> do + val <- cfValueToVariableValue ctx value + writeGlobal ctx name val + CFWriteLocal name value -> do + val <- cfValueToVariableValue ctx value + writeLocal ctx name val + CFWritePrefix name value -> do + val <- cfValueToVariableValue ctx value + writePrefix ctx name val + CFUndefineVariable name -> undefineVariable ctx name + CFUndefineFunction name -> undefineFunction ctx name + CFUndefine name -> do + -- This should really just unset one or the other + undefineVariable ctx name + undefineFunction ctx name + CFDefineFunction name id entry exit -> + writeFunction ctx name $ FunctionDefinition name entry exit + + -- TODO + CFUndefineNameref name -> undefineVariable ctx name + CFHintArray name -> return () + CFHintDefined name -> return () + CFModifyProps {} -> return () +-- _ -> error $ "Unknown effect " ++ show effect + + + +-- Transfer the CFG's idea of a value into our VariableState +cfValueToVariableValue ctx val = + case val of + CFValueArray -> return unknownVariableValue -- TODO: Track array status + CFValueComputed _ parts -> foldM f emptyVariableValue parts + CFValueInteger -> return unknownIntegerValue + CFValueString -> return unknownVariableValue + CFValueUninitialized -> return emptyVariableValue +-- _ -> error $ "Unknown value: " ++ show val + where + f val part = do + next <- computeValue ctx part + return $ val `appendVariableValue` next + +-- A value can be computed from 0 or more parts, such as x="literal$y$z" +computeValue ctx part = + case part of + CFStringLiteral str -> return $ literalToVariableValue str + CFStringInteger -> return unknownIntegerValue + CFStringUnknown -> return unknownVariableValue + CFStringVariable name -> readVariable ctx name + +-- Append two VariableValues as if with z="$x$y" +appendVariableValue :: VariableValue -> VariableValue -> VariableValue +appendVariableValue a b = + VariableValue { + literalValue = liftM2 (++) (literalValue a) (literalValue b), + spaceStatus = appendSpaceStatus (spaceStatus a) (spaceStatus b) + } + +appendSpaceStatus a b = + case (a,b) of + (SpaceStatusEmpty, _) -> b + (_, SpaceStatusEmpty) -> a + (SpaceStatusClean, SpaceStatusClean) -> a + _ ->SpaceStatusDirty + +unknownIntegerValue = VariableValue { + literalValue = Nothing, + spaceStatus = SpaceStatusClean +} + +literalToVariableValue str = VariableValue { + literalValue = Just str, + spaceStatus = literalToSpaceStatus str +} + +withoutChanges ctx f = do + let inp = cInput ctx + let out = cOutput ctx + prevInput <- readSTRef inp + prevOutput <- readSTRef out + res <- f + writeSTRef inp prevInput + writeSTRef out prevOutput + return res + +-- Get the SpaceStatus for a literal string, i.e. if it needs quoting +literalToSpaceStatus str = + case str of + "" -> SpaceStatusEmpty + _ | all (`notElem` " \t\n*?[") str -> SpaceStatusClean + _ -> SpaceStatusDirty + +type StateMap = M.Map Node (InternalState, InternalState) + +-- Classic, iterative Data Flow Analysis. See Wikipedia for a description of the process. +dataflow :: forall s. Ctx s -> Node -> ST s StateMap +dataflow ctx entry = do + pending <- newSTRef $ S.singleton entry + states <- newSTRef $ M.empty + -- Should probably be done via a stack frame instead + withoutChanges ctx $ + f iterationCount pending states + readSTRef states + where + graph = cGraph ctx + f 0 _ _ = error $ pleaseReport "DFA did not reach fix point" + f n pending states = do + ps <- readSTRef pending + if S.null ps + then return () + else do + let (next, rest) = S.deleteFindMin ps + nexts <- process states next + writeSTRef pending $ foldl (flip S.insert) rest nexts + f (n-1) pending states + + process states node = do + stateMap <- readSTRef states + let inputs = filter (\c -> sIsReachable c /= Just False) $ mapMaybe (\c -> fmap snd $ M.lookup c stateMap) incoming + input <- + case incoming of + [] -> return newInternalState + _ -> + case inputs of + [] -> return unreachableState + (x:rest) -> foldM (mergeState ctx) x rest + writeSTRef (cInput ctx) $ input + writeSTRef (cOutput ctx) $ input + writeSTRef (cNode ctx) $ node + transfer ctx label + newOutput <- readSTRef $ cOutput ctx + result <- + if is2plus outgoing + then + -- Version the state because we split and will probably merge later + versionState ctx newOutput + else return newOutput + writeSTRef states $ M.insert node (input, result) stateMap + case M.lookup node stateMap of + Nothing -> return outgoing + Just (oldInput, oldOutput) -> + if oldOutput == result + then return [] + else return outgoing + where + (incomingL, _, label, outgoingL) = context graph $ node + incoming = map snd $ filter isRegular $ incomingL + outgoing = map snd outgoingL + isRegular = ((== CFEFlow) . fst) + +runRoot ctx entry exit = do + let env = createEnvironmentState + writeSTRef (cInput ctx) $ env + writeSTRef (cOutput ctx) $ env + writeSTRef (cNode ctx) $ entry + (states, frame) <- withNewStackFrame ctx entry $ \c -> dataflow c entry + deps <- readSTRef $ dependencies frame + registerFlowResult ctx entry states deps + -- Return the final state, used to invoke functions that were declared but not invoked + return $ snd $ fromMaybe (error $ pleaseReport "Missing exit state") $ M.lookup exit states + + +analyzeControlFlow :: CFGParameters -> Token -> CFGAnalysis +analyzeControlFlow params t = + let + cfg = buildGraph params t + (entry, exit) = M.findWithDefault (error $ pleaseReport "Missing root") (getId t) (cfIdToNode cfg) + in + runST $ f cfg entry exit + where + f cfg entry exit = do + ctx <- newCtx $ cfGraph cfg + -- Do a dataflow analysis starting on the root node + exitState <- runRoot ctx entry exit + + -- All nodes we've touched + invocations <- readSTRef $ cInvocations ctx + let invokedNodes = M.fromDistinctAscList $ map (\c -> (c, ())) $ S.toList $ M.keysSet $ groupByNode $ M.map snd invocations + + -- Invoke all functions that were declared but not invoked + -- This is so that we still get warnings for dead code + -- (it's probably not actually dead, just used by a script that sources ours) + let declaredFunctions = getFunctionTargets exitState + let uninvoked = M.difference declaredFunctions invokedNodes + analyzeStragglers ctx exitState uninvoked + + -- Now round up all the states from all data flows + -- (FIXME: this excludes functions that were defined in straggling functions) + invocations <- readSTRef $ cInvocations ctx + invokedStates <- flattenByNode ctx $ groupByNode $ M.map addDeps invocations + + -- Fill in the map with unreachable states for anything we didn't get to + let baseStates = M.fromDistinctAscList $ map (\c -> (c, (unreachableState, unreachableState))) $ uncurry enumFromTo $ nodeRange $ cfGraph cfg + let allStates = M.unionWith (flip const) baseStates invokedStates + + -- Convert to external states + let nodeToData = M.map (\(a,b) -> (internalToExternal a, internalToExternal b)) allStates + + return $ nodeToData `deepseq` CFGAnalysis { + graph = cfGraph cfg, + tokenToNode = cfIdToNode cfg, + nodeToData = nodeToData + } + + + -- Include the dependencies in the state of each function, e.g. if it depends on `x=foo` then add that. + addDeps :: (S.Set StateDependency, M.Map Node (InternalState, InternalState)) -> M.Map Node (InternalState, InternalState) + addDeps (deps, m) = let base = depsToState deps in M.map (\(a,b) -> (base `patchState` a, base `patchState` b)) m + + -- Collect all the states that each node has resulted in. + groupByNode :: forall k v. M.Map k (M.Map Node v) -> M.Map Node [v] + groupByNode pathMap = M.fromListWith (++) $ map (\(k,v) -> (k,[v])) $ concatMap M.toList $ M.elems pathMap + + -- Merge all the pre/post states for each node. This would have been a foldM if Map had one. + flattenByNode ctx m = M.fromDistinctAscList <$> (mapM (mergePair ctx) $ M.toList m) + + mergeAllStates ctx pairs = + let + (pres, posts) = unzip pairs + in do + pre <- mergeStates ctx (error $ pleaseReport "Null node states") pres + post <- mergeStates ctx (error $ pleaseReport "Null node states") posts + return (pre, post) + + mergePair ctx (node, list) = do + merged <- mergeAllStates ctx list + return (node, merged) + + -- Get the all the functions defined in an InternalState + getFunctionTargets :: InternalState -> M.Map Node FunctionDefinition + getFunctionTargets state = + let + declaredFuncs = S.unions $ mapStorage $ sFunctionTargets state + getFunc d = + case d of + FunctionDefinition _ entry _ -> Just (entry, d) + _ -> Nothing + funcs = mapMaybe getFunc $ S.toList declaredFuncs + in + M.fromList funcs + + +analyzeStragglers ctx state stragglers = do + mapM_ f $ M.elems stragglers + where + f def@(FunctionDefinition name entry exit) = do + writeSTRef (cInput ctx) state + writeSTRef (cOutput ctx) state + writeSTRef (cNode ctx) entry + transferFunctionValue ctx def + + +return [] +runTests = $quickCheckAll diff --git a/src/ShellCheck/Checks/Commands.hs b/src/ShellCheck/Checks/Commands.hs index e65dc68..cac06bc 100644 --- a/src/ShellCheck/Checks/Commands.hs +++ b/src/ShellCheck/Checks/Commands.hs @@ -30,6 +30,7 @@ import ShellCheck.AnalyzerLib import ShellCheck.Data import ShellCheck.Interface import ShellCheck.Parser +import ShellCheck.Prelude import ShellCheck.Regex import Control.Monad diff --git a/src/ShellCheck/Checks/ControlFlow.hs b/src/ShellCheck/Checks/ControlFlow.hs new file mode 100644 index 0000000..9b7635e --- /dev/null +++ b/src/ShellCheck/Checks/ControlFlow.hs @@ -0,0 +1,101 @@ +{- + Copyright 2022 Vidar Holen + + This file is part of ShellCheck. + https://www.shellcheck.net + + ShellCheck is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ShellCheck is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +-} +{-# LANGUAGE TemplateHaskell #-} + +-- Checks that run on the Control Flow Graph (as opposed to the AST) +-- This is scaffolding for a work in progress. + +module ShellCheck.Checks.ControlFlow (checker, optionalChecks, ShellCheck.Checks.ControlFlow.runTests) where + +import ShellCheck.AST +import ShellCheck.ASTLib +import ShellCheck.CFG hiding (cfgAnalysis) +import ShellCheck.CFGAnalysis +import ShellCheck.AnalyzerLib +import ShellCheck.Data +import ShellCheck.Interface + +import Control.Monad +import Control.Monad.Reader +import Data.Graph.Inductive.Graph +import qualified Data.Map as M +import qualified Data.Set as S +import Data.List +import Data.Maybe + +import Test.QuickCheck.All (forAllProperties) +import Test.QuickCheck.Test (quickCheckWithResult, stdArgs, maxSuccess) + + +optionalChecks :: [CheckDescription] +optionalChecks = [] + +-- A check that runs on the entire graph +type ControlFlowCheck = Analysis +-- A check invoked once per node, with its (pre,post) data +type ControlFlowNodeCheck = LNode CFNode -> (ProgramState, ProgramState) -> Analysis +-- A check invoked once per effect, with its node's (pre,post) data +type ControlFlowEffectCheck = IdTagged CFEffect -> Node -> (ProgramState, ProgramState) -> Analysis + + +checker :: AnalysisSpec -> Parameters -> Checker +checker spec params = Checker { + perScript = const $ sequence_ controlFlowChecks, + perToken = const $ return () +} + +controlFlowChecks :: [ControlFlowCheck] +controlFlowChecks = [ + runNodeChecks controlFlowNodeChecks + ] + +controlFlowNodeChecks :: [ControlFlowNodeCheck] +controlFlowNodeChecks = [ + runEffectChecks controlFlowEffectChecks + ] + +controlFlowEffectChecks :: [ControlFlowEffectCheck] +controlFlowEffectChecks = [ + ] + +runNodeChecks :: [ControlFlowNodeCheck] -> ControlFlowCheck +runNodeChecks perNode = do + cfg <- asks cfgAnalysis + runOnAll cfg + where + getData datas n@(node, label) = do + (pre, post) <- M.lookup node datas + return (n, (pre, post)) + + runOn :: (LNode CFNode, (ProgramState, ProgramState)) -> Analysis + runOn (node, prepost) = mapM_ (\c -> c node prepost) perNode + runOnAll cfg = mapM_ runOn $ mapMaybe (getData $ nodeToData cfg) $ labNodes (graph cfg) + +runEffectChecks :: [ControlFlowEffectCheck] -> ControlFlowNodeCheck +runEffectChecks list = checkNode + where + checkNode (node, label) prepost = + case label of + CFApplyEffects effects -> mapM_ (\effect -> mapM_ (\c -> c effect node prepost) list) effects + _ -> return () + + +return [] +runTests = $( [| $(forAllProperties) (quickCheckWithResult (stdArgs { maxSuccess = 1 }) ) |]) diff --git a/src/ShellCheck/Checks/ShellSupport.hs b/src/ShellCheck/Checks/ShellSupport.hs index 22a6a5f..9ad17f5 100644 --- a/src/ShellCheck/Checks/ShellSupport.hs +++ b/src/ShellCheck/Checks/ShellSupport.hs @@ -25,6 +25,7 @@ import ShellCheck.AST import ShellCheck.ASTLib import ShellCheck.AnalyzerLib import ShellCheck.Interface +import ShellCheck.Prelude import ShellCheck.Regex import Control.Monad diff --git a/src/ShellCheck/Data.hs b/src/ShellCheck/Data.hs index e22b424..fb82ca8 100644 --- a/src/ShellCheck/Data.hs +++ b/src/ShellCheck/Data.hs @@ -2,9 +2,27 @@ module ShellCheck.Data where import ShellCheck.Interface import Data.Version (showVersion) -import Paths_ShellCheck (version) -shellcheckVersion = showVersion version -- VERSIONSTRING + +{- +If you are here because you saw an error about Paths_ShellCheck in this file, +simply comment out the import below and define the version as a constant string. + +Instead of: + + import Paths_ShellCheck (version) + shellcheckVersion = showVersion version + +Use: + + -- import Paths_ShellCheck (version) + shellcheckVersion = "kludge" + +-} + +import Paths_ShellCheck (version) +shellcheckVersion = showVersion version -- VERSIONSTRING + internalVariables = [ -- Generic @@ -43,9 +61,12 @@ internalVariables = [ "flags_error", "flags_return" ] -specialVariablesWithoutSpaces = [ - "$", "-", "?", "!", "#" +specialIntegerVariables = [ + "$", "?", "!", "#" ] + +specialVariablesWithoutSpaces = "-" : specialIntegerVariables + variablesWithoutSpaces = specialVariablesWithoutSpaces ++ [ "BASHPID", "BASH_ARGC", "BASH_LINENO", "BASH_SUBSHELL", "EUID", "LINENO", "OPTIND", "PPID", "RANDOM", "SECONDS", "SHELLOPTS", "SHLVL", "UID", diff --git a/src/ShellCheck/Debug.hs b/src/ShellCheck/Debug.hs new file mode 100644 index 0000000..c991308 --- /dev/null +++ b/src/ShellCheck/Debug.hs @@ -0,0 +1,313 @@ +{- + +This file contains useful functions for debugging and developing ShellCheck. + +To invoke them interactively, run: + + cabal repl + +At the ghci prompt, enter: + + :load ShellCheck.Debug + +You can now invoke the functions. Here are some examples: + + shellcheckString "echo $1" + stringToAst "(( x+1 ))" + stringToCfg "if foo; then bar; else baz; fi" + writeFile "/tmp/test.dot" $ stringToCfgViz "while foo; do bar; done" + +The latter file can be rendered to png with GraphViz: + + dot -Tpng /tmp/test.dot > /tmp/test.png + +To run all unit tests in a module: + + ShellCheck.Parser.runTests + ShellCheck.Analytics.runTests + +To run a specific test: + + :load ShellCheck.Analytics + prop_checkUuoc3 + +If you make code changes, reload in seconds at any time with: + + :r + +=========================================================================== + +Crash course in printf debugging in Haskell: + + import Debug.Trace + + greet 0 = return () + -- Print when a function is invoked + greet n | trace ("calling greet " ++ show n) False = undefined + greet n = do + putStrLn "Enter name" + name <- getLine + -- Print at some point in any monadic function + traceM $ "user entered " ++ name + putStrLn $ "Hello " ++ name + -- Print a value before passing it on + greet $ traceShowId (n - 1) + + +=========================================================================== + +If you want to invoke `ghci` directly, such as on `shellcheck.hs`, to +debug all of ShellCheck including I/O, you may see an error like this: + + src/ShellCheck/Data.hs:5:1: error: + Could not load module ‘Paths_ShellCheck’ + it is a hidden module in the package ‘ShellCheck-0.8.0’ + +This can easily be circumvented by running `./setgitversion` or manually +editing src/ShellCheck/Data.hs to replace the auto-deduced version number +with a constant string as indicated. + +Afterwards, you can run the ShellCheck tool, as if from the shell, with: + + $ ghci shellcheck.hs + ghci> runMain ["-x", "file.sh"] + +-} + +module ShellCheck.Debug () where + +import ShellCheck.Analyzer +import ShellCheck.AST +import ShellCheck.CFG +import ShellCheck.Checker +import ShellCheck.CFGAnalysis as CF +import ShellCheck.Interface +import ShellCheck.Parser +import ShellCheck.Prelude + +import Control.Monad +import Control.Monad.Identity +import Control.Monad.RWS +import Control.Monad.Writer +import Data.Graph.Inductive.Graph as G +import Data.List +import Data.Maybe +import qualified Data.Map as M +import qualified Data.Set as S + + +-- Run all of ShellCheck (minus output formatters) +shellcheckString :: String -> CheckResult +shellcheckString scriptString = + runIdentity $ checkScript dummySystemInterface checkSpec + where + checkSpec :: CheckSpec + checkSpec = emptyCheckSpec { + csScript = scriptString + } + +dummySystemInterface :: SystemInterface Identity +dummySystemInterface = mockedSystemInterface [ + -- A tiny, fake filesystem for sourced files + ("lib/mylib1.sh", "foo=$(cat $1 | wc -l)"), + ("lib/mylib2.sh", "bar=42") + ] + +-- Parameters used when generating Control Flow Graphs +cfgParams :: CFGParameters +cfgParams = CFGParameters { + cfLastpipe = False, + cfPipefail = False +} + +-- An example script to play with +exampleScript :: String +exampleScript = unlines [ + "#!/bin/sh", + "count=0", + "for file in *", + "do", + " (( count++ ))", + "done", + "echo $count" + ] + +-- Parse the script string into ShellCheck's ParseResult +parseScriptString :: String -> ParseResult +parseScriptString scriptString = + runIdentity $ parseScript dummySystemInterface parseSpec + where + parseSpec :: ParseSpec + parseSpec = newParseSpec { + psFilename = "myscript", + psScript = scriptString + } + + +-- Parse the script string into an Abstract Syntax Tree +stringToAst :: String -> Token +stringToAst scriptString = + case maybeRoot of + Just root -> root + Nothing -> error $ "Script failed to parse: " ++ show parserWarnings + where + parseResult :: ParseResult + parseResult = parseScriptString scriptString + + maybeRoot :: Maybe Token + maybeRoot = prRoot parseResult + + parserWarnings :: [PositionedComment] + parserWarnings = prComments parseResult + + +astToCfgResult :: Token -> CFGResult +astToCfgResult = buildGraph cfgParams + +astToDfa :: Token -> CFGAnalysis +astToDfa = analyzeControlFlow cfgParams + +astToCfg :: Token -> CFGraph +astToCfg = cfGraph . astToCfgResult + +stringToCfg :: String -> CFGraph +stringToCfg = astToCfg . stringToAst + +stringToDfa :: String -> CFGAnalysis +stringToDfa = astToDfa . stringToAst + +cfgToGraphViz :: CFGraph -> String +cfgToGraphViz = cfgToGraphVizWith show + +stringToCfgViz :: String -> String +stringToCfgViz = cfgToGraphViz . stringToCfg + +stringToDfaViz :: String -> String +stringToDfaViz = dfaToGraphViz . stringToDfa + +-- Dump a Control Flow Graph as GraphViz with extended information +stringToDetailedCfgViz :: String -> String +stringToDetailedCfgViz scriptString = cfgToGraphVizWith nodeLabel graph + where + ast :: Token + ast = stringToAst scriptString + + cfgResult :: CFGResult + cfgResult = astToCfgResult ast + + graph :: CFGraph + graph = cfGraph cfgResult + + idToToken :: M.Map Id Token + idToToken = M.fromList $ execWriter $ doAnalysis (\c -> tell [(getId c, c)]) ast + + idToNode :: M.Map Id (Node, Node) + idToNode = cfIdToNode cfgResult + + nodeToStartIds :: M.Map Node (S.Set Id) + nodeToStartIds = + M.fromListWith S.union $ + map (\(id, (start, _)) -> (start, S.singleton id)) $ + M.toList idToNode + + nodeToEndIds :: M.Map Node (S.Set Id) + nodeToEndIds = + M.fromListWith S.union $ + map (\(id, (_, end)) -> (end, S.singleton id)) $ + M.toList idToNode + + formatId :: Id -> String + formatId id = fromMaybe ("Unknown " ++ show id) $ do + (OuterToken _ token) <- M.lookup id idToToken + firstWord <- words (show token) !!! 0 + -- Strip off "Inner_" + (_ : tokenName) <- return $ dropWhile (/= '_') firstWord + return $ tokenName ++ " " ++ show id + + formatGroup :: S.Set Id -> String + formatGroup set = intercalate ", " $ map formatId $ S.toList set + + nodeLabel (node, label) = unlines [ + show node ++ ". " ++ show label, + "Begin: " ++ formatGroup (M.findWithDefault S.empty node nodeToStartIds), + "End: " ++ formatGroup (M.findWithDefault S.empty node nodeToEndIds) + ] + + +-- Dump a Control Flow Graph with Data Flow Analysis as GraphViz +dfaToGraphViz :: CF.CFGAnalysis -> String +dfaToGraphViz analysis = cfgToGraphVizWith label $ CF.graph analysis + where + label (node, label) = + let + desc = show node ++ ". " ++ show label + in + fromMaybe ("No DFA available\n\n" ++ desc) $ do + (pre, post) <- M.lookup node $ CF.nodeToData analysis + return $ unlines [ + "Precondition: " ++ show pre, + "", + desc, + "", + "Postcondition: " ++ show post + ] + + +-- Dump an Control Flow Graph to GraphViz with a given node formatter +cfgToGraphVizWith :: (LNode CFNode -> String) -> CFGraph -> String +cfgToGraphVizWith nodeLabel graph = concat [ + "digraph {\n", + concatMap dumpNode (labNodes graph), + concatMap dumpLink (labEdges graph), + tagVizEntries graph, + "}\n" + ] + where + dumpNode l@(node, label) = show node ++ " [label=" ++ quoteViz (nodeLabel l) ++ "]\n" + dumpLink (from, to, typ) = show from ++ " -> " ++ show to ++ " [style=" ++ quoteViz (edgeStyle typ) ++ "]\n" + edgeStyle CFEFlow = "solid" + edgeStyle CFEExit = "bold" + edgeStyle CFEFalseFlow = "dotted" + +quoteViz str = "\"" ++ escapeViz str ++ "\"" +escapeViz [] = [] +escapeViz (c:rest) = + case c of + '\"' -> '\\' : '\"' : escapeViz rest + '\n' -> '\\' : 'l' : escapeViz rest + '\\' -> '\\' : '\\' : escapeViz rest + _ -> c : escapeViz rest + + +-- Dump an Abstract Syntax Tree (or branch thereof) to GraphViz format +astToGraphViz :: Token -> String +astToGraphViz token = concat [ + "digraph {\n", + formatTree token, + "}\n" + ] + where + formatTree :: Token -> String + formatTree t = snd $ execRWS (doStackAnalysis push pop t) () [] + + push :: Token -> RWS () String [Int] () + push (OuterToken (Id n) inner) = do + stack <- get + put (n : stack) + case stack of + [] -> return () + (top:_) -> tell $ show top ++ " -> " ++ show n ++ "\n" + tell $ show n ++ " [label=" ++ quoteViz (show n ++ ": " ++ take 32 (show inner)) ++ "]\n" + + pop :: Token -> RWS () String [Int] () + pop _ = modify tail + + +-- For each entry point, set the rank so that they'll align in the graph +tagVizEntries :: CFGraph -> String +tagVizEntries graph = "{ rank=same " ++ rank ++ " }" + where + entries = mapMaybe find $ labNodes graph + find (node, CFEntryPoint name) = return (node, name) + find _ = Nothing + rank = unwords $ map (\(c, _) -> show c) entries diff --git a/src/ShellCheck/Fixer.hs b/src/ShellCheck/Fixer.hs index 1409b24..2376842 100644 --- a/src/ShellCheck/Fixer.hs +++ b/src/ShellCheck/Fixer.hs @@ -22,6 +22,7 @@ module ShellCheck.Fixer (applyFix, removeTabStops, mapPositions, Ranged(..), runTests) where import ShellCheck.Interface +import ShellCheck.Prelude import Control.Monad.State import Data.Array import Data.List @@ -228,7 +229,7 @@ applyReplacement2 rep string = do let (l1, l2) = tmap posLine originalPos in when (l1 /= 1 || l2 /= 1) $ - error "ShellCheck internal error, please report: bad cross-line fix" + error $ pleaseReport "bad cross-line fix" let replacer = repString rep let shift = (length replacer) - (oldEnd - oldStart) diff --git a/src/ShellCheck/Parser.hs b/src/ShellCheck/Parser.hs index 3958406..9f9241c 100644 --- a/src/ShellCheck/Parser.hs +++ b/src/ShellCheck/Parser.hs @@ -27,6 +27,7 @@ import ShellCheck.AST import ShellCheck.ASTLib hiding (runTests) import ShellCheck.Data import ShellCheck.Interface +import ShellCheck.Prelude import Control.Applicative ((<*), (*>)) import Control.Monad @@ -210,7 +211,7 @@ getNextIdSpanningTokenList list = -- Get the span covered by an id getSpanForId :: Monad m => Id -> SCParser m (SourcePos, SourcePos) getSpanForId id = - Map.findWithDefault (error "Internal error: no position for id. Please report!") id <$> + Map.findWithDefault (error $ pleaseReport "no parser span for id") id <$> getMap -- Create a new id with the same span as an existing one @@ -1918,7 +1919,7 @@ readPendingHereDocs = do -- The end token is just a prefix skipLine | hasTrailer -> - error "ShellCheck bug, please report (here doc trailer)." + error $ pleaseReport "unexpected heredoc trailer" -- The following cases assume no trailing text: | dashed == Undashed && (not $ null leadingSpace) -> do diff --git a/src/ShellCheck/Prelude.hs b/src/ShellCheck/Prelude.hs new file mode 100644 index 0000000..7e9011b --- /dev/null +++ b/src/ShellCheck/Prelude.hs @@ -0,0 +1,48 @@ +{- + Copyright 2022 Vidar Holen + + This file is part of ShellCheck. + https://www.shellcheck.net + + ShellCheck is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ShellCheck is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +-} + +-- Generic basic utility functions +module ShellCheck.Prelude where + +-- Get element 0 or a default. Like `head` but safe. +headOrDefault _ (a:_) = a +headOrDefault def _ = def + +-- Get the last element or a default. Like `last` but safe. +lastOrDefault def [] = def +lastOrDefault _ list = last list + +--- Get element n of a list, or Nothing. Like `!!` but safe. +(!!!) list i = + case drop i list of + [] -> Nothing + (r:_) -> Just r + + +-- Like mconcat but for Semigroups +sconcat1 :: (Semigroup t) => [t] -> t +sconcat1 [x] = x +sconcat1 (x:xs) = x <> sconcat1 xs + +sconcatOrDefault def [] = def +sconcatOrDefault _ list = sconcat1 list + +-- For more actionable "impossible" errors +pleaseReport str = "ShellCheck internal error, please report: " ++ str diff --git a/test/shellcheck.hs b/test/shellcheck.hs index e463403..1a272af 100644 --- a/test/shellcheck.hs +++ b/test/shellcheck.hs @@ -5,8 +5,11 @@ import System.Exit import qualified ShellCheck.Analytics import qualified ShellCheck.AnalyzerLib import qualified ShellCheck.ASTLib +import qualified ShellCheck.CFG +import qualified ShellCheck.CFGAnalysis import qualified ShellCheck.Checker import qualified ShellCheck.Checks.Commands +import qualified ShellCheck.Checks.ControlFlow import qualified ShellCheck.Checks.Custom import qualified ShellCheck.Checks.ShellSupport import qualified ShellCheck.Fixer @@ -19,8 +22,11 @@ main = do ShellCheck.Analytics.runTests ,ShellCheck.AnalyzerLib.runTests ,ShellCheck.ASTLib.runTests + ,ShellCheck.CFG.runTests + ,ShellCheck.CFGAnalysis.runTests ,ShellCheck.Checker.runTests ,ShellCheck.Checks.Commands.runTests + ,ShellCheck.Checks.ControlFlow.runTests ,ShellCheck.Checks.Custom.runTests ,ShellCheck.Checks.ShellSupport.runTests ,ShellCheck.Fixer.runTests