shellcheck/src/ShellCheck/CFGAnalysis.hs

1373 lines
51 KiB
Haskell

{-
Copyright 2022 Vidar Holen
This file is part of ShellCheck.
https://www.shellcheck.net
ShellCheck is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
ShellCheck is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE DeriveAnyClass, DeriveGeneric #-}
{-
Data Flow Analysis on a Control Flow Graph.
This module implements a pretty standard iterative Data Flow Analysis.
For an overview of the process, see Wikipedia.
Since shell scripts rely heavily on global variables, this DFA includes
tracking the value of globals across calls. Each function invocation is
treated as a separate DFA problem, and a caching mechanism (hopefully)
avoids any exponential explosions.
To do efficient DFA join operations (or merges, as the code calls them),
some of the data structures have an integer version attached. On update,
the version is changed. If two states have the same version number,
a merge is skipped on the grounds that they are identical. It is easy
to unintentionally forget to update/invalidate the version number,
and bugs will ensure.
For performance reasons, the entire code runs in plain ST, with a manual
context object Ctx being passed around. It relies heavily on mutable
STRefs. However, this turned out to be literally thousands of times faster
than my several attempts using RWST, so it can't be helped.
-}
module ShellCheck.CFGAnalysis (
analyzeControlFlow
,CFGParameters (..)
,CFGAnalysis (..)
,ProgramState (..)
,VariableState (..)
,VariableValue (..)
,VariableProperties
,SpaceStatus (..)
,getIncomingState
,getOutgoingState
,doesPostDominate
,ShellCheck.CFGAnalysis.runTests -- STRIP
) where
import GHC.Generics (Generic)
import ShellCheck.AST
import ShellCheck.CFG
import qualified ShellCheck.Data as Data
import ShellCheck.Prelude
import Control.Monad
import Control.Monad.ST
import Control.DeepSeq
import Data.List hiding (map)
import Data.STRef
import Data.Maybe
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Graph.Inductive.Graph
import Data.Graph.Inductive.Query.DFS
import Debug.Trace -- STRIP
import Test.QuickCheck
-- The number of iterations for DFA to stabilize
iterationCount = 1000000
-- There have been multiple bugs where bad caching caused oscillations.
-- As a precaution, disable caching if there's this many iterations left.
fallbackThreshold = 10000
-- The number of cache entries to keep per node
cacheEntries = 10
logVerbose log = do
-- traceShowM log
return ()
logInfo log = do
-- traceShowM log
return ()
-- The result of the data flow analysis
data CFGAnalysis = CFGAnalysis {
graph :: CFGraph,
tokenToRange :: M.Map Id (Node, Node),
tokenToNodes :: M.Map Id (S.Set Node),
postDominators :: M.Map Node (S.Set Node),
nodeToData :: M.Map Node (ProgramState, ProgramState)
} deriving (Show, Generic, NFData)
-- The program state we expose externally
data ProgramState = ProgramState {
-- internalState :: InternalState, -- For debugging
variablesInScope :: M.Map String VariableState,
exitCodes :: S.Set Id,
stateIsReachable :: Bool
} deriving (Show, Eq, Generic, NFData)
internalToExternal :: InternalState -> ProgramState
internalToExternal s =
ProgramState {
-- Censor the literal value to avoid introducing dependencies on it. It's just for debugging.
variablesInScope = M.map censor flatVars,
-- internalState = s, -- For debugging
exitCodes = fromMaybe S.empty $ sExitCodes s,
stateIsReachable = fromMaybe True $ sIsReachable s
}
where
censor s = s {
variableValue = (variableValue s) {
literalValue = Nothing
}
}
flatVars = M.unionsWith (\_ last -> last) $ map mapStorage [sGlobalValues s, sLocalValues s, sPrefixValues s]
-- Conveniently get the state before a token id
getIncomingState :: CFGAnalysis -> Id -> Maybe ProgramState
getIncomingState analysis id = do
(start,end) <- M.lookup id $ tokenToRange analysis
fst <$> M.lookup start (nodeToData analysis)
-- Conveniently get the state after a token id
getOutgoingState :: CFGAnalysis -> Id -> Maybe ProgramState
getOutgoingState analysis id = do
(start,end) <- M.lookup id $ tokenToRange analysis
snd <$> M.lookup end (nodeToData analysis)
-- Conveniently determine whether one node postdominates another,
-- i.e. whether 'target' always unconditionally runs after 'base'.
doesPostDominate :: CFGAnalysis -> Id -> Id -> Bool
doesPostDominate analysis target base = fromMaybe False $ do
(_, baseEnd) <- M.lookup base $ tokenToRange analysis
(targetStart, _) <- M.lookup target $ tokenToRange analysis
postDoms <- M.lookup baseEnd $ postDominators analysis
return $ S.member targetStart postDoms
getDataForNode analysis node = M.lookup node $ nodeToData analysis
-- The current state of data flow at a point in the program, potentially as a diff
data InternalState = InternalState {
sVersion :: Integer,
sGlobalValues :: VersionedMap String VariableState,
sLocalValues :: VersionedMap String VariableState,
sPrefixValues :: VersionedMap String VariableState,
sFunctionTargets :: VersionedMap String FunctionValue,
sExitCodes :: Maybe (S.Set Id),
sIsReachable :: Maybe Bool
} deriving (Show, Generic, NFData)
newInternalState = InternalState {
sVersion = 0,
sGlobalValues = vmEmpty,
sLocalValues = vmEmpty,
sPrefixValues = vmEmpty,
sFunctionTargets = vmEmpty,
sExitCodes = Nothing,
sIsReachable = Nothing
}
unreachableState = modified newInternalState {
sIsReachable = Just False
}
-- The default state we assume we get from the environment
createEnvironmentState :: InternalState
createEnvironmentState = do
foldl' (flip ($)) newInternalState $ concat [
addVars Data.internalVariables unknownVariableState,
addVars Data.variablesWithoutSpaces spacelessVariableState,
addVars Data.specialIntegerVariables spacelessVariableState
]
where
addVars names val = map (\name -> insertGlobal name val) names
spacelessVariableState = unknownVariableState {
variableValue = VariableValue {
literalValue = Nothing,
spaceStatus = SpaceStatusClean
}
}
modified s = s { sVersion = -1 }
insertGlobal :: String -> VariableState -> InternalState -> InternalState
insertGlobal name value state = modified state {
sGlobalValues = vmInsert name value $ sGlobalValues state
}
insertLocal :: String -> VariableState -> InternalState -> InternalState
insertLocal name value state = modified state {
sLocalValues = vmInsert name value $ sLocalValues state
}
insertPrefix :: String -> VariableState -> InternalState -> InternalState
insertPrefix name value state = modified state {
sPrefixValues = vmInsert name value $ sPrefixValues state
}
insertFunction :: String -> FunctionValue -> InternalState -> InternalState
insertFunction name value state = modified state {
sFunctionTargets = vmInsert name value $ sFunctionTargets state
}
addProperties :: S.Set CFVariableProp -> VariableState -> VariableState
addProperties props state = state {
variableProperties = S.map (S.union props) $ variableProperties state
}
removeProperties :: S.Set CFVariableProp -> VariableState -> VariableState
removeProperties props state = state {
variableProperties = S.map (\s -> S.difference s props) $ variableProperties state
}
setExitCode id = setExitCodes (S.singleton id)
setExitCodes set state = modified state {
sExitCodes = Just $ set
}
-- Dependencies on values, e.g. "if there is a global variable named 'foo' without spaces"
-- This is used to see if the DFA of a function would result in the same state, so anything
-- that affects DFA must be tracked.
data StateDependency =
-- Complete variable state
DepState Scope String VariableState
-- Only variable properties (we need properties but not values for x=1)
| DepProperties Scope String VariableProperties
-- Function definition
| DepFunction String (S.Set FunctionDefinition)
-- Whether invoking the node would result in recursion (i.e., is the function on the stack?)
| DepIsRecursive Node Bool
-- The set of commands that could have provided the exit code $?
| DepExitCodes (S.Set Id)
deriving (Show, Eq, Ord, Generic, NFData)
-- A function definition, or lack thereof
data FunctionDefinition = FunctionUnknown | FunctionDefinition String Node Node
deriving (Show, Eq, Ord, Generic, NFData)
-- The Set of places a command name can point (it's a Set to handle conditionally defined functions)
type FunctionValue = S.Set FunctionDefinition
-- Create an InternalState that fulfills the given dependencies
depsToState :: S.Set StateDependency -> InternalState
depsToState set = foldl insert newInternalState $ S.toList set
where
insert :: InternalState -> StateDependency -> InternalState
insert state dep =
case dep of
DepFunction name val -> insertFunction name val state
DepState scope name val -> insertIn True scope name val state
-- State includes properties and more, so don't overwrite a state with properties
DepProperties scope name props -> insertIn False scope name unknownVariableState { variableProperties = props } state
DepIsRecursive _ _ -> state
DepExitCodes s -> setExitCodes s state
insertIn overwrite scope name val state =
let
(mapToCheck, inserter) =
case scope of
PrefixScope -> (sPrefixValues, insertPrefix)
LocalScope -> (sLocalValues, insertLocal)
GlobalScope -> (sGlobalValues, insertGlobal)
DefaultScope -> error $ pleaseReport "Unresolved scope in dependency"
alreadyExists = isJust $ vmLookup name $ mapToCheck state
in
if overwrite || not alreadyExists
then inserter name val state
else state
unknownFunctionValue = S.singleton FunctionUnknown
-- The information about the value of a single variable
data VariableValue = VariableValue {
literalValue :: Maybe String, -- TODO: For debugging. Remove me.
spaceStatus :: SpaceStatus
}
deriving (Show, Eq, Ord, Generic, NFData)
data VariableState = VariableState {
variableValue :: VariableValue,
variableProperties :: VariableProperties
}
deriving (Show, Eq, Ord, Generic, NFData)
-- Whether or not the value needs quoting (has spaces/globs), or we don't know
data SpaceStatus = SpaceStatusEmpty | SpaceStatusClean | SpaceStatusDirty deriving (Show, Eq, Ord, Generic, NFData)
-- The set of possible sets of properties for this variable
type VariableProperties = S.Set (S.Set CFVariableProp)
defaultProperties = S.singleton S.empty
unknownVariableState = VariableState {
variableValue = unknownVariableValue,
variableProperties = defaultProperties
}
unknownVariableValue = VariableValue {
literalValue = Nothing,
spaceStatus = SpaceStatusDirty
}
emptyVariableValue = unknownVariableValue {
literalValue = Just "",
spaceStatus = SpaceStatusEmpty
}
unsetVariableState = VariableState {
variableValue = emptyVariableValue,
variableProperties = defaultProperties
}
mergeVariableState a b = VariableState {
variableValue = mergeVariableValue (variableValue a) (variableValue b),
variableProperties = S.union (variableProperties a) (variableProperties b)
}
mergeVariableValue a b = VariableValue {
literalValue = if literalValue a == literalValue b then literalValue a else Nothing,
spaceStatus = mergeSpaceStatus (spaceStatus a) (spaceStatus b)
}
mergeSpaceStatus a b =
case (a,b) of
(SpaceStatusEmpty, y) -> y
(x, SpaceStatusEmpty) -> x
(SpaceStatusClean, SpaceStatusClean) -> SpaceStatusClean
_ -> SpaceStatusDirty
-- A VersionedMap is a Map that keeps an additional integer version to quickly determine if it has changed.
-- * Version -1 means it's unknown (possibly and presumably changed)
-- * Version 0 means it's empty
-- * Version N means it's equal to any other map with Version N (this is required but not enforced)
data VersionedMap k v = VersionedMap {
mapVersion :: Integer,
mapStorage :: M.Map k v
}
deriving (Generic, NFData)
-- This makes states more readable but inhibits copy-paste
instance (Show k, Show v) => Show (VersionedMap k v) where
show m = (if mapVersion m >= 0 then "V" ++ show (mapVersion m) else "U") ++ " " ++ show (mapStorage m)
instance Eq InternalState where
(==) a b = stateIsQuickEqual a b || stateIsSlowEqual a b
instance (Eq k, Eq v) => Eq (VersionedMap k v) where
(==) a b = vmIsQuickEqual a b || mapStorage a == mapStorage b
instance (Ord k, Ord v) => Ord (VersionedMap k v) where
compare a b =
if vmIsQuickEqual a b
then EQ
else mapStorage a `compare` mapStorage b
-- A context with STRefs manually passed around to function.
-- This is done because it was dramatically much faster than any RWS type stack
data Ctx s = Ctx {
-- The current node
cNode :: STRef s Node,
-- The current input state
cInput :: STRef s InternalState,
-- The current output state
cOutput :: STRef s InternalState,
-- The current functions/subshells stack
cStack :: [StackEntry s],
-- The input graph
cGraph :: CFGraph,
-- An incrementing counter to version maps
cCounter :: STRef s Integer,
-- A cache of input state dependencies to output effects
cCache :: STRef s (M.Map Node [(S.Set StateDependency, InternalState)]),
-- Whether the cache is enabled (see fallbackThreshold)
cEnableCache :: STRef s Bool,
-- The states resulting from data flows per invocation path
cInvocations :: STRef s (M.Map [Node] (S.Set StateDependency, M.Map Node (InternalState, InternalState)))
}
-- Whenever a function (or subshell) is invoked, a value like this is pushed onto the stack
data StackEntry s = StackEntry {
-- The entry point of this stack entry for the purpose of detecting recursion
entryPoint :: Node,
-- Whether this is a function call (as opposed to a subshell)
isFunctionCall :: Bool,
-- The node where this entry point was invoked
callSite :: Node,
-- A mutable set of dependencies we fetched from here or higher in the stack
dependencies :: STRef s (S.Set StateDependency),
-- The original input state for this stack entry
stackState :: InternalState
}
deriving (Eq, Generic, NFData)
-- Overwrite a base state with the contents of a diff state
-- This is unrelated to join/merge.
patchState :: InternalState -> InternalState -> InternalState
patchState base diff =
case () of
_ | sVersion diff == 0 -> base
_ | sVersion base == 0 -> diff
_ | stateIsQuickEqual base diff -> diff
_ ->
InternalState {
sVersion = -1,
sGlobalValues = vmPatch (sGlobalValues base) (sGlobalValues diff),
sLocalValues = vmPatch (sLocalValues base) (sLocalValues diff),
sPrefixValues = vmPatch (sPrefixValues base) (sPrefixValues diff),
sFunctionTargets = vmPatch (sFunctionTargets base) (sFunctionTargets diff),
sExitCodes = sExitCodes diff `mplus` sExitCodes base,
sIsReachable = sIsReachable diff `mplus` sIsReachable base
}
patchOutputM ctx diff = do
let cOut = cOutput ctx
oldState <- readSTRef cOut
let newState = patchState oldState diff
writeSTRef cOut newState
-- Merge (aka Join) two states. This is monadic because it requires looking up
-- values from the current context. For example:
--
-- f() {
-- foo || x=2
-- HERE # This merge requires looking up the value of $x in the parent frame
-- }
-- x=1
-- f
mergeState :: forall s. Ctx s -> InternalState -> InternalState -> ST s InternalState
mergeState ctx a b = do
-- Kludge: we want `readVariable` & friends not to read from an intermediate state,
-- so temporarily set a blank input.
let cin = cInput ctx
old <- readSTRef cin
writeSTRef cin newInternalState
x <- merge a b
writeSTRef cin old
return x
where
merge a b =
case () of
_ | sIsReachable a == Just True && sIsReachable b == Just False
|| sIsReachable a == Just False && sIsReachable b == Just True ->
error $ pleaseReport "Unexpected merge of reachable and unreachable state"
_ | sIsReachable a == Just False && sIsReachable b == Just False ->
return unreachableState
_ | sVersion a >= 0 && sVersion b >= 0 && sVersion a == sVersion b -> return a
_ -> do
globals <- mergeMaps ctx mergeVariableState readGlobal (sGlobalValues a) (sGlobalValues b)
locals <- mergeMaps ctx mergeVariableState readVariable (sLocalValues a) (sLocalValues b)
prefix <- mergeMaps ctx mergeVariableState readVariable (sPrefixValues a) (sPrefixValues b)
funcs <- mergeMaps ctx S.union readFunction (sFunctionTargets a) (sFunctionTargets b)
exitCodes <- mergeMaybes ctx S.union readExitCodes (sExitCodes a) (sExitCodes b)
return $ InternalState {
sVersion = -1,
sGlobalValues = globals,
sLocalValues = locals,
sPrefixValues = prefix,
sFunctionTargets = funcs,
sExitCodes = exitCodes,
sIsReachable = liftM2 (&&) (sIsReachable a) (sIsReachable b)
}
-- Merge a number of states, or return a default if there are no states
-- (it can't fold from newInternalState because this would be equivalent of adding a new input edge).
mergeStates :: forall s. Ctx s -> InternalState -> [InternalState] -> ST s InternalState
mergeStates ctx def list =
case list of
[] -> return def
(first:rest) -> foldM (mergeState ctx) first rest
-- Merge two maps, key by key. If both maps have a key, the 'merger' is used.
-- If only one has the key, the 'reader' is used to fetch a second, and the two are merged as above.
mergeMaps :: (Ord k) => forall s.
Ctx s ->
(v -> v -> v) ->
(Ctx s -> k -> ST s v) ->
(VersionedMap k v) ->
(VersionedMap k v) ->
ST s (VersionedMap k v)
mergeMaps ctx merger reader a b =
if vmIsQuickEqual a b
then return a
else do
new <- M.fromDistinctAscList <$> reverse <$> f [] (M.toAscList $ mapStorage a) (M.toAscList $ mapStorage b)
vmFromMap ctx new
where
f l [] [] = return l
f l [] b = f l b []
f l ((k,v):rest1) [] = do
other <- reader ctx k
f ((k, merger v other):l) rest1 []
f l l1@((k1, v1):rest1) l2@((k2, v2):rest2) =
case k1 `compare` k2 of
EQ ->
f ((k1, merger v1 v2):l) rest1 rest2
LT -> do
nv2 <- reader ctx k1
f ((k1, merger v1 nv2):l) rest1 l2
GT -> do
nv1 <- reader ctx k2
f ((k2, merger nv1 v2):l) l1 rest2
-- Merge two Maybes, like mergeMaps for a single element
mergeMaybes ctx merger reader a b =
case (a, b) of
(Nothing, Nothing) -> return Nothing
(Just v1, Nothing) -> single v1
(Nothing, Just v2) -> single v2
(Just v1, Just v2) -> return $ Just $ merger v1 v2
where
single val = do
result <- merger val <$> reader ctx
return $ Just result
vmFromMap ctx map = return $ VersionedMap {
mapVersion = -1,
mapStorage = map
}
-- Give a VersionedMap a version if it does not already have one.
versionMap ctx map =
if mapVersion map >= 0
then return map
else do
v <- nextVersion ctx
return map {
mapVersion = v
}
-- Give an InternalState a version if it does not already have one.
versionState ctx state =
if sVersion state >= 0
then return state
else do
self <- nextVersion ctx
ssGlobalValues <- versionMap ctx $ sGlobalValues state
ssLocalValues <- versionMap ctx $ sLocalValues state
ssFunctionTargets <- versionMap ctx $ sFunctionTargets state
return state {
sVersion = self,
sGlobalValues = ssGlobalValues,
sLocalValues = ssLocalValues,
sFunctionTargets = ssFunctionTargets
}
-- Like 'not null' but for 2+ elements
is2plus :: [a] -> Bool
is2plus l = case l of
_:_:_ -> True
_ -> False
-- Use versions to see if two states are trivially identical
stateIsQuickEqual a b =
let
va = sVersion a
vb = sVersion b
in
va >= 0 && vb >= 0 && va == vb
-- A manual slow path 'Eq' (it's not derived because it's part of the custom Eq instance)
stateIsSlowEqual a b =
check sGlobalValues
&& check sLocalValues
&& check sPrefixValues
&& check sFunctionTargets
&& check sIsReachable
where
check f = f a == f b
-- Check if two VersionedMaps are trivially equal
vmIsQuickEqual :: VersionedMap k v -> VersionedMap k v -> Bool
vmIsQuickEqual a b =
let
va = mapVersion a
vb = mapVersion b
in
va >= 0 && vb >= 0 && va == vb
-- A new, empty VersionedMap
vmEmpty = VersionedMap {
mapVersion = 0,
mapStorage = M.empty
}
-- Map.null for VersionedMaps
vmNull :: VersionedMap k v -> Bool
vmNull m = mapVersion m == 0 || (M.null $ mapStorage m)
-- Map.lookup for VersionedMaps
vmLookup name map = M.lookup name $ mapStorage map
-- Map.insert for VersionedMaps
vmInsert key val map = VersionedMap {
mapVersion = -1,
mapStorage = M.insert key val $ mapStorage map
}
-- Overwrite all keys in the first map with values from the second
vmPatch :: (Ord k) => VersionedMap k v -> VersionedMap k v -> VersionedMap k v
vmPatch base diff =
case () of
_ | mapVersion base == 0 -> diff
_ | mapVersion diff == 0 -> base
_ | vmIsQuickEqual base diff -> diff
_ -> VersionedMap {
mapVersion = -1,
mapStorage = M.unionWith (flip const) (mapStorage base) (mapStorage diff)
}
-- Set a variable. This includes properties. Applies it to the appropriate scope.
writeVariable :: forall s. Ctx s -> String -> VariableState -> ST s ()
writeVariable ctx name val = do
typ <- readVariableScope ctx name
case typ of
GlobalScope -> writeGlobal ctx name val
LocalScope -> writeLocal ctx name val
-- Prefixed variables actually become local variables in the invoked function
PrefixScope -> writeLocal ctx name val
writeGlobal ctx name val = do
modifySTRef (cOutput ctx) $ insertGlobal name val
writeLocal ctx name val = do
modifySTRef (cOutput ctx) $ insertLocal name val
writePrefix ctx name val = do
modifySTRef (cOutput ctx) $ insertPrefix name val
updateVariableValue ctx name val = do
(props, scope) <- readVariablePropertiesWithScope ctx name
let f = case scope of
GlobalScope -> writeGlobal
LocalScope -> writeLocal
PrefixScope -> writeLocal -- Updates become local
f ctx name $ VariableState { variableValue = val, variableProperties = props }
updateGlobalValue ctx name val = do
props <- readGlobalProperties ctx name
writeGlobal ctx name VariableState { variableValue = val, variableProperties = props }
updateLocalValue ctx name val = do
props <- readLocalProperties ctx name
writeLocal ctx name VariableState { variableValue = val, variableProperties = props }
updatePrefixValue ctx name val = do
-- Prefix variables don't inherit properties
writePrefix ctx name VariableState { variableValue = val, variableProperties = defaultProperties }
-- Look up a variable value, and also return its scope
readVariableWithScope :: forall s. Ctx s -> String -> ST s (VariableState, Scope)
readVariableWithScope ctx name = lookupStack get dep def ctx name
where
def = (unknownVariableState, GlobalScope)
get = getVariableWithScope
dep k (val, scope) = DepState scope k val
-- Look up the variable's properties. This can be done independently to avoid incurring a dependency on the value.
readVariablePropertiesWithScope :: forall s. Ctx s -> String -> ST s (VariableProperties, Scope)
readVariablePropertiesWithScope ctx name = lookupStack get dep def ctx name
where
def = (defaultProperties, GlobalScope)
get s k = do
(val, scope) <- getVariableWithScope s k
return (variableProperties val, scope)
dep k (val, scope) = DepProperties scope k val
readVariableScope ctx name = snd <$> readVariablePropertiesWithScope ctx name
getVariableWithScope :: InternalState -> String -> Maybe (VariableState, Scope)
getVariableWithScope s name =
case (vmLookup name $ sPrefixValues s, vmLookup name $ sLocalValues s, vmLookup name $ sGlobalValues s) of
(Just var, _, _) -> return (var, PrefixScope)
(_, Just var, _) -> return (var, LocalScope)
(_, _, Just var) -> return (var, GlobalScope)
_ -> Nothing
undefineFunction ctx name =
writeFunction ctx name $ FunctionUnknown
undefineVariable ctx name =
writeVariable ctx name $ unsetVariableState
readVariable ctx name = fst <$> readVariableWithScope ctx name
readVariableProperties ctx name = fst <$> readVariablePropertiesWithScope ctx name
readGlobal ctx name = lookupStack get dep def ctx name
where
def = unknownVariableState -- could come from the environment
get s name = vmLookup name $ sGlobalValues s
dep k v = DepState GlobalScope k v
readGlobalProperties ctx name = lookupStack get dep def ctx name
where
def = defaultProperties
get s name = variableProperties <$> (vmLookup name $ sGlobalValues s)
-- This dependency will fail to match if it's shadowed by a local variable,
-- such as in x=1; f() { local -i x; declare -ag x; } because we'll look at
-- x and find it to be local and not global. FIXME?
dep k v = DepProperties GlobalScope k v
readLocal ctx name = lookupStackUntilFunction get dep def ctx name
where
def = unsetVariableState -- can't come from the environment
get s name = vmLookup name $ sLocalValues s
dep k v = DepState LocalScope k v
-- We only want to look up the local properties of the current function,
-- though preferably even if we're in a subshell. FIXME?
readLocalProperties ctx name = fst <$> lookupStackUntilFunction get dep def ctx name
where
def = (defaultProperties, LocalScope)
with tag f = do
val <- variableProperties <$> f
return (val, tag)
get s name = (with LocalScope $ vmLookup name $ sLocalValues s) `mplus` (with PrefixScope $ vmLookup name $ sPrefixValues s)
dep k (val, scope) = DepProperties scope k val
readFunction ctx name = lookupStack get dep def ctx name
where
def = unknownFunctionValue
get s name = vmLookup name $ sFunctionTargets s
dep k v = DepFunction k v
writeFunction ctx name val = do
modifySTRef (cOutput ctx) $ insertFunction name $ S.singleton val
readExitCodes ctx = lookupStack get dep def ctx ()
where
get s () = sExitCodes s
def = S.empty
dep () v = DepExitCodes v
-- Look up each state on the stack until a value is found (or the default is used),
-- then add this value as a StateDependency.
lookupStack' :: forall s k v.
-- Whether to stop at function boundaries
Bool
-- A function that maybe finds a value from a state
-> (InternalState -> k -> Maybe v)
-- A function that creates a dependency on what was found
-> (k -> v -> StateDependency)
-- A default value, if the value can't be found anywhere
-> v
-- Context
-> Ctx s
-- The key to look up
-> k
-- Returning the result
-> ST s v
lookupStack' functionOnly get dep def ctx key = do
top <- readSTRef $ cInput ctx
case get top key of
Just v -> return v
Nothing -> f (cStack ctx)
where
f [] = return def
f (s:_) | functionOnly && isFunctionCall s = return def
f (s:rest) = do
-- Go up the stack until we find the value, and add
-- a dependency on each state (including where it was found)
res <- fromMaybe (f rest) (return <$> get (stackState s) key)
modifySTRef (dependencies s) $ S.insert $ dep key res
return res
lookupStack = lookupStack' False
lookupStackUntilFunction = lookupStack' True
-- Like lookupStack but without adding dependencies
peekStack get def ctx key = do
top <- readSTRef $ cInput ctx
case get top key of
Just v -> return v
Nothing -> f (cStack ctx)
where
f [] = return def
f (s:rest) =
case get (stackState s) key of
Just v -> return v
Nothing -> f rest
-- Check if the current context fulfills a StateDependency if entering `entry`
fulfillsDependency ctx entry dep =
case dep of
DepState scope name val -> (== (val, scope)) <$> peek scope ctx name
DepProperties scope name props -> do
(state, s) <- peek scope ctx name
return $ scope == s && variableProperties state == props
DepFunction name val -> (== val) <$> peekFunc ctx name
-- Hack. Since we haven't pushed the soon-to-be invoked function on the stack,
-- it won't be found by the normal check.
DepIsRecursive node val | node == entry -> return True
DepIsRecursive node val -> return $ val == any (\f -> entryPoint f == node) (cStack ctx)
DepExitCodes val -> (== val) <$> peekStack (\s k -> sExitCodes s) S.empty ctx ()
-- _ -> error $ "Unknown dep " ++ show dep
where
peek scope = peekStack getVariableWithScope $ if scope == GlobalScope then (unknownVariableState, GlobalScope) else (unsetVariableState, LocalScope)
peekFunc = peekStack (\state name -> vmLookup name $ sFunctionTargets state) unknownFunctionValue
-- Check if the current context fulfills all StateDependencies
fulfillsDependencies ctx entry deps =
f $ S.toList deps
where
f [] = return True
f (dep:rest) = do
res <- fulfillsDependency ctx entry dep
if res
then f rest
else return False
-- Create a brand new Ctx given a Control Flow Graph (CFG)
newCtx g = do
c <- newSTRef 1
input <- newSTRef undefined
output <- newSTRef undefined
node <- newSTRef undefined
cache <- newSTRef M.empty
enableCache <- newSTRef True
invocations <- newSTRef M.empty
return $ Ctx {
cCounter = c,
cInput = input,
cOutput = output,
cNode = node,
cCache = cache,
cEnableCache = enableCache,
cStack = [],
cInvocations = invocations,
cGraph = g
}
-- The next incrementing version for VersionedMaps
nextVersion ctx = do
let ctr = cCounter ctx
n <- readSTRef ctr
writeSTRef ctr $! n+1
return n
-- Create a new StackEntry
newStackEntry ctx point isCall = do
deps <- newSTRef S.empty
state <- readSTRef $ cOutput ctx
callsite <- readSTRef $ cNode ctx
return $ StackEntry {
entryPoint = point,
isFunctionCall = isCall,
callSite = callsite,
dependencies = deps,
stackState = state
}
-- Call a function with a new stack entry on the stack
withNewStackFrame ctx node isCall f = do
newEntry <- newStackEntry ctx node isCall
newInput <- newSTRef newInternalState
newOutput <- newSTRef newInternalState
newNode <- newSTRef node
let newCtx = ctx {
cInput = newInput,
cOutput = newOutput,
cNode = newNode,
cStack = newEntry : cStack ctx
}
x <- f newCtx
{-
deps <- readSTRef $ dependencies newEntry
selfcheck <- fulfillsDependencies newCtx deps
unless selfcheck $ error $ pleaseReport $ "Unmet stack dependencies on " ++ show (node, deps)
-}
return (x, newEntry)
-- Check if invoking this function would be a recursive loop
-- (i.e. we already have the function on the stack)
wouldBeRecursive ctx node = f (cStack ctx)
where
f [] = return False
f (s:rest) = do
res <-
if entryPoint s == node
then return True
else f rest
modifySTRef (dependencies s) $ S.insert $ DepIsRecursive node res
return res
-- The main DFA 'transfer' function, applying the effects of a node to the output state
transfer ctx label =
--traceShow ("Transferring", label) $
case label of
CFStructuralNode -> return ()
CFEntryPoint _ -> return ()
CFImpliedExit -> return ()
CFResolvedExit {} -> return ()
CFExecuteCommand cmd -> transferCommand ctx cmd
CFExecuteSubshell reason entry exit -> transferSubshell ctx reason entry exit
CFApplyEffects effects -> mapM_ (\(IdTagged _ f) -> transferEffect ctx f) effects
CFSetExitCode id -> transferExitCode ctx id
CFUnresolvedExit -> patchOutputM ctx unreachableState
CFUnreachable -> patchOutputM ctx unreachableState
-- TODO
CFSetBackgroundPid _ -> return ()
CFDropPrefixAssignments {} ->
modifySTRef (cOutput ctx) $ \c -> modified c { sPrefixValues = vmEmpty }
-- _ -> error $ "Unknown " ++ show label
-- Transfer the effects of a subshell invocation. This is similar to a function call
-- to allow easily discarding the effects (otherwise the InternalState would have
-- to represent subshell depth, while this way it can simply use the function stack).
transferSubshell ctx reason entry exit = do
let cout = cOutput ctx
initial <- readSTRef cout
runCached ctx entry (f entry exit)
res <- readSTRef cout
-- Clear subshell changes. TODO: track this to warn about modifications.
writeSTRef cout $ initial {
sExitCodes = sExitCodes res
}
where
f entry exit ctx = do
(states, frame) <- withNewStackFrame ctx entry False (flip dataflow $ entry)
let (_, res) = fromMaybe (error $ pleaseReport "Subshell has no exit") $ M.lookup exit states
deps <- readSTRef $ dependencies frame
registerFlowResult ctx entry states deps
return (deps, res)
-- Transfer the effects of executing a command, i.e. the merged union of all possible function definitions.
transferCommand ctx Nothing = return ()
transferCommand ctx (Just name) = do
targets <- readFunction ctx name
logVerbose ("Transferring ",name,targets)
transferMultiple ctx $ map (flip transferFunctionValue) $ S.toList targets
-- Transfer a set of function definitions and merge the output states.
transferMultiple ctx funcs = do
logVerbose ("Transferring set of ", length funcs)
original <- readSTRef out
branches <- mapM (apply ctx original) funcs
merged <- mergeStates ctx original branches
let patched = patchState original merged
writeSTRef out patched
where
out = cOutput ctx
apply ctx original f = do
writeSTRef out original
f ctx
readSTRef out
-- Transfer the effects of a single function definition.
transferFunctionValue ctx funcVal =
case funcVal of
FunctionUnknown -> return ()
FunctionDefinition name entry exit -> do
isRecursive <- wouldBeRecursive ctx entry
if isRecursive
then return () -- TODO: Find a better strategy for recursion
else runCached ctx entry (f name entry exit)
where
f name entry exit ctx = do
(states, frame) <- withNewStackFrame ctx entry True (flip dataflow $ entry)
deps <- readSTRef $ dependencies frame
let res =
case M.lookup exit states of
Just (input, output) -> do
-- Discard local variables. TODO: track&retain variables declared local in previous scopes?
modified output { sLocalValues = vmEmpty }
Nothing -> do
-- e.g. f() { exit; }
unreachableState
registerFlowResult ctx entry states deps
return (deps, res)
transferExitCode ctx id = do
modifySTRef (cOutput ctx) $ setExitCode id
-- Register/save the result of a dataflow of a function.
-- At the end, all the different values from different flows are merged together.
registerFlowResult ctx entry states deps = do
-- This function is called in the context of a CFExecuteCommand and not its invoked function,
-- so manually add the current node to the stack.
current <- readSTRef $ cNode ctx
let parents = map callSite $ cStack ctx
-- A unique path to this flow context. The specific value doesn't matter, as long as it's
-- unique per invocation of the function. This is required so that 'x=1; f; x=2; f' won't
-- overwrite each other.
let path = entry : current : parents
modifySTRef (cInvocations ctx) $ M.insert path (deps, states)
-- Look up a node in the cache and see if the dependencies of any entries are matched.
-- In that case, reuse the previous result instead of doing a new data flow.
runCached :: forall s. Ctx s -> Node -> (Ctx s -> ST s (S.Set StateDependency, InternalState)) -> ST s ()
runCached ctx node f = do
cache <- getCache ctx node
case cache of
Just v -> do
logInfo ("Running cached", node)
-- do { (deps, diff) <- f ctx; unless (v == diff) $ traceShowM ("Cache FAILED to match actual result", node, deps, diff); }
patchOutputM ctx v
Nothing -> do
logInfo ("Cache failed", node)
(deps, diff) <- f ctx
modifySTRef (cCache ctx) (M.insertWith (\_ old -> (deps, diff):(take cacheEntries old)) node [(deps,diff)])
logVerbose ("Recomputed cache for", node, deps)
-- do { f <- fulfillsDependencies ctx node deps; unless (f) $ traceShowM ("New dependencies FAILED to match", node, deps); }
patchOutputM ctx diff
-- Get a cached version whose dependencies are currently fulfilled, if any.
getCache :: forall s. Ctx s -> Node -> ST s (Maybe InternalState)
getCache ctx node = do
cache <- readSTRef $ cCache ctx
enable <- readSTRef $ cEnableCache ctx
logVerbose ("Cache for", node, "length", length $ M.findWithDefault [] node cache, M.lookup node cache)
if enable
then f $ M.findWithDefault [] node cache
else return Nothing
where
f [] = return Nothing
f ((deps, value):rest) = do
match <- fulfillsDependencies ctx node deps
if match
then return $ Just value
else f rest
-- Transfer a single CFEffect to the output state.
transferEffect ctx effect =
case effect of
CFReadVariable name ->
case name of
"?" -> void $ readExitCodes ctx
_ -> void $ readVariable ctx name
CFWriteVariable name value -> do
val <- cfValueToVariableValue ctx value
updateVariableValue ctx name val
CFWriteGlobal name value -> do
val <- cfValueToVariableValue ctx value
updateGlobalValue ctx name val
CFWriteLocal name value -> do
val <- cfValueToVariableValue ctx value
updateLocalValue ctx name val
CFWritePrefix name value -> do
val <- cfValueToVariableValue ctx value
updatePrefixValue ctx name val
CFSetProps scope name props ->
case scope of
DefaultScope -> do
state <- readVariable ctx name
writeVariable ctx name $ addProperties props state
GlobalScope -> do
state <- readGlobal ctx name
writeGlobal ctx name $ addProperties props state
LocalScope -> do
out <- readSTRef (cOutput ctx)
state <- readLocal ctx name
writeLocal ctx name $ addProperties props state
PrefixScope -> do
-- Prefix values become local
state <- readLocal ctx name
writeLocal ctx name $ addProperties props state
CFUnsetProps scope name props ->
case scope of
DefaultScope -> do
state <- readVariable ctx name
writeVariable ctx name $ removeProperties props state
GlobalScope -> do
state <- readGlobal ctx name
writeGlobal ctx name $ removeProperties props state
LocalScope -> do
out <- readSTRef (cOutput ctx)
state <- readLocal ctx name
writeLocal ctx name $ removeProperties props state
PrefixScope -> do
-- Prefix values become local
state <- readLocal ctx name
writeLocal ctx name $ removeProperties props state
CFUndefineVariable name -> undefineVariable ctx name
CFUndefineFunction name -> undefineFunction ctx name
CFUndefine name -> do
-- This should really just unset one or the other
undefineVariable ctx name
undefineFunction ctx name
CFDefineFunction name id entry exit ->
writeFunction ctx name $ FunctionDefinition name entry exit
-- TODO
CFUndefineNameref name -> undefineVariable ctx name
CFHintArray name -> return ()
CFHintDefined name -> return ()
-- _ -> error $ "Unknown effect " ++ show effect
-- Transfer the CFG's idea of a value into our VariableState
cfValueToVariableValue ctx val =
case val of
CFValueArray -> return unknownVariableValue -- TODO: Track array status
CFValueComputed _ parts -> foldM f emptyVariableValue parts
CFValueInteger -> return unknownIntegerValue
CFValueString -> return unknownVariableValue
CFValueUninitialized -> return emptyVariableValue
-- _ -> error $ "Unknown value: " ++ show val
where
f val part = do
next <- computeValue ctx part
return $ val `appendVariableValue` next
-- A value can be computed from 0 or more parts, such as x="literal$y$z"
computeValue ctx part =
case part of
CFStringLiteral str -> return $ literalToVariableValue str
CFStringInteger -> return unknownIntegerValue
CFStringUnknown -> return unknownVariableValue
CFStringVariable name -> variableStateToValue <$> readVariable ctx name
where
variableStateToValue state =
case () of
_ | all (CFVPInteger `S.member`) $ variableProperties state -> unknownIntegerValue
_ -> variableValue state
-- Append two VariableValues as if with z="$x$y"
appendVariableValue :: VariableValue -> VariableValue -> VariableValue
appendVariableValue a b =
unknownVariableValue {
literalValue = liftM2 (++) (literalValue a) (literalValue b),
spaceStatus = appendSpaceStatus (spaceStatus a) (spaceStatus b)
}
appendSpaceStatus a b =
case (a,b) of
(SpaceStatusEmpty, _) -> b
(_, SpaceStatusEmpty) -> a
(SpaceStatusClean, SpaceStatusClean) -> a
_ ->SpaceStatusDirty
unknownIntegerValue = unknownVariableValue {
literalValue = Nothing,
spaceStatus = SpaceStatusClean
}
literalToVariableValue str = unknownVariableValue {
literalValue = Just str,
spaceStatus = literalToSpaceStatus str
}
withoutChanges ctx f = do
let inp = cInput ctx
let out = cOutput ctx
prevInput <- readSTRef inp
prevOutput <- readSTRef out
res <- f
writeSTRef inp prevInput
writeSTRef out prevOutput
return res
-- Get the SpaceStatus for a literal string, i.e. if it needs quoting
literalToSpaceStatus str =
case str of
"" -> SpaceStatusEmpty
_ | all (`notElem` " \t\n*?[") str -> SpaceStatusClean
_ -> SpaceStatusDirty
type StateMap = M.Map Node (InternalState, InternalState)
-- Classic, iterative Data Flow Analysis. See Wikipedia for a description of the process.
dataflow :: forall s. Ctx s -> Node -> ST s StateMap
dataflow ctx entry = do
pending <- newSTRef $ S.singleton entry
states <- newSTRef $ M.empty
-- Should probably be done via a stack frame instead
withoutChanges ctx $
f iterationCount pending states
readSTRef states
where
graph = cGraph ctx
f 0 _ _ = error $ pleaseReport "DFA did not reach fix point"
f n pending states = do
ps <- readSTRef pending
when (n == fallbackThreshold) $ do
-- This should never happen, but has historically been due to caching bugs.
-- Try disabling the cache and continuing.
logInfo "DFA is not stabilizing! Disabling cache."
writeSTRef (cEnableCache ctx) False
if S.null ps
then return ()
else do
let (next, rest) = S.deleteFindMin ps
nexts <- process states next
writeSTRef pending $ foldl (flip S.insert) rest nexts
f (n-1) pending states
process states node = do
stateMap <- readSTRef states
let inputs = filter (\c -> sIsReachable c /= Just False) $ mapMaybe (\c -> fmap snd $ M.lookup c stateMap) incoming
input <-
case incoming of
[] -> return newInternalState
_ ->
case inputs of
[] -> return unreachableState
(x:rest) -> foldM (mergeState ctx) x rest
writeSTRef (cInput ctx) $ input
writeSTRef (cOutput ctx) $ input
writeSTRef (cNode ctx) $ node
transfer ctx label
newOutput <- readSTRef $ cOutput ctx
result <-
if is2plus outgoing
then
-- Version the state because we split and will probably merge later
versionState ctx newOutput
else return newOutput
writeSTRef states $ M.insert node (input, result) stateMap
case M.lookup node stateMap of
Nothing -> return outgoing
Just (oldInput, oldOutput) ->
if oldOutput == result
then return []
else return outgoing
where
(incomingL, _, label, outgoingL) = context graph $ node
incoming = map snd $ filter isRegular $ incomingL
outgoing = map snd outgoingL
isRegular = ((== CFEFlow) . fst)
runRoot ctx entry exit = do
let env = createEnvironmentState
writeSTRef (cInput ctx) $ env
writeSTRef (cOutput ctx) $ env
writeSTRef (cNode ctx) $ entry
(states, frame) <- withNewStackFrame ctx entry False $ \c -> dataflow c entry
deps <- readSTRef $ dependencies frame
registerFlowResult ctx entry states deps
-- Return the final state, used to invoke functions that were declared but not invoked
return $ snd $ fromMaybe (error $ pleaseReport "Missing exit state") $ M.lookup exit states
analyzeControlFlow :: CFGParameters -> Token -> CFGAnalysis
analyzeControlFlow params t =
let
cfg = buildGraph params t
(entry, exit) = M.findWithDefault (error $ pleaseReport "Missing root") (getId t) (cfIdToRange cfg)
in
runST $ f cfg entry exit
where
f cfg entry exit = do
ctx <- newCtx $ cfGraph cfg
-- Do a dataflow analysis starting on the root node
exitState <- runRoot ctx entry exit
-- All nodes we've touched
invocations <- readSTRef $ cInvocations ctx
let invokedNodes = M.fromDistinctAscList $ map (\c -> (c, ())) $ S.toList $ M.keysSet $ groupByNode $ M.map snd invocations
-- Invoke all functions that were declared but not invoked
-- This is so that we still get warnings for dead code
-- (it's probably not actually dead, just used by a script that sources ours)
let declaredFunctions = getFunctionTargets exitState
let uninvoked = M.difference declaredFunctions invokedNodes
let stragglerInput =
exitState {
-- We don't want `die() { exit $?; }; echo "Sourced"` to assume $? is always echo
sExitCodes = Nothing
}
analyzeStragglers ctx stragglerInput uninvoked
-- Now round up all the states from all data flows
-- (FIXME: this excludes functions that were defined in straggling functions)
invocations <- readSTRef $ cInvocations ctx
invokedStates <- flattenByNode ctx $ groupByNode $ M.map addDeps invocations
-- Fill in the map with unreachable states for anything we didn't get to
let baseStates = M.fromDistinctAscList $ map (\c -> (c, (unreachableState, unreachableState))) $ uncurry enumFromTo $ nodeRange $ cfGraph cfg
let allStates = M.unionWith (flip const) baseStates invokedStates
-- Convert to external states
let nodeToData = M.map (\(a,b) -> (internalToExternal a, internalToExternal b)) allStates
return $ nodeToData `deepseq` CFGAnalysis {
graph = cfGraph cfg,
tokenToRange = cfIdToRange cfg,
tokenToNodes = cfIdToNodes cfg,
nodeToData = nodeToData,
postDominators = cfPostDominators cfg
}
-- Include the dependencies in the state of each function, e.g. if it depends on `x=foo` then add that.
addDeps :: (S.Set StateDependency, M.Map Node (InternalState, InternalState)) -> M.Map Node (InternalState, InternalState)
addDeps (deps, m) = let base = depsToState deps in M.map (\(a,b) -> (base `patchState` a, base `patchState` b)) m
-- Collect all the states that each node has resulted in.
groupByNode :: forall k v. M.Map k (M.Map Node v) -> M.Map Node [v]
groupByNode pathMap = M.fromListWith (++) $ map (\(k,v) -> (k,[v])) $ concatMap M.toList $ M.elems pathMap
-- Merge all the pre/post states for each node. This would have been a foldM if Map had one.
flattenByNode ctx m = M.fromDistinctAscList <$> (mapM (mergePair ctx) $ M.toList m)
mergeAllStates ctx pairs =
let
(pres, posts) = unzip pairs
in do
pre <- mergeStates ctx (error $ pleaseReport "Null node states") pres
post <- mergeStates ctx (error $ pleaseReport "Null node states") posts
return (pre, post)
mergePair ctx (node, list) = do
merged <- mergeAllStates ctx list
return (node, merged)
-- Get the all the functions defined in an InternalState
getFunctionTargets :: InternalState -> M.Map Node FunctionDefinition
getFunctionTargets state =
let
declaredFuncs = S.unions $ mapStorage $ sFunctionTargets state
getFunc d =
case d of
FunctionDefinition _ entry _ -> Just (entry, d)
_ -> Nothing
funcs = mapMaybe getFunc $ S.toList declaredFuncs
in
M.fromList funcs
analyzeStragglers ctx state stragglers = do
mapM_ f $ M.elems stragglers
where
f def@(FunctionDefinition name entry exit) = do
writeSTRef (cInput ctx) state
writeSTRef (cOutput ctx) state
writeSTRef (cNode ctx) entry
transferFunctionValue ctx def
return []
runTests = $quickCheckAll