{-
    Copyright 2022 Vidar Holen

    This file is part of ShellCheck.
    https://www.shellcheck.net

    ShellCheck is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    ShellCheck is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE DeriveAnyClass, DeriveGeneric #-}
{-# LANGUAGE CPP #-}

{-
    Data Flow Analysis on a Control Flow Graph.

    This module implements a pretty standard iterative Data Flow Analysis.
    For an overview of the process, see Wikipedia.

    Since shell scripts rely heavily on global variables, this DFA includes
    tracking the value of globals across calls. Each function invocation is
    treated as a separate DFA problem, and a caching mechanism (hopefully)
    avoids any exponential explosions.

    To do efficient DFA join operations (or merges, as the code calls them),
    some of the data structures have an integer version attached. On update,
    the version is changed. If two states have the same version number,
    a merge is skipped on the grounds that they are identical. It is easy
    to unintentionally forget to update/invalidate the version number,
    and bugs will ensure.

    For performance reasons, the entire code runs in plain ST, with a manual
    context object Ctx being passed around. It relies heavily on mutable
    STRefs. However, this turned out to be literally thousands of times faster
    than my several attempts using RWST, so it can't be helped.
-}

module ShellCheck.CFGAnalysis (
    analyzeControlFlow
    ,CFGParameters (..)
    ,CFGAnalysis (..)
    ,ProgramState (..)
    ,VariableState (..)
    ,VariableValue (..)
    ,VariableProperties
    ,SpaceStatus (..)
    ,NumericalStatus (..)
    ,getIncomingState
    ,getOutgoingState
    ,doesPostDominate
    ,variableMayBeDeclaredInteger
    ,variableMayBeAssignedInteger
    ,ShellCheck.CFGAnalysis.runTests -- STRIP
    ) where

import Control.DeepSeq
import Control.Monad
import Control.Monad.ST
import Data.Array.Unboxed
import Data.Char
import Data.Graph.Inductive.Graph
import Data.Graph.Inductive.Query.DFS
import Data.List hiding (map)
import Data.Maybe
import Data.STRef
import Debug.Trace -- STRIP
import GHC.Generics (Generic)
import qualified Data.Map as M
import qualified Data.Set as S
import qualified ShellCheck.Data as Data
import ShellCheck.AST
import ShellCheck.CFG
import ShellCheck.Prelude

import Test.QuickCheck


-- The number of iterations for DFA to stabilize
iterationCount = 1000000
-- There have been multiple bugs where bad caching caused oscillations.
-- As a precaution, disable caching if there's this many iterations left.
fallbackThreshold = 10000
-- The number of cache entries to keep per node
cacheEntries = 10

logVerbose log = do
    -- traceShowM log
    return ()
logInfo log = do
    -- traceShowM log
    return ()

-- The result of the data flow analysis
data CFGAnalysis = CFGAnalysis {
    graph :: CFGraph,
    tokenToRange :: M.Map Id (Node, Node),
    tokenToNodes :: M.Map Id (S.Set Node),
    postDominators :: Array Node [Node],
    nodeToData :: M.Map Node (ProgramState, ProgramState)
} deriving (Show)

-- The program state we expose externally
data ProgramState = ProgramState {
    -- internalState :: InternalState, -- For debugging
    variablesInScope :: M.Map String VariableState,
    exitCodes :: S.Set Id,
    stateIsReachable :: Bool
} deriving (Show, Eq, Generic, NFData)

internalToExternal :: InternalState -> ProgramState
internalToExternal s =
    ProgramState {
        -- Censor the literal value to avoid introducing dependencies on it. It's just for debugging.
        variablesInScope = M.map censor flatVars,
        -- internalState = s, -- For debugging
        exitCodes = fromMaybe S.empty $ sExitCodes s,
        stateIsReachable = fromMaybe True $ sIsReachable s
    }
  where
    censor s = s {
        variableValue = (variableValue s) {
            literalValue = Nothing
        }
    }
    flatVars = M.unionsWith (\_ last -> last) $ map mapStorage [sGlobalValues s, sLocalValues s, sPrefixValues s]

-- Conveniently get the state before a token id
getIncomingState :: CFGAnalysis -> Id -> Maybe ProgramState
getIncomingState analysis id = do
    (start,end) <- M.lookup id $ tokenToRange analysis
    fst <$> M.lookup start (nodeToData analysis)

-- Conveniently get the state after a token id
getOutgoingState :: CFGAnalysis -> Id -> Maybe ProgramState
getOutgoingState analysis id = do
    (start,end) <- M.lookup id $ tokenToRange analysis
    snd <$> M.lookup end (nodeToData analysis)

-- Conveniently determine whether one node postdominates another,
-- i.e. whether 'target' always unconditionally runs after 'base'.
doesPostDominate :: CFGAnalysis -> Id -> Id -> Bool
doesPostDominate analysis target base = fromMaybe False $ do
    (_, baseEnd) <- M.lookup base $ tokenToRange analysis
    (targetStart, _) <- M.lookup target $ tokenToRange analysis
    return $ targetStart `elem` (postDominators analysis ! baseEnd)

-- See if any execution path results in the variable containing a state
variableMayHaveState :: ProgramState -> String -> CFVariableProp -> Maybe Bool
variableMayHaveState state var property = do
    value <- M.lookup var $ variablesInScope state
    return $ any (S.member property) $ variableProperties value

-- See if any execution path declares the variable an integer (declare -i).
variableMayBeDeclaredInteger state var = variableMayHaveState state var CFVPInteger

-- See if any execution path suggests the variable may contain an integer value
variableMayBeAssignedInteger state var = do
    value <- M.lookup var $ variablesInScope state
    return $ (numericalStatus $ variableValue value) >= NumericalStatusMaybe

getDataForNode analysis node = M.lookup node $ nodeToData analysis

-- The current state of data flow at a point in the program, potentially as a diff
data InternalState = InternalState {
    sVersion :: Integer,
    sGlobalValues :: VersionedMap String VariableState,
    sLocalValues :: VersionedMap String VariableState,
    sPrefixValues :: VersionedMap String VariableState,
    sFunctionTargets :: VersionedMap String FunctionValue,
    sExitCodes :: Maybe (S.Set Id),
    sIsReachable :: Maybe Bool
} deriving (Show, Generic, NFData)

newInternalState = InternalState {
    sVersion = 0,
    sGlobalValues = vmEmpty,
    sLocalValues = vmEmpty,
    sPrefixValues = vmEmpty,
    sFunctionTargets = vmEmpty,
    sExitCodes = Nothing,
    sIsReachable = Nothing
}

unreachableState = modified newInternalState {
    sIsReachable = Just False
}

-- The default state we assume we get from the environment
createEnvironmentState :: InternalState
createEnvironmentState = do
    foldl' (flip ($)) newInternalState $ concat [
        addVars Data.internalVariables unknownVariableState,
        addVars Data.variablesWithoutSpaces spacelessVariableState,
        addVars Data.specialIntegerVariables integerVariableState
        ]
  where
    addVars names val = map (\name -> insertGlobal name val) names
    spacelessVariableState = unknownVariableState {
        variableValue = VariableValue {
            literalValue = Nothing,
            spaceStatus = SpaceStatusClean,
            numericalStatus = NumericalStatusUnknown
        }
    }
    integerVariableState = unknownVariableState {
        variableValue = unknownIntegerValue
    }


modified s = s { sVersion = -1 }

insertGlobal :: String -> VariableState -> InternalState -> InternalState
insertGlobal name value state = modified state {
    sGlobalValues = vmInsert name value $ sGlobalValues state
}

insertLocal :: String -> VariableState -> InternalState -> InternalState
insertLocal name value state = modified state {
    sLocalValues = vmInsert name value $ sLocalValues state
}

insertPrefix :: String -> VariableState -> InternalState -> InternalState
insertPrefix name value state = modified state {
    sPrefixValues = vmInsert name value $ sPrefixValues state
}

insertFunction :: String -> FunctionValue -> InternalState -> InternalState
insertFunction name value state = modified state {
    sFunctionTargets = vmInsert name value $ sFunctionTargets state
}

addProperties :: S.Set CFVariableProp -> VariableState -> VariableState
addProperties props state = state {
    variableProperties = S.map (S.union props) $ variableProperties state
}

removeProperties :: S.Set CFVariableProp -> VariableState -> VariableState
removeProperties props state = state {
    variableProperties = S.map (\s -> S.difference s props) $ variableProperties state
}

setExitCode id = setExitCodes (S.singleton id)
setExitCodes set state = modified state {
    sExitCodes = Just $ set
}

-- Dependencies on values, e.g. "if there is a global variable named 'foo' without spaces"
-- This is used to see if the DFA of a function would result in the same state, so anything
-- that affects DFA must be tracked.
data StateDependency =
    -- Complete variable state
    DepState Scope String VariableState
    -- Only variable properties (we need properties but not values for x=1)
    | DepProperties Scope String VariableProperties
    -- Function definition
    | DepFunction String (S.Set FunctionDefinition)
    -- Whether invoking the node would result in recursion (i.e., is the function on the stack?)
    | DepIsRecursive Node Bool
    -- The set of commands that could have provided the exit code $?
    | DepExitCodes (S.Set Id)
    deriving (Show, Eq, Ord, Generic, NFData)

-- A function definition, or lack thereof
data FunctionDefinition = FunctionUnknown | FunctionDefinition String Node Node
    deriving (Show, Eq, Ord, Generic, NFData)

-- The Set of places a command name can point (it's a Set to handle conditionally defined functions)
type FunctionValue = S.Set FunctionDefinition

-- Create an InternalState that fulfills the given dependencies
depsToState :: S.Set StateDependency -> InternalState
depsToState set = foldl insert newInternalState $ S.toList set
  where
    insert :: InternalState -> StateDependency -> InternalState
    insert state dep =
        case dep of
            DepFunction name val -> insertFunction name val state
            DepState scope name val -> insertIn True scope name val state
            -- State includes properties and more, so don't overwrite a state with properties
            DepProperties scope name props -> insertIn False scope name unknownVariableState { variableProperties = props } state
            DepIsRecursive _ _ -> state
            DepExitCodes s -> setExitCodes s state

    insertIn overwrite scope name val state =
        let
            (mapToCheck, inserter) =
                case scope of
                    PrefixScope -> (sPrefixValues, insertPrefix)
                    LocalScope -> (sLocalValues, insertLocal)
                    GlobalScope -> (sGlobalValues, insertGlobal)
                    DefaultScope -> error $ pleaseReport "Unresolved scope in dependency"

            alreadyExists = isJust $ vmLookup name $ mapToCheck state
        in
            if overwrite || not alreadyExists
            then inserter name val state
            else state

unknownFunctionValue = S.singleton FunctionUnknown

-- The information about the value of a single variable
data VariableValue = VariableValue {
    literalValue :: Maybe String, -- TODO: For debugging. Remove me.
    spaceStatus :: SpaceStatus,
    numericalStatus :: NumericalStatus
}
    deriving (Show, Eq, Ord, Generic, NFData)

data VariableState = VariableState {
    variableValue :: VariableValue,
    variableProperties :: VariableProperties
}
    deriving (Show, Eq, Ord, Generic, NFData)

-- Whether or not the value needs quoting (has spaces/globs), or we don't know
data SpaceStatus = SpaceStatusEmpty | SpaceStatusClean | SpaceStatusDirty deriving (Show, Eq, Ord, Generic, NFData)
--
-- Whether or not the value needs quoting (has spaces/globs), or we don't know
data NumericalStatus = NumericalStatusUnknown | NumericalStatusEmpty | NumericalStatusMaybe | NumericalStatusDefinitely deriving (Show, Eq, Ord, Generic, NFData)

-- The set of possible sets of properties for this variable
type VariableProperties = S.Set (S.Set CFVariableProp)

defaultProperties = S.singleton S.empty

unknownVariableState = VariableState {
    variableValue = unknownVariableValue,
    variableProperties = defaultProperties
}

unknownVariableValue = VariableValue {
    literalValue = Nothing,
    spaceStatus = SpaceStatusDirty,
    numericalStatus = NumericalStatusUnknown
}

emptyVariableValue = unknownVariableValue {
    literalValue = Just "",
    spaceStatus = SpaceStatusEmpty,
    numericalStatus = NumericalStatusEmpty
}

unsetVariableState = VariableState {
    variableValue = emptyVariableValue,
    variableProperties = defaultProperties
}

mergeVariableState a b = VariableState {
    variableValue = mergeVariableValue (variableValue a) (variableValue b),
    variableProperties = S.union (variableProperties a) (variableProperties b)
}

mergeVariableValue a b = VariableValue {
    literalValue = if literalValue a == literalValue b then literalValue a else Nothing,
    spaceStatus = mergeSpaceStatus (spaceStatus a) (spaceStatus b),
    numericalStatus = mergeNumericalStatus (numericalStatus a) (numericalStatus b)
}

mergeSpaceStatus a b =
    case (a,b) of
        (SpaceStatusEmpty, y) -> y
        (x, SpaceStatusEmpty) -> x
        (SpaceStatusClean, SpaceStatusClean) -> SpaceStatusClean
        _ -> SpaceStatusDirty

mergeNumericalStatus a b =
    case (a,b) of
        (NumericalStatusDefinitely, NumericalStatusDefinitely) -> NumericalStatusDefinitely
        (NumericalStatusDefinitely, _) -> NumericalStatusMaybe
        (_, NumericalStatusDefinitely) -> NumericalStatusMaybe
        (NumericalStatusMaybe, _) -> NumericalStatusMaybe
        (_, NumericalStatusMaybe) -> NumericalStatusMaybe
        (NumericalStatusEmpty, NumericalStatusEmpty) -> NumericalStatusEmpty
        _ -> NumericalStatusUnknown

-- A VersionedMap is a Map that keeps an additional integer version to quickly determine if it has changed.
-- * Version -1 means it's unknown (possibly and presumably changed)
-- * Version 0 means it's empty
-- * Version N means it's equal to any other map with Version N (this is required but not enforced)
data VersionedMap k v = VersionedMap {
    mapVersion :: Integer,
    mapStorage :: M.Map k v
}
    deriving (Generic, NFData)

-- This makes states more readable but inhibits copy-paste
instance (Show k, Show v) => Show (VersionedMap k v) where
    show m = (if mapVersion m >= 0 then "V" ++ show (mapVersion m) else "U") ++ " " ++ show (mapStorage m)

instance Eq InternalState where
    (==) a b = stateIsQuickEqual a b || stateIsSlowEqual a b

instance (Eq k, Eq v) => Eq (VersionedMap k v) where
    (==) a b = vmIsQuickEqual a b || mapStorage a == mapStorage b

instance (Ord k, Ord v) => Ord (VersionedMap k v) where
    compare a b =
        if vmIsQuickEqual a b
        then EQ
        else mapStorage a `compare` mapStorage b


-- A context with STRefs manually passed around to function.
-- This is done because it was dramatically much faster than any RWS type stack
data Ctx s = Ctx {
    -- The current node
    cNode :: STRef s Node,
    -- The current input state
    cInput :: STRef s InternalState,
    -- The current output state
    cOutput :: STRef s InternalState,

    -- The current functions/subshells stack
    cStack :: [StackEntry s],
    -- The input graph
    cGraph :: CFGraph,
    -- An incrementing counter to version maps
    cCounter :: STRef s Integer,
    -- A cache of input state dependencies to output effects
    cCache :: STRef s (M.Map Node [(S.Set StateDependency, InternalState)]),
    -- Whether the cache is enabled (see fallbackThreshold)
    cEnableCache :: STRef s Bool,
    -- The states resulting from data flows per invocation path
    cInvocations :: STRef s (M.Map [Node] (S.Set StateDependency, M.Map Node (InternalState, InternalState)))
}

-- Whenever a function (or subshell) is invoked, a value like this is pushed onto the stack
data StackEntry s = StackEntry {
    -- The entry point of this stack entry for the purpose of detecting recursion
    entryPoint :: Node,
    -- Whether this is a function call (as opposed to a subshell)
    isFunctionCall :: Bool,
    -- The node where this entry point was invoked
    callSite :: Node,
    -- A mutable set of dependencies we fetched from here or higher in the stack
    dependencies :: STRef s (S.Set StateDependency),
    -- The original input state for this stack entry
    stackState :: InternalState
}
    deriving (Eq, Generic, NFData)

#if MIN_VERSION_deepseq(1,4,2)
-- Our deepseq already has a STRef instance
#else
-- Older deepseq (for GHC < 8) lacks this instance
instance NFData (STRef s a) where
    rnf = (`seq` ())
#endif

-- Overwrite a base state with the contents of a diff state
-- This is unrelated to join/merge.
patchState :: InternalState -> InternalState -> InternalState
patchState base diff =
    case () of
        _ | sVersion diff == 0 -> base
        _ | sVersion base == 0 -> diff
        _ | stateIsQuickEqual base diff -> diff
        _ ->
            InternalState {
                sVersion = -1,
                sGlobalValues = vmPatch (sGlobalValues base) (sGlobalValues diff),
                sLocalValues = vmPatch (sLocalValues base) (sLocalValues diff),
                sPrefixValues = vmPatch (sPrefixValues base) (sPrefixValues diff),
                sFunctionTargets = vmPatch (sFunctionTargets base) (sFunctionTargets diff),
                sExitCodes = sExitCodes diff `mplus` sExitCodes base,
                sIsReachable = sIsReachable diff `mplus` sIsReachable base
            }

patchOutputM ctx diff = do
    let cOut = cOutput ctx
    oldState <- readSTRef cOut
    let newState = patchState oldState diff
    writeSTRef cOut newState

-- Merge (aka Join) two states. This is monadic because it requires looking up
-- values from the current context. For example:
--
--   f() {
--     foo || x=2
--     HERE         # This merge requires looking up the value of $x in the parent frame
--   }
--   x=1
--   f
mergeState :: forall s. Ctx s -> InternalState -> InternalState -> ST s InternalState
mergeState ctx a b = do
    -- Kludge: we want `readVariable` & friends not to read from an intermediate state,
    --         so temporarily set a blank input.
    let cin = cInput ctx
    old <- readSTRef cin
    writeSTRef cin newInternalState
    x <- merge a b
    writeSTRef cin old
    return x

  where

    merge a b =
        case () of
            _ | sIsReachable a == Just True && sIsReachable b == Just False
                    || sIsReachable a == Just False && sIsReachable b == Just True ->
                error $ pleaseReport "Unexpected merge of reachable and unreachable state"
            _ | sIsReachable a == Just False && sIsReachable b == Just False ->
                return unreachableState
            _ | sVersion a >= 0 && sVersion b >= 0 && sVersion a == sVersion b -> return a
            _ -> do
                globals <- mergeMaps ctx mergeVariableState readGlobal (sGlobalValues a) (sGlobalValues b)
                locals <- mergeMaps ctx mergeVariableState readVariable (sLocalValues a) (sLocalValues b)
                prefix <- mergeMaps ctx mergeVariableState readVariable (sPrefixValues a) (sPrefixValues b)
                funcs <- mergeMaps ctx S.union readFunction (sFunctionTargets a) (sFunctionTargets b)
                exitCodes <- mergeMaybes ctx S.union readExitCodes (sExitCodes a) (sExitCodes b)
                return $ InternalState {
                    sVersion = -1,
                    sGlobalValues = globals,
                    sLocalValues = locals,
                    sPrefixValues = prefix,
                    sFunctionTargets = funcs,
                    sExitCodes = exitCodes,
                    sIsReachable = liftM2 (&&) (sIsReachable a) (sIsReachable b)
                }

-- Merge a number of states, or return a default if there are no states
-- (it can't fold from newInternalState because this would be equivalent of adding a new input edge).
mergeStates :: forall s. Ctx s -> InternalState -> [InternalState] -> ST s InternalState
mergeStates ctx def list =
    case list of
        [] -> return def
        (first:rest) -> foldM (mergeState ctx) first rest

-- Merge two maps, key by key. If both maps have a key, the 'merger' is used.
-- If only one has the key, the 'reader' is used to fetch a second, and the two are merged as above.
mergeMaps :: (Ord k) => forall s.
    Ctx s ->
    (v -> v -> v) ->
    (Ctx s -> k -> ST s v) ->
    (VersionedMap k v) ->
    (VersionedMap k v) ->
    ST s (VersionedMap k v)
mergeMaps ctx merger reader a b =
    if vmIsQuickEqual a b
    then return a
    else do
        new <- M.fromDistinctAscList <$> reverse <$> f [] (M.toAscList $ mapStorage a) (M.toAscList $ mapStorage b)
        vmFromMap ctx new
  where
    f l [] [] = return l
    f l [] b = f l b []
    f l ((k,v):rest1) [] = do
        other <- reader ctx k
        f ((k, merger v other):l) rest1 []
    f l l1@((k1, v1):rest1) l2@((k2, v2):rest2) =
        case k1 `compare` k2 of
            EQ ->
                f ((k1, merger v1 v2):l) rest1 rest2
            LT -> do
                nv2 <- reader ctx k1
                f ((k1, merger v1 nv2):l) rest1 l2
            GT -> do
                nv1 <- reader ctx k2
                f ((k2, merger nv1 v2):l) l1 rest2

-- Merge two Maybes, like mergeMaps for a single element
mergeMaybes ctx merger reader a b =
    case (a, b) of
        (Nothing, Nothing) -> return Nothing
        (Just v1, Nothing) -> single v1
        (Nothing, Just v2) -> single v2
        (Just v1, Just v2) -> return $ Just $ merger v1 v2
  where
    single val = do
        result <- merger val <$> reader ctx
        return $ Just result

vmFromMap ctx map = return $ VersionedMap {
    mapVersion = -1,
    mapStorage = map
}

-- Give a VersionedMap a version if it does not already have one.
versionMap ctx map =
    if mapVersion map >= 0
    then return map
    else do
        v <- nextVersion ctx
        return map {
            mapVersion = v
        }

-- Give an InternalState a version if it does not already have one.
versionState ctx state =
    if sVersion state >= 0
    then return state
    else do
        self <- nextVersion ctx
        ssGlobalValues <- versionMap ctx $ sGlobalValues state
        ssLocalValues <- versionMap ctx $ sLocalValues state
        ssFunctionTargets <- versionMap ctx $ sFunctionTargets state
        return state {
            sVersion = self,
            sGlobalValues = ssGlobalValues,
            sLocalValues = ssLocalValues,
            sFunctionTargets = ssFunctionTargets
        }

-- Like 'not null' but for 2+ elements
is2plus :: [a] -> Bool
is2plus l = case l of
    _:_:_ -> True
    _ -> False

-- Use versions to see if two states are trivially identical
stateIsQuickEqual a b =
    let
        va = sVersion a
        vb = sVersion b
    in
        va >= 0 && vb >= 0 && va == vb

-- A manual slow path 'Eq' (it's not derived because it's part of the custom Eq instance)
stateIsSlowEqual a b =
    check sGlobalValues
    && check sLocalValues
    && check sPrefixValues
    && check sFunctionTargets
    && check sIsReachable
  where
    check f = f a == f b

-- Check if two VersionedMaps are trivially equal
vmIsQuickEqual :: VersionedMap k v -> VersionedMap k v -> Bool
vmIsQuickEqual a b =
    let
        va = mapVersion a
        vb = mapVersion b
    in
        va >= 0 && vb >= 0 && va == vb

-- A new, empty VersionedMap
vmEmpty = VersionedMap {
    mapVersion = 0,
    mapStorage = M.empty
}

-- Map.null for VersionedMaps
vmNull :: VersionedMap k v -> Bool
vmNull m = mapVersion m == 0 || (M.null $ mapStorage m)

-- Map.lookup for VersionedMaps
vmLookup name map = M.lookup name $ mapStorage map

-- Map.insert for VersionedMaps
vmInsert key val map = VersionedMap {
    mapVersion = -1,
    mapStorage = M.insert key val $ mapStorage map
}

-- Overwrite all keys in the first map with values from the second
vmPatch :: (Ord k) => VersionedMap k v -> VersionedMap k v -> VersionedMap k v
vmPatch base diff =
    case () of
        _ | mapVersion base == 0 -> diff
        _ | mapVersion diff == 0 -> base
        _ | vmIsQuickEqual base diff -> diff
        _ -> VersionedMap {
            mapVersion = -1,
            mapStorage = M.unionWith (flip const) (mapStorage base) (mapStorage diff)
        }

-- Set a variable. This includes properties. Applies it to the appropriate scope.
writeVariable :: forall s. Ctx s -> String -> VariableState -> ST s ()
writeVariable ctx name val = do
    typ <- readVariableScope ctx name
    case typ of
        GlobalScope -> writeGlobal ctx name val
        LocalScope -> writeLocal ctx name val
        -- Prefixed variables actually become local variables in the invoked function
        PrefixScope -> writeLocal ctx name val

writeGlobal ctx name val = do
    modifySTRef (cOutput ctx) $ insertGlobal name val

writeLocal ctx name val = do
    modifySTRef (cOutput ctx) $ insertLocal name val

writePrefix ctx name val = do
    modifySTRef (cOutput ctx) $ insertPrefix name val

updateVariableValue ctx name val = do
    (props, scope) <- readVariablePropertiesWithScope ctx name
    let f = case scope of
                GlobalScope -> writeGlobal
                LocalScope -> writeLocal
                PrefixScope -> writeLocal -- Updates become local
    f ctx name $ VariableState { variableValue = val, variableProperties = props }

updateGlobalValue ctx name val = do
    props <- readGlobalProperties ctx name
    writeGlobal ctx name VariableState { variableValue = val, variableProperties = props }

updateLocalValue ctx name val = do
    props <- readLocalProperties ctx name
    writeLocal ctx name VariableState { variableValue = val, variableProperties = props }

updatePrefixValue ctx name val = do
    -- Prefix variables don't inherit properties
    writePrefix ctx name VariableState { variableValue = val, variableProperties = defaultProperties }


-- Look up a variable value, and also return its scope
readVariableWithScope :: forall s. Ctx s -> String -> ST s (VariableState, Scope)
readVariableWithScope ctx name = lookupStack get dep def ctx name
  where
    def = (unknownVariableState, GlobalScope)
    get = getVariableWithScope
    dep k (val, scope) = DepState scope k val

-- Look up the variable's properties. This can be done independently to avoid incurring a dependency on the value.
readVariablePropertiesWithScope :: forall s. Ctx s -> String -> ST s (VariableProperties, Scope)
readVariablePropertiesWithScope ctx name = lookupStack get dep def ctx name
  where
    def = (defaultProperties, GlobalScope)
    get s k = do
        (val, scope) <- getVariableWithScope s k
        return (variableProperties val, scope)
    dep k (val, scope) = DepProperties scope k val

readVariableScope ctx name = snd <$> readVariablePropertiesWithScope ctx name

getVariableWithScope :: InternalState -> String -> Maybe (VariableState, Scope)
getVariableWithScope s name =
    case (vmLookup name $ sPrefixValues s, vmLookup name $ sLocalValues s, vmLookup name $ sGlobalValues s) of
        (Just var, _, _) -> return (var, PrefixScope)
        (_, Just var, _) -> return (var, LocalScope)
        (_, _, Just var) -> return (var, GlobalScope)
        _ -> Nothing

undefineFunction ctx name =
    writeFunction ctx name $ FunctionUnknown

undefineVariable ctx name =
    writeVariable ctx name $ unsetVariableState

readVariable ctx name = fst <$> readVariableWithScope ctx name
readVariableProperties ctx name = fst <$> readVariablePropertiesWithScope ctx name

readGlobal ctx name = lookupStack get dep def ctx name
  where
    def = unknownVariableState -- could come from the environment
    get s name = vmLookup name $ sGlobalValues s
    dep k v = DepState GlobalScope k v


readGlobalProperties ctx name = lookupStack get dep def ctx name
  where
    def = defaultProperties
    get s name = variableProperties <$> (vmLookup name $ sGlobalValues s)
    -- This dependency will fail to match if it's shadowed by a local variable,
    -- such as in  x=1; f() { local -i x; declare -ag x; } because we'll look at
    -- x and find it to be local and not global. FIXME?
    dep k v = DepProperties GlobalScope k v

readLocal ctx name = lookupStackUntilFunction get dep def ctx name
  where
    def = unsetVariableState -- can't come from the environment
    get s name = vmLookup name $ sLocalValues s
    dep k v = DepState LocalScope k v

-- We only want to look up the local properties of the current function,
-- though preferably even if we're in a subshell.  FIXME?
readLocalProperties ctx name = fst <$> lookupStackUntilFunction get dep def ctx name
  where
    def = (defaultProperties, LocalScope)
    with tag f = do
        val <- variableProperties <$> f
        return (val, tag)

    get s name = (with LocalScope $ vmLookup name $ sLocalValues s) `mplus` (with PrefixScope $ vmLookup name $ sPrefixValues s)
    dep k (val, scope) = DepProperties scope k val

readFunction ctx name = lookupStack get dep def ctx name
  where
    def = unknownFunctionValue
    get s name = vmLookup name $ sFunctionTargets s
    dep k v = DepFunction k v

writeFunction ctx name val = do
    modifySTRef (cOutput ctx) $ insertFunction name $ S.singleton val

readExitCodes ctx = lookupStack get dep def ctx ()
  where
    get s () = sExitCodes s
    def = S.empty
    dep () v = DepExitCodes v

-- Look up each state on the stack until a value is found (or the default is used),
-- then add this value as a StateDependency.
lookupStack' :: forall s k v.
    -- Whether to stop at function boundaries
    Bool
    -- A function that maybe finds a value from a state
    -> (InternalState -> k -> Maybe v)
    -- A function that creates a dependency on what was found
    -> (k -> v -> StateDependency)
    -- A default value, if the value can't be found anywhere
    -> v
    -- Context
    -> Ctx s
    -- The key to look up
    -> k
    -- Returning the result
    -> ST s v
lookupStack' functionOnly get dep def ctx key = do
    top <- readSTRef $ cInput ctx
    case get top key of
        Just v -> return v
        Nothing -> f (cStack ctx)
  where
    f [] = return def
    f (s:_) | functionOnly && isFunctionCall s = return def
    f (s:rest) = do
        -- Go up the stack until we find the value, and add
        -- a dependency on each state (including where it was found)
        res <- fromMaybe (f rest) (return <$> get (stackState s) key)
        modifySTRef (dependencies s) $ S.insert $ dep key res
        return res

lookupStack = lookupStack' False
lookupStackUntilFunction = lookupStack' True

-- Like lookupStack but without adding dependencies
peekStack get def ctx key = do
    top <- readSTRef $ cInput ctx
    case get top key of
        Just v -> return v
        Nothing -> f (cStack ctx)
  where
    f [] = return def
    f (s:rest) =
        case get (stackState s) key of
            Just v -> return v
            Nothing -> f rest

-- Check if the current context fulfills a StateDependency if entering `entry`
fulfillsDependency ctx entry dep =
    case dep of
        DepState scope name val -> (== (val, scope)) <$> peek scope ctx name
        DepProperties scope name props -> do
            (state, s) <- peek scope ctx name
            return $ scope == s && variableProperties state == props
        DepFunction name val -> (== val) <$> peekFunc ctx name
        -- Hack. Since we haven't pushed the soon-to-be invoked function on the stack,
        -- it won't be found by the normal check.
        DepIsRecursive node val | node == entry -> return True
        DepIsRecursive node val -> return $ val == any (\f -> entryPoint f == node) (cStack ctx)
        DepExitCodes val -> (== val) <$> peekStack (\s k -> sExitCodes s) S.empty ctx ()
  --      _ -> error $ "Unknown dep " ++ show dep
  where
    peek scope = peekStack getVariableWithScope $ if scope == GlobalScope then (unknownVariableState, GlobalScope) else (unsetVariableState, LocalScope)
    peekFunc = peekStack (\state name -> vmLookup name $ sFunctionTargets state) unknownFunctionValue

-- Check if the current context fulfills all StateDependencies
fulfillsDependencies ctx entry deps =
    f $ S.toList deps
  where
    f [] = return True
    f (dep:rest) = do
        res <- fulfillsDependency ctx entry dep
        if res
            then f rest
            else return False

-- Create a brand new Ctx given a Control Flow Graph (CFG)
newCtx g = do
    c <- newSTRef 1
    input <- newSTRef undefined
    output <- newSTRef undefined
    node <- newSTRef undefined
    cache <- newSTRef M.empty
    enableCache <- newSTRef True
    invocations <- newSTRef M.empty
    return $ Ctx {
        cCounter = c,
        cInput = input,
        cOutput = output,
        cNode = node,
        cCache = cache,
        cEnableCache = enableCache,
        cStack = [],
        cInvocations = invocations,
        cGraph = g
    }

-- The next incrementing version for VersionedMaps
nextVersion ctx = do
    let ctr = cCounter ctx
    n <- readSTRef ctr
    writeSTRef ctr $! n+1
    return n

-- Create a new StackEntry
newStackEntry ctx point isCall = do
    deps <- newSTRef S.empty
    state <- readSTRef $ cOutput ctx
    callsite <- readSTRef $ cNode ctx
    return $ StackEntry {
        entryPoint = point,
        isFunctionCall = isCall,
        callSite = callsite,
        dependencies = deps,
        stackState = state
    }

-- Call a function with a new stack entry on the stack
withNewStackFrame ctx node isCall f = do
    newEntry <- newStackEntry ctx node isCall
    newInput <- newSTRef newInternalState
    newOutput <- newSTRef newInternalState
    newNode <- newSTRef node
    let newCtx = ctx {
        cInput = newInput,
        cOutput = newOutput,
        cNode = newNode,
        cStack = newEntry : cStack ctx
    }
    x <- f newCtx

    {-
    deps <- readSTRef $ dependencies newEntry
    selfcheck <- fulfillsDependencies newCtx deps
    unless selfcheck $ error $ pleaseReport $ "Unmet stack dependencies on " ++ show (node, deps)
    -}

    return (x, newEntry)

-- Check if invoking this function would be a recursive loop
-- (i.e. we already have the function on the stack)
wouldBeRecursive ctx node = f (cStack ctx)
  where
    f [] = return False
    f (s:rest) = do
        res <-
            if entryPoint s == node
            then return True
            else f rest
        modifySTRef (dependencies s) $ S.insert $ DepIsRecursive node res
        return res

-- The main DFA 'transfer' function, applying the effects of a node to the output state
transfer ctx label =
  --traceShow ("Transferring", label) $
    case label of
        CFStructuralNode -> return ()
        CFEntryPoint _ -> return ()
        CFImpliedExit -> return ()
        CFResolvedExit {} -> return ()

        CFExecuteCommand cmd -> transferCommand ctx cmd
        CFExecuteSubshell reason entry exit -> transferSubshell ctx reason entry exit
        CFApplyEffects effects -> mapM_ (\(IdTagged _ f) -> transferEffect ctx f) effects
        CFSetExitCode id -> transferExitCode ctx id

        CFUnresolvedExit -> patchOutputM ctx unreachableState
        CFUnreachable -> patchOutputM ctx unreachableState

        -- TODO
        CFSetBackgroundPid _ -> return ()
        CFDropPrefixAssignments {} ->
            modifySTRef (cOutput ctx) $ \c -> modified c { sPrefixValues = vmEmpty }
--        _ -> error $ "Unknown " ++ show label


-- Transfer the effects of a subshell invocation. This is similar to a function call
-- to allow easily discarding the effects (otherwise the InternalState would have
-- to represent subshell depth, while this way it can simply use the function stack).
transferSubshell ctx reason entry exit = do
    let cout = cOutput ctx
    initial <- readSTRef cout
    runCached ctx entry (f entry exit)
    res <- readSTRef cout
    -- Clear subshell changes. TODO: track this to warn about modifications.
    writeSTRef cout $ initial {
        sExitCodes = sExitCodes res
    }
  where
    f entry exit ctx = do
        (states, frame) <- withNewStackFrame ctx entry False (flip dataflow $ entry)
        let (_, res) = fromMaybe (error $ pleaseReport "Subshell has no exit") $ M.lookup exit states
        deps <- readSTRef $ dependencies frame
        registerFlowResult ctx entry states deps
        return (deps, res)

-- Transfer the effects of executing a command, i.e. the merged union of all possible function definitions.
transferCommand ctx Nothing = return ()
transferCommand ctx (Just name) = do
    targets <- readFunction ctx name
    logVerbose ("Transferring ",name,targets)
    transferMultiple ctx $ map (flip transferFunctionValue) $ S.toList targets

-- Transfer a set of function definitions and merge the output states.
transferMultiple ctx funcs = do
    logVerbose ("Transferring set of ", length funcs)
    original <- readSTRef out
    branches <- mapM (apply ctx original) funcs
    merged <- mergeStates ctx original branches
    let patched = patchState original merged
    writeSTRef out patched
  where
    out = cOutput ctx
    apply ctx original f = do
        writeSTRef out original
        f ctx
        readSTRef out

-- Transfer the effects of a single function definition.
transferFunctionValue ctx funcVal =
    case funcVal of
        FunctionUnknown -> return ()
        FunctionDefinition name entry exit -> do
            isRecursive <- wouldBeRecursive ctx entry
            if isRecursive
                then return () -- TODO: Find a better strategy for recursion
                else runCached ctx entry (f name entry exit)
  where
    f name entry exit ctx = do
        (states, frame) <- withNewStackFrame ctx entry True (flip dataflow $ entry)
        deps <- readSTRef $ dependencies frame
        let res =
                case M.lookup exit states of
                    Just (input, output) -> do
                        -- Discard local variables. TODO: track&retain variables declared local in previous scopes?
                        modified output { sLocalValues = vmEmpty }
                    Nothing -> do
                        -- e.g. f() { exit; }
                        unreachableState
        registerFlowResult ctx entry states deps
        return (deps, res)

transferExitCode ctx id = do
    modifySTRef (cOutput ctx) $ setExitCode id

-- Register/save the result of a dataflow of a function.
-- At the end, all the different values from different flows are merged together.
registerFlowResult ctx entry states deps = do
    -- This function is called in the context of a CFExecuteCommand and not its invoked function,
    -- so manually add the current node to the stack.
    current <- readSTRef $ cNode ctx
    let parents = map callSite $ cStack ctx
    -- A unique path to this flow context. The specific value doesn't matter, as long as it's
    -- unique per invocation of the function. This is required so that 'x=1; f; x=2; f' won't
    -- overwrite each other.
    let path = entry : current : parents
    modifySTRef (cInvocations ctx) $ M.insert path (deps, states)


-- Look up a node in the cache and see if the dependencies of any entries are matched.
-- In that case, reuse the previous result instead of doing a new data flow.
runCached :: forall s. Ctx s -> Node -> (Ctx s -> ST s (S.Set StateDependency, InternalState)) -> ST s ()
runCached ctx node f = do
    cache <- getCache ctx node
    case cache of
        Just v -> do
            logInfo ("Running cached", node)
            -- do { (deps, diff) <- f ctx; unless (v == diff) $ traceShowM ("Cache FAILED to match actual result", node, deps, diff); }
            patchOutputM ctx v

        Nothing -> do
            logInfo ("Cache failed", node)
            (deps, diff) <- f ctx
            modifySTRef (cCache ctx) (M.insertWith (\_ old -> (deps, diff):(take cacheEntries old)) node [(deps,diff)])
            logVerbose ("Recomputed cache for", node, deps)
            -- do { f <- fulfillsDependencies ctx node deps; unless (f) $ traceShowM ("New dependencies FAILED to match", node, deps); }
            patchOutputM ctx diff

-- Get a cached version whose dependencies are currently fulfilled, if any.
getCache :: forall s. Ctx s -> Node -> ST s (Maybe InternalState)
getCache ctx node = do
    cache <- readSTRef $ cCache ctx
    enable <- readSTRef $ cEnableCache ctx
    logVerbose ("Cache for", node, "length", length $ M.findWithDefault [] node cache, M.lookup node cache)
    if enable
        then f $ M.findWithDefault [] node cache
        else return Nothing
  where
    f [] = return Nothing
    f ((deps, value):rest) = do
        match <- fulfillsDependencies ctx node deps
        if match
            then return $ Just value
            else f rest

-- Transfer a single CFEffect to the output state.
transferEffect ctx effect =
    case effect of
        CFReadVariable name ->
            case name of
                "?" -> void $ readExitCodes ctx
                _ -> void $ readVariable ctx name
        CFWriteVariable name value -> do
            val <- cfValueToVariableValue ctx value
            updateVariableValue ctx name val
        CFWriteGlobal name value -> do
            val <- cfValueToVariableValue ctx value
            updateGlobalValue ctx name val
        CFWriteLocal name value -> do
            val <- cfValueToVariableValue ctx value
            updateLocalValue ctx name val
        CFWritePrefix name value -> do
            val <- cfValueToVariableValue ctx value
            updatePrefixValue ctx name val

        CFSetProps scope name props ->
            case scope of
                DefaultScope -> do
                    state <- readVariable ctx name
                    writeVariable ctx name $ addProperties props state
                GlobalScope -> do
                    state <- readGlobal ctx name
                    writeGlobal ctx name $ addProperties props state
                LocalScope -> do
                    out <- readSTRef (cOutput ctx)
                    state <- readLocal ctx name
                    writeLocal ctx name $ addProperties props state
                PrefixScope -> do
                    -- Prefix values become local
                    state <- readLocal ctx name
                    writeLocal ctx name $ addProperties props state

        CFUnsetProps scope name props ->
            case scope of
                DefaultScope -> do
                    state <- readVariable ctx name
                    writeVariable ctx name $ removeProperties props state
                GlobalScope -> do
                    state <- readGlobal ctx name
                    writeGlobal ctx name $ removeProperties props state
                LocalScope -> do
                    out <- readSTRef (cOutput ctx)
                    state <- readLocal ctx name
                    writeLocal ctx name $ removeProperties props state
                PrefixScope -> do
                    -- Prefix values become local
                    state <- readLocal ctx name
                    writeLocal ctx name $ removeProperties props state


        CFUndefineVariable name -> undefineVariable ctx name
        CFUndefineFunction name -> undefineFunction ctx name
        CFUndefine name -> do
            -- This should really just unset one or the other
            undefineVariable ctx name
            undefineFunction ctx name
        CFDefineFunction name id entry exit ->
            writeFunction ctx name $ FunctionDefinition name entry exit

        -- TODO
        CFUndefineNameref name -> undefineVariable ctx name
        CFHintArray name -> return ()
        CFHintDefined name -> return ()
--        _ -> error $ "Unknown effect " ++ show effect


-- Transfer the CFG's idea of a value into our VariableState
cfValueToVariableValue ctx val =
    case val of
        CFValueArray -> return unknownVariableValue -- TODO: Track array status
        CFValueComputed _ parts -> foldM f emptyVariableValue parts
        CFValueInteger -> return unknownIntegerValue
        CFValueString -> return unknownVariableValue
        CFValueUninitialized -> return emptyVariableValue
--        _ -> error $ "Unknown value: " ++ show val
  where
    f val part = do
        next <- computeValue ctx part
        return $ val `appendVariableValue` next

-- A value can be computed from 0 or more parts, such as x="literal$y$z"
computeValue ctx part =
    case part of
        CFStringLiteral str -> return $ literalToVariableValue str
        CFStringInteger -> return unknownIntegerValue
        CFStringUnknown -> return unknownVariableValue
        CFStringVariable name -> variableStateToValue <$> readVariable ctx name
   where
    variableStateToValue state =
        case () of
            _ | all (CFVPInteger `S.member`) $ variableProperties state -> unknownIntegerValue
            _ -> variableValue state

-- Append two VariableValues as if with z="$x$y"
appendVariableValue :: VariableValue -> VariableValue -> VariableValue
appendVariableValue a b =
    unknownVariableValue {
        literalValue = liftM2 (++) (literalValue a) (literalValue b),
        spaceStatus = appendSpaceStatus (spaceStatus a) (spaceStatus b),
        numericalStatus = appendNumericalStatus (numericalStatus a) (numericalStatus b)
    }

appendSpaceStatus a b =
    case (a,b) of
        (SpaceStatusEmpty, _) -> b
        (_, SpaceStatusEmpty) -> a
        (SpaceStatusClean, SpaceStatusClean) -> a
        _ ->SpaceStatusDirty

appendNumericalStatus a b =
    case (a,b) of
        (NumericalStatusEmpty, x) -> x
        (x, NumericalStatusEmpty) -> x
        (NumericalStatusDefinitely, NumericalStatusDefinitely) -> NumericalStatusDefinitely
        (NumericalStatusUnknown, _) -> NumericalStatusUnknown
        (_, NumericalStatusUnknown) -> NumericalStatusUnknown
        _ -> NumericalStatusMaybe

unknownIntegerValue = unknownVariableValue {
    literalValue = Nothing,
    spaceStatus = SpaceStatusClean,
    numericalStatus = NumericalStatusDefinitely
}

literalToVariableValue str = unknownVariableValue {
    literalValue = Just str,
    spaceStatus = literalToSpaceStatus str,
    numericalStatus = literalToNumericalStatus str
}

withoutChanges ctx f = do
    let inp = cInput ctx
    let out = cOutput ctx
    prevInput <- readSTRef inp
    prevOutput <- readSTRef out
    res <- f
    writeSTRef inp prevInput
    writeSTRef out prevOutput
    return res

-- Get the SpaceStatus for a literal string, i.e. if it needs quoting
literalToSpaceStatus str =
    case str of
        "" -> SpaceStatusEmpty
        _ | all (`notElem` " \t\n*?[") str -> SpaceStatusClean
        _ -> SpaceStatusDirty

-- Get the NumericalStatus for a literal string, i.e. whether it's an integer
literalToNumericalStatus str =
    case str of
        "" -> NumericalStatusEmpty
        '-':rest -> if isNumeric rest then NumericalStatusDefinitely else NumericalStatusUnknown
        rest -> if isNumeric rest then NumericalStatusDefinitely else NumericalStatusUnknown
  where
    isNumeric = all isDigit

type StateMap = M.Map Node (InternalState, InternalState)

-- Classic, iterative Data Flow Analysis. See Wikipedia for a description of the process.
dataflow :: forall s. Ctx s -> Node -> ST s StateMap
dataflow ctx entry = do
    pending <- newSTRef $ S.singleton entry
    states <- newSTRef $ M.empty
    -- Should probably be done via a stack frame instead
    withoutChanges ctx $
        f iterationCount pending states
    readSTRef states
  where
    graph = cGraph ctx
    f 0 _ _ = error $ pleaseReport "DFA did not reach fix point"
    f n pending states = do
        ps <- readSTRef pending

        when (n == fallbackThreshold) $ do
            -- This should never happen, but has historically been due to caching bugs.
            -- Try disabling the cache and continuing.
            logInfo "DFA is not stabilizing! Disabling cache."
            writeSTRef (cEnableCache ctx) False

        if S.null ps
            then return ()
            else do
                let (next, rest) = S.deleteFindMin ps
                nexts <- process states next
                writeSTRef pending $ foldl (flip S.insert) rest nexts
                f (n-1) pending states

    process states node = do
        stateMap <- readSTRef states
        let inputs = filter (\c -> sIsReachable c /= Just False) $ mapMaybe (\c -> fmap snd $ M.lookup c stateMap) incoming
        input <-
            case incoming of
                [] -> return newInternalState
                _ ->
                    case inputs of
                        [] -> return unreachableState
                        (x:rest) -> foldM (mergeState ctx) x rest
        writeSTRef (cInput ctx) $ input
        writeSTRef (cOutput ctx) $ input
        writeSTRef (cNode ctx) $ node
        transfer ctx label
        newOutput <- readSTRef $ cOutput ctx
        result <-
            if is2plus outgoing
            then
                -- Version the state because we split and will probably merge later
                versionState ctx newOutput
            else return newOutput
        writeSTRef states $ M.insert node (input, result) stateMap
        case M.lookup node stateMap of
            Nothing -> return outgoing
            Just (oldInput, oldOutput) ->
                if oldOutput == result
                then return []
                else return outgoing
      where
        (incomingL, _, label, outgoingL) = context graph $ node
        incoming = map snd $ filter isRegular $ incomingL
        outgoing = map snd outgoingL
        isRegular = ((== CFEFlow) . fst)

runRoot ctx env entry exit = do
    writeSTRef (cInput ctx) $ env
    writeSTRef (cOutput ctx) $ env
    writeSTRef (cNode ctx) $ entry
    (states, frame) <- withNewStackFrame ctx entry False $ \c -> dataflow c entry
    deps <- readSTRef $ dependencies frame
    registerFlowResult ctx entry states deps
    -- Return the final state, used to invoke functions that were declared but not invoked
    return $ snd $ fromMaybe (error $ pleaseReport "Missing exit state") $ M.lookup exit states


analyzeControlFlow :: CFGParameters -> Token -> CFGAnalysis
analyzeControlFlow params t =
    let
        cfg = buildGraph params t
        (entry, exit) = M.findWithDefault (error $ pleaseReport "Missing root") (getId t) (cfIdToRange cfg)
    in
        runST $ f cfg entry exit
  where
    f cfg entry exit = do
        let env = createEnvironmentState
        ctx <- newCtx $ cfGraph cfg
        -- Do a dataflow analysis starting on the root node
        exitState <- runRoot ctx env entry exit

        -- All nodes we've touched
        invocations <- readSTRef $ cInvocations ctx
        let invokedNodes = M.fromDistinctAscList $ map (\c -> (c, ())) $ S.toList $ M.keysSet $ groupByNode $ M.map snd invocations

        -- Invoke all functions that were declared but not invoked
        -- This is so that we still get warnings for dead code
        -- (it's probably not actually dead, just used by a script that sources ours)
        let declaredFunctions = getFunctionTargets exitState
        let uninvoked = M.difference declaredFunctions invokedNodes

        let stragglerInput =
                (env `patchState` exitState) {
                    -- We don't want `die() { exit $?; }; echo "Sourced"` to assume $? is always echo
                    sExitCodes = Nothing
                }

        analyzeStragglers ctx stragglerInput uninvoked

        -- Now round up all the states from all data flows
        -- (FIXME: this excludes functions that were defined in straggling functions)
        invocations <- readSTRef $ cInvocations ctx
        invokedStates <- flattenByNode ctx $ groupByNode $ M.map addDeps invocations

        -- Fill in the map with unreachable states for anything we didn't get to
        let baseStates = M.fromDistinctAscList $ map (\c -> (c, (unreachableState, unreachableState))) $ uncurry enumFromTo $ nodeRange $ cfGraph cfg
        let allStates = M.unionWith (flip const) baseStates invokedStates

        -- Convert to external states
        let nodeToData = M.map (\(a,b) -> (internalToExternal a, internalToExternal b)) allStates

        return $ nodeToData `deepseq` CFGAnalysis {
            graph = cfGraph cfg,
            tokenToRange = cfIdToRange cfg,
            tokenToNodes = cfIdToNodes cfg,
            nodeToData = nodeToData,
            postDominators = cfPostDominators cfg
        }


    -- Include the dependencies in the state of each function, e.g. if it depends on `x=foo` then add that.
    addDeps :: (S.Set StateDependency, M.Map Node (InternalState, InternalState)) -> M.Map Node (InternalState, InternalState)
    addDeps (deps, m) = let base = depsToState deps in M.map (\(a,b) -> (base `patchState` a, base `patchState` b)) m

    -- Collect all the states that each node has resulted in.
    groupByNode :: forall k v. M.Map k (M.Map Node v) -> M.Map Node [v]
    groupByNode pathMap = M.fromListWith (++) $ map (\(k,v) -> (k,[v])) $ concatMap M.toList $ M.elems pathMap

    -- Merge all the pre/post states for each node. This would have been a foldM if Map had one.
    flattenByNode ctx m = M.fromDistinctAscList <$> (mapM (mergePair ctx) $ M.toList m)

    mergeAllStates ctx pairs =
        let
            (pres, posts) = unzip pairs
        in do
            pre <- mergeStates ctx (error $ pleaseReport "Null node states") pres
            post <- mergeStates ctx (error $ pleaseReport "Null node states") posts
            return (pre, post)

    mergePair ctx (node, list) = do
        merged <- mergeAllStates ctx list
        return (node, merged)

    -- Get the all the functions defined in an InternalState
    getFunctionTargets :: InternalState -> M.Map Node FunctionDefinition
    getFunctionTargets state =
        let
            declaredFuncs = S.unions $ M.elems $ mapStorage $ sFunctionTargets state
            getFunc d =
                case d of
                    FunctionDefinition _ entry _ -> Just (entry, d)
                    _ -> Nothing
            funcs = mapMaybe getFunc $ S.toList declaredFuncs
        in
            M.fromList funcs


analyzeStragglers ctx state stragglers = do
    mapM_ f $ M.elems stragglers
  where
    f def@(FunctionDefinition name entry exit) = do
        writeSTRef (cInput ctx) state
        writeSTRef (cOutput ctx) state
        writeSTRef (cNode ctx) entry
        transferFunctionValue ctx def



return []
runTests = $quickCheckAll