{- Copyright 2012-2019 Vidar Holen This file is part of ShellCheck. https://www.shellcheck.net ShellCheck is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. ShellCheck is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} module ShellCheck.ASTLib where import ShellCheck.AST import Control.Monad.Writer import Control.Monad import Data.Char import Data.Functor import Data.Functor.Identity import Data.List import Data.Maybe import qualified Data.Map as Map arguments (T_SimpleCommand _ _ (cmd:args)) = args -- Is this a type of loop? isLoop t = case t of T_WhileExpression {} -> True T_UntilExpression {} -> True T_ForIn {} -> True T_ForArithmetic {} -> True T_SelectIn {} -> True _ -> False -- Will this split into multiple words when used as an argument? willSplit x = case x of T_DollarBraced {} -> True T_DollarExpansion {} -> True T_Backticked {} -> True T_BraceExpansion {} -> True T_Glob {} -> True T_Extglob {} -> True T_DoubleQuoted _ l -> any willBecomeMultipleArgs l T_NormalWord _ l -> any willSplit l _ -> False isGlob T_Extglob {} = True isGlob T_Glob {} = True isGlob (T_NormalWord _ l) = any isGlob l isGlob _ = False -- Is this shell word a constant? isConstant token = case token of -- This ignores some cases like ~"foo": T_NormalWord _ (T_Literal _ ('~':_) : _) -> False T_NormalWord _ l -> all isConstant l T_DoubleQuoted _ l -> all isConstant l T_SingleQuoted _ _ -> True T_Literal _ _ -> True _ -> False -- Is this an empty literal? isEmpty token = case token of T_NormalWord _ l -> all isEmpty l T_DoubleQuoted _ l -> all isEmpty l T_SingleQuoted _ "" -> True T_Literal _ "" -> True _ -> False -- Quick&lazy oversimplification of commands, throwing away details -- and returning a list like ["find", ".", "-name", "${VAR}*" ]. oversimplify token = case token of (T_NormalWord _ l) -> [concat (concatMap oversimplify l)] (T_DoubleQuoted _ l) -> [concat (concatMap oversimplify l)] (T_SingleQuoted _ s) -> [s] (T_DollarBraced _ _ _) -> ["${VAR}"] (T_DollarArithmetic _ _) -> ["${VAR}"] (T_DollarExpansion _ _) -> ["${VAR}"] (T_Backticked _ _) -> ["${VAR}"] (T_Glob _ s) -> [s] (T_Pipeline _ _ [x]) -> oversimplify x (T_Literal _ x) -> [x] (T_ParamSubSpecialChar _ x) -> [x] (T_SimpleCommand _ vars words) -> concatMap oversimplify words (T_Redirecting _ _ foo) -> oversimplify foo (T_DollarSingleQuoted _ s) -> [s] (T_Annotation _ _ s) -> oversimplify s -- Workaround for let "foo = bar" parsing (TA_Sequence _ [TA_Expansion _ v]) -> concatMap oversimplify v _ -> [] -- Turn a SimpleCommand foo -avz --bar=baz into args "a", "v", "z", "bar", -- each in a tuple of (token, stringFlag). Non-flag arguments are added with -- stringFlag == "". getFlagsUntil stopCondition (T_SimpleCommand _ _ (_:args)) = let tokenAndText = map (\x -> (x, concat $ oversimplify x)) args (flagArgs, rest) = break (stopCondition . snd) tokenAndText in concatMap flag flagArgs ++ map (\(t, _) -> (t, "")) rest where flag (x, '-':'-':arg) = [ (x, takeWhile (/= '=') arg) ] flag (x, '-':args) = map (\v -> (x, [v])) args flag (x, _) = [ (x, "") ] getFlagsUntil _ _ = error "Internal shellcheck error, please report! (getFlags on non-command)" -- Get all flags in a GNU way, up until -- getAllFlags :: Token -> [(Token, String)] getAllFlags = getFlagsUntil (== "--") -- Get all flags in a BSD way, up until first non-flag argument or -- getLeadingFlags = getFlagsUntil (\x -> x == "--" || (not $ "-" `isPrefixOf` x)) -- Check if a command has a flag. hasFlag cmd str = str `elem` (map snd $ getAllFlags cmd) -- Is this token a word that starts with a dash? isFlag token = case getWordParts token of T_Literal _ ('-':_) : _ -> True _ -> False -- Is this token a flag where the - is unquoted? isUnquotedFlag token = fromMaybe False $ do str <- getLeadingUnquotedString token return $ "-" `isPrefixOf` str -- getGnuOpts "erd:u:" will parse a list of arguments tokens like `read` -- -re -d : -u 3 bar -- into -- Just [("r", (-re, -re)), ("e", (-re, -re)), ("d", (-d,:)), ("u", (-u,3)), ("", (bar,bar))] -- -- Each string flag maps to a tuple of (flag, argument), where argument=flag if it -- doesn't take a specific one. -- -- Any unrecognized flag will result in Nothing. The exception is if arbitraryLongOpts -- is set, in which case --anything will map to "anything". getGnuOpts :: String -> [Token] -> Maybe [(String, (Token, Token))] getGnuOpts str args = getOpts (True, False) str [] args -- As above, except the first non-arg string will treat the rest as arguments getBsdOpts :: String -> [Token] -> Maybe [(String, (Token, Token))] getBsdOpts str args = getOpts (False, False) str [] args -- Tests for this are in Commands.hs where it's more frequently used getOpts :: -- Behavioral config: gnu style, allow arbitrary long options (Bool, Bool) -- A getopts style string -> String -- List of long options and whether they take arguments -> [(String, Bool)] -- List of arguments (excluding command) -> [Token] -- List of flags to tuple of (optionToken, valueToken) -> Maybe [(String, (Token, Token))] getOpts (gnu, arbitraryLongOpts) string longopts args = process args where flagList (c:':':rest) = ([c], True) : flagList rest flagList (c:rest) = ([c], False) : flagList rest flagList [] = longopts flagMap = Map.fromList $ ("", False) : flagList string process [] = return [] process (token:rest) = do case getLiteralStringDef "\0" token of '-':'-':[] -> return $ listToArgs rest '-':'-':word -> do let (name, arg) = span (/= '=') word needsArg <- if arbitraryLongOpts then return $ Map.findWithDefault False name flagMap else Map.lookup name flagMap if needsArg && null arg then case rest of (arg:rest2) -> do more <- process rest2 return $ (name, (token, arg)) : more _ -> fail "Missing arg" else do more <- process rest -- Consider splitting up token to get arg return $ (name, (token, token)) : more '-':opts -> shortToOpts opts token rest arg -> if gnu then do more <- process rest return $ ("", (token, token)):more else return $ listToArgs (token:rest) shortToOpts opts token args = case opts of c:rest -> do needsArg <- Map.lookup [c] flagMap case () of _ | needsArg && null rest -> do (next:restArgs) <- return args more <- process restArgs return $ ([c], (token, next)):more _ | needsArg -> do more <- process args return $ ([c], (token, token)):more _ -> do more <- shortToOpts rest token args return $ ([c], (token, token)):more [] -> process args listToArgs = map (\x -> ("", (x, x))) -- Is this an expansion of multiple items of an array? isArrayExpansion (T_DollarBraced _ _ l) = let string = concat $ oversimplify l in "@" `isPrefixOf` string || not ("#" `isPrefixOf` string) && "[@]" `isInfixOf` string isArrayExpansion _ = False -- Is it possible that this arg becomes multiple args? mayBecomeMultipleArgs t = willBecomeMultipleArgs t || f t where f (T_DollarBraced _ _ l) = let string = concat $ oversimplify l in "!" `isPrefixOf` string f (T_DoubleQuoted _ parts) = any f parts f (T_NormalWord _ parts) = any f parts f _ = False -- Is it certain that this word will becomes multiple words? willBecomeMultipleArgs t = willConcatInAssignment t || f t where f T_Extglob {} = True f T_Glob {} = True f T_BraceExpansion {} = True f (T_DoubleQuoted _ parts) = any f parts f (T_NormalWord _ parts) = any f parts f _ = False -- This does token cause implicit concatenation in assignments? willConcatInAssignment token = case token of t@T_DollarBraced {} -> isArrayExpansion t (T_DoubleQuoted _ parts) -> any willConcatInAssignment parts (T_NormalWord _ parts) -> any willConcatInAssignment parts _ -> False -- Maybe get the literal string corresponding to this token getLiteralString :: Token -> Maybe String getLiteralString = getLiteralStringExt (const Nothing) -- Definitely get a literal string, with a given default for all non-literals getLiteralStringDef :: String -> Token -> String getLiteralStringDef x = runIdentity . getLiteralStringExt (const $ return x) -- Definitely get a literal string, skipping over all non-literals onlyLiteralString :: Token -> String onlyLiteralString = getLiteralStringDef "" -- Maybe get a literal string, but only if it's an unquoted argument. getUnquotedLiteral (T_NormalWord _ list) = concat <$> mapM str list where str (T_Literal _ s) = return s str _ = Nothing getUnquotedLiteral _ = Nothing -- Get the last unquoted T_Literal in a word like "${var}foo"THIS -- or nothing if the word does not end in an unquoted literal. getTrailingUnquotedLiteral :: Token -> Maybe Token getTrailingUnquotedLiteral t = case t of (T_NormalWord _ list@(_:_)) -> from (last list) _ -> Nothing where from t = case t of T_Literal {} -> return t _ -> Nothing -- Get the leading, unquoted, literal string of a token (if any). getLeadingUnquotedString :: Token -> Maybe String getLeadingUnquotedString t = case t of T_NormalWord _ ((T_Literal _ s) : _) -> return s _ -> Nothing -- Maybe get the literal string of this token and any globs in it. getGlobOrLiteralString = getLiteralStringExt f where f (T_Glob _ str) = return str f _ = Nothing -- Maybe get the literal value of a token, using a custom function -- to map unrecognized Tokens into strings. getLiteralStringExt :: Monad m => (Token -> m String) -> Token -> m String getLiteralStringExt more = g where allInList = fmap concat . mapM g g (T_DoubleQuoted _ l) = allInList l g (T_DollarDoubleQuoted _ l) = allInList l g (T_NormalWord _ l) = allInList l g (TA_Expansion _ l) = allInList l g (T_SingleQuoted _ s) = return s g (T_Literal _ s) = return s g (T_ParamSubSpecialChar _ s) = return s g (T_DollarSingleQuoted _ s) = return $ decodeEscapes s g x = more x -- Bash style $'..' decoding decodeEscapes ('\\':c:cs) = case c of 'a' -> '\a' : rest 'b' -> '\b' : rest 'e' -> '\x1B' : rest 'f' -> '\f' : rest 'n' -> '\n' : rest 'r' -> '\r' : rest 't' -> '\t' : rest 'v' -> '\v' : rest '\'' -> '\'' : rest '"' -> '"' : rest '\\' -> '\\' : rest 'x' -> case cs of (x:y:more) -> if isHexDigit x && isHexDigit y then chr (16*(digitToInt x) + (digitToInt y)) : rest else '\\':c:rest _ | isOctDigit c -> let digits = take 3 $ takeWhile isOctDigit (c:cs) num = parseOct digits in (if num < 256 then chr num else '?') : rest _ -> '\\' : c : rest where rest = decodeEscapes cs parseOct = f 0 where f n "" = n f n (c:rest) = f (n * 8 + digitToInt c) rest decodeEscapes (c:cs) = c : decodeEscapes cs decodeEscapes [] = [] -- Is this token a string literal? isLiteral t = isJust $ getLiteralString t -- Turn a NormalWord like foo="bar $baz" into a series of constituent elements like [foo=,bar ,$baz] getWordParts (T_NormalWord _ l) = concatMap getWordParts l getWordParts (T_DoubleQuoted _ l) = l -- TA_Expansion is basically T_NormalWord for arithmetic expressions getWordParts (TA_Expansion _ l) = concatMap getWordParts l getWordParts other = [other] -- Return a list of NormalWords that would result from brace expansion braceExpand (T_NormalWord id list) = take 1000 $ do items <- mapM part list return $ T_NormalWord id items where part (T_BraceExpansion id items) = do item <- items braceExpand item part x = return x -- Maybe get a SimpleCommand from immediate wrappers like T_Redirections getCommand t = case t of T_Redirecting _ _ w -> getCommand w T_SimpleCommand _ _ (w:_) -> return t T_Annotation _ _ t -> getCommand t _ -> Nothing -- Maybe get the command name string of a token representing a command getCommandName :: Token -> Maybe String getCommandName = fst . getCommandNameAndToken False -- Maybe get the name+arguments of a command. getCommandArgv t = do (T_SimpleCommand _ _ args@(_:_)) <- getCommand t return args -- Get the command name token from a command, i.e. -- the token representing 'ls' in 'ls -la 2> foo'. -- If it can't be determined, return the original token. getCommandTokenOrThis = snd . getCommandNameAndToken False -- Given a command, get the string and token that represents the command name. -- If direct, return the actual command (e.g. exec in 'exec ls') -- If not, return the logical command (e.g. 'ls' in 'exec ls') getCommandNameAndToken :: Bool -> Token -> (Maybe String, Token) getCommandNameAndToken direct t = fromMaybe (Nothing, t) $ do cmd@(T_SimpleCommand _ _ (w:rest)) <- getCommand t s <- getLiteralString w return $ fromMaybe (Just s, w) $ do guard $ not direct actual <- getEffectiveCommandToken s cmd rest return (getLiteralString actual, actual) where getEffectiveCommandToken str cmd args = let firstArg = do arg <- listToMaybe args guard . not $ isFlag arg return arg in case str of "busybox" -> firstArg "builtin" -> firstArg "command" -> firstArg "run" -> firstArg -- Used by bats "exec" -> do opts <- getBsdOpts "cla:" args (_, (t, _)) <- listToMaybe $ filter (null . fst) opts return t _ -> fail "" -- If a command substitution is a single command, get its name. -- $(date +%s) = Just "date" getCommandNameFromExpansion :: Token -> Maybe String getCommandNameFromExpansion t = case t of T_DollarExpansion _ [c] -> extract c T_Backticked _ [c] -> extract c T_DollarBraceCommandExpansion _ [c] -> extract c _ -> Nothing where extract (T_Pipeline _ _ [cmd]) = getCommandName cmd extract _ = Nothing -- Get the basename of a token representing a command getCommandBasename = fmap basename . getCommandName basename = reverse . takeWhile (/= '/') . reverse isAssignment t = case t of T_Redirecting _ _ w -> isAssignment w T_SimpleCommand _ (w:_) [] -> True T_Assignment {} -> True T_Annotation _ _ w -> isAssignment w _ -> False isOnlyRedirection t = case t of T_Pipeline _ _ [x] -> isOnlyRedirection x T_Annotation _ _ w -> isOnlyRedirection w T_Redirecting _ (_:_) c -> isOnlyRedirection c T_SimpleCommand _ [] [] -> True _ -> False isFunction t = case t of T_Function {} -> True; _ -> False -- Bats tests are functions for the purpose of 'local' and such isFunctionLike t = case t of T_Function {} -> True T_BatsTest {} -> True _ -> False isBraceExpansion t = case t of T_BraceExpansion {} -> True; _ -> False -- Get the lists of commands from tokens that contain them, such as -- the conditions and bodies of while loops or branches of if statements. getCommandSequences :: Token -> [[Token]] getCommandSequences t = case t of T_Script _ _ cmds -> [cmds] T_BraceGroup _ cmds -> [cmds] T_Subshell _ cmds -> [cmds] T_WhileExpression _ cond cmds -> [cond, cmds] T_UntilExpression _ cond cmds -> [cond, cmds] T_ForIn _ _ _ cmds -> [cmds] T_ForArithmetic _ _ _ _ cmds -> [cmds] T_IfExpression _ thens elses -> (concatMap (\(a,b) -> [a,b]) thens) ++ [elses] T_Annotation _ _ t -> getCommandSequences t T_DollarExpansion _ cmds -> [cmds] T_DollarBraceCommandExpansion _ cmds -> [cmds] T_Backticked _ cmds -> [cmds] _ -> [] -- Get a list of names of associative arrays getAssociativeArrays t = nub . execWriter $ doAnalysis f t where f :: Token -> Writer [String] () f t@T_SimpleCommand {} = sequence_ $ do name <- getCommandName t let assocNames = ["declare","local","typeset"] guard $ name `elem` assocNames let flags = getAllFlags t guard $ "A" `elem` map snd flags let args = [arg | (arg, "") <- flags] let names = mapMaybe (getLiteralStringExt nameAssignments) args return $ tell names f _ = return () nameAssignments t = case t of T_Assignment _ _ name _ _ -> return name _ -> Nothing -- A Pseudoglob is a wildcard pattern used for checking if a match can succeed. -- For example, [[ $(cmd).jpg == [a-z] ]] will give the patterns *.jpg and ?, which -- can be proven never to match. data PseudoGlob = PGAny | PGMany | PGChar Char deriving (Eq, Show) -- Turn a word into a PG pattern, replacing all unknown/runtime values with -- PGMany. wordToPseudoGlob :: Token -> [PseudoGlob] wordToPseudoGlob = fromMaybe [PGMany] . wordToPseudoGlob' False -- Turn a word into a PG pattern, but only if we can preserve -- exact semantics. wordToExactPseudoGlob :: Token -> Maybe [PseudoGlob] wordToExactPseudoGlob = wordToPseudoGlob' True wordToPseudoGlob' :: Bool -> Token -> Maybe [PseudoGlob] wordToPseudoGlob' exact word = simplifyPseudoGlob <$> toGlob word where toGlob :: Token -> Maybe [PseudoGlob] toGlob word = case word of T_NormalWord _ (T_Literal _ ('~':str):rest) -> do guard $ not exact let this = (PGMany : (map PGChar $ dropWhile (/= '/') str)) tail <- concat <$> (mapM f $ concatMap getWordParts rest) return $ this ++ tail _ -> concat <$> (mapM f $ getWordParts word) f x = case x of T_Literal _ s -> return $ map PGChar s T_SingleQuoted _ s -> return $ map PGChar s T_Glob _ "?" -> return [PGAny] T_Glob _ "*" -> return [PGMany] T_Glob _ ('[':_) | not exact -> return [PGAny] _ -> if exact then fail "" else return [PGMany] -- Reorder a PseudoGlob for more efficient matching, e.g. -- f?*?**g -> f??*g simplifyPseudoGlob :: [PseudoGlob] -> [PseudoGlob] simplifyPseudoGlob = f where f [] = [] f (x@(PGChar _) : rest ) = x : f rest f list = let (anys, rest) = span (\x -> x == PGMany || x == PGAny) list in order anys ++ f rest order s = let (any, many) = partition (== PGAny) s in any ++ take 1 many -- Check whether the two patterns can ever overlap. pseudoGlobsCanOverlap :: [PseudoGlob] -> [PseudoGlob] -> Bool pseudoGlobsCanOverlap = matchable where matchable x@(xf:xs) y@(yf:ys) = case (xf, yf) of (PGMany, _) -> matchable x ys || matchable xs y (_, PGMany) -> matchable x ys || matchable xs y (PGAny, _) -> matchable xs ys (_, PGAny) -> matchable xs ys (_, _) -> xf == yf && matchable xs ys matchable [] [] = True matchable (PGMany : rest) [] = matchable rest [] matchable (_:_) [] = False matchable [] r = matchable r [] -- Check whether the first pattern always overlaps the second. pseudoGlobIsSuperSetof :: [PseudoGlob] -> [PseudoGlob] -> Bool pseudoGlobIsSuperSetof = matchable where matchable x@(xf:xs) y@(yf:ys) = case (xf, yf) of (PGMany, PGMany) -> matchable x ys (PGMany, _) -> matchable x ys || matchable xs y (_, PGMany) -> False (PGAny, _) -> matchable xs ys (_, PGAny) -> False (_, _) -> xf == yf && matchable xs ys matchable [] [] = True matchable (PGMany : rest) [] = matchable rest [] matchable _ _ = False wordsCanBeEqual x y = pseudoGlobsCanOverlap (wordToPseudoGlob x) (wordToPseudoGlob y) -- Is this an expansion that can be quoted, -- e.g. $(foo) `foo` $foo (but not {foo,})? isQuoteableExpansion t = case t of T_DollarBraced {} -> True _ -> isCommandSubstitution t isCommandSubstitution t = case t of T_DollarExpansion {} -> True T_DollarBraceCommandExpansion {} -> True T_Backticked {} -> True _ -> False -- Is this an expansion that results in a simple string? isStringExpansion t = isCommandSubstitution t || case t of T_DollarArithmetic {} -> True T_DollarBraced {} -> not (isArrayExpansion t) _ -> False -- Is this a T_Annotation that ignores a specific code? isAnnotationIgnoringCode code t = case t of T_Annotation _ anns _ -> any hasNum anns _ -> False where hasNum (DisableComment from to) = code >= from && code < to hasNum _ = False