From 3e5ecaa2629f21027f47f905882b4ef90dd27a6b Mon Sep 17 00:00:00 2001 From: Vidar Holen Date: Sun, 26 Jun 2016 14:39:49 -0700 Subject: [PATCH] Parse indices of associative arrays properly --- ShellCheck/AST.hs | 14 ++++++--- ShellCheck/ASTLib.hs | 14 +++++++++ ShellCheck/Analytics.hs | 6 +++- ShellCheck/Parser.hs | 68 +++++++++++++++++++++++++++++++++++------ 4 files changed, 86 insertions(+), 16 deletions(-) diff --git a/ShellCheck/AST.hs b/ShellCheck/AST.hs index 896d6d0..a7a42ba 100644 --- a/ShellCheck/AST.hs +++ b/ShellCheck/AST.hs @@ -21,6 +21,7 @@ module ShellCheck.AST where import Control.Monad import Control.Monad.Identity +import Text.Parsec import qualified ShellCheck.Regex as Re data Id = Id Int deriving (Show, Eq, Ord) @@ -51,6 +52,8 @@ data Token = | T_Arithmetic Id Token | T_Array Id [Token] | T_IndexedElement Id Token Token + -- Store the index as string, and parse as arithmetic or string later + | T_UnparsedIndex Id SourcePos String | T_Assignment Id AssignmentMode String (Maybe Token) Token | T_Backgrounded Id Token | T_Backticked Id [Token] @@ -145,7 +148,7 @@ tokenEquals a b = kludge a == kludge b instance Eq Token where (==) = tokenEquals -analyze :: Monad m => (Token -> m ()) -> (Token -> m ()) -> (Token -> Token) -> Token -> m Token +analyze :: Monad m => (Token -> m ()) -> (Token -> m ()) -> (Token -> m Token) -> Token -> m Token analyze f g i = round where @@ -153,7 +156,7 @@ analyze f g i = f t newT <- delve t g t - return . i $ newT + i newT roundAll = mapM round roundMaybe Nothing = return Nothing @@ -363,10 +366,11 @@ getId t = case t of T_CoProc id _ _ -> id T_CoProcBody id _ -> id T_Include id _ _ -> id + T_UnparsedIndex id _ _ -> id blank :: Monad m => Token -> m () blank = const $ return () -doAnalysis f = analyze f blank id -doStackAnalysis startToken endToken = analyze startToken endToken id -doTransform i = runIdentity . analyze blank blank i +doAnalysis f = analyze f blank (return . id) +doStackAnalysis startToken endToken = analyze startToken endToken (return . id) +doTransform i = runIdentity . analyze blank blank (return . i) diff --git a/ShellCheck/ASTLib.hs b/ShellCheck/ASTLib.hs index c1a64e0..f0c54fe 100644 --- a/ShellCheck/ASTLib.hs +++ b/ShellCheck/ASTLib.hs @@ -21,6 +21,7 @@ module ShellCheck.ASTLib where import ShellCheck.AST +import Control.Monad.Writer import Control.Monad import Data.List import Data.Maybe @@ -251,3 +252,16 @@ getCommandSequences t = T_IfExpression _ thens elses -> map snd thens ++ [elses] otherwise -> [] +getAssociativeArrays t = + nub . execWriter $ doAnalysis f t + where + f :: Token -> Writer [String] () + f t@(T_SimpleCommand {}) = fromMaybe (return ()) $ do + name <- getCommandName t + guard $ name == "declare" + let flags = getAllFlags t + guard $ elem "A" $ map snd flags + let args = map fst . filter ((==) "" . snd) $ flags + let names = mapMaybe getLiteralString args + return $ tell names + f _ = return () diff --git a/ShellCheck/Analytics.hs b/ShellCheck/Analytics.hs index 6ab59ed..1afa9b4 100644 --- a/ShellCheck/Analytics.hs +++ b/ShellCheck/Analytics.hs @@ -1322,7 +1322,9 @@ checkBraceExpansionVars params t@(T_BraceExpansion id list) = mapM_ check list (`isUnqualifiedCommand` "eval") <$> getClosestCommand (parentMap params) t checkBraceExpansionVars _ _ = return () -prop_checkForDecimals = verify checkForDecimals "((3.14*c))" +prop_checkForDecimals1 = verify checkForDecimals "((3.14*c))" +prop_checkForDecimals2 = verify checkForDecimals "foo[1.2]=bar" +prop_checkForDecimals3 = verifyNot checkForDecimals "declare -A foo; foo[1.2]=bar" checkForDecimals params t@(TA_Expansion id _) = potentially $ do guard $ not (hasFloatingPoint params) str <- getLiteralString t @@ -2116,6 +2118,8 @@ prop_checkUnassignedReferences19= verifyNotTree checkUnassignedReferences "reado prop_checkUnassignedReferences20= verifyNotTree checkUnassignedReferences "printf -v foo bar; echo $foo" prop_checkUnassignedReferences21= verifyTree checkUnassignedReferences "echo ${#foo}" prop_checkUnassignedReferences22= verifyNotTree checkUnassignedReferences "echo ${!os*}" +prop_checkUnassignedReferences23= verifyTree checkUnassignedReferences "declare -a foo; foo[bar]=42;" +prop_checkUnassignedReferences24= verifyNotTree checkUnassignedReferences "declare -A foo; foo[bar]=42;" checkUnassignedReferences params t = warnings where (readMap, writeMap) = execState (mapM tally $ variableFlow params) (Map.empty, Map.empty) diff --git a/ShellCheck/Parser.hs b/ShellCheck/Parser.hs index eaf81a8..6f3e1bf 100644 --- a/ShellCheck/Parser.hs +++ b/ShellCheck/Parser.hs @@ -897,6 +897,20 @@ readNormalishWord end = do checkPossibleTermination pos x return $ T_NormalWord id x +readIndexSpan = do + id <- getNextId + x <- many (readNormalWordPart "]" <|> someSpace <|> otherLiteral) + return $ T_NormalWord id x + where + someSpace = do + id <- getNextId + str <- spacing1 + return $ T_Literal id str + otherLiteral = do + id <- getNextId + str <- many1 $ oneOf quotableChars + return $ T_Literal id str + checkPossibleTermination pos [T_Literal _ x] = when (x `elem` ["do", "done", "then", "fi", "esac"]) $ parseProblemAt pos WarningC 1010 $ "Use semicolon or linefeed before '" ++ x ++ "' (or quote to make it literal)." @@ -2251,7 +2265,9 @@ readEvalSuffix = many1 (readIoRedirect <|> readCmdWord <|> evalFallback) -- Get whatever a parser would parse as a string readStringForParser parser = do + state <- Ms.get pos <- lookAhead (parser >> getPosition) + Ms.put state readUntil pos where readUntil endPos = anyChar `reluctantlyTill` (getPosition >>= guard . (== endPos)) @@ -2316,11 +2332,12 @@ readAssignmentWord = try $ do return $ T_Literal id "" readArrayIndex = do + id <- getNextId char '[' - optional space - x <- readArithmeticContents + pos <- getPosition + str <- readStringForParser readIndexSpan char ']' - return x + return $ T_UnparsedIndex id pos str readArray = called "array assignment" $ do id <- getNextId @@ -2477,12 +2494,12 @@ verifyEof = eof <|> choice [ try (lookAhead p) action -prop_readScript1 = isOk readScript "#!/bin/bash\necho hello world\n" -prop_readScript2 = isWarning readScript "#!/bin/bash\r\necho hello world\n" -prop_readScript3 = isWarning readScript "#!/bin/bash\necho hello\xA0world" -prop_readScript4 = isWarning readScript "#!/usr/bin/perl\nfoo=(" -prop_readScript5 = isOk readScript "#!/bin/bash\n#This is an empty script\n\n" -readScript = do +prop_readScript1 = isOk readScriptFile "#!/bin/bash\necho hello world\n" +prop_readScript2 = isWarning readScriptFile "#!/bin/bash\r\necho hello world\n" +prop_readScript3 = isWarning readScriptFile "#!/bin/bash\necho hello\xA0world" +prop_readScript4 = isWarning readScriptFile "#!/usr/bin/perl\nfoo=(" +prop_readScript5 = isOk readScriptFile "#!/bin/bash\n#This is an empty script\n\n" +readScriptFile = do id <- getNextId pos <- getPosition optional $ do @@ -2497,7 +2514,8 @@ readScript = do annotations <- readAnnotations commands <- withAnnotations annotations readCompoundListOrEmpty verifyEof - return $ T_Annotation annotationId annotations $ T_Script id sb commands + let script = T_Annotation annotationId annotations $ T_Script id sb commands + reparseIndices script else do many anyChar return $ T_Script id sb [] @@ -2549,6 +2567,9 @@ readScript = do readUtf8Bom = called "Byte Order Mark" $ string "\xFEFF" +readScript = do + script <- readScriptFile + reparseIndices script isWarning p s = parsesCleanly p s == Just False isOk p s = parsesCleanly p s == Just True @@ -2635,6 +2656,33 @@ parseShell sys name contents = do second (ContextName pos str) = ParseNote pos InfoC 1009 $ "The mentioned parser error was in this " ++ str ++ "." +-- Go over all T_UnparsedIndex and reparse them as either arithmetic or text +-- depending on declare -A statements. +reparseIndices root = + analyze blank blank f root + where + associative = getAssociativeArrays root + isAssociative s = s `elem` associative + f (T_Assignment id mode name (Just (T_UnparsedIndex _ pos src)) value) = do + new <- parsed name pos src + return $ T_Assignment id mode name (Just new) value + f (T_Assignment id mode name Nothing (T_Array id2 words)) = do + newwords <- mapM (fix name) words + return $ T_Assignment id mode name Nothing (T_Array id2 newwords) + f t = return t + + fix name word = + case word of + T_IndexedElement id (T_UnparsedIndex _ pos src) value -> do + new <- parsed name pos src + return $ T_IndexedElement id new value + otherwise -> return word + + parsed name pos src = + if isAssociative name + then subParse pos readIndexSpan src + else subParse pos (optional space >> readArithmeticContents) src + reattachHereDocs root map = doTransform f root where