Parse the contents of unquoted here documents

2025-10-17 12:29:30 +08:00 · 2013-08-31 17:03:15 -07:00
parent de1fa61560
commit bb49cf8e65
3 changed files with 62 additions and 37 deletions
--- a/ShellCheck/AST.hs
+++ b/ShellCheck/AST.hs
@@ -23,6 +23,9 @@ import qualified Text.Regex as Re
 data Id = Id Int deriving (Show, Eq, Ord)
 data Quoted = Quoted | Unquoted deriving (Show, Eq)
 data Dashed = Dashed | Undashed deriving (Show, Eq)
 data Token =
    TA_Base Id String Token
    | TA_Binary Id String Token Token
@@ -80,7 +83,7 @@ data Token =
    | T_GREATAND Id
    | T_Glob Id String
    | T_Greater Id
-    | T_HereDoc Id Bool Bool String String
+    | T_HereDoc Id Dashed Quoted String [Token]
    | T_HereString Id Token
    | T_If Id
    | T_IfExpression Id [([Token],[Token])] [Token]
@@ -208,6 +211,7 @@ analyze f g i t =
    delve (T_Condition id typ token) = d1 token $ T_Condition id typ
    delve (T_Extglob id str l) = dl l $ T_Extglob id str
    delve (T_DollarBraced id op) = d1 op $ T_DollarBraced id
    delve (T_HereDoc id d q str l) = dl l $ T_HereDoc id d q str
    delve (TC_And id typ str t1 t2) = d2 t1 t2 $ TC_And id typ str
    delve (TC_Or id typ str t1 t2) = d2 t1 t2 $ TC_Or id typ str
--- a/ShellCheck/Analytics.hs
+++ b/ShellCheck/Analytics.hs
@@ -549,6 +549,7 @@ prop_checkUnquotedExpansions3a= verifyTree checkUnquotedExpansions "[ ! $(foo) ]
 prop_checkUnquotedExpansions4 = verifyNotTree checkUnquotedExpansions "[[ $(foo) == cow ]]"
 prop_checkUnquotedExpansions5 = verifyNotTree checkUnquotedExpansions "for f in $(cmd); do echo $f; done"
 prop_checkUnquotedExpansions6 = verifyNotTree checkUnquotedExpansions "$(cmd)"
 prop_checkUnquotedExpansions7 = verifyNotTree checkUnquotedExpansions "cat << foo\n$(ls)\nfoo"
 checkUnquotedExpansions t tree =
    check t
  where
@@ -870,6 +871,7 @@ inUnquotableContext tree t =
        T_Redirecting _ _ _ -> or $ map (isCommand t) ["local", "declare"]
        T_DoubleQuoted _ _ -> True
        T_CaseExpression _ _ _ -> True
        T_HereDoc _ _ _ _ _ -> True
        T_ForIn _ _ _ _ -> True -- Pragmatically assume it's desirable here
        x -> case Map.lookup (getId x) tree of
                Nothing -> False
@@ -1296,8 +1298,8 @@ checkSshHereDoc (T_Redirecting _ redirs cmd)
    mapM_ checkHereDoc redirs
  where
    hasVariables = mkRegex "[`$]"
-    checkHereDoc (T_FdRedirect _ _ (T_HereDoc id _ False token str))
+    checkHereDoc (T_FdRedirect _ _ (T_HereDoc id _ Unquoted token tokens))
-        | isJust $ matchRegex hasVariables str =
+        | not (all isConstant tokens) =
        warn id $ "Quote '" ++ token ++ "' to make here document expansions happen on the server side rather than on the client."
    checkHereDoc _ = return ()
 checkSshHereDoc _ = return ()
--- a/ShellCheck/Parser.hs
+++ b/ShellCheck/Parser.hs
@@ -681,24 +681,24 @@ readBackTicked = called "backtick expansion" $ do
    subStart <- getPosition
    subString <- readGenericLiteral (char '`')
    char '`'
    -- Result positions may be off due to escapes
    result <- subParse subStart readCompoundList (unEscape subString)
    return $ T_Backticked id result
  where
    -- Position may be off due to escapes
    subParse pos parser input = do
        lastPosition <- getPosition
        lastInput <- getInput
        setPosition pos
        setInput input
        result <- parser
        setInput lastInput
        setPosition lastPosition
        return result
    unEscape [] = []
    unEscape ('\\':x:rest) | x `elem` "\"$`\\" = x : unEscape rest
    unEscape ('\\':'\n':rest) = unEscape rest
    unEscape (c:rest) = c : unEscape rest
 subParse pos parser input = do
    lastPosition <- getPosition
    lastInput <- getInput
    setPosition pos
    setInput input
    result <- parser
    setInput lastInput
    setPosition lastPosition
    return result
 prop_readDoubleQuoted = isOk readDoubleQuoted "\"Hello $FOO\""
 prop_readDoubleQuoted2 = isOk readDoubleQuoted "\"$'\""
@@ -969,7 +969,7 @@ readHereDoc = called "here document" $ do
    fid <- getNextId
    pos <- getPosition
    try $ string "<<"
-    dashed <- (char '-' >> return True) <|> return False
+    dashed <- (char '-' >> return Dashed) <|> return Undashed
    tokenPosition <- getPosition
    sp <- spacing
    optional $ do
@@ -977,37 +977,56 @@ readHereDoc = called "here document" $ do
        let message = "Shells are space sensitive. Use '< <(cmd)', not '<<" ++ sp ++ "(cmd)'."
        parseProblemAt pos ErrorC message
    hid <- getNextId
-    (quoted, endToken) <- (readNormalLiteral "" >>= (\x -> return (False, stripLiteral x)) )
+    (quoted, endToken) <- (readNormalLiteral "" >>= (\x -> return (Unquoted, stripLiteral x)) )
-                            <|> (readDoubleQuotedLiteral >>= return . (\x -> (True, stripLiteral x)))
+                            <|> (readDoubleQuotedLiteral >>= return . (\x -> (Quoted, stripLiteral x)))
-                            <|> (readSingleQuotedLiteral >>= return . (\x -> (True, x)))
+                            <|> (readSingleQuotedLiteral >>= return . (\x -> (Quoted, x)))
    spacing
-    hereInfo <- anyChar `reluctantlyTill` (linefeed >> spacing >> (string endToken) >> (disregard whitespace <|> eof))
+    startPos <- getPosition
    hereData <- anyChar `reluctantlyTill` (linefeed >> spacing >> (string endToken) >> (disregard whitespace <|> eof))
    do
        linefeed
        spaces <- spacing
-        verifyHereDoc dashed quoted spaces hereInfo
+        verifyHereDoc dashed quoted spaces hereData
-        token <- string endToken
+        string endToken
-        return $ T_FdRedirect fid "" $ T_HereDoc hid dashed quoted endToken hereInfo
+        parsedData <- parseHereData quoted startPos hereData
-     `attempting` (eof >> debugHereDoc tokenPosition endToken hereInfo)
+        return $ T_FdRedirect fid "" $ T_HereDoc hid dashed quoted endToken parsedData
     `attempting` (eof >> debugHereDoc tokenPosition endToken hereData)
-verifyHereDoc dashed quoted spacing hereInfo = do
+  where
-    when (not dashed && spacing /= "") $ parseNote ErrorC "Use <<- instead of << if you want to indent the end token."
+    parseHereData Quoted startPos hereData = do
-    when (dashed && filter (/= '\t') spacing /= "" ) $ parseNote ErrorC "When using <<-, you can only indent with tabs."
+        id <- getNextIdAt startPos
-    return ()
+        return $ [T_Literal id hereData]
-debugHereDoc pos endToken doc =
+    parseHereData Unquoted startPos hereData = do
-    if endToken `isInfixOf` doc
+        subParse startPos readHereData hereData
-        then
+
-              let lookAt line = when (endToken `isInfixOf` line) $
+    readHereData = many $ readNormalDollar <|> readBackTicked <|> readHereLiteral
-                        parseProblemAt pos ErrorC ("Close matches include '" ++ line ++ "' (!= '" ++ endToken ++ "').")
+
-              in do
+    readHereLiteral = do
-                    parseProblemAt pos ErrorC ("Found '" ++ endToken ++ "' further down, but not entirely by itself.")
+        id <- getNextId
-                    mapM_ lookAt (lines doc)
+        chars <- anyChar `reluctantlyTill1` oneOf "`$"
-        else if (map toLower endToken) `isInfixOf` (map toLower doc)
+        return $ T_Literal id chars
-            then parseProblemAt pos ErrorC ("Found " ++ endToken ++ " further down, but with wrong casing.")
+
-            else parseProblemAt pos ErrorC ("Couldn't find end token `" ++ endToken ++ "' in the here document.")
+    verifyHereDoc dashed quoted spacing hereInfo = do
        when (dashed == Undashed && spacing /= "") $
            parseNote ErrorC "Use <<- instead of << if you want to indent the end token."
        when (dashed == Dashed && filter (/= '\t') spacing /= "" ) $
            parseNote ErrorC "When using <<-, you can only indent with tabs."
        return ()
    debugHereDoc pos endToken doc =
        if endToken `isInfixOf` doc
            then
                  let lookAt line = when (endToken `isInfixOf` line) $
                            parseProblemAt pos ErrorC ("Close matches include '" ++ line ++ "' (!= '" ++ endToken ++ "').")
                  in do
                        parseProblemAt pos ErrorC ("Found '" ++ endToken ++ "' further down, but not entirely by itself.")
                        mapM_ lookAt (lines doc)
            else if (map toLower endToken) `isInfixOf` (map toLower doc)
                then parseProblemAt pos ErrorC ("Found " ++ endToken ++ " further down, but with wrong casing.")
                else parseProblemAt pos ErrorC ("Couldn't find end token `" ++ endToken ++ "' in the here document.")
 readFilename = readNormalWord