Parse the contents of unquoted here documents

This commit is contained in:
Vidar Holen 2013-08-31 17:03:15 -07:00
parent de1fa61560
commit bb49cf8e65
3 changed files with 62 additions and 37 deletions

View File

@ -23,6 +23,9 @@ import qualified Text.Regex as Re
data Id = Id Int deriving (Show, Eq, Ord) data Id = Id Int deriving (Show, Eq, Ord)
data Quoted = Quoted | Unquoted deriving (Show, Eq)
data Dashed = Dashed | Undashed deriving (Show, Eq)
data Token = data Token =
TA_Base Id String Token TA_Base Id String Token
| TA_Binary Id String Token Token | TA_Binary Id String Token Token
@ -80,7 +83,7 @@ data Token =
| T_GREATAND Id | T_GREATAND Id
| T_Glob Id String | T_Glob Id String
| T_Greater Id | T_Greater Id
| T_HereDoc Id Bool Bool String String | T_HereDoc Id Dashed Quoted String [Token]
| T_HereString Id Token | T_HereString Id Token
| T_If Id | T_If Id
| T_IfExpression Id [([Token],[Token])] [Token] | T_IfExpression Id [([Token],[Token])] [Token]
@ -208,6 +211,7 @@ analyze f g i t =
delve (T_Condition id typ token) = d1 token $ T_Condition id typ delve (T_Condition id typ token) = d1 token $ T_Condition id typ
delve (T_Extglob id str l) = dl l $ T_Extglob id str delve (T_Extglob id str l) = dl l $ T_Extglob id str
delve (T_DollarBraced id op) = d1 op $ T_DollarBraced id delve (T_DollarBraced id op) = d1 op $ T_DollarBraced id
delve (T_HereDoc id d q str l) = dl l $ T_HereDoc id d q str
delve (TC_And id typ str t1 t2) = d2 t1 t2 $ TC_And id typ str delve (TC_And id typ str t1 t2) = d2 t1 t2 $ TC_And id typ str
delve (TC_Or id typ str t1 t2) = d2 t1 t2 $ TC_Or id typ str delve (TC_Or id typ str t1 t2) = d2 t1 t2 $ TC_Or id typ str

View File

@ -549,6 +549,7 @@ prop_checkUnquotedExpansions3a= verifyTree checkUnquotedExpansions "[ ! $(foo) ]
prop_checkUnquotedExpansions4 = verifyNotTree checkUnquotedExpansions "[[ $(foo) == cow ]]" prop_checkUnquotedExpansions4 = verifyNotTree checkUnquotedExpansions "[[ $(foo) == cow ]]"
prop_checkUnquotedExpansions5 = verifyNotTree checkUnquotedExpansions "for f in $(cmd); do echo $f; done" prop_checkUnquotedExpansions5 = verifyNotTree checkUnquotedExpansions "for f in $(cmd); do echo $f; done"
prop_checkUnquotedExpansions6 = verifyNotTree checkUnquotedExpansions "$(cmd)" prop_checkUnquotedExpansions6 = verifyNotTree checkUnquotedExpansions "$(cmd)"
prop_checkUnquotedExpansions7 = verifyNotTree checkUnquotedExpansions "cat << foo\n$(ls)\nfoo"
checkUnquotedExpansions t tree = checkUnquotedExpansions t tree =
check t check t
where where
@ -870,6 +871,7 @@ inUnquotableContext tree t =
T_Redirecting _ _ _ -> or $ map (isCommand t) ["local", "declare"] T_Redirecting _ _ _ -> or $ map (isCommand t) ["local", "declare"]
T_DoubleQuoted _ _ -> True T_DoubleQuoted _ _ -> True
T_CaseExpression _ _ _ -> True T_CaseExpression _ _ _ -> True
T_HereDoc _ _ _ _ _ -> True
T_ForIn _ _ _ _ -> True -- Pragmatically assume it's desirable here T_ForIn _ _ _ _ -> True -- Pragmatically assume it's desirable here
x -> case Map.lookup (getId x) tree of x -> case Map.lookup (getId x) tree of
Nothing -> False Nothing -> False
@ -1296,8 +1298,8 @@ checkSshHereDoc (T_Redirecting _ redirs cmd)
mapM_ checkHereDoc redirs mapM_ checkHereDoc redirs
where where
hasVariables = mkRegex "[`$]" hasVariables = mkRegex "[`$]"
checkHereDoc (T_FdRedirect _ _ (T_HereDoc id _ False token str)) checkHereDoc (T_FdRedirect _ _ (T_HereDoc id _ Unquoted token tokens))
| isJust $ matchRegex hasVariables str = | not (all isConstant tokens) =
warn id $ "Quote '" ++ token ++ "' to make here document expansions happen on the server side rather than on the client." warn id $ "Quote '" ++ token ++ "' to make here document expansions happen on the server side rather than on the client."
checkHereDoc _ = return () checkHereDoc _ = return ()
checkSshHereDoc _ = return () checkSshHereDoc _ = return ()

View File

@ -681,24 +681,24 @@ readBackTicked = called "backtick expansion" $ do
subStart <- getPosition subStart <- getPosition
subString <- readGenericLiteral (char '`') subString <- readGenericLiteral (char '`')
char '`' char '`'
-- Result positions may be off due to escapes
result <- subParse subStart readCompoundList (unEscape subString) result <- subParse subStart readCompoundList (unEscape subString)
return $ T_Backticked id result return $ T_Backticked id result
where where
-- Position may be off due to escapes
subParse pos parser input = do
lastPosition <- getPosition
lastInput <- getInput
setPosition pos
setInput input
result <- parser
setInput lastInput
setPosition lastPosition
return result
unEscape [] = [] unEscape [] = []
unEscape ('\\':x:rest) | x `elem` "\"$`\\" = x : unEscape rest unEscape ('\\':x:rest) | x `elem` "\"$`\\" = x : unEscape rest
unEscape ('\\':'\n':rest) = unEscape rest unEscape ('\\':'\n':rest) = unEscape rest
unEscape (c:rest) = c : unEscape rest unEscape (c:rest) = c : unEscape rest
subParse pos parser input = do
lastPosition <- getPosition
lastInput <- getInput
setPosition pos
setInput input
result <- parser
setInput lastInput
setPosition lastPosition
return result
prop_readDoubleQuoted = isOk readDoubleQuoted "\"Hello $FOO\"" prop_readDoubleQuoted = isOk readDoubleQuoted "\"Hello $FOO\""
prop_readDoubleQuoted2 = isOk readDoubleQuoted "\"$'\"" prop_readDoubleQuoted2 = isOk readDoubleQuoted "\"$'\""
@ -969,7 +969,7 @@ readHereDoc = called "here document" $ do
fid <- getNextId fid <- getNextId
pos <- getPosition pos <- getPosition
try $ string "<<" try $ string "<<"
dashed <- (char '-' >> return True) <|> return False dashed <- (char '-' >> return Dashed) <|> return Undashed
tokenPosition <- getPosition tokenPosition <- getPosition
sp <- spacing sp <- spacing
optional $ do optional $ do
@ -977,37 +977,56 @@ readHereDoc = called "here document" $ do
let message = "Shells are space sensitive. Use '< <(cmd)', not '<<" ++ sp ++ "(cmd)'." let message = "Shells are space sensitive. Use '< <(cmd)', not '<<" ++ sp ++ "(cmd)'."
parseProblemAt pos ErrorC message parseProblemAt pos ErrorC message
hid <- getNextId hid <- getNextId
(quoted, endToken) <- (readNormalLiteral "" >>= (\x -> return (False, stripLiteral x)) ) (quoted, endToken) <- (readNormalLiteral "" >>= (\x -> return (Unquoted, stripLiteral x)) )
<|> (readDoubleQuotedLiteral >>= return . (\x -> (True, stripLiteral x))) <|> (readDoubleQuotedLiteral >>= return . (\x -> (Quoted, stripLiteral x)))
<|> (readSingleQuotedLiteral >>= return . (\x -> (True, x))) <|> (readSingleQuotedLiteral >>= return . (\x -> (Quoted, x)))
spacing spacing
hereInfo <- anyChar `reluctantlyTill` (linefeed >> spacing >> (string endToken) >> (disregard whitespace <|> eof)) startPos <- getPosition
hereData <- anyChar `reluctantlyTill` (linefeed >> spacing >> (string endToken) >> (disregard whitespace <|> eof))
do do
linefeed linefeed
spaces <- spacing spaces <- spacing
verifyHereDoc dashed quoted spaces hereInfo verifyHereDoc dashed quoted spaces hereData
token <- string endToken string endToken
return $ T_FdRedirect fid "" $ T_HereDoc hid dashed quoted endToken hereInfo parsedData <- parseHereData quoted startPos hereData
`attempting` (eof >> debugHereDoc tokenPosition endToken hereInfo) return $ T_FdRedirect fid "" $ T_HereDoc hid dashed quoted endToken parsedData
`attempting` (eof >> debugHereDoc tokenPosition endToken hereData)
verifyHereDoc dashed quoted spacing hereInfo = do where
when (not dashed && spacing /= "") $ parseNote ErrorC "Use <<- instead of << if you want to indent the end token." parseHereData Quoted startPos hereData = do
when (dashed && filter (/= '\t') spacing /= "" ) $ parseNote ErrorC "When using <<-, you can only indent with tabs." id <- getNextIdAt startPos
return () return $ [T_Literal id hereData]
debugHereDoc pos endToken doc = parseHereData Unquoted startPos hereData = do
if endToken `isInfixOf` doc subParse startPos readHereData hereData
then
let lookAt line = when (endToken `isInfixOf` line) $ readHereData = many $ readNormalDollar <|> readBackTicked <|> readHereLiteral
parseProblemAt pos ErrorC ("Close matches include '" ++ line ++ "' (!= '" ++ endToken ++ "').")
in do readHereLiteral = do
parseProblemAt pos ErrorC ("Found '" ++ endToken ++ "' further down, but not entirely by itself.") id <- getNextId
mapM_ lookAt (lines doc) chars <- anyChar `reluctantlyTill1` oneOf "`$"
else if (map toLower endToken) `isInfixOf` (map toLower doc) return $ T_Literal id chars
then parseProblemAt pos ErrorC ("Found " ++ endToken ++ " further down, but with wrong casing.")
else parseProblemAt pos ErrorC ("Couldn't find end token `" ++ endToken ++ "' in the here document.") verifyHereDoc dashed quoted spacing hereInfo = do
when (dashed == Undashed && spacing /= "") $
parseNote ErrorC "Use <<- instead of << if you want to indent the end token."
when (dashed == Dashed && filter (/= '\t') spacing /= "" ) $
parseNote ErrorC "When using <<-, you can only indent with tabs."
return ()
debugHereDoc pos endToken doc =
if endToken `isInfixOf` doc
then
let lookAt line = when (endToken `isInfixOf` line) $
parseProblemAt pos ErrorC ("Close matches include '" ++ line ++ "' (!= '" ++ endToken ++ "').")
in do
parseProblemAt pos ErrorC ("Found '" ++ endToken ++ "' further down, but not entirely by itself.")
mapM_ lookAt (lines doc)
else if (map toLower endToken) `isInfixOf` (map toLower doc)
then parseProblemAt pos ErrorC ("Found " ++ endToken ++ " further down, but with wrong casing.")
else parseProblemAt pos ErrorC ("Couldn't find end token `" ++ endToken ++ "' in the here document.")
readFilename = readNormalWord readFilename = readNormalWord