Added slightly better ${..} parsing

This commit is contained in:
Vidar Holen 2012-11-27 23:05:39 -08:00
parent 77a3e3b331
commit 0cbbee7b89
3 changed files with 46 additions and 15 deletions

View File

@ -23,7 +23,7 @@ import qualified Text.Regex as Re
data Id = Id Int deriving (Show, Eq, Ord)
data Token = T_AND_IF Id | T_OR_IF Id | T_DSEMI Id | T_Semi Id | T_DLESS Id | T_DGREAT Id | T_LESSAND Id | T_GREATAND Id | T_LESSGREAT Id | T_DLESSDASH Id | T_CLOBBER Id | T_If Id | T_Then Id | T_Else Id | T_Elif Id | T_Fi Id | T_Do Id | T_Done Id | T_Case Id | T_Esac Id | T_While Id | T_Until Id | T_For Id | T_Lbrace Id | T_Rbrace Id | T_Lparen Id | T_Rparen Id | T_Bang Id | T_In Id | T_NEWLINE Id | T_EOF Id | T_Less Id | T_Greater Id | T_SingleQuoted Id String | T_Literal Id String | T_NormalWord Id [Token] | T_DoubleQuoted Id [Token] | T_DollarExpansion Id [Token] | T_DollarBraced Id String | T_DollarArithmetic Id Token | T_BraceExpansion Id String | T_IoFile Id Token Token | T_HereDoc Id Bool Bool String | T_HereString Id Token | T_FdRedirect Id String Token | T_Assignment Id String Token | T_Array Id [Token] | T_Redirecting Id [Token] Token | T_SimpleCommand Id [Token] [Token] | T_Pipeline Id [Token] | T_Banged Id Token | T_AndIf Id (Token) (Token) | T_OrIf Id (Token) (Token) | T_Backgrounded Id Token | T_IfExpression Id [([Token],[Token])] [Token] | T_Subshell Id [Token] | T_BraceGroup Id [Token] | T_WhileExpression Id [Token] [Token] | T_UntilExpression Id [Token] [Token] | T_ForIn Id String [Token] [Token] | T_CaseExpression Id Token [([Token],[Token])] | T_Function Id String Token | T_Arithmetic Id Token | T_Script Id [Token] | T_Condition Id ConditionType Token | T_Extglob Id String [Token] | TC_And Id ConditionType String Token Token | TC_Or Id ConditionType String Token Token | TC_Not Id ConditionType Token | TC_Group Id ConditionType Token | TC_Binary Id ConditionType String Token Token | TC_Unary Id ConditionType String Token | TC_Noary Id ConditionType Token | TA_Binary Id String Token Token | TA_Unary Id String Token | TA_Sequence Id [Token] | TA_Variable Id String | TA_Trinary Id Token Token Token | TA_Expansion Id Token | TA_Literal Id String
data Token = T_AND_IF Id | T_OR_IF Id | T_DSEMI Id | T_Semi Id | T_DLESS Id | T_DGREAT Id | T_LESSAND Id | T_GREATAND Id | T_LESSGREAT Id | T_DLESSDASH Id | T_CLOBBER Id | T_If Id | T_Then Id | T_Else Id | T_Elif Id | T_Fi Id | T_Do Id | T_Done Id | T_Case Id | T_Esac Id | T_While Id | T_Until Id | T_For Id | T_Lbrace Id | T_Rbrace Id | T_Lparen Id | T_Rparen Id | T_Bang Id | T_In Id | T_NEWLINE Id | T_EOF Id | T_Less Id | T_Greater Id | T_SingleQuoted Id String | T_Literal Id String | T_NormalWord Id [Token] | T_DoubleQuoted Id [Token] | T_DollarExpansion Id [Token] | T_DollarBraced Id Token | T_DollarArithmetic Id Token | T_BraceExpansion Id String | T_IoFile Id Token Token | T_HereDoc Id Bool Bool String | T_HereString Id Token | T_FdRedirect Id String Token | T_Assignment Id String Token | T_Array Id [Token] | T_Redirecting Id [Token] Token | T_SimpleCommand Id [Token] [Token] | T_Pipeline Id [Token] | T_Banged Id Token | T_AndIf Id (Token) (Token) | T_OrIf Id (Token) (Token) | T_Backgrounded Id Token | T_IfExpression Id [([Token],[Token])] [Token] | T_Subshell Id [Token] | T_BraceGroup Id [Token] | T_WhileExpression Id [Token] [Token] | T_UntilExpression Id [Token] [Token] | T_ForIn Id String [Token] [Token] | T_CaseExpression Id Token [([Token],[Token])] | T_Function Id String Token | T_Arithmetic Id Token | T_Script Id [Token] | T_Condition Id ConditionType Token | T_Extglob Id String [Token] | TC_And Id ConditionType String Token Token | TC_Or Id ConditionType String Token Token | TC_Not Id ConditionType Token | TC_Group Id ConditionType Token | TC_Binary Id ConditionType String Token Token | TC_Unary Id ConditionType String Token | TC_Noary Id ConditionType Token | TA_Binary Id String Token Token | TA_Unary Id String Token | TA_Sequence Id [Token] | TA_Variable Id String | TA_Trinary Id Token Token Token | TA_Expansion Id Token | TA_Literal Id String
deriving (Show)
@ -107,6 +107,7 @@ analyze f g i t =
delve (T_Function id name body) = d1 body $ T_Function id name
delve (T_Condition id typ token) = d1 token $ T_Condition id typ
delve (T_Extglob id str l) = dl l $ T_Extglob id str
delve (T_DollarBraced id op) = d1 op $ T_DollarBraced id
delve (TC_And id typ str t1 t2) = d2 t1 t2 $ TC_And id typ str
delve (TC_Or id typ str t1 t2) = d2 t1 t2 $ TC_Or id typ str

View File

@ -205,7 +205,8 @@ indexOfSublists sub all = f 0 all
match _ _ = False
isMagicInQuotes (T_DollarBraced _ s) | '@' `elem` s = True
bracedString l = concat $ deadSimple l
isMagicInQuotes (T_DollarBraced _ l) | '@' `elem` (bracedString l) = True
isMagicInQuotes _ = False
prop_checkForInQuoted = verify checkForInQuoted "for f in \"$(ls)\"; do echo foo; done"
@ -272,15 +273,15 @@ checkShorthandIf _ = return ()
prop_checkDollarStar = verify checkDollarStar "for f in $*; do ..; done"
checkDollarStar (T_NormalWord _ [(T_DollarBraced id "*")]) =
checkDollarStar (T_NormalWord _ [(T_DollarBraced id l)]) | (bracedString l) == "*" =
warn id $ "Use \"$@\" (with quotes) to prevent whitespace problems."
checkDollarStar _ = return ()
prop_checkUnquotedDollarAt = verify checkUnquotedDollarAt "ls $@"
prop_checkUnquotedDollarAt2 = verifyNot checkUnquotedDollarAt "ls \"$@\""
checkUnquotedDollarAt (T_NormalWord _ [T_DollarBraced id "@"]) =
err id $ "Add double quotes around $@, otherwise it's just like $* and breaks on spaces."
checkUnquotedDollarAt (T_NormalWord _ [T_DollarBraced id l]) | '@' `elem` (bracedString l) =
err id $ "Add double quotes around ${" ++ (bracedString l) ++ "}, otherwise it's just like $* and breaks on spaces."
checkUnquotedDollarAt _ = return ()
prop_checkStderrRedirect = verify checkStderrRedirect "test 2>&1 > cow"
@ -376,7 +377,7 @@ prop_checkArithmeticDeref = verify checkArithmeticDeref "echo $((3+$foo))"
prop_checkArithmeticDeref2 = verify checkArithmeticDeref "cow=14; (( s+= $cow ))"
prop_checkArithmeticDeref3 = verifyNot checkArithmeticDeref "cow=1/40; (( s+= ${cow%%/*} ))"
prop_checkArithmeticDeref4 = verifyNot checkArithmeticDeref "(( ! $? ))"
checkArithmeticDeref (TA_Expansion _ (T_DollarBraced id str)) | not $ any (`elem` "/.:#%?*@") $ str =
checkArithmeticDeref (TA_Expansion _ (T_DollarBraced id l)) | not $ any (`elem` "/.:#%?*@") $ bracedString l =
style id $ "Don't use $ on variables in (( ))."
checkArithmeticDeref _ = return ()
@ -600,7 +601,7 @@ isSpaceful spacefulF x =
T_Extglob _ _ _ -> True
T_Literal _ s -> s `containsAny` globspace
T_SingleQuoted _ s -> s `containsAny` globspace
T_DollarBraced _ s -> spacefulF $ getBracedReference s
T_DollarBraced _ l -> spacefulF $ getBracedReference $ bracedString l
T_NormalWord _ w -> isSpacefulWord spacefulF w
T_DoubleQuoted _ w -> isSpacefulWord spacefulF w
_ -> False
@ -633,7 +634,7 @@ getBracedReference s = takeWhile (\x -> not $ x `elem` ":[#%/^,") $ dropWhile (=
getReferencedVariables t =
case t of
T_DollarBraced id str -> map (\x -> (id, x)) $ [getBracedReference str]
T_DollarBraced id l -> map (\x -> (id, x)) $ [getBracedReference $ bracedString l]
TA_Variable id str -> [(id,str)]
x -> []

View File

@ -44,6 +44,7 @@ variableChars = upper <|> lower <|> digit <|> oneOf "_"
specialVariable = oneOf "@*#?-$!"
tokenDelimiter = oneOf "&|;<> \t\n"
quotable = oneOf "#|&;<>()$`\\ \"'\t\n"
bracedQuotable = oneOf "}\"$`'"
doubleQuotable = oneOf "\"$`"
whitespace = oneOf " \t\n"
linewhitespace = oneOf " \t"
@ -476,6 +477,18 @@ readSpacePart = do
x <- many1 whitespace
return $ T_Literal id x
readDollarBracedWord = do
id <- getNextId
list <- many readDollarBracedPart
return $ T_NormalWord id list
readDollarBracedPart = readSingleQuoted <|> readDoubleQuoted <|> readExtglob <|> readDollar <|> readBackTicked <|> readDollarBracedLiteral
readDollarBracedLiteral = do
id <- getNextId
vars <- (readBraceEscaped <|> (anyChar >>= \x -> return [x])) `reluctantlyTill1` bracedQuotable
return $ T_Literal id $ concat vars
prop_readSingleQuoted = isOk readSingleQuoted "'foo bar'"
prop_readSingleQuoted2 = isWarning readSingleQuoted "'foo bar\\'"
readSingleQuoted = do
@ -599,6 +612,12 @@ readDoubleEscaped = do
<|> (doubleQuotable >>= return . return)
<|> (anyChar >>= (return . \x -> [bs, x]))
readBraceEscaped = do
bs <- backslash
(linefeed >> return "")
<|> (bracedQuotable >>= return . return)
<|> (anyChar >>= (return . \x -> [bs, x]))
readGenericLiteral endExp = do
strings <- many (readGenericEscaped <|> anyChar `reluctantlyTill1` endExp)
@ -652,15 +671,17 @@ readArithmeticExpression = do
string "))"
return (T_Arithmetic id c)
prop_readDollarBraced = isOk readDollarBraced "${foo//bar/baz}"
prop_readDollarBraced1 = isOk readDollarBraced "${foo//bar/baz}"
prop_readDollarBraced2 = isOk readDollarBraced "${foo/'{cow}'}"
prop_readDollarBraced3 = isOk readDollarBraced "${foo%%$(echo cow})}"
prop_readDollarBraced4 = isOk readDollarBraced "${foo#\\}}"
readDollarBraced = do
id <- getNextId
try (string "${")
-- TODO
str <- readGenericLiteral (char '}')
word <- readDollarBracedWord
char '}' <?> "matching }"
return $ (T_DollarBraced id str)
return $ T_DollarBraced id word
prop_readDollarExpansion = isOk readDollarExpansion "$(echo foo; ls\n)"
readDollarExpansion = do
id <- getNextId
@ -674,7 +695,8 @@ readDollarVariable = do
id <- getNextId
let singleCharred p = do
n <- p
return (T_DollarBraced id [n]) `attempting` do
value <- wrap [n]
return (T_DollarBraced id value) `attempting` do
pos <- getPosition
num <- lookAhead $ many1 p
parseNoteAt pos ErrorC $ "$" ++ (n:num) ++ " is equivalent to ${" ++ [n] ++ "}"++ num ++"."
@ -684,10 +706,17 @@ readDollarVariable = do
let regular = do
name <- readVariableName
return $ T_DollarBraced id (name)
value <- wrap name
return $ T_DollarBraced id value
try $ char '$' >> (positional <|> special <|> regular)
where
wrap s = do
x <- getNextId
y <- getNextId
return $ T_NormalWord x [T_Literal y s]
readVariableName = do
f <- variableStart
rest <- many variableChars