From b718e5f10857108ab5de7aa13681ef830b5d2c66 Mon Sep 17 00:00:00 2001 From: Vidar Holen Date: Thu, 29 Nov 2012 19:20:44 -0800 Subject: [PATCH] Parser help with globs, fixed message for grep foo\* --- ShellCheck/AST.hs | 3 ++- ShellCheck/Analytics.hs | 13 ++++++++----- ShellCheck/Parser.hs | 39 ++++++++++++++++++++++++++++++--------- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/ShellCheck/AST.hs b/ShellCheck/AST.hs index 4963cf7..5615a07 100644 --- a/ShellCheck/AST.hs +++ b/ShellCheck/AST.hs @@ -23,7 +23,7 @@ import qualified Text.Regex as Re data Id = Id Int deriving (Show, Eq, Ord) -data Token = T_AND_IF Id | T_OR_IF Id | T_DSEMI Id | T_Semi Id | T_DLESS Id | T_DGREAT Id | T_LESSAND Id | T_GREATAND Id | T_LESSGREAT Id | T_DLESSDASH Id | T_CLOBBER Id | T_If Id | T_Then Id | T_Else Id | T_Elif Id | T_Fi Id | T_Do Id | T_Done Id | T_Case Id | T_Esac Id | T_While Id | T_Until Id | T_For Id | T_Lbrace Id | T_Rbrace Id | T_Lparen Id | T_Rparen Id | T_Bang Id | T_In Id | T_NEWLINE Id | T_EOF Id | T_Less Id | T_Greater Id | T_SingleQuoted Id String | T_Literal Id String | T_NormalWord Id [Token] | T_DoubleQuoted Id [Token] | T_DollarExpansion Id [Token] | T_DollarBraced Id Token | T_DollarArithmetic Id Token | T_BraceExpansion Id String | T_IoFile Id Token Token | T_HereDoc Id Bool Bool String | T_HereString Id Token | T_FdRedirect Id String Token | T_Assignment Id String Token | T_Array Id [Token] | T_Redirecting Id [Token] Token | T_SimpleCommand Id [Token] [Token] | T_Pipeline Id [Token] | T_Banged Id Token | T_AndIf Id (Token) (Token) | T_OrIf Id (Token) (Token) | T_Backgrounded Id Token | T_IfExpression Id [([Token],[Token])] [Token] | T_Subshell Id [Token] | T_BraceGroup Id [Token] | T_WhileExpression Id [Token] [Token] | T_UntilExpression Id [Token] [Token] | T_ForIn Id String [Token] [Token] | T_CaseExpression Id Token [([Token],[Token])] | T_Function Id String Token | T_Arithmetic Id Token | T_Script Id [Token] | T_Condition Id ConditionType Token | T_Extglob Id String [Token] | TC_And Id ConditionType String Token Token | TC_Or Id ConditionType String Token Token | TC_Group Id ConditionType Token | TC_Binary Id ConditionType String Token Token | TC_Unary Id ConditionType String Token | TC_Noary Id ConditionType Token | TA_Binary Id String Token Token | TA_Unary Id String Token | TA_Sequence Id [Token] | TA_Variable Id String | TA_Trinary Id Token Token Token | TA_Expansion Id Token | TA_Literal Id String | T_Backticked Id String | T_ProcSub Id String [Token] +data Token = T_AND_IF Id | T_OR_IF Id | T_DSEMI Id | T_Semi Id | T_DLESS Id | T_DGREAT Id | T_LESSAND Id | T_GREATAND Id | T_LESSGREAT Id | T_DLESSDASH Id | T_CLOBBER Id | T_If Id | T_Then Id | T_Else Id | T_Elif Id | T_Fi Id | T_Do Id | T_Done Id | T_Case Id | T_Esac Id | T_While Id | T_Until Id | T_For Id | T_Lbrace Id | T_Rbrace Id | T_Lparen Id | T_Rparen Id | T_Bang Id | T_In Id | T_NEWLINE Id | T_EOF Id | T_Less Id | T_Greater Id | T_SingleQuoted Id String | T_Literal Id String | T_NormalWord Id [Token] | T_DoubleQuoted Id [Token] | T_DollarExpansion Id [Token] | T_DollarBraced Id Token | T_DollarArithmetic Id Token | T_BraceExpansion Id String | T_IoFile Id Token Token | T_HereDoc Id Bool Bool String | T_HereString Id Token | T_FdRedirect Id String Token | T_Assignment Id String Token | T_Array Id [Token] | T_Redirecting Id [Token] Token | T_SimpleCommand Id [Token] [Token] | T_Pipeline Id [Token] | T_Banged Id Token | T_AndIf Id (Token) (Token) | T_OrIf Id (Token) (Token) | T_Backgrounded Id Token | T_IfExpression Id [([Token],[Token])] [Token] | T_Subshell Id [Token] | T_BraceGroup Id [Token] | T_WhileExpression Id [Token] [Token] | T_UntilExpression Id [Token] [Token] | T_ForIn Id String [Token] [Token] | T_CaseExpression Id Token [([Token],[Token])] | T_Function Id String Token | T_Arithmetic Id Token | T_Script Id [Token] | T_Condition Id ConditionType Token | T_Extglob Id String [Token] | TC_And Id ConditionType String Token Token | TC_Or Id ConditionType String Token Token | TC_Group Id ConditionType Token | TC_Binary Id ConditionType String Token Token | TC_Unary Id ConditionType String Token | TC_Noary Id ConditionType Token | TA_Binary Id String Token Token | TA_Unary Id String Token | TA_Sequence Id [Token] | TA_Variable Id String | TA_Trinary Id Token Token Token | TA_Expansion Id Token | TA_Literal Id String | T_Backticked Id String | T_ProcSub Id String [Token] | T_Glob Id String deriving (Show) @@ -210,6 +210,7 @@ getId t = case t of TA_Expansion id _ -> id TA_Literal id _ -> id T_ProcSub id _ _ -> id + T_Glob id _ -> id blank :: Monad m => Token -> m () blank = const $ return () diff --git a/ShellCheck/Analytics.hs b/ShellCheck/Analytics.hs index c475a55..d46d5ca 100644 --- a/ShellCheck/Analytics.hs +++ b/ShellCheck/Analytics.hs @@ -89,15 +89,13 @@ willSplit x = T_DollarExpansion _ _ -> True T_Backticked _ _ -> True T_BraceExpansion _ s -> True + T_Glob _ _ -> True T_Extglob _ _ _ -> True T_NormalWord _ l -> any willSplit l - T_Literal _ s -> isGlobLiteral s _ -> False -isGlobLiteral str = any (`elem` str) "*?" - isGlob (T_Extglob _ _ _) = True -isGlob (T_Literal _ s) = isGlobLiteral s +isGlob (T_Glob _ _) = True isGlob (T_NormalWord _ l) = any isGlob l isGlob _ = False @@ -571,10 +569,13 @@ checkUuoe = checkCommand "echo" f where prop_checkTrAZ1 = verify checkTrAZ "tr [a-f] [A-F]" prop_checkTrAZ2 = verify checkTrAZ "tr 'a-z' 'A-Z'" +prop_checkTrAZ2a= verify checkTrAZ "tr '[a-z]' '[A-Z]'" prop_checkTrAZ3 = verifyNot checkTrAZ "tr -d '[:lower:]'" prop_checkTrAZ4 = verifyNot checkTrAZ "ls [a-z]" checkTrAZ = checkCommand "tr" (mapM_ f) where + f w | isGlob w = do -- The user will go [ab] -> '[ab]' -> 'ab'. Fixme? + warn (getId w) $ "Quote the parameter to tr to prevent glob expansion." f word = case getLiteralString word of Just "a-z" -> info (getId word) "Use '[:lower:]' to support accents and foreign alphabets." Just "A-Z" -> info (getId word) "Use '[:upper:]' to support accents and foreign alphabets." @@ -603,6 +604,7 @@ prop_checkGrepRe2 = verify checkGrepRe "grep -Ev cow*test *.mp3" prop_checkGrepRe3 = verify checkGrepRe "grep --regex=*.mp3 file" prop_checkGrepRe4 = verifyNot checkGrepRe "grep foo *.mp3" prop_checkGrepRe5 = verifyNot checkGrepRe "grep-v --regex=moo *" +prop_checkGrepRe6 = verifyNot checkGrepRe "grep foo \\*.mp3" checkGrepRe = checkCommand "grep" f where -- --regex=*(extglob) doesn't work. Fixme? skippable (Just s) = not ("--regex=" `isPrefixOf` s) && "-" `isPrefixOf` s @@ -675,6 +677,7 @@ isSpaceful :: (String -> Bool) -> Token -> Bool isSpaceful spacefulF x = case x of T_DollarExpansion _ _ -> True + T_Glob _ _ -> True T_Extglob _ _ _ -> True T_Literal _ s -> s `containsAny` globspace T_SingleQuoted _ s -> s `containsAny` globspace @@ -683,7 +686,7 @@ isSpaceful spacefulF x = T_DoubleQuoted _ w -> isSpacefulWord spacefulF w _ -> False where - globspace = "* \t\n" + globspace = "*? \t\n" containsAny s chars = any (\c -> c `elem` s) chars isSpacefulWord :: (String -> Bool) -> [Token] -> Bool diff --git a/ShellCheck/Parser.hs b/ShellCheck/Parser.hs index 3f933b2..27f24c7 100644 --- a/ShellCheck/Parser.hs +++ b/ShellCheck/Parser.hs @@ -471,7 +471,7 @@ checkPossibleTermination pos [T_Literal _ x] = checkPossibleTermination _ _ = return () -readNormalWordPart = readSingleQuoted <|> readDoubleQuoted <|> readExtglob <|> readDollar <|> readBraced <|> readBackTicked <|> readProcSub <|> readNormalLiteral +readNormalWordPart = readSingleQuoted <|> readDoubleQuoted <|> readGlob <|> readDollar <|> readBraced <|> readBackTicked <|> readProcSub <|> readNormalLiteral readSpacePart = do id <- getNextId x <- many1 whitespace @@ -569,8 +569,29 @@ readNormalLiteral = do s <- many1 readNormalLiteralPart return $ T_Literal id (concat s) +prop_readGlob1 = isOk readGlob "*" +prop_readGlob2 = isOk readGlob "[^0-9]" +readGlob = readExtglob <|> readSimple <|> readClass <|> readGlobbyLiteral + where + readSimple = do + id <- getNextId + c <- oneOf "*?" + return $ T_Glob id [c] + -- Doesn't handle weird things like [^]a] and [$foo]. fixme? + readClass = try $ do + id <- getNextId + char '[' + s <- many1 (letter <|> digit <|> oneOf "^-_:") + char ']' + return $ T_Glob id $ "[" ++ s ++ "]" + + readGlobbyLiteral = do + id <- getNextId + c <- extglobStart <|> char '[' + return $ T_Literal id [c] + readNormalLiteralPart = do - readNormalEscaped <|> (anyChar `reluctantlyTill1` (quotable <|> extglobStart)) + readNormalEscaped <|> (anyChar `reluctantlyTill1` (quotable <|> extglobStart <|> char '[')) readNormalEscaped = do pos <- getPosition @@ -591,13 +612,13 @@ prop_readExtglob4 = isOk readExtglob "+(foo \\) bar)" prop_readExtglob5 = isOk readExtglob "+(!(foo *(bar)))" readExtglob = do id <- getNextId - c <- extglobStart - ( try $ do - char '(' - contents <- readExtglobPart `sepBy` (char '|') - char ')' - return $ T_Extglob id [c] contents - ) <|> (return $ T_Literal id [c]) + c <- try $ do + f <- extglobStart + char '(' + return f + contents <- readExtglobPart `sepBy` (char '|') + char ')' + return $ T_Extglob id [c] contents readExtglobPart = do id <- getNextId