Parser help with globs, fixed message for grep foo\*

This commit is contained in:
Vidar Holen 2012-11-29 19:20:44 -08:00
parent 1bc6086aec
commit b718e5f108
3 changed files with 40 additions and 15 deletions

View File

@ -23,7 +23,7 @@ import qualified Text.Regex as Re
data Id = Id Int deriving (Show, Eq, Ord)
data Token = T_AND_IF Id | T_OR_IF Id | T_DSEMI Id | T_Semi Id | T_DLESS Id | T_DGREAT Id | T_LESSAND Id | T_GREATAND Id | T_LESSGREAT Id | T_DLESSDASH Id | T_CLOBBER Id | T_If Id | T_Then Id | T_Else Id | T_Elif Id | T_Fi Id | T_Do Id | T_Done Id | T_Case Id | T_Esac Id | T_While Id | T_Until Id | T_For Id | T_Lbrace Id | T_Rbrace Id | T_Lparen Id | T_Rparen Id | T_Bang Id | T_In Id | T_NEWLINE Id | T_EOF Id | T_Less Id | T_Greater Id | T_SingleQuoted Id String | T_Literal Id String | T_NormalWord Id [Token] | T_DoubleQuoted Id [Token] | T_DollarExpansion Id [Token] | T_DollarBraced Id Token | T_DollarArithmetic Id Token | T_BraceExpansion Id String | T_IoFile Id Token Token | T_HereDoc Id Bool Bool String | T_HereString Id Token | T_FdRedirect Id String Token | T_Assignment Id String Token | T_Array Id [Token] | T_Redirecting Id [Token] Token | T_SimpleCommand Id [Token] [Token] | T_Pipeline Id [Token] | T_Banged Id Token | T_AndIf Id (Token) (Token) | T_OrIf Id (Token) (Token) | T_Backgrounded Id Token | T_IfExpression Id [([Token],[Token])] [Token] | T_Subshell Id [Token] | T_BraceGroup Id [Token] | T_WhileExpression Id [Token] [Token] | T_UntilExpression Id [Token] [Token] | T_ForIn Id String [Token] [Token] | T_CaseExpression Id Token [([Token],[Token])] | T_Function Id String Token | T_Arithmetic Id Token | T_Script Id [Token] | T_Condition Id ConditionType Token | T_Extglob Id String [Token] | TC_And Id ConditionType String Token Token | TC_Or Id ConditionType String Token Token | TC_Group Id ConditionType Token | TC_Binary Id ConditionType String Token Token | TC_Unary Id ConditionType String Token | TC_Noary Id ConditionType Token | TA_Binary Id String Token Token | TA_Unary Id String Token | TA_Sequence Id [Token] | TA_Variable Id String | TA_Trinary Id Token Token Token | TA_Expansion Id Token | TA_Literal Id String | T_Backticked Id String | T_ProcSub Id String [Token]
data Token = T_AND_IF Id | T_OR_IF Id | T_DSEMI Id | T_Semi Id | T_DLESS Id | T_DGREAT Id | T_LESSAND Id | T_GREATAND Id | T_LESSGREAT Id | T_DLESSDASH Id | T_CLOBBER Id | T_If Id | T_Then Id | T_Else Id | T_Elif Id | T_Fi Id | T_Do Id | T_Done Id | T_Case Id | T_Esac Id | T_While Id | T_Until Id | T_For Id | T_Lbrace Id | T_Rbrace Id | T_Lparen Id | T_Rparen Id | T_Bang Id | T_In Id | T_NEWLINE Id | T_EOF Id | T_Less Id | T_Greater Id | T_SingleQuoted Id String | T_Literal Id String | T_NormalWord Id [Token] | T_DoubleQuoted Id [Token] | T_DollarExpansion Id [Token] | T_DollarBraced Id Token | T_DollarArithmetic Id Token | T_BraceExpansion Id String | T_IoFile Id Token Token | T_HereDoc Id Bool Bool String | T_HereString Id Token | T_FdRedirect Id String Token | T_Assignment Id String Token | T_Array Id [Token] | T_Redirecting Id [Token] Token | T_SimpleCommand Id [Token] [Token] | T_Pipeline Id [Token] | T_Banged Id Token | T_AndIf Id (Token) (Token) | T_OrIf Id (Token) (Token) | T_Backgrounded Id Token | T_IfExpression Id [([Token],[Token])] [Token] | T_Subshell Id [Token] | T_BraceGroup Id [Token] | T_WhileExpression Id [Token] [Token] | T_UntilExpression Id [Token] [Token] | T_ForIn Id String [Token] [Token] | T_CaseExpression Id Token [([Token],[Token])] | T_Function Id String Token | T_Arithmetic Id Token | T_Script Id [Token] | T_Condition Id ConditionType Token | T_Extglob Id String [Token] | TC_And Id ConditionType String Token Token | TC_Or Id ConditionType String Token Token | TC_Group Id ConditionType Token | TC_Binary Id ConditionType String Token Token | TC_Unary Id ConditionType String Token | TC_Noary Id ConditionType Token | TA_Binary Id String Token Token | TA_Unary Id String Token | TA_Sequence Id [Token] | TA_Variable Id String | TA_Trinary Id Token Token Token | TA_Expansion Id Token | TA_Literal Id String | T_Backticked Id String | T_ProcSub Id String [Token] | T_Glob Id String
deriving (Show)
@ -210,6 +210,7 @@ getId t = case t of
TA_Expansion id _ -> id
TA_Literal id _ -> id
T_ProcSub id _ _ -> id
T_Glob id _ -> id
blank :: Monad m => Token -> m ()
blank = const $ return ()

View File

@ -89,15 +89,13 @@ willSplit x =
T_DollarExpansion _ _ -> True
T_Backticked _ _ -> True
T_BraceExpansion _ s -> True
T_Glob _ _ -> True
T_Extglob _ _ _ -> True
T_NormalWord _ l -> any willSplit l
T_Literal _ s -> isGlobLiteral s
_ -> False
isGlobLiteral str = any (`elem` str) "*?"
isGlob (T_Extglob _ _ _) = True
isGlob (T_Literal _ s) = isGlobLiteral s
isGlob (T_Glob _ _) = True
isGlob (T_NormalWord _ l) = any isGlob l
isGlob _ = False
@ -571,10 +569,13 @@ checkUuoe = checkCommand "echo" f where
prop_checkTrAZ1 = verify checkTrAZ "tr [a-f] [A-F]"
prop_checkTrAZ2 = verify checkTrAZ "tr 'a-z' 'A-Z'"
prop_checkTrAZ2a= verify checkTrAZ "tr '[a-z]' '[A-Z]'"
prop_checkTrAZ3 = verifyNot checkTrAZ "tr -d '[:lower:]'"
prop_checkTrAZ4 = verifyNot checkTrAZ "ls [a-z]"
checkTrAZ = checkCommand "tr" (mapM_ f)
where
f w | isGlob w = do -- The user will go [ab] -> '[ab]' -> 'ab'. Fixme?
warn (getId w) $ "Quote the parameter to tr to prevent glob expansion."
f word = case getLiteralString word of
Just "a-z" -> info (getId word) "Use '[:lower:]' to support accents and foreign alphabets."
Just "A-Z" -> info (getId word) "Use '[:upper:]' to support accents and foreign alphabets."
@ -603,6 +604,7 @@ prop_checkGrepRe2 = verify checkGrepRe "grep -Ev cow*test *.mp3"
prop_checkGrepRe3 = verify checkGrepRe "grep --regex=*.mp3 file"
prop_checkGrepRe4 = verifyNot checkGrepRe "grep foo *.mp3"
prop_checkGrepRe5 = verifyNot checkGrepRe "grep-v --regex=moo *"
prop_checkGrepRe6 = verifyNot checkGrepRe "grep foo \\*.mp3"
checkGrepRe = checkCommand "grep" f where
-- --regex=*(extglob) doesn't work. Fixme?
skippable (Just s) = not ("--regex=" `isPrefixOf` s) && "-" `isPrefixOf` s
@ -675,6 +677,7 @@ isSpaceful :: (String -> Bool) -> Token -> Bool
isSpaceful spacefulF x =
case x of
T_DollarExpansion _ _ -> True
T_Glob _ _ -> True
T_Extglob _ _ _ -> True
T_Literal _ s -> s `containsAny` globspace
T_SingleQuoted _ s -> s `containsAny` globspace
@ -683,7 +686,7 @@ isSpaceful spacefulF x =
T_DoubleQuoted _ w -> isSpacefulWord spacefulF w
_ -> False
where
globspace = "* \t\n"
globspace = "*? \t\n"
containsAny s chars = any (\c -> c `elem` s) chars
isSpacefulWord :: (String -> Bool) -> [Token] -> Bool

View File

@ -471,7 +471,7 @@ checkPossibleTermination pos [T_Literal _ x] =
checkPossibleTermination _ _ = return ()
readNormalWordPart = readSingleQuoted <|> readDoubleQuoted <|> readExtglob <|> readDollar <|> readBraced <|> readBackTicked <|> readProcSub <|> readNormalLiteral
readNormalWordPart = readSingleQuoted <|> readDoubleQuoted <|> readGlob <|> readDollar <|> readBraced <|> readBackTicked <|> readProcSub <|> readNormalLiteral
readSpacePart = do
id <- getNextId
x <- many1 whitespace
@ -569,8 +569,29 @@ readNormalLiteral = do
s <- many1 readNormalLiteralPart
return $ T_Literal id (concat s)
prop_readGlob1 = isOk readGlob "*"
prop_readGlob2 = isOk readGlob "[^0-9]"
readGlob = readExtglob <|> readSimple <|> readClass <|> readGlobbyLiteral
where
readSimple = do
id <- getNextId
c <- oneOf "*?"
return $ T_Glob id [c]
-- Doesn't handle weird things like [^]a] and [$foo]. fixme?
readClass = try $ do
id <- getNextId
char '['
s <- many1 (letter <|> digit <|> oneOf "^-_:")
char ']'
return $ T_Glob id $ "[" ++ s ++ "]"
readGlobbyLiteral = do
id <- getNextId
c <- extglobStart <|> char '['
return $ T_Literal id [c]
readNormalLiteralPart = do
readNormalEscaped <|> (anyChar `reluctantlyTill1` (quotable <|> extglobStart))
readNormalEscaped <|> (anyChar `reluctantlyTill1` (quotable <|> extglobStart <|> char '['))
readNormalEscaped = do
pos <- getPosition
@ -591,13 +612,13 @@ prop_readExtglob4 = isOk readExtglob "+(foo \\) bar)"
prop_readExtglob5 = isOk readExtglob "+(!(foo *(bar)))"
readExtglob = do
id <- getNextId
c <- extglobStart
( try $ do
char '('
contents <- readExtglobPart `sepBy` (char '|')
char ')'
return $ T_Extglob id [c] contents
) <|> (return $ T_Literal id [c])
c <- try $ do
f <- extglobStart
char '('
return f
contents <- readExtglobPart `sepBy` (char '|')
char ')'
return $ T_Extglob id [c] contents
readExtglobPart = do
id <- getNextId