Parser help with globs, fixed message for grep foo\*
This commit is contained in:
parent
1bc6086aec
commit
b718e5f108
|
@ -23,7 +23,7 @@ import qualified Text.Regex as Re
|
|||
|
||||
data Id = Id Int deriving (Show, Eq, Ord)
|
||||
|
||||
data Token = T_AND_IF Id | T_OR_IF Id | T_DSEMI Id | T_Semi Id | T_DLESS Id | T_DGREAT Id | T_LESSAND Id | T_GREATAND Id | T_LESSGREAT Id | T_DLESSDASH Id | T_CLOBBER Id | T_If Id | T_Then Id | T_Else Id | T_Elif Id | T_Fi Id | T_Do Id | T_Done Id | T_Case Id | T_Esac Id | T_While Id | T_Until Id | T_For Id | T_Lbrace Id | T_Rbrace Id | T_Lparen Id | T_Rparen Id | T_Bang Id | T_In Id | T_NEWLINE Id | T_EOF Id | T_Less Id | T_Greater Id | T_SingleQuoted Id String | T_Literal Id String | T_NormalWord Id [Token] | T_DoubleQuoted Id [Token] | T_DollarExpansion Id [Token] | T_DollarBraced Id Token | T_DollarArithmetic Id Token | T_BraceExpansion Id String | T_IoFile Id Token Token | T_HereDoc Id Bool Bool String | T_HereString Id Token | T_FdRedirect Id String Token | T_Assignment Id String Token | T_Array Id [Token] | T_Redirecting Id [Token] Token | T_SimpleCommand Id [Token] [Token] | T_Pipeline Id [Token] | T_Banged Id Token | T_AndIf Id (Token) (Token) | T_OrIf Id (Token) (Token) | T_Backgrounded Id Token | T_IfExpression Id [([Token],[Token])] [Token] | T_Subshell Id [Token] | T_BraceGroup Id [Token] | T_WhileExpression Id [Token] [Token] | T_UntilExpression Id [Token] [Token] | T_ForIn Id String [Token] [Token] | T_CaseExpression Id Token [([Token],[Token])] | T_Function Id String Token | T_Arithmetic Id Token | T_Script Id [Token] | T_Condition Id ConditionType Token | T_Extglob Id String [Token] | TC_And Id ConditionType String Token Token | TC_Or Id ConditionType String Token Token | TC_Group Id ConditionType Token | TC_Binary Id ConditionType String Token Token | TC_Unary Id ConditionType String Token | TC_Noary Id ConditionType Token | TA_Binary Id String Token Token | TA_Unary Id String Token | TA_Sequence Id [Token] | TA_Variable Id String | TA_Trinary Id Token Token Token | TA_Expansion Id Token | TA_Literal Id String | T_Backticked Id String | T_ProcSub Id String [Token]
|
||||
data Token = T_AND_IF Id | T_OR_IF Id | T_DSEMI Id | T_Semi Id | T_DLESS Id | T_DGREAT Id | T_LESSAND Id | T_GREATAND Id | T_LESSGREAT Id | T_DLESSDASH Id | T_CLOBBER Id | T_If Id | T_Then Id | T_Else Id | T_Elif Id | T_Fi Id | T_Do Id | T_Done Id | T_Case Id | T_Esac Id | T_While Id | T_Until Id | T_For Id | T_Lbrace Id | T_Rbrace Id | T_Lparen Id | T_Rparen Id | T_Bang Id | T_In Id | T_NEWLINE Id | T_EOF Id | T_Less Id | T_Greater Id | T_SingleQuoted Id String | T_Literal Id String | T_NormalWord Id [Token] | T_DoubleQuoted Id [Token] | T_DollarExpansion Id [Token] | T_DollarBraced Id Token | T_DollarArithmetic Id Token | T_BraceExpansion Id String | T_IoFile Id Token Token | T_HereDoc Id Bool Bool String | T_HereString Id Token | T_FdRedirect Id String Token | T_Assignment Id String Token | T_Array Id [Token] | T_Redirecting Id [Token] Token | T_SimpleCommand Id [Token] [Token] | T_Pipeline Id [Token] | T_Banged Id Token | T_AndIf Id (Token) (Token) | T_OrIf Id (Token) (Token) | T_Backgrounded Id Token | T_IfExpression Id [([Token],[Token])] [Token] | T_Subshell Id [Token] | T_BraceGroup Id [Token] | T_WhileExpression Id [Token] [Token] | T_UntilExpression Id [Token] [Token] | T_ForIn Id String [Token] [Token] | T_CaseExpression Id Token [([Token],[Token])] | T_Function Id String Token | T_Arithmetic Id Token | T_Script Id [Token] | T_Condition Id ConditionType Token | T_Extglob Id String [Token] | TC_And Id ConditionType String Token Token | TC_Or Id ConditionType String Token Token | TC_Group Id ConditionType Token | TC_Binary Id ConditionType String Token Token | TC_Unary Id ConditionType String Token | TC_Noary Id ConditionType Token | TA_Binary Id String Token Token | TA_Unary Id String Token | TA_Sequence Id [Token] | TA_Variable Id String | TA_Trinary Id Token Token Token | TA_Expansion Id Token | TA_Literal Id String | T_Backticked Id String | T_ProcSub Id String [Token] | T_Glob Id String
|
||||
|
||||
deriving (Show)
|
||||
|
||||
|
@ -210,6 +210,7 @@ getId t = case t of
|
|||
TA_Expansion id _ -> id
|
||||
TA_Literal id _ -> id
|
||||
T_ProcSub id _ _ -> id
|
||||
T_Glob id _ -> id
|
||||
|
||||
blank :: Monad m => Token -> m ()
|
||||
blank = const $ return ()
|
||||
|
|
|
@ -89,15 +89,13 @@ willSplit x =
|
|||
T_DollarExpansion _ _ -> True
|
||||
T_Backticked _ _ -> True
|
||||
T_BraceExpansion _ s -> True
|
||||
T_Glob _ _ -> True
|
||||
T_Extglob _ _ _ -> True
|
||||
T_NormalWord _ l -> any willSplit l
|
||||
T_Literal _ s -> isGlobLiteral s
|
||||
_ -> False
|
||||
|
||||
isGlobLiteral str = any (`elem` str) "*?"
|
||||
|
||||
isGlob (T_Extglob _ _ _) = True
|
||||
isGlob (T_Literal _ s) = isGlobLiteral s
|
||||
isGlob (T_Glob _ _) = True
|
||||
isGlob (T_NormalWord _ l) = any isGlob l
|
||||
isGlob _ = False
|
||||
|
||||
|
@ -571,10 +569,13 @@ checkUuoe = checkCommand "echo" f where
|
|||
|
||||
prop_checkTrAZ1 = verify checkTrAZ "tr [a-f] [A-F]"
|
||||
prop_checkTrAZ2 = verify checkTrAZ "tr 'a-z' 'A-Z'"
|
||||
prop_checkTrAZ2a= verify checkTrAZ "tr '[a-z]' '[A-Z]'"
|
||||
prop_checkTrAZ3 = verifyNot checkTrAZ "tr -d '[:lower:]'"
|
||||
prop_checkTrAZ4 = verifyNot checkTrAZ "ls [a-z]"
|
||||
checkTrAZ = checkCommand "tr" (mapM_ f)
|
||||
where
|
||||
f w | isGlob w = do -- The user will go [ab] -> '[ab]' -> 'ab'. Fixme?
|
||||
warn (getId w) $ "Quote the parameter to tr to prevent glob expansion."
|
||||
f word = case getLiteralString word of
|
||||
Just "a-z" -> info (getId word) "Use '[:lower:]' to support accents and foreign alphabets."
|
||||
Just "A-Z" -> info (getId word) "Use '[:upper:]' to support accents and foreign alphabets."
|
||||
|
@ -603,6 +604,7 @@ prop_checkGrepRe2 = verify checkGrepRe "grep -Ev cow*test *.mp3"
|
|||
prop_checkGrepRe3 = verify checkGrepRe "grep --regex=*.mp3 file"
|
||||
prop_checkGrepRe4 = verifyNot checkGrepRe "grep foo *.mp3"
|
||||
prop_checkGrepRe5 = verifyNot checkGrepRe "grep-v --regex=moo *"
|
||||
prop_checkGrepRe6 = verifyNot checkGrepRe "grep foo \\*.mp3"
|
||||
checkGrepRe = checkCommand "grep" f where
|
||||
-- --regex=*(extglob) doesn't work. Fixme?
|
||||
skippable (Just s) = not ("--regex=" `isPrefixOf` s) && "-" `isPrefixOf` s
|
||||
|
@ -675,6 +677,7 @@ isSpaceful :: (String -> Bool) -> Token -> Bool
|
|||
isSpaceful spacefulF x =
|
||||
case x of
|
||||
T_DollarExpansion _ _ -> True
|
||||
T_Glob _ _ -> True
|
||||
T_Extglob _ _ _ -> True
|
||||
T_Literal _ s -> s `containsAny` globspace
|
||||
T_SingleQuoted _ s -> s `containsAny` globspace
|
||||
|
@ -683,7 +686,7 @@ isSpaceful spacefulF x =
|
|||
T_DoubleQuoted _ w -> isSpacefulWord spacefulF w
|
||||
_ -> False
|
||||
where
|
||||
globspace = "* \t\n"
|
||||
globspace = "*? \t\n"
|
||||
containsAny s chars = any (\c -> c `elem` s) chars
|
||||
|
||||
isSpacefulWord :: (String -> Bool) -> [Token] -> Bool
|
||||
|
|
|
@ -471,7 +471,7 @@ checkPossibleTermination pos [T_Literal _ x] =
|
|||
checkPossibleTermination _ _ = return ()
|
||||
|
||||
|
||||
readNormalWordPart = readSingleQuoted <|> readDoubleQuoted <|> readExtglob <|> readDollar <|> readBraced <|> readBackTicked <|> readProcSub <|> readNormalLiteral
|
||||
readNormalWordPart = readSingleQuoted <|> readDoubleQuoted <|> readGlob <|> readDollar <|> readBraced <|> readBackTicked <|> readProcSub <|> readNormalLiteral
|
||||
readSpacePart = do
|
||||
id <- getNextId
|
||||
x <- many1 whitespace
|
||||
|
@ -569,8 +569,29 @@ readNormalLiteral = do
|
|||
s <- many1 readNormalLiteralPart
|
||||
return $ T_Literal id (concat s)
|
||||
|
||||
prop_readGlob1 = isOk readGlob "*"
|
||||
prop_readGlob2 = isOk readGlob "[^0-9]"
|
||||
readGlob = readExtglob <|> readSimple <|> readClass <|> readGlobbyLiteral
|
||||
where
|
||||
readSimple = do
|
||||
id <- getNextId
|
||||
c <- oneOf "*?"
|
||||
return $ T_Glob id [c]
|
||||
-- Doesn't handle weird things like [^]a] and [$foo]. fixme?
|
||||
readClass = try $ do
|
||||
id <- getNextId
|
||||
char '['
|
||||
s <- many1 (letter <|> digit <|> oneOf "^-_:")
|
||||
char ']'
|
||||
return $ T_Glob id $ "[" ++ s ++ "]"
|
||||
|
||||
readGlobbyLiteral = do
|
||||
id <- getNextId
|
||||
c <- extglobStart <|> char '['
|
||||
return $ T_Literal id [c]
|
||||
|
||||
readNormalLiteralPart = do
|
||||
readNormalEscaped <|> (anyChar `reluctantlyTill1` (quotable <|> extglobStart))
|
||||
readNormalEscaped <|> (anyChar `reluctantlyTill1` (quotable <|> extglobStart <|> char '['))
|
||||
|
||||
readNormalEscaped = do
|
||||
pos <- getPosition
|
||||
|
@ -591,13 +612,13 @@ prop_readExtglob4 = isOk readExtglob "+(foo \\) bar)"
|
|||
prop_readExtglob5 = isOk readExtglob "+(!(foo *(bar)))"
|
||||
readExtglob = do
|
||||
id <- getNextId
|
||||
c <- extglobStart
|
||||
( try $ do
|
||||
char '('
|
||||
contents <- readExtglobPart `sepBy` (char '|')
|
||||
char ')'
|
||||
return $ T_Extglob id [c] contents
|
||||
) <|> (return $ T_Literal id [c])
|
||||
c <- try $ do
|
||||
f <- extglobStart
|
||||
char '('
|
||||
return f
|
||||
contents <- readExtglobPart `sepBy` (char '|')
|
||||
char ')'
|
||||
return $ T_Extglob id [c] contents
|
||||
|
||||
readExtglobPart = do
|
||||
id <- getNextId
|
||||
|
|
Loading…
Reference in New Issue