Improved regex parsing. To think [[ f =~ f( ]] )* ]] is valid..

This commit is contained in:
Vidar Holen 2013-05-28 21:32:15 -07:00
parent 6b89f33d0c
commit 1dbbc51f86
2 changed files with 33 additions and 5 deletions

View File

@ -155,6 +155,7 @@ isGlob _ = False
isConfusedGlobRegex ('*':_) = True isConfusedGlobRegex ('*':_) = True
isConfusedGlobRegex [x,'*'] | x /= '\\' = True
isConfusedGlobRegex _ = False isConfusedGlobRegex _ = False
isPotentiallyConfusedGlobRegex = isPotentiallyConfusedGlobRegex =
@ -588,10 +589,10 @@ prop_checkGlobbedRegex2a = verify checkGlobbedRegex "[[ $foo =~ \\#* ]]"
prop_checkGlobbedRegex3 = verifyNot checkGlobbedRegex "[[ $foo =~ $foo ]]" prop_checkGlobbedRegex3 = verifyNot checkGlobbedRegex "[[ $foo =~ $foo ]]"
prop_checkGlobbedRegex4 = verifyNot checkGlobbedRegex "[[ $foo =~ ^c.* ]]" prop_checkGlobbedRegex4 = verifyNot checkGlobbedRegex "[[ $foo =~ ^c.* ]]"
checkGlobbedRegex (TC_Binary _ DoubleBracket "=~" _ rhs) = checkGlobbedRegex (TC_Binary _ DoubleBracket "=~" _ rhs) =
case rhs of let s = concat $ deadSimple rhs in
T_NormalWord id ((T_Glob _ "*"):_) -> warn id $ "=~ is for regex. Use == for globs." if isConfusedGlobRegex s
T_NormalWord id ([(T_Literal _ [c]), (T_Glob _ "*")]) -> warn id $ "=~ is for regex. Either ^anchor$ this, or treat it as a glob with == ." then warn (getId rhs) $ "=~ is for regex. Use == for globs."
_ -> return () else return ()
checkGlobbedRegex _ = return () checkGlobbedRegex _ = return ()

View File

@ -291,8 +291,11 @@ readConditionContents single = do
) )
(do (do
pos <- getPosition pos <- getPosition
isRegex <- regexOperatorAhead
op <- readCondBinaryOp op <- readCondBinaryOp
y <- readCondWord <|> ( (parseProblemAt pos ErrorC $ "Expected another argument for this operator.") >> mzero) y <- if isRegex
then readRegex
else readCondWord <|> ( (parseProblemAt pos ErrorC $ "Expected another argument for this operator.") >> mzero)
return (x `op` y) return (x `op` y)
) <|> (return $ TC_Noary id typ x) ) <|> (return $ TC_Noary id typ x)
@ -316,6 +319,28 @@ readConditionContents single = do
isEscaped _ = False isEscaped _ = False
xor x y = x && not y || not x && y xor x y = x && not y || not x && y
-- Currently a bit of a hack since parsing rules are obscure
regexOperatorAhead = (lookAhead $ do
try (string "=~") <|> try (string "~=")
return True)
<|> return False
readRegex = called "regex" $ do
id <- getNextId
parts <- many1 (readGroup <|> readSingleQuoted <|> readDoubleQuoted <|> readDollar <|> readNormalLiteral "( " <|> readGlobLiteral)
disregard spacing
return $ T_NormalWord id parts
where
readGlobLiteral = do
id <- getNextId
s <- many1 extglobStart
return $ T_Literal id s
readGroup = do -- Fixme: account for vars and quotes in groups
id <- getNextId
char '('
s <- readGenericLiteral (char ')')
char ')'
return $ T_Literal id $ "(" ++ s ++ ")"
readCondTerm = readCondNot <|> readCondExpr readCondTerm = readCondNot <|> readCondExpr
readCondNot = do readCondNot = do
id <- getNextId id <- getNextId
@ -505,6 +530,8 @@ readArithmeticContents =
prop_readCondition = isOk readCondition "[ \\( a = b \\) -a \\( c = d \\) ]" prop_readCondition = isOk readCondition "[ \\( a = b \\) -a \\( c = d \\) ]"
prop_readCondition2 = isOk readCondition "[[ (a = b) || (c = d) ]]" prop_readCondition2 = isOk readCondition "[[ (a = b) || (c = d) ]]"
prop_readCondition3 = isOk readCondition "[[ $c = [[:alpha:].~-] ]]" prop_readCondition3 = isOk readCondition "[[ $c = [[:alpha:].~-] ]]"
prop_readCondition4 = isOk readCondition "[[ $c =~ *foo* ]]"
prop_readCondition5 = isOk readCondition "[[ $c =~ f( ]] )* ]]"
readCondition = called "test expression" $ do readCondition = called "test expression" $ do
opos <- getPosition opos <- getPosition
id <- getNextId id <- getNextId