Don't parse unicode quotes as real quotes.

This commit is contained in:
Vidar Holen 2017-02-25 15:14:52 -08:00
parent 35c74e4747
commit 2154583fd3
1 changed files with 50 additions and 25 deletions

View File

@ -58,18 +58,18 @@ linefeed = do
c <- char '\n' c <- char '\n'
readPendingHereDocs readPendingHereDocs
return c return c
singleQuote = char '\'' <|> unicodeSingleQuote singleQuote = char '\''
doubleQuote = char '"' <|> unicodeDoubleQuote doubleQuote = char '"'
variableStart = upper <|> lower <|> oneOf "_" variableStart = upper <|> lower <|> oneOf "_"
variableChars = upper <|> lower <|> digit <|> oneOf "_" variableChars = upper <|> lower <|> digit <|> oneOf "_"
functionChars = variableChars <|> oneOf ":+-.?" functionChars = variableChars <|> oneOf ":+-.?"
specialVariable = oneOf "@*#?-$!" specialVariable = oneOf "@*#?-$!"
paramSubSpecialChars = oneOf "/:+-=%" paramSubSpecialChars = oneOf "/:+-=%"
quotableChars = "|&;<>()\\ '\t\n\r\xA0" ++ doubleQuotableChars quotableChars = "|&;<>()\\ '\t\n\r\xA0" ++ doubleQuotableChars
quotable = almostSpace <|> unicodeDoubleQuote <|> oneOf quotableChars quotable = almostSpace <|> oneOf quotableChars
bracedQuotable = oneOf "}\"$`'" bracedQuotable = oneOf "}\"$`'"
doubleQuotableChars = "\"$`" ++ unicodeDoubleQuoteChars doubleQuotableChars = "\"$`"
doubleQuotable = unicodeDoubleQuote <|> oneOf doubleQuotableChars doubleQuotable = oneOf doubleQuotableChars
whitespace = oneOf " \t" <|> carriageReturn <|> almostSpace <|> linefeed whitespace = oneOf " \t" <|> carriageReturn <|> almostSpace <|> linefeed
linewhitespace = oneOf " \t" <|> almostSpace linewhitespace = oneOf " \t" <|> almostSpace
@ -78,7 +78,8 @@ suspectCharAfterQuotes = variableChars <|> char '%'
extglobStartChars = "?*@!+" extglobStartChars = "?*@!+"
extglobStart = oneOf extglobStartChars extglobStart = oneOf extglobStartChars
unicodeDoubleQuoteChars = "\x201C\x201D\x2033\x2036" unicodeDoubleQuotes = "\x201C\x201D\x2033\x2036"
unicodeSingleQuotes = "\x2018\x2019"
prop_spacing = isOk spacing " \\\n # Comment" prop_spacing = isOk spacing " \\\n # Comment"
spacing = do spacing = do
@ -107,17 +108,12 @@ allspacingOrFail = do
s <- allspacing s <- allspacing
when (null s) $ fail "Expected whitespace" when (null s) $ fail "Expected whitespace"
unicodeDoubleQuote = do readUnicodeQuote = do
pos <- getPosition pos <- getPosition
oneOf unicodeDoubleQuoteChars c <- oneOf (unicodeSingleQuotes ++ unicodeDoubleQuotes)
parseProblemAt pos WarningC 1015 "This is a unicode double quote. Delete and retype it." parseProblemAt pos WarningC 1110 "This is a unicode quote. Delete and retype it (or quote to make literal)."
return '"' id <- getNextIdAt pos
return $ T_Literal id [c]
unicodeSingleQuote = do
pos <- getPosition
char '\x2018' <|> char '\x2019'
parseProblemAt pos WarningC 1016 "This is a unicode single quote. Delete and retype it."
return '"'
carriageReturn = do carriageReturn = do
parseNote ErrorC 1017 "Literal carriage return. Run script through tr -d '\\r' ." parseNote ErrorC 1017 "Literal carriage return. Run script through tr -d '\\r' ."
@ -336,7 +332,7 @@ parseProblemAt pos = parseProblemAtWithEnd pos pos
parseProblemAtId :: Monad m => Id -> Severity -> Integer -> String -> SCParser m () parseProblemAtId :: Monad m => Id -> Severity -> Integer -> String -> SCParser m ()
parseProblemAtId id level code msg = do parseProblemAtId id level code msg = do
map <- getMap map <- getMap
let pos = Map.findWithDefault let pos = Map.findWithDefault
(error "Internal error (no position for id). Please report.") id map (error "Internal error (no position for id). Please report.") id map
parseProblemAt pos level code msg parseProblemAt pos level code msg
@ -947,6 +943,9 @@ prop_readNormalWord6 = isOk readNormalWord "foo/{}"
prop_readNormalWord7 = isOk readNormalWord "foo\\\nbar" prop_readNormalWord7 = isOk readNormalWord "foo\\\nbar"
prop_readNormalWord8 = isWarning readSubshell "(foo\\ \nbar)" prop_readNormalWord8 = isWarning readSubshell "(foo\\ \nbar)"
prop_readNormalWord9 = isOk readSubshell "(foo\\ ;\nbar)" prop_readNormalWord9 = isOk readSubshell "(foo\\ ;\nbar)"
prop_readNormalWord10 = isWarning readNormalWord "\x201Chello\x201D"
prop_readNormalWord11 = isWarning readNormalWord "\x2018hello\x2019"
prop_readNormalWord12 = isWarning readNormalWord "hello\x2018"
readNormalWord = readNormalishWord "" readNormalWord = readNormalishWord ""
readNormalishWord end = do readNormalishWord end = do
@ -986,6 +985,7 @@ readNormalWordPart end = do
readBraced, readBraced,
readUnquotedBackTicked, readUnquotedBackTicked,
readProcSub, readProcSub,
readUnicodeQuote,
readNormalLiteral end, readNormalLiteral end,
readLiteralCurlyBraces readLiteralCurlyBraces
] ]
@ -1049,15 +1049,16 @@ readProcSub = called "process substitution" $ do
prop_readSingleQuoted = isOk readSingleQuoted "'foo bar'" prop_readSingleQuoted = isOk readSingleQuoted "'foo bar'"
prop_readSingleQuoted2 = isWarning readSingleQuoted "'foo bar\\'" prop_readSingleQuoted2 = isWarning readSingleQuoted "'foo bar\\'"
prop_readsingleQuoted3 = isWarning readSingleQuoted "\x2018hello\x2019"
prop_readSingleQuoted4 = isWarning readNormalWord "'it's" prop_readSingleQuoted4 = isWarning readNormalWord "'it's"
prop_readSingleQuoted5 = isWarning readSimpleCommand "foo='bar\ncow 'arg" prop_readSingleQuoted5 = isWarning readSimpleCommand "foo='bar\ncow 'arg"
prop_readSingleQuoted6 = isOk readSimpleCommand "foo='bar cow 'arg" prop_readSingleQuoted6 = isOk readSimpleCommand "foo='bar cow 'arg"
prop_readSingleQuoted7 = isOk readSingleQuoted "'foo\x201C\&bar'"
prop_readSingleQuoted8 = isWarning readSingleQuoted "'foo\x2018\&bar'"
readSingleQuoted = called "single quoted string" $ do readSingleQuoted = called "single quoted string" $ do
id <- getNextId id <- getNextId
startPos <- getPosition startPos <- getPosition
singleQuote singleQuote
s <- readSingleQuotedPart `reluctantlyTill` singleQuote s <- many readSingleQuotedPart
let string = concat s let string = concat s
endPos <- getPosition endPos <- getPosition
singleQuote <|> fail "Expected end of single quoted string" singleQuote <|> fail "Expected end of single quoted string"
@ -1082,7 +1083,15 @@ readSingleQuotedLiteral = do
readSingleQuotedPart = readSingleQuotedPart =
readSingleEscaped readSingleEscaped
<|> many1 (noneOf "'\\\x2018\x2019") <|> many1 (noneOf $ "'\\" ++ unicodeSingleQuotes)
<|> readUnicodeQuote
where
readUnicodeQuote = do
pos <- getPosition
x <- oneOf unicodeSingleQuotes
parseProblemAt pos WarningC 1112
"This is a unicode quote. Delete and retype it (or ignore/doublequote for literal)."
return [x]
prop_readBackTicked = isOk (readBackTicked False) "`ls *.mp3`" prop_readBackTicked = isOk (readBackTicked False) "`ls *.mp3`"
@ -1158,11 +1167,12 @@ parseForgettingContext alsoOnSuccess parser = do
prop_readDoubleQuoted = isOk readDoubleQuoted "\"Hello $FOO\"" prop_readDoubleQuoted = isOk readDoubleQuoted "\"Hello $FOO\""
prop_readDoubleQuoted2 = isOk readDoubleQuoted "\"$'\"" prop_readDoubleQuoted2 = isOk readDoubleQuoted "\"$'\""
prop_readDoubleQuoted3 = isWarning readDoubleQuoted "\x201Chello\x201D" prop_readDoubleQuoted3 = isOk readDoubleQuoted "\"\x2018hello\x2019\""
prop_readDoubleQuoted4 = isWarning readSimpleCommand "\"foo\nbar\"foo" prop_readDoubleQuoted4 = isWarning readSimpleCommand "\"foo\nbar\"foo"
prop_readDoubleQuoted5 = isOk readSimpleCommand "lol \"foo\nbar\" etc" prop_readDoubleQuoted5 = isOk readSimpleCommand "lol \"foo\nbar\" etc"
prop_readDoubleQuoted6 = isOk readSimpleCommand "echo \"${ ls; }\"" prop_readDoubleQuoted6 = isOk readSimpleCommand "echo \"${ ls; }\""
prop_readDoubleQuoted7 = isOk readSimpleCommand "echo \"${ ls;}bar\"" prop_readDoubleQuoted7 = isOk readSimpleCommand "echo \"${ ls;}bar\""
prop_readDoubleQuoted8 = isWarning readDoubleQuoted "\"\x201Chello\x201D\""
readDoubleQuoted = called "double quoted string" $ do readDoubleQuoted = called "double quoted string" $ do
id <- getNextId id <- getNextId
startPos <- getPosition startPos <- getPosition
@ -1187,7 +1197,15 @@ suggestForgotClosingQuote startPos endPos name = do
parseProblemAt endPos InfoC 1079 parseProblemAt endPos InfoC 1079
"This is actually an end quote, but due to next char it looks suspect." "This is actually an end quote, but due to next char it looks suspect."
doubleQuotedPart = readDoubleLiteral <|> readDoubleQuotedDollar <|> readQuotedBackTicked doubleQuotedPart = readDoubleLiteral <|> readDoubleQuotedDollar <|> readQuotedBackTicked <|> readUnicodeQuote
where
readUnicodeQuote = do
pos <- getPosition
id <- getNextId
c <- oneOf unicodeDoubleQuotes
parseProblemAt pos WarningC 1111
"This is a unicode quote. Delete and retype it (or ignore/singlequote for literal)."
return $ T_Literal id [c]
readDoubleQuotedLiteral = do readDoubleQuotedLiteral = do
doubleQuote doubleQuote
@ -1201,7 +1219,7 @@ readDoubleLiteral = do
return $ T_Literal id (concat s) return $ T_Literal id (concat s)
readDoubleLiteralPart = do readDoubleLiteralPart = do
x <- many1 (readDoubleEscaped <|> many1 (noneOf ('\\':doubleQuotableChars))) x <- many1 (readDoubleEscaped <|> many1 (noneOf ('\\':doubleQuotableChars ++ unicodeDoubleQuotes)))
return $ concat x return $ concat x
readNormalLiteral end = do readNormalLiteral end = do
@ -1243,8 +1261,15 @@ readGlob = readExtglob <|> readSimple <|> readClass <|> readGlobbyLiteral
c <- extglobStart <|> char '[' c <- extglobStart <|> char '['
return $ T_Literal id [c] return $ T_Literal id [c]
readNormalLiteralPart end = readNormalLiteralPart customEnd =
readNormalEscaped <|> many1 (noneOf (end ++ quotableChars ++ extglobStartChars ++ "[{}")) readNormalEscaped <|>
many1 (noneOf (customEnd ++ standardEnd))
where
standardEnd = "[{}"
++ quotableChars
++ extglobStartChars
++ unicodeDoubleQuotes
++ unicodeSingleQuotes
readNormalEscaped = called "escaped char" $ do readNormalEscaped = called "escaped char" $ do
pos <- getPosition pos <- getPosition