Don't parse unicode quotes as real quotes.

2017-02-25 15:14:52 -08:00 · 2017-02-25 15:14:52 -08:00 · 2154583fd3
parent 35c74e4747
commit 2154583fd3
1 changed files with 50 additions and 25 deletions
--- a/ShellCheck/Parser.hs
+++ b/ShellCheck/Parser.hs
@ -58,18 +58,18 @@ linefeed = do
    c <- char '\n'
    readPendingHereDocs
    return c
-singleQuote = char '\'' <|> unicodeSingleQuote
+singleQuote = char '\''
-doubleQuote = char '"' <|> unicodeDoubleQuote
+doubleQuote = char '"'
 variableStart = upper <|> lower <|> oneOf "_"
 variableChars = upper <|> lower <|> digit <|> oneOf "_"
 functionChars = variableChars <|> oneOf ":+-.?"
 specialVariable = oneOf "@*#?-$!"
 paramSubSpecialChars = oneOf "/:+-=%"
 quotableChars = "|&;<>()\\ '\t\n\r\xA0" ++ doubleQuotableChars
-quotable = almostSpace <|> unicodeDoubleQuote <|> oneOf quotableChars
+quotable = almostSpace <|> oneOf quotableChars
 bracedQuotable = oneOf "}\"$`'"
-doubleQuotableChars = "\"$`" ++ unicodeDoubleQuoteChars
+doubleQuotableChars = "\"$`"
-doubleQuotable = unicodeDoubleQuote <|> oneOf doubleQuotableChars
+doubleQuotable = oneOf doubleQuotableChars
 whitespace = oneOf " \t" <|> carriageReturn <|> almostSpace <|> linefeed
 linewhitespace = oneOf " \t" <|> almostSpace
@ -78,7 +78,8 @@ suspectCharAfterQuotes = variableChars <|> char '%'
 extglobStartChars = "?*@!+"
 extglobStart = oneOf extglobStartChars
-unicodeDoubleQuoteChars = "\x201C\x201D\x2033\x2036"
+unicodeDoubleQuotes = "\x201C\x201D\x2033\x2036"
 unicodeSingleQuotes = "\x2018\x2019"
 prop_spacing = isOk spacing "  \\\n # Comment"
 spacing = do
@ -107,17 +108,12 @@ allspacingOrFail = do
    s <- allspacing
    when (null s) $ fail "Expected whitespace"
-unicodeDoubleQuote = do
+readUnicodeQuote = do
    pos <- getPosition
-    oneOf unicodeDoubleQuoteChars
+    c <- oneOf (unicodeSingleQuotes ++ unicodeDoubleQuotes)
-    parseProblemAt pos WarningC 1015 "This is a unicode double quote. Delete and retype it."
+    parseProblemAt pos WarningC 1110 "This is a unicode quote. Delete and retype it (or quote to make literal)."
-    return '"'
+    id <- getNextIdAt pos
-
+    return $ T_Literal id [c]
 unicodeSingleQuote = do
    pos <- getPosition
    char '\x2018' <|> char '\x2019'
    parseProblemAt pos WarningC 1016 "This is a unicode single quote. Delete and retype it."
    return '"'
 carriageReturn = do
    parseNote ErrorC 1017 "Literal carriage return. Run script through tr -d '\\r' ."
@ -336,7 +332,7 @@ parseProblemAt pos = parseProblemAtWithEnd pos pos
 parseProblemAtId :: Monad m => Id -> Severity -> Integer -> String -> SCParser m ()
 parseProblemAtId id level code msg = do
    map <- getMap
-    let pos = Map.findWithDefault 
+    let pos = Map.findWithDefault
                (error "Internal error (no position for id). Please report.") id map
    parseProblemAt pos level code msg
@ -947,6 +943,9 @@ prop_readNormalWord6 = isOk readNormalWord "foo/{}"
 prop_readNormalWord7 = isOk readNormalWord "foo\\\nbar"
 prop_readNormalWord8 = isWarning readSubshell "(foo\\ \nbar)"
 prop_readNormalWord9 = isOk readSubshell "(foo\\ ;\nbar)"
 prop_readNormalWord10 = isWarning readNormalWord "\x201Chello\x201D"
 prop_readNormalWord11 = isWarning readNormalWord "\x2018hello\x2019"
 prop_readNormalWord12 = isWarning readNormalWord "hello\x2018"
 readNormalWord = readNormalishWord ""
 readNormalishWord end = do
@ -986,6 +985,7 @@ readNormalWordPart end = do
        readBraced,
        readUnquotedBackTicked,
        readProcSub,
        readUnicodeQuote,
        readNormalLiteral end,
        readLiteralCurlyBraces
      ]
@ -1049,15 +1049,16 @@ readProcSub = called "process substitution" $ do
 prop_readSingleQuoted = isOk readSingleQuoted "'foo bar'"
 prop_readSingleQuoted2 = isWarning readSingleQuoted "'foo bar\\'"
 prop_readsingleQuoted3 = isWarning readSingleQuoted "\x2018hello\x2019"
 prop_readSingleQuoted4 = isWarning readNormalWord "'it's"
 prop_readSingleQuoted5 = isWarning readSimpleCommand "foo='bar\ncow 'arg"
 prop_readSingleQuoted6 = isOk readSimpleCommand "foo='bar cow 'arg"
 prop_readSingleQuoted7 = isOk readSingleQuoted "'foo\x201C\&bar'"
 prop_readSingleQuoted8 = isWarning readSingleQuoted "'foo\x2018\&bar'"
 readSingleQuoted = called "single quoted string" $ do
    id <- getNextId
    startPos <- getPosition
    singleQuote
-    s <- readSingleQuotedPart `reluctantlyTill` singleQuote
+    s <- many readSingleQuotedPart
    let string = concat s
    endPos <- getPosition
    singleQuote <|> fail "Expected end of single quoted string"
@ -1082,7 +1083,15 @@ readSingleQuotedLiteral = do
 readSingleQuotedPart =
    readSingleEscaped
-    <|> many1 (noneOf "'\\\x2018\x2019")
+    <|> many1 (noneOf $ "'\\" ++ unicodeSingleQuotes)
    <|> readUnicodeQuote
   where
    readUnicodeQuote = do
        pos <- getPosition
        x <- oneOf unicodeSingleQuotes
        parseProblemAt pos WarningC 1112
            "This is a unicode quote. Delete and retype it (or ignore/doublequote for literal)."
        return [x]
 prop_readBackTicked = isOk (readBackTicked False) "`ls *.mp3`"
@ -1158,11 +1167,12 @@ parseForgettingContext alsoOnSuccess parser = do
 prop_readDoubleQuoted = isOk readDoubleQuoted "\"Hello $FOO\""
 prop_readDoubleQuoted2 = isOk readDoubleQuoted "\"$'\""
-prop_readDoubleQuoted3 = isWarning readDoubleQuoted "\x201Chello\x201D"
+prop_readDoubleQuoted3 = isOk readDoubleQuoted "\"\x2018hello\x2019\""
 prop_readDoubleQuoted4 = isWarning readSimpleCommand "\"foo\nbar\"foo"
 prop_readDoubleQuoted5 = isOk readSimpleCommand "lol \"foo\nbar\" etc"
 prop_readDoubleQuoted6 = isOk readSimpleCommand "echo \"${ ls; }\""
 prop_readDoubleQuoted7 = isOk readSimpleCommand "echo \"${ ls;}bar\""
 prop_readDoubleQuoted8 = isWarning readDoubleQuoted "\"\x201Chello\x201D\""
 readDoubleQuoted = called "double quoted string" $ do
    id <- getNextId
    startPos <- getPosition
@ -1187,7 +1197,15 @@ suggestForgotClosingQuote startPos endPos name = do
    parseProblemAt endPos InfoC 1079
        "This is actually an end quote, but due to next char it looks suspect."
-doubleQuotedPart = readDoubleLiteral <|> readDoubleQuotedDollar <|> readQuotedBackTicked
+doubleQuotedPart = readDoubleLiteral <|> readDoubleQuotedDollar <|> readQuotedBackTicked <|> readUnicodeQuote
  where
    readUnicodeQuote = do
        pos <- getPosition
        id <- getNextId
        c <- oneOf unicodeDoubleQuotes
        parseProblemAt pos WarningC 1111
            "This is a unicode quote. Delete and retype it (or ignore/singlequote for literal)."
        return $ T_Literal id [c]
 readDoubleQuotedLiteral = do
    doubleQuote
@ -1201,7 +1219,7 @@ readDoubleLiteral = do
    return $ T_Literal id (concat s)
 readDoubleLiteralPart = do
-    x <- many1 (readDoubleEscaped <|> many1 (noneOf ('\\':doubleQuotableChars)))
+    x <- many1 (readDoubleEscaped <|> many1 (noneOf ('\\':doubleQuotableChars ++ unicodeDoubleQuotes)))
    return $ concat x
 readNormalLiteral end = do
@ -1243,8 +1261,15 @@ readGlob = readExtglob <|> readSimple <|> readClass <|> readGlobbyLiteral
            c <- extglobStart <|> char '['
            return $ T_Literal id [c]
-readNormalLiteralPart end =
+readNormalLiteralPart customEnd =
-    readNormalEscaped <|> many1 (noneOf (end ++ quotableChars ++ extglobStartChars ++ "[{}"))
+    readNormalEscaped <|>
        many1 (noneOf (customEnd ++ standardEnd))
  where
    standardEnd = "[{}"
        ++ quotableChars
        ++ extglobStartChars
        ++ unicodeDoubleQuotes
        ++ unicodeSingleQuotes
 readNormalEscaped = called "escaped char" $ do
    pos <- getPosition