Don't parse unicode quotes as real quotes.

2025-11-07 04:46:15 +08:00 · 2017-02-25 15:14:52 -08:00
parent 35c74e4747
commit 2154583fd3
1 changed files with 50 additions and 25 deletions
--- a/ShellCheck/Parser.hs
+++ b/ShellCheck/Parser.hs
@@ -58,18 +58,18 @@ linefeed = do
    c <- char '\n'
    readPendingHereDocs
    return c
-singleQuote = char '\'' <|> unicodeSingleQuote
-doubleQuote = char '"' <|> unicodeDoubleQuote
+singleQuote = char '\''
+doubleQuote = char '"'
 variableStart = upper <|> lower <|> oneOf "_"
 variableChars = upper <|> lower <|> digit <|> oneOf "_"
 functionChars = variableChars <|> oneOf ":+-.?"
 specialVariable = oneOf "@*#?-$!"
 paramSubSpecialChars = oneOf "/:+-=%"
 quotableChars = "|&;<>()\\ '\t\n\r\xA0" ++ doubleQuotableChars
-quotable = almostSpace <|> unicodeDoubleQuote <|> oneOf quotableChars
+quotable = almostSpace <|> oneOf quotableChars
 bracedQuotable = oneOf "}\"$`'"
-doubleQuotableChars = "\"$`" ++ unicodeDoubleQuoteChars
-doubleQuotable = unicodeDoubleQuote <|> oneOf doubleQuotableChars
+doubleQuotableChars = "\"$`"
+doubleQuotable = oneOf doubleQuotableChars
 whitespace = oneOf " \t" <|> carriageReturn <|> almostSpace <|> linefeed
 linewhitespace = oneOf " \t" <|> almostSpace

@@ -78,7 +78,8 @@ suspectCharAfterQuotes = variableChars <|> char '%'
 extglobStartChars = "?*@!+"
 extglobStart = oneOf extglobStartChars

-unicodeDoubleQuoteChars = "\x201C\x201D\x2033\x2036"
+unicodeDoubleQuotes = "\x201C\x201D\x2033\x2036"
+unicodeSingleQuotes = "\x2018\x2019"

 prop_spacing = isOk spacing "  \\\n # Comment"
 spacing = do
@@ -107,17 +108,12 @@ allspacingOrFail = do
    s <- allspacing
    when (null s) $ fail "Expected whitespace"

-unicodeDoubleQuote = do
+readUnicodeQuote = do
    pos <- getPosition
-    oneOf unicodeDoubleQuoteChars
-    parseProblemAt pos WarningC 1015 "This is a unicode double quote. Delete and retype it."
-    return '"'
-
-unicodeSingleQuote = do
-    pos <- getPosition
-    char '\x2018' <|> char '\x2019'
-    parseProblemAt pos WarningC 1016 "This is a unicode single quote. Delete and retype it."
-    return '"'
+    c <- oneOf (unicodeSingleQuotes ++ unicodeDoubleQuotes)
+    parseProblemAt pos WarningC 1110 "This is a unicode quote. Delete and retype it (or quote to make literal)."
+    id <- getNextIdAt pos
+    return $ T_Literal id [c]

 carriageReturn = do
    parseNote ErrorC 1017 "Literal carriage return. Run script through tr -d '\\r' ."
@@ -336,7 +332,7 @@ parseProblemAt pos = parseProblemAtWithEnd pos pos
 parseProblemAtId :: Monad m => Id -> Severity -> Integer -> String -> SCParser m ()
 parseProblemAtId id level code msg = do
    map <- getMap
-    let pos = Map.findWithDefault 
+    let pos = Map.findWithDefault
                (error "Internal error (no position for id). Please report.") id map
    parseProblemAt pos level code msg

@@ -947,6 +943,9 @@ prop_readNormalWord6 = isOk readNormalWord "foo/{}"
 prop_readNormalWord7 = isOk readNormalWord "foo\\\nbar"
 prop_readNormalWord8 = isWarning readSubshell "(foo\\ \nbar)"
 prop_readNormalWord9 = isOk readSubshell "(foo\\ ;\nbar)"
+prop_readNormalWord10 = isWarning readNormalWord "\x201Chello\x201D"
+prop_readNormalWord11 = isWarning readNormalWord "\x2018hello\x2019"
+prop_readNormalWord12 = isWarning readNormalWord "hello\x2018"
 readNormalWord = readNormalishWord ""

 readNormalishWord end = do
@@ -986,6 +985,7 @@ readNormalWordPart end = do
        readBraced,
        readUnquotedBackTicked,
        readProcSub,
+        readUnicodeQuote,
        readNormalLiteral end,
        readLiteralCurlyBraces
      ]
@@ -1049,15 +1049,16 @@ readProcSub = called "process substitution" $ do

 prop_readSingleQuoted = isOk readSingleQuoted "'foo bar'"
 prop_readSingleQuoted2 = isWarning readSingleQuoted "'foo bar\\'"
-prop_readsingleQuoted3 = isWarning readSingleQuoted "\x2018hello\x2019"
 prop_readSingleQuoted4 = isWarning readNormalWord "'it's"
 prop_readSingleQuoted5 = isWarning readSimpleCommand "foo='bar\ncow 'arg"
 prop_readSingleQuoted6 = isOk readSimpleCommand "foo='bar cow 'arg"
+prop_readSingleQuoted7 = isOk readSingleQuoted "'foo\x201C\&bar'"
+prop_readSingleQuoted8 = isWarning readSingleQuoted "'foo\x2018\&bar'"
 readSingleQuoted = called "single quoted string" $ do
    id <- getNextId
    startPos <- getPosition
    singleQuote
-    s <- readSingleQuotedPart `reluctantlyTill` singleQuote
+    s <- many readSingleQuotedPart
    let string = concat s
    endPos <- getPosition
    singleQuote <|> fail "Expected end of single quoted string"
@@ -1082,7 +1083,15 @@ readSingleQuotedLiteral = do

 readSingleQuotedPart =
    readSingleEscaped
-    <|> many1 (noneOf "'\\\x2018\x2019")
+    <|> many1 (noneOf $ "'\\" ++ unicodeSingleQuotes)
+    <|> readUnicodeQuote
+   where
+    readUnicodeQuote = do
+        pos <- getPosition
+        x <- oneOf unicodeSingleQuotes
+        parseProblemAt pos WarningC 1112
+            "This is a unicode quote. Delete and retype it (or ignore/doublequote for literal)."
+        return [x]


 prop_readBackTicked = isOk (readBackTicked False) "`ls *.mp3`"
@@ -1158,11 +1167,12 @@ parseForgettingContext alsoOnSuccess parser = do

 prop_readDoubleQuoted = isOk readDoubleQuoted "\"Hello $FOO\""
 prop_readDoubleQuoted2 = isOk readDoubleQuoted "\"$'\""
-prop_readDoubleQuoted3 = isWarning readDoubleQuoted "\x201Chello\x201D"
+prop_readDoubleQuoted3 = isOk readDoubleQuoted "\"\x2018hello\x2019\""
 prop_readDoubleQuoted4 = isWarning readSimpleCommand "\"foo\nbar\"foo"
 prop_readDoubleQuoted5 = isOk readSimpleCommand "lol \"foo\nbar\" etc"
 prop_readDoubleQuoted6 = isOk readSimpleCommand "echo \"${ ls; }\""
 prop_readDoubleQuoted7 = isOk readSimpleCommand "echo \"${ ls;}bar\""
+prop_readDoubleQuoted8 = isWarning readDoubleQuoted "\"\x201Chello\x201D\""
 readDoubleQuoted = called "double quoted string" $ do
    id <- getNextId
    startPos <- getPosition
@@ -1187,7 +1197,15 @@ suggestForgotClosingQuote startPos endPos name = do
    parseProblemAt endPos InfoC 1079
        "This is actually an end quote, but due to next char it looks suspect."

-doubleQuotedPart = readDoubleLiteral <|> readDoubleQuotedDollar <|> readQuotedBackTicked
+doubleQuotedPart = readDoubleLiteral <|> readDoubleQuotedDollar <|> readQuotedBackTicked <|> readUnicodeQuote
+  where
+    readUnicodeQuote = do
+        pos <- getPosition
+        id <- getNextId
+        c <- oneOf unicodeDoubleQuotes
+        parseProblemAt pos WarningC 1111
+            "This is a unicode quote. Delete and retype it (or ignore/singlequote for literal)."
+        return $ T_Literal id [c]

 readDoubleQuotedLiteral = do
    doubleQuote
@@ -1201,7 +1219,7 @@ readDoubleLiteral = do
    return $ T_Literal id (concat s)

 readDoubleLiteralPart = do
-    x <- many1 (readDoubleEscaped <|> many1 (noneOf ('\\':doubleQuotableChars)))
+    x <- many1 (readDoubleEscaped <|> many1 (noneOf ('\\':doubleQuotableChars ++ unicodeDoubleQuotes)))
    return $ concat x

 readNormalLiteral end = do
@@ -1243,8 +1261,15 @@ readGlob = readExtglob <|> readSimple <|> readClass <|> readGlobbyLiteral
            c <- extglobStart <|> char '['
            return $ T_Literal id [c]

-readNormalLiteralPart end =
-    readNormalEscaped <|> many1 (noneOf (end ++ quotableChars ++ extglobStartChars ++ "[{}"))
+readNormalLiteralPart customEnd =
+    readNormalEscaped <|>
+        many1 (noneOf (customEnd ++ standardEnd))
+  where
+    standardEnd = "[{}"
+        ++ quotableChars
+        ++ extglobStartChars
+        ++ unicodeDoubleQuotes
+        ++ unicodeSingleQuotes

 readNormalEscaped = called "escaped char" $ do
    pos <- getPosition