Manually decode input files as lenient UTF-8.
This commit is contained in:
parent
128d5d6013
commit
46fb91b44d
|
@ -175,6 +175,15 @@ ShellCheck uses the follow exit codes:
|
||||||
+ 3: ShellCheck was invoked with bad syntax (e.g. unknown flag).
|
+ 3: ShellCheck was invoked with bad syntax (e.g. unknown flag).
|
||||||
+ 4: ShellCheck was invoked with bad options (e.g. unknown formatter).
|
+ 4: ShellCheck was invoked with bad options (e.g. unknown formatter).
|
||||||
|
|
||||||
|
# LOCALE
|
||||||
|
This version of ShellCheck is only available in English. All files are
|
||||||
|
leniently decoded as UTF-8, with a fallback of ISO-8859-1 for invalid
|
||||||
|
sequences. `LC_CTYPE` is respected for output, and defaults to UTF-8 for
|
||||||
|
locales where encoding is unspecified (such as the `C` locale).
|
||||||
|
|
||||||
|
Windows users seeing `commitBuffer: invalid argument (invalid character)`
|
||||||
|
should set their terminal to use UTF-8 with `chcp 65001`.
|
||||||
|
|
||||||
# AUTHOR
|
# AUTHOR
|
||||||
ShellCheck is written and maintained by Vidar Holen.
|
ShellCheck is written and maintained by Vidar Holen.
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ import qualified ShellCheck.Formatter.TTY
|
||||||
import Control.Exception
|
import Control.Exception
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
import Control.Monad.Except
|
import Control.Monad.Except
|
||||||
|
import Data.Bits
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.Functor
|
import Data.Functor
|
||||||
import Data.Either
|
import Data.Either
|
||||||
|
@ -288,14 +289,48 @@ ioInterface options files = do
|
||||||
fallback path _ = return path
|
fallback path _ = return path
|
||||||
|
|
||||||
inputFile file = do
|
inputFile file = do
|
||||||
contents <-
|
handle <-
|
||||||
if file == "-"
|
if file == "-"
|
||||||
then getContents
|
then return stdin
|
||||||
else readFile file
|
else openBinaryFile file ReadMode
|
||||||
|
|
||||||
|
hSetBinaryMode handle True
|
||||||
|
contents <- decodeString <$> hGetContents handle -- closes handle
|
||||||
|
|
||||||
seq (length contents) $
|
seq (length contents) $
|
||||||
return contents
|
return contents
|
||||||
|
|
||||||
|
-- Decode a char8 string into a utf8 string, with fallback on
|
||||||
|
-- ISO-8859-1. This avoids depending on additional libraries.
|
||||||
|
decodeString = decode
|
||||||
|
where
|
||||||
|
decode [] = []
|
||||||
|
decode (c:rest) | isAscii c = c : decode rest
|
||||||
|
decode (c:rest) =
|
||||||
|
let num = (fromIntegral $ ord c) :: Int
|
||||||
|
next = case num of
|
||||||
|
_ | num >= 0xFF -> Nothing
|
||||||
|
| num >= 0xFE -> construct (num .&. 0x00) 6 rest
|
||||||
|
| num >= 0xFC -> construct (num .&. 0x01) 5 rest
|
||||||
|
| num >= 0xF8 -> construct (num .&. 0x03) 4 rest
|
||||||
|
| num >= 0xF0 -> construct (num .&. 0x07) 3 rest
|
||||||
|
| num >= 0xE0 -> construct (num .&. 0x0F) 2 rest
|
||||||
|
| num >= 0xC0 -> construct (num .&. 0x1F) 1 rest
|
||||||
|
| True -> Nothing
|
||||||
|
in
|
||||||
|
case next of
|
||||||
|
Just (n, remainder) -> chr n : decode remainder
|
||||||
|
Nothing -> c : decode rest
|
||||||
|
|
||||||
|
construct x 0 rest = return (x, rest)
|
||||||
|
construct x n (c:rest) =
|
||||||
|
let num = (fromIntegral $ ord c) :: Int in
|
||||||
|
if num >= 0x80 && num <= 0xBF
|
||||||
|
then construct ((x `shiftL` 6) .|. (num .&. 0x3f)) (n-1) rest
|
||||||
|
else Nothing
|
||||||
|
construct _ _ _ = Nothing
|
||||||
|
|
||||||
|
|
||||||
verifyFiles files =
|
verifyFiles files =
|
||||||
when (null files) $ do
|
when (null files) $ do
|
||||||
printErr "No files specified.\n"
|
printErr "No files specified.\n"
|
||||||
|
|
Loading…
Reference in New Issue