From 46fb91b44d2784829a5949e1faeaedaea59e76a8 Mon Sep 17 00:00:00 2001 From: Vidar Holen Date: Sun, 22 Jan 2017 15:24:21 -0800 Subject: [PATCH] Manually decode input files as lenient UTF-8. --- shellcheck.1.md | 9 +++++++++ shellcheck.hs | 41 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/shellcheck.1.md b/shellcheck.1.md index 8e76e3c..00ef455 100644 --- a/shellcheck.1.md +++ b/shellcheck.1.md @@ -175,6 +175,15 @@ ShellCheck uses the follow exit codes: + 3: ShellCheck was invoked with bad syntax (e.g. unknown flag). + 4: ShellCheck was invoked with bad options (e.g. unknown formatter). +# LOCALE +This version of ShellCheck is only available in English. All files are +leniently decoded as UTF-8, with a fallback of ISO-8859-1 for invalid +sequences. `LC_CTYPE` is respected for output, and defaults to UTF-8 for +locales where encoding is unspecified (such as the `C` locale). + +Windows users seeing `commitBuffer: invalid argument (invalid character)` +should set their terminal to use UTF-8 with `chcp 65001`. + # AUTHOR ShellCheck is written and maintained by Vidar Holen. diff --git a/shellcheck.hs b/shellcheck.hs index 392d271..4a40ba2 100644 --- a/shellcheck.hs +++ b/shellcheck.hs @@ -31,6 +31,7 @@ import qualified ShellCheck.Formatter.TTY import Control.Exception import Control.Monad import Control.Monad.Except +import Data.Bits import Data.Char import Data.Functor import Data.Either @@ -288,14 +289,48 @@ ioInterface options files = do fallback path _ = return path inputFile file = do - contents <- + handle <- if file == "-" - then getContents - else readFile file + then return stdin + else openBinaryFile file ReadMode + + hSetBinaryMode handle True + contents <- decodeString <$> hGetContents handle -- closes handle seq (length contents) $ return contents +-- Decode a char8 string into a utf8 string, with fallback on +-- ISO-8859-1. This avoids depending on additional libraries. +decodeString = decode + where + decode [] = [] + decode (c:rest) | isAscii c = c : decode rest + decode (c:rest) = + let num = (fromIntegral $ ord c) :: Int + next = case num of + _ | num >= 0xFF -> Nothing + | num >= 0xFE -> construct (num .&. 0x00) 6 rest + | num >= 0xFC -> construct (num .&. 0x01) 5 rest + | num >= 0xF8 -> construct (num .&. 0x03) 4 rest + | num >= 0xF0 -> construct (num .&. 0x07) 3 rest + | num >= 0xE0 -> construct (num .&. 0x0F) 2 rest + | num >= 0xC0 -> construct (num .&. 0x1F) 1 rest + | True -> Nothing + in + case next of + Just (n, remainder) -> chr n : decode remainder + Nothing -> c : decode rest + + construct x 0 rest = return (x, rest) + construct x n (c:rest) = + let num = (fromIntegral $ ord c) :: Int in + if num >= 0x80 && num <= 0xBF + then construct ((x `shiftL` 6) .|. (num .&. 0x3f)) (n-1) rest + else Nothing + construct _ _ _ = Nothing + + verifyFiles files = when (null files) $ do printErr "No files specified.\n"