Decode UTF-8 sequences over 0x10FFFF as latin1

This commit is contained in:
Vidar Holen 2017-03-10 10:09:09 -08:00
parent a3a4873190
commit 22c86256ac
1 changed files with 4 additions and 5 deletions

View File

@ -309,10 +309,7 @@ decodeString = decode
decode (c:rest) = decode (c:rest) =
let num = (fromIntegral $ ord c) :: Int let num = (fromIntegral $ ord c) :: Int
next = case num of next = case num of
_ | num >= 0xFF -> Nothing _ | num >= 0xF8 -> Nothing
| num >= 0xFE -> construct (num .&. 0x00) 6 rest
| num >= 0xFC -> construct (num .&. 0x01) 5 rest
| num >= 0xF8 -> construct (num .&. 0x03) 4 rest
| num >= 0xF0 -> construct (num .&. 0x07) 3 rest | num >= 0xF0 -> construct (num .&. 0x07) 3 rest
| num >= 0xE0 -> construct (num .&. 0x0F) 2 rest | num >= 0xE0 -> construct (num .&. 0x0F) 2 rest
| num >= 0xC0 -> construct (num .&. 0x1F) 1 rest | num >= 0xC0 -> construct (num .&. 0x1F) 1 rest
@ -322,7 +319,9 @@ decodeString = decode
Just (n, remainder) -> chr n : decode remainder Just (n, remainder) -> chr n : decode remainder
Nothing -> c : decode rest Nothing -> c : decode rest
construct x 0 rest = return (x, rest) construct x 0 rest = do
guard $ x <= 0x10FFFF
return (x, rest)
construct x n (c:rest) = construct x n (c:rest) =
let num = (fromIntegral $ ord c) :: Int in let num = (fromIntegral $ ord c) :: Int in
if num >= 0x80 && num <= 0xBF if num >= 0x80 && num <= 0xBF