Initial version

2025-10-26 18:49:26 +08:00 · 2012-11-03 09:32:09 -07:00
commit dabc445202
1 changed files with 934 additions and 0 deletions
--- a/shpell.hs
+++ b/shpell.hs
@@ -0,0 +1,934 @@
+{-# LANGUAGE NoMonomorphismRestriction #-}
+
+-- Shpell Check, by Vidar 'koala_man' Holen
+-- Sorry about the code. It was a week's worth of hacking.
+
+import Text.Parsec
+import Text.Parsec.Pos (initialPos)
+import Debug.Trace
+import Control.Monad
+import Data.Char
+import Data.List (isInfixOf, partition, sortBy, intercalate)
+import qualified Control.Monad.State as Ms
+import Data.Maybe
+import Prelude hiding (readList)
+import System.IO
+
+
+
+backslash = char '\\'
+linefeed = char '\n'
+singleQuote = char '\''
+doubleQuote = char '"'
+variableStart = upper <|> lower <|> oneOf "_"
+variableChars = upper <|> lower <|> digit <|> oneOf "_"
+specialVariable = oneOf "@*#?-$!"
+tokenDelimiter = oneOf "&|;<> \t\n"
+quotable = oneOf "#|&;<>()$`\\ \"'\t\n"
+doubleQuotable = oneOf "\"$`"
+whitespace = oneOf " \t\n"
+linewhitespace = oneOf " \t"
+
+spacing = do
+    x <- many (many1 linewhitespace <|> (try $ string "\\\n"))
+    optional readComment
+    return $ concat x
+
+allspacing = do
+    spacing
+    x <- option False ((linefeed <|> carriageReturn) >> return True)
+    when x allspacing
+
+carriageReturn = do
+    parseNote ErrorC "Literal carriage return. Run script through tr -d '\\r' "
+    char '\r'
+
+
+--------- Message/position annotation on top of user state
+data Annotated a = Annotated SourcePos [Note] a deriving (Show, Eq)
+data Note = ParseNote SourcePos Severity String | Note Severity String deriving (Show, Eq)
+data MessageStack = StackNode Note MessageStack | StackMark String SourcePos MessageStack | StackEmpty
+data ParseProblem = ParseProblem SourcePos Severity String deriving (Show, Eq)
+data OutputNote = OutputNote SourcePos Severity String deriving (Show, Eq)
+data Severity = ErrorC | WarningC | InfoC | StyleC deriving (Show, Eq, Ord)
+
+instance Functor Annotated where
+    fmap f (Annotated p n a) = Annotated p n (f a)
+
+markStack msg = do
+    pos <- getPosition
+    modifyState (StackMark msg pos)
+
+getMessages r (StackMark _ _ s) = (r, s)
+getMessages r (StackNode n s) = getMessages (n:r) s
+popStack = do
+    f <- getState
+    let (notes, stack) = getMessages [] f
+    putState stack
+    return notes
+
+-- Store potential parse problems outside of parsec
+parseProblem level msg = do
+    pos <- getPosition
+    parseProblemAt pos level msg
+
+parseProblemAt pos level msg = do
+    Ms.modify ((ParseProblem pos level msg):)
+
+pushNote n = modifyState (StackNode n)
+
+parseNote l a = do
+    pos <- getPosition
+    parseNoteAt pos l a
+
+parseNoteAt pos l a = pushNote $ ParseNote pos l a
+
+
+annotated msg parser = do
+    pos <- getPosition
+    markStack msg
+    result <- parser
+    messages <- popStack
+    return $ Annotated pos messages result
+
+dropAnnotation (Annotated _ _ s) = s
+blankAnnotation pos t = Annotated pos [] t
+
+merge (Annotated pos messages result) = do
+    mapM pushNote messages
+    return result
+
+merging p = p >>= merge
+
+getOutputNotes (Annotated p notes _) = map (makeOutputNote p) notes
+
+makeOutputNote _ (ParseNote p l s) = OutputNote p l s
+makeOutputNote p (Note l s) = OutputNote p l s
+
+--------- Convenient combinators
+
+thenSkip main follow = do
+    r <- main
+    optional follow
+    return r
+
+disregard x = x >> return ()
+
+reluctantlyTill p end = do -- parse p until end <|> eof matches ahead
+    (lookAhead ((disregard $ try end) <|> eof) >> return []) <|> do
+        x <- p
+        more <- reluctantlyTill p end
+        return $ x:more
+      <|> return []
+
+reluctantlyTill1 p end = do
+    notFollowedBy end
+    x <- p
+    more <- reluctantlyTill p end
+    return $ x:more
+
+attempting rest branch = do
+    ((try branch) >> rest) <|> rest
+
+wasIncluded p = option False (p >> return True)
+
+-- Horrifying AST
+data Token = T_AND_IF | T_OR_IF | T_DSEMI | T_Semi | T_DLESS | T_DGREAT | T_LESSAND | T_GREATAND | T_LESSGREAT | T_DLESSDASH | T_CLOBBER | T_If | T_Then | T_Else | T_Elif | T_Fi | T_Do | T_Done | T_Case | T_Esac | T_While | T_Until | T_For | T_Lbrace | T_Rbrace | T_Lparen | T_Rparen | T_Bang | T_In  | T_NEWLINE | T_EOF | T_Less | T_Greater | T_SingleQuoted String | T_Literal String | T_NormalWord [Annotated Token] | T_DoubleQuoted [Annotated Token] | T_DollarExpansion [Token] | T_DollarBraced String | T_DollarVariable String | T_DollarArithmetic String | T_BraceExpansion String | T_IoFile Token Token | T_HereDoc Bool Bool String | T_HereString Token | T_FdRedirect String Token | T_Assignment String Token | T_Redirecting [Annotated Token] Token | T_SimpleCommand [Annotated Token] [Annotated Token] | T_Pipeline [Annotated Token] | T_Banged Token | T_AndIf (Annotated Token) (Annotated Token) | T_OrIf (Annotated Token) (Annotated Token) | T_Backgrounded Token | T_IfExpression [([Token],[Token])] [Token] | T_Subshell [Token] | T_BraceGroup [Token] | T_WhileExpression [Token] [Token] | T_UntilExpression [Token] [Token] | T_ForIn String [Token] [Token] | T_CaseExpression Token [([Token],[Token])] |T_Function String Token | T_Command (Annotated Token) | T_Script [Token]
+    deriving (Show)
+
+extractNotes' list = modifyFlag ((++) $ concatMap getOutputNotes list) >> return ()
+extractNotes (T_NormalWord list) = extractNotes' list
+extractNotes (T_DoubleQuoted list) = extractNotes' list
+extractNotes (T_Redirecting list f) = extractNotes' list
+extractNotes (T_Pipeline list) = extractNotes' list
+extractNotes (T_Command list) = extractNotes' [list]
+extractNotes (T_SimpleCommand list1 list2) = do
+    extractNotes' list1
+    extractNotes' list2
+extractNotes t = return ()
+
+
+postMessage level s = Ms.modify $ \(x, l) -> (x, Note level s : l)
+warn s = postMessage WarningC s
+inform s = postMessage InfoC s
+style s = postMessage StyleC s
+
+
+putFlag v = modifyFlag (const v) >> return ()
+getFlag = modifyFlag id
+modifyFlag f = do
+    Ms.modify $ \(x, l) -> (f x, l)
+    v <- Ms.get
+    return $ fst v
+
+
+analyzeScopes f i = mapM (analyzeScope f i)
+analyzeScope f i (Annotated pos notes t) = do
+    v <- getFlag
+    let (ret, (flag, list)) = Ms.runState (analyze f i t) (v, [])
+    putFlag flag
+    return $ Annotated pos (notes++list) ret
+
+analyze f i s@(T_NormalWord list) = do
+    f s
+    a <- analyzeScopes f i list
+    return . i $ T_NormalWord a
+
+analyze f i s@(T_DoubleQuoted list) = do
+    f s
+    a <- analyzeScopes f i list
+    return . i $ T_DoubleQuoted a
+
+analyze f i s@(T_DollarExpansion l) = do
+    f s
+    nl <- mapM (analyze f i) l
+    return . i $ T_DollarExpansion nl
+
+analyze f i s@(T_IoFile op file) = do
+    f s
+    a <- analyze f i op
+    b <- analyze f i file
+    return . i $ T_IoFile a b
+
+analyze f i s@(T_HereString word) = do
+    f s
+    a <- analyze f i word
+    return . i $ T_HereString a
+
+analyze f i s@(T_FdRedirect v t) = do
+    f s
+    a <- analyze f i t
+    return . i $ T_FdRedirect v a
+
+analyze f i s@(T_Assignment v t) = do
+    f s
+    a <- analyze f i t
+    return . i $ T_Assignment v a
+
+analyze f i s@(T_Redirecting redirs cmd) = do
+    f s
+    newRedirs <- analyzeScopes f i redirs
+    newCmd <- analyze f i $ cmd
+    return . i $ (T_Redirecting newRedirs newCmd)
+
+analyze f i s@(T_SimpleCommand vars cmds) = do
+    f s
+    a <- analyzeScopes f i vars
+    b <- analyzeScopes f i cmds
+    return . i $ T_SimpleCommand a b
+
+analyze f i s@(T_Pipeline l) = do
+    f s
+    a <- analyzeScopes f i l
+    return . i $ T_Pipeline a
+
+analyze f i s@(T_Banged l) = do
+    f s
+    a <- analyze f i l
+    return . i $ T_Banged a
+
+analyze f i s@(T_AndIf t u) = do
+    f s
+    a <- analyzeScope f i t
+    b <- analyzeScope f i u
+    return . i $ T_AndIf a b
+
+analyze f i s@(T_OrIf t u) = do
+    f s
+    a <- analyzeScope f i t
+    b <- analyzeScope f i u
+    return . i $ T_OrIf a b
+
+analyze f i s@(T_Backgrounded l) = do
+    f s
+    a <- analyze f i l
+    return . i $ T_Backgrounded a
+
+analyze f i s@(T_IfExpression conditions elses) = do
+    f s
+    newConds <- mapM (\(c, t) -> do
+                x <- mapM (analyze f i) c
+                y <- mapM (analyze f i) t
+                return (x, y)
+             ) conditions
+    newElses <- mapM (analyze f i) elses
+    return . i $ T_IfExpression newConds newElses
+
+analyze f i s@(T_Subshell l) = do
+    f s
+    a <- mapM (analyze f i) l
+    return . i $ T_Subshell a
+
+analyze f i s@(T_BraceGroup l) = do
+    f s
+    a <- mapM (analyze f i) l
+    return . i $ T_BraceGroup a
+
+analyze f i s@(T_WhileExpression c l) = do
+    f s
+    a <- mapM (analyze f i) c
+    b <- mapM (analyze f i) l
+    return . i $ T_WhileExpression a b
+
+analyze f i s@(T_UntilExpression c l) = do
+    f s
+    a <- mapM (analyze f i) c
+    b <- mapM (analyze f i) l
+    return . i $ T_UntilExpression a b
+
+analyze f i s@(T_ForIn v w l) = do
+    f s
+    a <- mapM (analyze f i) w
+    b <- mapM (analyze f i) l
+    return . i $ T_ForIn v a b
+
+analyze f i s@(T_CaseExpression word cases) = do
+    f s
+    newWord <- analyze f i word
+    newCases <- mapM (\(c, t) -> do
+                x <- mapM (analyze f i) c
+                y <- mapM (analyze f i) t
+                return (x, y)
+             ) cases
+    return . i $ T_CaseExpression newWord newCases
+
+analyze f i s@(T_Script l) = do
+    f s
+    a <- mapM (analyze f i) l
+    return . i $ T_Script a
+
+analyze f i s@(T_Function name body) = do
+    f s
+    a <- analyze f i body
+    return . i $ T_Function name a
+
+analyze f i s@(T_Command c) = do
+    f s
+    a <- analyzeScope f i c
+    return . i $ T_Command a
+
+analyze f i t = do
+    f t
+    return . i $ t
+
+doAnalysis f t = fst $ Ms.runState (analyze f id t) ((), [])
+explore f d t = fst . snd $ Ms.runState (analyze f id t) (d, [])
+transform i t = fst $ Ms.runState (analyze (const $ return ()) i t) ((), [])
+
+findNotes t = explore extractNotes [] t
+sortNotes l = sortBy compareNotes l
+compareNotes (OutputNote pos1 level1 _) (OutputNote pos2 level2 _) = compare (pos1, level1) (pos2, level2)
+findParseNotes l = map (\(ParseProblem p level s) -> OutputNote p level s) l
+--   T_UntilExpression [Token] [Token] | T_ForIn String [Token] [Token]
+
+getNotes s =
+    case rp readScript s of
+        (Right x, p) -> sortNotes $ (findNotes $ doAllAnalysis x) ++ (findParseNotes p)
+        (Left _, p) -> sortNotes $ (OutputNote (initialPos "-") ErrorC "Parsing failed"):(findParseNotes p)
+
+readComment = do
+    char '#'
+    anyChar `reluctantlyTill` linefeed
+
+readNormalWord = do
+    x <- many1 readNormalWordPart
+    return $ T_NormalWord x
+
+readNormalWordPart = readSingleQuoted <|> readDoubleQuoted <|> readDollar <|> readBraced <|> readBackTicked <|> (annotated "normal literal" $ readNormalLiteral)
+
+readSingleQuoted = annotated "single quoted string" $ do
+    singleQuote
+    s <- readSingleQuotedPart `reluctantlyTill` singleQuote
+    singleQuote <?> "End single quoted string"
+
+    let string = concat s
+    return (T_SingleQuoted string) `attempting` do
+        x <- lookAhead anyChar
+        when (isAlpha x && isAlpha (last string)) $ parseProblem WarningC "This apostrophe terminated the single quoted string."
+
+readSingleQuotedLiteral = do
+    singleQuote
+    strs <- many1 readSingleQuotedPart
+    singleQuote
+    return $ concat strs
+
+readSingleQuotedPart =
+    readSingleEscaped
+    <|> anyChar `reluctantlyTill1` (singleQuote <|> backslash)
+
+readBackTicked = annotated "backtick expansion" $ do
+    parseNote StyleC "`..` style expansion is deprecated, use $(..) instead if you want my help"
+    pos <- getPosition
+    char '`'
+    f <- readGenericLiteral (char '`')
+    char '`' `attempting` (eof >> parseProblemAt pos ErrorC "Can't find terminating backtick for this one")
+    return $ T_Literal f
+
+
+readDoubleQuoted = annotated "double quoted string" $ do
+    doubleQuote
+    x <- many doubleQuotedPart
+    doubleQuote <?> "End double quoted"
+    return $ T_DoubleQuoted x
+
+doubleQuotedPart = readDoubleLiteral <|> readDollar <|> readBackTicked
+
+readDoubleQuotedLiteral = do
+    doubleQuote
+    x <- readDoubleLiteral
+    doubleQuote
+    return $ dropAnnotation x
+
+readDoubleLiteral = annotated "double literal" $ do
+    s <- many1 readDoubleLiteralPart
+    return $ T_Literal (concat s)
+
+readDoubleLiteralPart = do
+    x <- (readDoubleEscaped <|> (anyChar >>= \x -> return [x])) `reluctantlyTill1` doubleQuotable
+    return $ concat x
+
+readNormalLiteral = do
+    s <- many1 readNormalLiteralPart
+    return $ T_Literal (concat s)
+
+readNormalLiteralPart = do
+    readNormalEscaped <|> (anyChar `reluctantlyTill1` quotable)
+
+readNormalEscaped = do
+    backslash
+    pos <- getPosition
+    do
+        next <- (quotable <|> oneOf "?*[]")
+        return $ if next == '\n' then "" else [next]
+      <|>
+        do
+            next <- anyChar <?> "No character after \\"
+            parseNoteAt pos WarningC $ "This character doesn't need escaping here, the \\ is ignored"
+            return [next]
+
+readSingleEscaped = do
+    s <- backslash
+    let attempt level p msg = do { try $ parseNote level msg; x <- p; return [s,x]; }
+
+    do {
+        x <- singleQuote;
+        parseProblem InfoC "Are you trying to escape a single quote? echo 'You'\\''re doing it wrong'.";
+        return [s,x];
+    }
+        <|> attempt InfoC linefeed "You don't break lines with \\ in single quotes, it results in literal backslash-linefeed."
+        <|> do
+            x <- anyChar
+            return [s,x]
+
+
+readDoubleEscaped = do
+    bs <- backslash
+    (linefeed >> return "")
+        <|> (doubleQuotable >>= return . return)
+        <|> (anyChar >>= (return . \x -> [bs, x]))
+
+
+readGenericLiteral endExp = do
+    strings <- many (readGenericEscaped <|> anyChar `reluctantlyTill1` endExp)
+    return $ concat strings
+
+readGenericLiteral1 endExp = do
+    strings <- many1 (readGenericEscaped <|> anyChar `reluctantlyTill1` endExp)
+    return $ concat strings
+
+readGenericEscaped = do
+    backslash
+    x <- anyChar
+    return $ if x == '\n' then [] else [x]
+
+readBraced = annotated "{1,2..3} expression" $ try $ do
+    let strip (T_Literal s) = return ("\"" ++ s ++ "\"")
+    char '{'
+    str <- many1 ((readDoubleQuotedLiteral >>= (strip )) <|> readGenericLiteral1 (oneOf "}" <|> whitespace))
+    char '}'
+    return $ T_BraceExpansion $ concat str
+
+readDollar = readDollarArithmetic <|> readDollarBraced <|> readDollarExpansion <|> readDollarVariable <|> readDollarLonely
+
+
+readParenLiteralHack = do
+    strs <- ((anyChar >>= \x -> return [x]) <|> readParenHack) `reluctantlyTill1` (string "))")
+    return $ concat strs
+
+readParenHack = do
+    char '('
+    x <- many anyChar
+    char ')'
+    return $ "(" ++ x ++ ")"
+
+readDollarArithmetic = annotated "$(( )) expression" $ do
+    try (string "$((")
+    -- TODO
+    str <- readParenLiteralHack
+    string "))"
+    return (T_DollarArithmetic str)
+
+readDollarBraced = annotated "${ } expression" $ do
+    try (string "${")
+    -- TODO
+    str <- readGenericLiteral (char '}')
+    char '}' <?> "matching }"
+    return $ (T_DollarBraced str)
+
+readDollarExpansion = annotated "$( )" $ do
+    try (string "$(")
+    cmds <- readCompoundList
+    char ')'
+    return $ (T_DollarExpansion cmds)
+
+readDollarVariable = annotated "$variable" $ do
+    let singleCharred p = do
+        n <- p
+        return (T_DollarVariable [n]) `attempting` do
+            pos <- getPosition
+            num <- lookAhead $ many1 p
+            parseNoteAt pos ErrorC $ "$" ++ (n:num) ++ " is equivalent to ${" ++ [n] ++ "}"++ num
+
+    let positional = singleCharred digit
+    let special = singleCharred specialVariable
+
+    let regular = do
+        name <- readVariableName
+        return $ T_DollarVariable (name)
+
+    char '$'
+    positional <|> special <|> regular
+
+readVariableName = do
+    f <- variableStart
+    rest <- many variableChars
+    return (f:rest)
+
+readDollarLonely = annotated "lonely $" $ do
+    parseNote ErrorC "$ is not used specially and should therefore be escaped"
+    char '$'
+    return $ T_Literal "$"
+
+readHereDoc = annotated "here document" $ do
+    let stripLiteral (T_Literal x) = x
+        stripLiteral (T_SingleQuoted x) = x
+    try $ string "<<"
+    dashed <- (char '-' >> return True) <|> return False
+    tokenPosition <- getPosition
+    spacing
+    (quoted, endToken) <- (readNormalLiteral >>= (\x -> return (False, stripLiteral x)) )
+                            <|> (readDoubleQuotedLiteral >>= return . (\x -> (True, stripLiteral x)))
+                            <|> (readSingleQuotedLiteral >>= return . (\x -> (True, x)))
+    spacing
+
+    hereInfo <- anyChar `reluctantlyTill` (linefeed >> spacing >> (string endToken) >> (disregard whitespace <|> eof))
+
+    do
+        linefeed
+        spaces <- spacing
+        verifyHereDoc dashed quoted spaces hereInfo
+        token <- string endToken
+        return $ T_FdRedirect "" $ T_HereDoc dashed quoted hereInfo
+     `attempting` (eof >> debugHereDoc tokenPosition endToken hereInfo)
+
+verifyHereDoc dashed quoted spacing hereInfo = do
+    when (not dashed && spacing /= "") $ parseNote ErrorC "When using << instead of <<-, the end tokens can't be indented"
+    when (dashed && filter (/= '\t') spacing /= "" ) $ parseNote ErrorC "When using <<-, you can only indent with tabs"
+    return ()
+
+debugHereDoc pos endToken doc =
+    if endToken `isInfixOf` doc
+        then parseProblemAt pos ErrorC (endToken ++ " was part of the here document, but not by itself at the start of the line")
+        else if (map toLower endToken) `isInfixOf` (map toLower doc)
+            then parseProblemAt pos ErrorC (endToken ++ " appears in the here document, but with different case")
+            else parseProblemAt pos ErrorC ("Couldn't find end token `" ++ endToken ++ "' in the here document ")
+
+
+readFilename = readNormalWord
+readIoFileOp = choice [g_LESSAND, g_GREATAND, g_DGREAT, g_LESSGREAT, g_CLOBBER, string "<" >> return T_Less, string ">" >> return T_Greater ]
+readIoFile = do
+    op <- readIoFileOp
+    spacing
+    file <- readFilename
+    return $ T_FdRedirect "" $ T_IoFile op file
+readIoNumber = try $ do
+    x <- many1 digit
+    lookAhead readIoFileOp
+    return x
+readIoNumberRedirect = annotated "fd io redirect" $ do
+    n <- readIoNumber
+    op <- merging readHereString <|> merging readHereDoc <|> readIoFile
+    let actualOp = case op of T_FdRedirect "" x -> x
+    spacing
+    return $ T_FdRedirect n actualOp
+
+readIoRedirect = annotated "io redirect" $ choice [ merging readIoNumberRedirect, merging readHereString, merging readHereDoc, readIoFile ] `thenSkip` spacing
+
+readRedirectList = many1 readIoRedirect
+
+readHereString = annotated "here string" $ do
+    try $ string "<<<"
+    spacing
+    word <- readNormalWord
+    return $ T_FdRedirect "" $ T_HereString word
+
+readNewlineList = many1 ((newline <|> carriageReturn) `thenSkip` spacing)
+readLineBreak = optional readNewlineList
+
+readSeparatorOp = do
+    notFollowedBy (g_AND_IF <|> g_DSEMI)
+    f <- char ';' <|> char '&'
+    spacing
+    return f
+
+readSequentialSep = (disregard $ g_Semi >> readLineBreak) <|> (disregard readNewlineList)
+readSeparator =
+    do
+        separator <- readSeparatorOp
+        readLineBreak
+        return separator
+     <|>
+        do
+            readNewlineList
+            return '\n'
+
+makeSimpleCommand tokens =
+    let (assignment, rest) = partition (\x -> case dropAnnotation x of T_Assignment _ _ -> True; _ -> False) tokens
+    in let (redirections, rest2) = partition (\x -> case dropAnnotation x of T_FdRedirect _ _ -> True; _ -> False) rest
+       in T_Redirecting redirections $ T_SimpleCommand assignment rest2
+
+readSimpleCommand = annotated "simple command" $ do
+    prefix <- option [] readCmdPrefix
+    cmd <- option [] $ do { f <- annotated "command name" readCmdName; return [f]; }
+    when (null prefix && null cmd) $ fail "No command"
+    if null cmd
+        then return $ makeSimpleCommand prefix
+        else do
+            suffix <- option [] readCmdSuffix
+            return $ makeSimpleCommand (prefix ++ cmd ++ suffix)
+
+readPipeline = annotated "Pipeline" $ do
+    notFollowedBy $ try readKeyword
+    do
+        g_Bang `thenSkip` spacing
+        pipe <- readPipeSequence
+        return $ T_Banged pipe
+      <|> do
+        readPipeSequence
+
+readAndOr = (flip (>>=)) (return . T_Command) $ chainr1 readPipeline $ do
+    pos <- getPosition
+    op <- g_AND_IF <|> g_OR_IF
+    readLineBreak
+    return $ \a b ->
+                blankAnnotation pos $
+                     case op of T_AND_IF -> T_AndIf a b
+                                T_OR_IF  -> T_OrIf a b
+
+readTerm = do
+    m <- readAndOr
+    readTerm' m
+
+readTerm' current =
+    do
+        sep <- readSeparator
+        more <- (option T_EOF $ readAndOr)
+        case more of T_EOF -> return [transformWithSeparator sep current]
+                     _     -> do
+                                list <- readTerm' more
+                                return $ (transformWithSeparator sep current : list)
+      <|>
+        return [current]
+
+transformWithSeparator '&' = T_Backgrounded
+transformWithSeparator _  = id
+
+
+readPipeSequence = do
+    list <- readCommand `sepBy1` (readPipe `thenSkip` (spacing >> readLineBreak))
+    spacing
+    return $ T_Pipeline list
+
+readPipe = do
+    notFollowedBy g_OR_IF
+    char '|' `thenSkip` spacing
+
+readCommand = (readCompoundCommand <|> readSimpleCommand)
+
+readCmdName = do
+    f <- readNormalWord
+    spacing
+    return f
+
+readCmdWord = do
+    f <- readNormalWord
+    spacing
+    return f
+
+readIfClause = annotated "if statement" $ do
+    (condition, action) <- readIfPart
+    elifs <- many readElifPart
+    elses <- option [] readElsePart
+    g_Fi
+    return $ T_IfExpression ((condition, action):elifs) elses
+
+readIfPart = do
+    g_If
+    allspacing
+    condition <- readTerm
+    g_Then
+    allspacing
+    action <- readTerm
+    return (condition, action)
+
+readElifPart = do
+    g_Elif
+    allspacing
+    condition <- readTerm
+    g_Then
+    allspacing
+    action <- readTerm
+    return (condition, action)
+
+readElsePart = do
+    g_Else
+    allspacing
+    readTerm
+
+readSubshell = annotated "subshell group" $ do
+    char '('
+    allspacing
+    list <- readCompoundList
+    allspacing
+    char ')'
+    return $ T_Subshell list
+
+readBraceGroup = annotated "brace group" $ do
+    char '{'
+    allspacing
+    list <- readTerm
+    allspacing
+    char '}'
+    return $ T_BraceGroup list
+
+readWhileClause = annotated "while loop" $ do
+    g_While
+    condition <- readTerm
+    statements <- readDoGroup
+    return $ T_WhileExpression condition statements
+
+readUntilClause = annotated "until loop" $ do
+    g_Until
+    condition <- readTerm
+    statements <- readDoGroup
+    return $ T_UntilExpression condition statements
+
+readDoGroup = do
+    pos <- getPosition
+    g_Do
+    allspacing
+    (eof >> return []) <|>
+        do
+            commands <- readCompoundList
+            disregard g_Done <|> eof -- stunted support
+            return commands
+          <|> do
+            parseProblemAt pos ErrorC "Can't find the 'done' for this 'do'"
+            fail "No done"
+
+readForClause = annotated "for loop" $ do
+    g_For
+    spacing
+    name <- readVariableName
+    allspacing
+    values <- readInClause <|> (readSequentialSep >> return [])
+    group <- readDoGroup <|> (allspacing >> eof >> return []) -- stunted support
+    return $ T_ForIn name values group
+
+readInClause = do
+    g_In
+    things <- (readCmdWord) `reluctantlyTill`
+                (disregard (g_Semi) <|> disregard linefeed <|> disregard g_Do)
+
+    do {
+        lookAhead (g_Do);
+        parseNote ErrorC "You need a line feed or semicolon before the 'do' (in Bash)";
+    } <|> do {
+        optional $ g_Semi;
+        disregard allspacing;
+    }
+
+    return things
+
+readCaseClause = annotated "case statement" $ do
+    g_Case
+    word <- readNormalWord
+    spacing
+    g_In
+    readLineBreak
+    list <- readCaseList
+    g_Esac
+    return $ T_CaseExpression word list
+
+readCaseList = many readCaseItem
+
+readCaseItem = do
+    notFollowedBy g_Esac
+    optional g_Lparen
+    spacing
+    pattern <- readPattern
+    g_Rparen
+    readLineBreak
+    list <- ((lookAhead g_DSEMI >> return []) <|> readCompoundList)
+    (g_DSEMI <|> lookAhead (readLineBreak >> g_Esac))
+    readLineBreak
+    return (pattern, list)
+
+readFunctionDefinition = annotated "function definition" $ do
+    name <- try readFunctionSignature
+    allspacing
+    (disregard (lookAhead g_Lbrace) <|> parseProblem ErrorC "Expected a { to open the function definition")
+    group <- merging readBraceGroup
+    return $ T_Function name group
+
+
+readFunctionSignature = do
+    (optional $ try (string "function " >> parseNote StyleC "Don't use 'function' in front of function definitions"))
+    name <- readVariableName
+    spacing
+    g_Lparen
+    g_Rparen
+    return name
+
+
+readPattern = (readNormalWord `thenSkip` spacing) `sepBy1` (char '|' `thenSkip` spacing)
+
+
+readCompoundCommand = annotated "compound command" $ do
+    cmd <- merging $ choice [ readBraceGroup, readSubshell, readWhileClause, readUntilClause, readIfClause, readForClause, readCaseClause, readFunctionDefinition]
+    spacing
+    redirs <- many readIoRedirect
+    return $ T_Redirecting redirs $ cmd
+
+
+readCompoundList = readTerm
+
+readCmdPrefix = many1 (readIoRedirect <|> readAssignmentWord)
+readCmdSuffix = many1 (readIoRedirect <|> annotated "normal word" readCmdWord)
+
+readAssignmentWord = annotated "assignment" $ try $ do
+    optional (char '$' >> parseNote ErrorC "Don't use $ on the left side of assignments")
+    variable <- readVariableName
+    space <- spacing
+    pos <- getPosition
+    char '='
+    space2 <- spacing
+    value <- readNormalWord
+    spacing
+    when (space ++ space2 /= "") $ parseNoteAt pos ErrorC "Don't put spaces around the = in assignments"
+    return $ T_Assignment variable value
+
+
+tryToken s t = try (string s >> spacing >> return t)
+tryWordToken s t = tryParseWordToken (string s) t `thenSkip` spacing
+tryParseWordToken parser t = try (parser >> (lookAhead (eof <|> disregard whitespace))) >> return t
+
+g_AND_IF = tryToken "&&" T_AND_IF
+g_OR_IF = tryToken "||" T_OR_IF
+g_DSEMI = tryToken ";;" T_DSEMI
+g_DLESS = tryToken "<<" T_DLESS
+g_DGREAT = tryToken ">>" T_DGREAT
+g_LESSAND = tryToken "<&" T_LESSAND
+g_GREATAND = tryToken ">&" T_GREATAND
+g_LESSGREAT = tryToken "<>" T_LESSGREAT
+g_DLESSDASH = tryToken "<<-" T_DLESSDASH
+g_CLOBBER = tryToken ">|" T_CLOBBER
+g_OPERATOR = g_AND_IF <|> g_OR_IF <|> g_DSEMI <|> g_DLESSDASH <|> g_DLESS <|> g_DGREAT <|> g_LESSAND <|> g_GREATAND <|> g_LESSGREAT
+
+g_If = tryWordToken "if" T_If
+g_Then = tryWordToken "then" T_Then
+g_Else = tryWordToken "else" T_Else
+g_Elif = tryWordToken "elif" T_Elif
+g_Fi = tryWordToken "fi" T_Fi
+g_Do = tryWordToken "do" T_Do
+g_Done = tryWordToken "done" T_Done
+g_Case = tryWordToken "case" T_Case
+g_Esac = tryWordToken "esac" T_Esac
+g_While = tryWordToken "while" T_While
+g_Until = tryWordToken "until" T_Until
+g_For = tryWordToken "for" T_For
+g_In = tryWordToken "in" T_In
+g_Lbrace = tryWordToken "{" T_Lbrace
+g_Rbrace = tryWordToken "}" T_Rbrace
+
+g_Lparen = tryToken "(" T_Lparen
+g_Rparen = tryToken ")" T_Rparen
+g_Bang = tryToken "!" T_Bang
+
+g_Semi = do
+    notFollowedBy g_DSEMI
+    tryToken ";" T_Semi
+
+readKeyword = choice [ g_Then, g_Else, g_Elif, g_Fi, g_Do, g_Done, g_Esac, g_Rbrace, g_Rparen, g_DSEMI ]
+
+ifParse p t f = do
+    (lookAhead (try p) >> t) <|> f
+
+wtf = do
+    x <- many anyChar
+    parseProblem ErrorC x
+
+readScript = do
+    do {
+        allspacing;
+        commands <- readTerm;
+        eof <|> (parseProblem WarningC "Stopping here, because I can't parse this command");
+        return $ T_Script commands;
+    } <|> do {
+        parseProblem WarningC "Couldn't read any commands";
+        wtf;
+        return T_EOF;
+    }
+
+shpell s = rp readScript s
+rp p s = Ms.runState (runParserT p StackEmpty "-" s) []
+
+-------- Destructively simplify AST
+
+simplify (T_Redirecting [] t) = t
+simplify (T_Pipeline [x]) = dropAnnotation x
+simplify (T_NormalWord [x]) = dropAnnotation x
+simplify t = t
+
+-------- Analytics
+doAllAnalysis t = foldl (\v f -> doAnalysis f v) t checks
+
+getAst s = case rp readScript s of (Right parsed, _) -> parsed
+getAst2 s = case rp readScript s of (Right parsed, _) -> transform simplify parsed
+lol (Right x, _) = x
+
+deadSimple (T_NormalWord l) = [concat (concatMap (deadSimple . dropAnnotation) l)]
+deadSimple (T_DoubleQuoted l) = ["\"" ++(concat (concatMap (deadSimple . dropAnnotation) l)) ++ "\""]
+deadSimple (T_SingleQuoted s) = [s]
+deadSimple (T_DollarVariable _) = ["${VAR}"]
+deadSimple (T_DollarBraced _) = ["${VAR}"]
+deadSimple (T_DollarArithmetic _) = ["${VAR}"]
+deadSimple (T_DollarExpansion _) = ["${VAR}"]
+deadSimple (T_Literal x) = [x]
+deadSimple (T_SimpleCommand vars words) = concatMap (deadSimple . dropAnnotation) words
+deadSimple (T_Redirecting _ foo) = deadSimple foo
+deadSimple _ = []
+
+
+checks = [checkUuoc]
+checkUuoc (T_Pipeline ((Annotated _ _ x):_:_)) = case (deadSimple x) of ["cat", _] -> style "UUOC: Instead of 'cat a | b', use 'b < a'"
+                                                                        _ -> return ()
+checkUuoc _ = return ()
+
+
+main = do
+    s <- getContents
+--    case rp readScript s of (Right parsed, _) -> putStrLn . show $ transform simplify parsed
+--                            (Left x, y) -> putStrLn $ "Can't parse: " ++ (show (x,y))
+
+    mapM (putStrLn . show) $ getNotes s