From dabc4452028cfa3ff0dc5685766dffe2d0a342cd Mon Sep 17 00:00:00 2001 From: Vidar Holen Date: Sat, 3 Nov 2012 09:32:09 -0700 Subject: [PATCH] Initial version --- shpell.hs | 934 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 934 insertions(+) create mode 100644 shpell.hs diff --git a/shpell.hs b/shpell.hs new file mode 100644 index 0000000..dec310c --- /dev/null +++ b/shpell.hs @@ -0,0 +1,934 @@ +{-# LANGUAGE NoMonomorphismRestriction #-} + +-- Shpell Check, by Vidar 'koala_man' Holen +-- Sorry about the code. It was a week's worth of hacking. + +import Text.Parsec +import Text.Parsec.Pos (initialPos) +import Debug.Trace +import Control.Monad +import Data.Char +import Data.List (isInfixOf, partition, sortBy, intercalate) +import qualified Control.Monad.State as Ms +import Data.Maybe +import Prelude hiding (readList) +import System.IO + + + +backslash = char '\\' +linefeed = char '\n' +singleQuote = char '\'' +doubleQuote = char '"' +variableStart = upper <|> lower <|> oneOf "_" +variableChars = upper <|> lower <|> digit <|> oneOf "_" +specialVariable = oneOf "@*#?-$!" +tokenDelimiter = oneOf "&|;<> \t\n" +quotable = oneOf "#|&;<>()$`\\ \"'\t\n" +doubleQuotable = oneOf "\"$`" +whitespace = oneOf " \t\n" +linewhitespace = oneOf " \t" + +spacing = do + x <- many (many1 linewhitespace <|> (try $ string "\\\n")) + optional readComment + return $ concat x + +allspacing = do + spacing + x <- option False ((linefeed <|> carriageReturn) >> return True) + when x allspacing + +carriageReturn = do + parseNote ErrorC "Literal carriage return. Run script through tr -d '\\r' " + char '\r' + + +--------- Message/position annotation on top of user state +data Annotated a = Annotated SourcePos [Note] a deriving (Show, Eq) +data Note = ParseNote SourcePos Severity String | Note Severity String deriving (Show, Eq) +data MessageStack = StackNode Note MessageStack | StackMark String SourcePos MessageStack | StackEmpty +data ParseProblem = ParseProblem SourcePos Severity String deriving (Show, Eq) +data OutputNote = OutputNote SourcePos Severity String deriving (Show, Eq) +data Severity = ErrorC | WarningC | InfoC | StyleC deriving (Show, Eq, Ord) + +instance Functor Annotated where + fmap f (Annotated p n a) = Annotated p n (f a) + +markStack msg = do + pos <- getPosition + modifyState (StackMark msg pos) + +getMessages r (StackMark _ _ s) = (r, s) +getMessages r (StackNode n s) = getMessages (n:r) s +popStack = do + f <- getState + let (notes, stack) = getMessages [] f + putState stack + return notes + +-- Store potential parse problems outside of parsec +parseProblem level msg = do + pos <- getPosition + parseProblemAt pos level msg + +parseProblemAt pos level msg = do + Ms.modify ((ParseProblem pos level msg):) + +pushNote n = modifyState (StackNode n) + +parseNote l a = do + pos <- getPosition + parseNoteAt pos l a + +parseNoteAt pos l a = pushNote $ ParseNote pos l a + + +annotated msg parser = do + pos <- getPosition + markStack msg + result <- parser + messages <- popStack + return $ Annotated pos messages result + +dropAnnotation (Annotated _ _ s) = s +blankAnnotation pos t = Annotated pos [] t + +merge (Annotated pos messages result) = do + mapM pushNote messages + return result + +merging p = p >>= merge + +getOutputNotes (Annotated p notes _) = map (makeOutputNote p) notes + +makeOutputNote _ (ParseNote p l s) = OutputNote p l s +makeOutputNote p (Note l s) = OutputNote p l s + +--------- Convenient combinators + +thenSkip main follow = do + r <- main + optional follow + return r + +disregard x = x >> return () + +reluctantlyTill p end = do -- parse p until end <|> eof matches ahead + (lookAhead ((disregard $ try end) <|> eof) >> return []) <|> do + x <- p + more <- reluctantlyTill p end + return $ x:more + <|> return [] + +reluctantlyTill1 p end = do + notFollowedBy end + x <- p + more <- reluctantlyTill p end + return $ x:more + +attempting rest branch = do + ((try branch) >> rest) <|> rest + +wasIncluded p = option False (p >> return True) + +-- Horrifying AST +data Token = T_AND_IF | T_OR_IF | T_DSEMI | T_Semi | T_DLESS | T_DGREAT | T_LESSAND | T_GREATAND | T_LESSGREAT | T_DLESSDASH | T_CLOBBER | T_If | T_Then | T_Else | T_Elif | T_Fi | T_Do | T_Done | T_Case | T_Esac | T_While | T_Until | T_For | T_Lbrace | T_Rbrace | T_Lparen | T_Rparen | T_Bang | T_In | T_NEWLINE | T_EOF | T_Less | T_Greater | T_SingleQuoted String | T_Literal String | T_NormalWord [Annotated Token] | T_DoubleQuoted [Annotated Token] | T_DollarExpansion [Token] | T_DollarBraced String | T_DollarVariable String | T_DollarArithmetic String | T_BraceExpansion String | T_IoFile Token Token | T_HereDoc Bool Bool String | T_HereString Token | T_FdRedirect String Token | T_Assignment String Token | T_Redirecting [Annotated Token] Token | T_SimpleCommand [Annotated Token] [Annotated Token] | T_Pipeline [Annotated Token] | T_Banged Token | T_AndIf (Annotated Token) (Annotated Token) | T_OrIf (Annotated Token) (Annotated Token) | T_Backgrounded Token | T_IfExpression [([Token],[Token])] [Token] | T_Subshell [Token] | T_BraceGroup [Token] | T_WhileExpression [Token] [Token] | T_UntilExpression [Token] [Token] | T_ForIn String [Token] [Token] | T_CaseExpression Token [([Token],[Token])] |T_Function String Token | T_Command (Annotated Token) | T_Script [Token] + deriving (Show) + +extractNotes' list = modifyFlag ((++) $ concatMap getOutputNotes list) >> return () +extractNotes (T_NormalWord list) = extractNotes' list +extractNotes (T_DoubleQuoted list) = extractNotes' list +extractNotes (T_Redirecting list f) = extractNotes' list +extractNotes (T_Pipeline list) = extractNotes' list +extractNotes (T_Command list) = extractNotes' [list] +extractNotes (T_SimpleCommand list1 list2) = do + extractNotes' list1 + extractNotes' list2 +extractNotes t = return () + + +postMessage level s = Ms.modify $ \(x, l) -> (x, Note level s : l) +warn s = postMessage WarningC s +inform s = postMessage InfoC s +style s = postMessage StyleC s + + +putFlag v = modifyFlag (const v) >> return () +getFlag = modifyFlag id +modifyFlag f = do + Ms.modify $ \(x, l) -> (f x, l) + v <- Ms.get + return $ fst v + + +analyzeScopes f i = mapM (analyzeScope f i) +analyzeScope f i (Annotated pos notes t) = do + v <- getFlag + let (ret, (flag, list)) = Ms.runState (analyze f i t) (v, []) + putFlag flag + return $ Annotated pos (notes++list) ret + +analyze f i s@(T_NormalWord list) = do + f s + a <- analyzeScopes f i list + return . i $ T_NormalWord a + +analyze f i s@(T_DoubleQuoted list) = do + f s + a <- analyzeScopes f i list + return . i $ T_DoubleQuoted a + +analyze f i s@(T_DollarExpansion l) = do + f s + nl <- mapM (analyze f i) l + return . i $ T_DollarExpansion nl + +analyze f i s@(T_IoFile op file) = do + f s + a <- analyze f i op + b <- analyze f i file + return . i $ T_IoFile a b + +analyze f i s@(T_HereString word) = do + f s + a <- analyze f i word + return . i $ T_HereString a + +analyze f i s@(T_FdRedirect v t) = do + f s + a <- analyze f i t + return . i $ T_FdRedirect v a + +analyze f i s@(T_Assignment v t) = do + f s + a <- analyze f i t + return . i $ T_Assignment v a + +analyze f i s@(T_Redirecting redirs cmd) = do + f s + newRedirs <- analyzeScopes f i redirs + newCmd <- analyze f i $ cmd + return . i $ (T_Redirecting newRedirs newCmd) + +analyze f i s@(T_SimpleCommand vars cmds) = do + f s + a <- analyzeScopes f i vars + b <- analyzeScopes f i cmds + return . i $ T_SimpleCommand a b + +analyze f i s@(T_Pipeline l) = do + f s + a <- analyzeScopes f i l + return . i $ T_Pipeline a + +analyze f i s@(T_Banged l) = do + f s + a <- analyze f i l + return . i $ T_Banged a + +analyze f i s@(T_AndIf t u) = do + f s + a <- analyzeScope f i t + b <- analyzeScope f i u + return . i $ T_AndIf a b + +analyze f i s@(T_OrIf t u) = do + f s + a <- analyzeScope f i t + b <- analyzeScope f i u + return . i $ T_OrIf a b + +analyze f i s@(T_Backgrounded l) = do + f s + a <- analyze f i l + return . i $ T_Backgrounded a + +analyze f i s@(T_IfExpression conditions elses) = do + f s + newConds <- mapM (\(c, t) -> do + x <- mapM (analyze f i) c + y <- mapM (analyze f i) t + return (x, y) + ) conditions + newElses <- mapM (analyze f i) elses + return . i $ T_IfExpression newConds newElses + +analyze f i s@(T_Subshell l) = do + f s + a <- mapM (analyze f i) l + return . i $ T_Subshell a + +analyze f i s@(T_BraceGroup l) = do + f s + a <- mapM (analyze f i) l + return . i $ T_BraceGroup a + +analyze f i s@(T_WhileExpression c l) = do + f s + a <- mapM (analyze f i) c + b <- mapM (analyze f i) l + return . i $ T_WhileExpression a b + +analyze f i s@(T_UntilExpression c l) = do + f s + a <- mapM (analyze f i) c + b <- mapM (analyze f i) l + return . i $ T_UntilExpression a b + +analyze f i s@(T_ForIn v w l) = do + f s + a <- mapM (analyze f i) w + b <- mapM (analyze f i) l + return . i $ T_ForIn v a b + +analyze f i s@(T_CaseExpression word cases) = do + f s + newWord <- analyze f i word + newCases <- mapM (\(c, t) -> do + x <- mapM (analyze f i) c + y <- mapM (analyze f i) t + return (x, y) + ) cases + return . i $ T_CaseExpression newWord newCases + +analyze f i s@(T_Script l) = do + f s + a <- mapM (analyze f i) l + return . i $ T_Script a + +analyze f i s@(T_Function name body) = do + f s + a <- analyze f i body + return . i $ T_Function name a + +analyze f i s@(T_Command c) = do + f s + a <- analyzeScope f i c + return . i $ T_Command a + +analyze f i t = do + f t + return . i $ t + +doAnalysis f t = fst $ Ms.runState (analyze f id t) ((), []) +explore f d t = fst . snd $ Ms.runState (analyze f id t) (d, []) +transform i t = fst $ Ms.runState (analyze (const $ return ()) i t) ((), []) + +findNotes t = explore extractNotes [] t +sortNotes l = sortBy compareNotes l +compareNotes (OutputNote pos1 level1 _) (OutputNote pos2 level2 _) = compare (pos1, level1) (pos2, level2) +findParseNotes l = map (\(ParseProblem p level s) -> OutputNote p level s) l +-- T_UntilExpression [Token] [Token] | T_ForIn String [Token] [Token] + +getNotes s = + case rp readScript s of + (Right x, p) -> sortNotes $ (findNotes $ doAllAnalysis x) ++ (findParseNotes p) + (Left _, p) -> sortNotes $ (OutputNote (initialPos "-") ErrorC "Parsing failed"):(findParseNotes p) + +readComment = do + char '#' + anyChar `reluctantlyTill` linefeed + +readNormalWord = do + x <- many1 readNormalWordPart + return $ T_NormalWord x + +readNormalWordPart = readSingleQuoted <|> readDoubleQuoted <|> readDollar <|> readBraced <|> readBackTicked <|> (annotated "normal literal" $ readNormalLiteral) + +readSingleQuoted = annotated "single quoted string" $ do + singleQuote + s <- readSingleQuotedPart `reluctantlyTill` singleQuote + singleQuote "End single quoted string" + + let string = concat s + return (T_SingleQuoted string) `attempting` do + x <- lookAhead anyChar + when (isAlpha x && isAlpha (last string)) $ parseProblem WarningC "This apostrophe terminated the single quoted string." + +readSingleQuotedLiteral = do + singleQuote + strs <- many1 readSingleQuotedPart + singleQuote + return $ concat strs + +readSingleQuotedPart = + readSingleEscaped + <|> anyChar `reluctantlyTill1` (singleQuote <|> backslash) + +readBackTicked = annotated "backtick expansion" $ do + parseNote StyleC "`..` style expansion is deprecated, use $(..) instead if you want my help" + pos <- getPosition + char '`' + f <- readGenericLiteral (char '`') + char '`' `attempting` (eof >> parseProblemAt pos ErrorC "Can't find terminating backtick for this one") + return $ T_Literal f + + +readDoubleQuoted = annotated "double quoted string" $ do + doubleQuote + x <- many doubleQuotedPart + doubleQuote "End double quoted" + return $ T_DoubleQuoted x + +doubleQuotedPart = readDoubleLiteral <|> readDollar <|> readBackTicked + +readDoubleQuotedLiteral = do + doubleQuote + x <- readDoubleLiteral + doubleQuote + return $ dropAnnotation x + +readDoubleLiteral = annotated "double literal" $ do + s <- many1 readDoubleLiteralPart + return $ T_Literal (concat s) + +readDoubleLiteralPart = do + x <- (readDoubleEscaped <|> (anyChar >>= \x -> return [x])) `reluctantlyTill1` doubleQuotable + return $ concat x + +readNormalLiteral = do + s <- many1 readNormalLiteralPart + return $ T_Literal (concat s) + +readNormalLiteralPart = do + readNormalEscaped <|> (anyChar `reluctantlyTill1` quotable) + +readNormalEscaped = do + backslash + pos <- getPosition + do + next <- (quotable <|> oneOf "?*[]") + return $ if next == '\n' then "" else [next] + <|> + do + next <- anyChar "No character after \\" + parseNoteAt pos WarningC $ "This character doesn't need escaping here, the \\ is ignored" + return [next] + +readSingleEscaped = do + s <- backslash + let attempt level p msg = do { try $ parseNote level msg; x <- p; return [s,x]; } + + do { + x <- singleQuote; + parseProblem InfoC "Are you trying to escape a single quote? echo 'You'\\''re doing it wrong'."; + return [s,x]; + } + <|> attempt InfoC linefeed "You don't break lines with \\ in single quotes, it results in literal backslash-linefeed." + <|> do + x <- anyChar + return [s,x] + + +readDoubleEscaped = do + bs <- backslash + (linefeed >> return "") + <|> (doubleQuotable >>= return . return) + <|> (anyChar >>= (return . \x -> [bs, x])) + + +readGenericLiteral endExp = do + strings <- many (readGenericEscaped <|> anyChar `reluctantlyTill1` endExp) + return $ concat strings + +readGenericLiteral1 endExp = do + strings <- many1 (readGenericEscaped <|> anyChar `reluctantlyTill1` endExp) + return $ concat strings + +readGenericEscaped = do + backslash + x <- anyChar + return $ if x == '\n' then [] else [x] + +readBraced = annotated "{1,2..3} expression" $ try $ do + let strip (T_Literal s) = return ("\"" ++ s ++ "\"") + char '{' + str <- many1 ((readDoubleQuotedLiteral >>= (strip )) <|> readGenericLiteral1 (oneOf "}" <|> whitespace)) + char '}' + return $ T_BraceExpansion $ concat str + +readDollar = readDollarArithmetic <|> readDollarBraced <|> readDollarExpansion <|> readDollarVariable <|> readDollarLonely + + +readParenLiteralHack = do + strs <- ((anyChar >>= \x -> return [x]) <|> readParenHack) `reluctantlyTill1` (string "))") + return $ concat strs + +readParenHack = do + char '(' + x <- many anyChar + char ')' + return $ "(" ++ x ++ ")" + +readDollarArithmetic = annotated "$(( )) expression" $ do + try (string "$((") + -- TODO + str <- readParenLiteralHack + string "))" + return (T_DollarArithmetic str) + +readDollarBraced = annotated "${ } expression" $ do + try (string "${") + -- TODO + str <- readGenericLiteral (char '}') + char '}' "matching }" + return $ (T_DollarBraced str) + +readDollarExpansion = annotated "$( )" $ do + try (string "$(") + cmds <- readCompoundList + char ')' + return $ (T_DollarExpansion cmds) + +readDollarVariable = annotated "$variable" $ do + let singleCharred p = do + n <- p + return (T_DollarVariable [n]) `attempting` do + pos <- getPosition + num <- lookAhead $ many1 p + parseNoteAt pos ErrorC $ "$" ++ (n:num) ++ " is equivalent to ${" ++ [n] ++ "}"++ num + + let positional = singleCharred digit + let special = singleCharred specialVariable + + let regular = do + name <- readVariableName + return $ T_DollarVariable (name) + + char '$' + positional <|> special <|> regular + +readVariableName = do + f <- variableStart + rest <- many variableChars + return (f:rest) + +readDollarLonely = annotated "lonely $" $ do + parseNote ErrorC "$ is not used specially and should therefore be escaped" + char '$' + return $ T_Literal "$" + +readHereDoc = annotated "here document" $ do + let stripLiteral (T_Literal x) = x + stripLiteral (T_SingleQuoted x) = x + try $ string "<<" + dashed <- (char '-' >> return True) <|> return False + tokenPosition <- getPosition + spacing + (quoted, endToken) <- (readNormalLiteral >>= (\x -> return (False, stripLiteral x)) ) + <|> (readDoubleQuotedLiteral >>= return . (\x -> (True, stripLiteral x))) + <|> (readSingleQuotedLiteral >>= return . (\x -> (True, x))) + spacing + + hereInfo <- anyChar `reluctantlyTill` (linefeed >> spacing >> (string endToken) >> (disregard whitespace <|> eof)) + + do + linefeed + spaces <- spacing + verifyHereDoc dashed quoted spaces hereInfo + token <- string endToken + return $ T_FdRedirect "" $ T_HereDoc dashed quoted hereInfo + `attempting` (eof >> debugHereDoc tokenPosition endToken hereInfo) + +verifyHereDoc dashed quoted spacing hereInfo = do + when (not dashed && spacing /= "") $ parseNote ErrorC "When using << instead of <<-, the end tokens can't be indented" + when (dashed && filter (/= '\t') spacing /= "" ) $ parseNote ErrorC "When using <<-, you can only indent with tabs" + return () + +debugHereDoc pos endToken doc = + if endToken `isInfixOf` doc + then parseProblemAt pos ErrorC (endToken ++ " was part of the here document, but not by itself at the start of the line") + else if (map toLower endToken) `isInfixOf` (map toLower doc) + then parseProblemAt pos ErrorC (endToken ++ " appears in the here document, but with different case") + else parseProblemAt pos ErrorC ("Couldn't find end token `" ++ endToken ++ "' in the here document ") + + +readFilename = readNormalWord +readIoFileOp = choice [g_LESSAND, g_GREATAND, g_DGREAT, g_LESSGREAT, g_CLOBBER, string "<" >> return T_Less, string ">" >> return T_Greater ] +readIoFile = do + op <- readIoFileOp + spacing + file <- readFilename + return $ T_FdRedirect "" $ T_IoFile op file +readIoNumber = try $ do + x <- many1 digit + lookAhead readIoFileOp + return x +readIoNumberRedirect = annotated "fd io redirect" $ do + n <- readIoNumber + op <- merging readHereString <|> merging readHereDoc <|> readIoFile + let actualOp = case op of T_FdRedirect "" x -> x + spacing + return $ T_FdRedirect n actualOp + +readIoRedirect = annotated "io redirect" $ choice [ merging readIoNumberRedirect, merging readHereString, merging readHereDoc, readIoFile ] `thenSkip` spacing + +readRedirectList = many1 readIoRedirect + +readHereString = annotated "here string" $ do + try $ string "<<<" + spacing + word <- readNormalWord + return $ T_FdRedirect "" $ T_HereString word + +readNewlineList = many1 ((newline <|> carriageReturn) `thenSkip` spacing) +readLineBreak = optional readNewlineList + +readSeparatorOp = do + notFollowedBy (g_AND_IF <|> g_DSEMI) + f <- char ';' <|> char '&' + spacing + return f + +readSequentialSep = (disregard $ g_Semi >> readLineBreak) <|> (disregard readNewlineList) +readSeparator = + do + separator <- readSeparatorOp + readLineBreak + return separator + <|> + do + readNewlineList + return '\n' + +makeSimpleCommand tokens = + let (assignment, rest) = partition (\x -> case dropAnnotation x of T_Assignment _ _ -> True; _ -> False) tokens + in let (redirections, rest2) = partition (\x -> case dropAnnotation x of T_FdRedirect _ _ -> True; _ -> False) rest + in T_Redirecting redirections $ T_SimpleCommand assignment rest2 + +readSimpleCommand = annotated "simple command" $ do + prefix <- option [] readCmdPrefix + cmd <- option [] $ do { f <- annotated "command name" readCmdName; return [f]; } + when (null prefix && null cmd) $ fail "No command" + if null cmd + then return $ makeSimpleCommand prefix + else do + suffix <- option [] readCmdSuffix + return $ makeSimpleCommand (prefix ++ cmd ++ suffix) + +readPipeline = annotated "Pipeline" $ do + notFollowedBy $ try readKeyword + do + g_Bang `thenSkip` spacing + pipe <- readPipeSequence + return $ T_Banged pipe + <|> do + readPipeSequence + +readAndOr = (flip (>>=)) (return . T_Command) $ chainr1 readPipeline $ do + pos <- getPosition + op <- g_AND_IF <|> g_OR_IF + readLineBreak + return $ \a b -> + blankAnnotation pos $ + case op of T_AND_IF -> T_AndIf a b + T_OR_IF -> T_OrIf a b + +readTerm = do + m <- readAndOr + readTerm' m + +readTerm' current = + do + sep <- readSeparator + more <- (option T_EOF $ readAndOr) + case more of T_EOF -> return [transformWithSeparator sep current] + _ -> do + list <- readTerm' more + return $ (transformWithSeparator sep current : list) + <|> + return [current] + +transformWithSeparator '&' = T_Backgrounded +transformWithSeparator _ = id + + +readPipeSequence = do + list <- readCommand `sepBy1` (readPipe `thenSkip` (spacing >> readLineBreak)) + spacing + return $ T_Pipeline list + +readPipe = do + notFollowedBy g_OR_IF + char '|' `thenSkip` spacing + +readCommand = (readCompoundCommand <|> readSimpleCommand) + +readCmdName = do + f <- readNormalWord + spacing + return f + +readCmdWord = do + f <- readNormalWord + spacing + return f + +readIfClause = annotated "if statement" $ do + (condition, action) <- readIfPart + elifs <- many readElifPart + elses <- option [] readElsePart + g_Fi + return $ T_IfExpression ((condition, action):elifs) elses + +readIfPart = do + g_If + allspacing + condition <- readTerm + g_Then + allspacing + action <- readTerm + return (condition, action) + +readElifPart = do + g_Elif + allspacing + condition <- readTerm + g_Then + allspacing + action <- readTerm + return (condition, action) + +readElsePart = do + g_Else + allspacing + readTerm + +readSubshell = annotated "subshell group" $ do + char '(' + allspacing + list <- readCompoundList + allspacing + char ')' + return $ T_Subshell list + +readBraceGroup = annotated "brace group" $ do + char '{' + allspacing + list <- readTerm + allspacing + char '}' + return $ T_BraceGroup list + +readWhileClause = annotated "while loop" $ do + g_While + condition <- readTerm + statements <- readDoGroup + return $ T_WhileExpression condition statements + +readUntilClause = annotated "until loop" $ do + g_Until + condition <- readTerm + statements <- readDoGroup + return $ T_UntilExpression condition statements + +readDoGroup = do + pos <- getPosition + g_Do + allspacing + (eof >> return []) <|> + do + commands <- readCompoundList + disregard g_Done <|> eof -- stunted support + return commands + <|> do + parseProblemAt pos ErrorC "Can't find the 'done' for this 'do'" + fail "No done" + +readForClause = annotated "for loop" $ do + g_For + spacing + name <- readVariableName + allspacing + values <- readInClause <|> (readSequentialSep >> return []) + group <- readDoGroup <|> (allspacing >> eof >> return []) -- stunted support + return $ T_ForIn name values group + +readInClause = do + g_In + things <- (readCmdWord) `reluctantlyTill` + (disregard (g_Semi) <|> disregard linefeed <|> disregard g_Do) + + do { + lookAhead (g_Do); + parseNote ErrorC "You need a line feed or semicolon before the 'do' (in Bash)"; + } <|> do { + optional $ g_Semi; + disregard allspacing; + } + + return things + +readCaseClause = annotated "case statement" $ do + g_Case + word <- readNormalWord + spacing + g_In + readLineBreak + list <- readCaseList + g_Esac + return $ T_CaseExpression word list + +readCaseList = many readCaseItem + +readCaseItem = do + notFollowedBy g_Esac + optional g_Lparen + spacing + pattern <- readPattern + g_Rparen + readLineBreak + list <- ((lookAhead g_DSEMI >> return []) <|> readCompoundList) + (g_DSEMI <|> lookAhead (readLineBreak >> g_Esac)) + readLineBreak + return (pattern, list) + +readFunctionDefinition = annotated "function definition" $ do + name <- try readFunctionSignature + allspacing + (disregard (lookAhead g_Lbrace) <|> parseProblem ErrorC "Expected a { to open the function definition") + group <- merging readBraceGroup + return $ T_Function name group + + +readFunctionSignature = do + (optional $ try (string "function " >> parseNote StyleC "Don't use 'function' in front of function definitions")) + name <- readVariableName + spacing + g_Lparen + g_Rparen + return name + + +readPattern = (readNormalWord `thenSkip` spacing) `sepBy1` (char '|' `thenSkip` spacing) + + +readCompoundCommand = annotated "compound command" $ do + cmd <- merging $ choice [ readBraceGroup, readSubshell, readWhileClause, readUntilClause, readIfClause, readForClause, readCaseClause, readFunctionDefinition] + spacing + redirs <- many readIoRedirect + return $ T_Redirecting redirs $ cmd + + +readCompoundList = readTerm + +readCmdPrefix = many1 (readIoRedirect <|> readAssignmentWord) +readCmdSuffix = many1 (readIoRedirect <|> annotated "normal word" readCmdWord) + +readAssignmentWord = annotated "assignment" $ try $ do + optional (char '$' >> parseNote ErrorC "Don't use $ on the left side of assignments") + variable <- readVariableName + space <- spacing + pos <- getPosition + char '=' + space2 <- spacing + value <- readNormalWord + spacing + when (space ++ space2 /= "") $ parseNoteAt pos ErrorC "Don't put spaces around the = in assignments" + return $ T_Assignment variable value + + +tryToken s t = try (string s >> spacing >> return t) +tryWordToken s t = tryParseWordToken (string s) t `thenSkip` spacing +tryParseWordToken parser t = try (parser >> (lookAhead (eof <|> disregard whitespace))) >> return t + +g_AND_IF = tryToken "&&" T_AND_IF +g_OR_IF = tryToken "||" T_OR_IF +g_DSEMI = tryToken ";;" T_DSEMI +g_DLESS = tryToken "<<" T_DLESS +g_DGREAT = tryToken ">>" T_DGREAT +g_LESSAND = tryToken "<&" T_LESSAND +g_GREATAND = tryToken ">&" T_GREATAND +g_LESSGREAT = tryToken "<>" T_LESSGREAT +g_DLESSDASH = tryToken "<<-" T_DLESSDASH +g_CLOBBER = tryToken ">|" T_CLOBBER +g_OPERATOR = g_AND_IF <|> g_OR_IF <|> g_DSEMI <|> g_DLESSDASH <|> g_DLESS <|> g_DGREAT <|> g_LESSAND <|> g_GREATAND <|> g_LESSGREAT + +g_If = tryWordToken "if" T_If +g_Then = tryWordToken "then" T_Then +g_Else = tryWordToken "else" T_Else +g_Elif = tryWordToken "elif" T_Elif +g_Fi = tryWordToken "fi" T_Fi +g_Do = tryWordToken "do" T_Do +g_Done = tryWordToken "done" T_Done +g_Case = tryWordToken "case" T_Case +g_Esac = tryWordToken "esac" T_Esac +g_While = tryWordToken "while" T_While +g_Until = tryWordToken "until" T_Until +g_For = tryWordToken "for" T_For +g_In = tryWordToken "in" T_In +g_Lbrace = tryWordToken "{" T_Lbrace +g_Rbrace = tryWordToken "}" T_Rbrace + +g_Lparen = tryToken "(" T_Lparen +g_Rparen = tryToken ")" T_Rparen +g_Bang = tryToken "!" T_Bang + +g_Semi = do + notFollowedBy g_DSEMI + tryToken ";" T_Semi + +readKeyword = choice [ g_Then, g_Else, g_Elif, g_Fi, g_Do, g_Done, g_Esac, g_Rbrace, g_Rparen, g_DSEMI ] + +ifParse p t f = do + (lookAhead (try p) >> t) <|> f + +wtf = do + x <- many anyChar + parseProblem ErrorC x + +readScript = do + do { + allspacing; + commands <- readTerm; + eof <|> (parseProblem WarningC "Stopping here, because I can't parse this command"); + return $ T_Script commands; + } <|> do { + parseProblem WarningC "Couldn't read any commands"; + wtf; + return T_EOF; + } + +shpell s = rp readScript s +rp p s = Ms.runState (runParserT p StackEmpty "-" s) [] + +-------- Destructively simplify AST + +simplify (T_Redirecting [] t) = t +simplify (T_Pipeline [x]) = dropAnnotation x +simplify (T_NormalWord [x]) = dropAnnotation x +simplify t = t + +-------- Analytics +doAllAnalysis t = foldl (\v f -> doAnalysis f v) t checks + +getAst s = case rp readScript s of (Right parsed, _) -> parsed +getAst2 s = case rp readScript s of (Right parsed, _) -> transform simplify parsed +lol (Right x, _) = x + +deadSimple (T_NormalWord l) = [concat (concatMap (deadSimple . dropAnnotation) l)] +deadSimple (T_DoubleQuoted l) = ["\"" ++(concat (concatMap (deadSimple . dropAnnotation) l)) ++ "\""] +deadSimple (T_SingleQuoted s) = [s] +deadSimple (T_DollarVariable _) = ["${VAR}"] +deadSimple (T_DollarBraced _) = ["${VAR}"] +deadSimple (T_DollarArithmetic _) = ["${VAR}"] +deadSimple (T_DollarExpansion _) = ["${VAR}"] +deadSimple (T_Literal x) = [x] +deadSimple (T_SimpleCommand vars words) = concatMap (deadSimple . dropAnnotation) words +deadSimple (T_Redirecting _ foo) = deadSimple foo +deadSimple _ = [] + + +checks = [checkUuoc] +checkUuoc (T_Pipeline ((Annotated _ _ x):_:_)) = case (deadSimple x) of ["cat", _] -> style "UUOC: Instead of 'cat a | b', use 'b < a'" + _ -> return () +checkUuoc _ = return () + + +main = do + s <- getContents +-- case rp readScript s of (Right parsed, _) -> putStrLn . show $ transform simplify parsed +-- (Left x, y) -> putStrLn $ "Can't parse: " ++ (show (x,y)) + + mapM (putStrLn . show) $ getNotes s