Switch from regex-compat to regex-tdfa

This commit is contained in:
Vidar Holen 2015-04-04 16:26:28 -07:00
parent 93debd3556
commit 9f1f00cdd1
4 changed files with 103 additions and 42 deletions

View File

@ -43,7 +43,7 @@ library
json, json,
mtl, mtl,
parsec, parsec,
regex-compat, regex-tdfa,
QuickCheck >= 2.7.4 QuickCheck >= 2.7.4
exposed-modules: exposed-modules:
ShellCheck.Analytics ShellCheck.Analytics
@ -51,6 +51,7 @@ library
ShellCheck.Data ShellCheck.Data
ShellCheck.Options ShellCheck.Options
ShellCheck.Parser ShellCheck.Parser
ShellCheck.Regex
ShellCheck.Simple ShellCheck.Simple
other-modules: other-modules:
Paths_ShellCheck Paths_ShellCheck
@ -64,7 +65,7 @@ executable shellcheck
json, json,
mtl, mtl,
parsec, parsec,
regex-compat, regex-tdfa,
transformers, transformers,
QuickCheck >= 2.7.4 QuickCheck >= 2.7.4
main-is: shellcheck.hs main-is: shellcheck.hs
@ -79,7 +80,7 @@ test-suite test-shellcheck
json, json,
mtl, mtl,
parsec, parsec,
regex-compat, regex-tdfa,
transformers, transformers,
QuickCheck >= 2.7.4 QuickCheck >= 2.7.4
main-is: test/shellcheck.hs main-is: test/shellcheck.hs

View File

@ -19,7 +19,7 @@ module ShellCheck.AST where
import Control.Monad import Control.Monad
import Control.Monad.Identity import Control.Monad.Identity
import qualified Text.Regex as Re import qualified ShellCheck.Regex as Re
data Id = Id Int deriving (Show, Eq, Ord) data Id = Id Int deriving (Show, Eq, Ord)
@ -128,11 +128,13 @@ data Token =
data Annotation = DisableComment Integer deriving (Show, Eq) data Annotation = DisableComment Integer deriving (Show, Eq)
data ConditionType = DoubleBracket | SingleBracket deriving (Show, Eq) data ConditionType = DoubleBracket | SingleBracket deriving (Show, Eq)
-- I apologize for nothing! -- This is an abomination.
lolHax s = Re.subRegex (Re.mkRegex "(Id [0-9]+)") (show s) "(Id 0)" tokenEquals :: Token -> Token -> Bool
instance Eq Token where tokenEquals a b = kludge a == kludge b
(==) a b = lolHax a == lolHax b where kludge s = Re.subRegex (Re.mkRegex "\\(Id [0-9]+\\)") (show s) "(Id 0)"
instance Eq Token where
(==) = tokenEquals
analyze :: Monad m => (Token -> m ()) -> (Token -> m ()) -> (Token -> Token) -> Token -> m Token analyze :: Monad m => (Token -> m ()) -> (Token -> m ()) -> (Token -> Token) -> Token -> m Token
analyze f g i = analyze f g i =

View File

@ -33,7 +33,7 @@ import ShellCheck.AST
import ShellCheck.Options import ShellCheck.Options
import ShellCheck.Data import ShellCheck.Data
import ShellCheck.Parser hiding (runTests) import ShellCheck.Parser hiding (runTests)
import Text.Regex import ShellCheck.Regex
import qualified Data.Map as Map import qualified Data.Map as Map
import Test.QuickCheck.All (forAllProperties) import Test.QuickCheck.All (forAllProperties)
import Test.QuickCheck.Test (quickCheckWithResult, stdArgs, maxSuccess) import Test.QuickCheck.Test (quickCheckWithResult, stdArgs, maxSuccess)
@ -240,12 +240,6 @@ isVariableName _ = False
potentially = fromMaybe (return ()) potentially = fromMaybe (return ())
matchAll re = unfoldr f
where
f str = do
(_, match, rest, _) <- matchRegexAll re str
return (match, rest)
willSplit x = willSplit x =
case x of case x of
T_DollarBraced {} -> True T_DollarBraced {} -> True
@ -280,20 +274,9 @@ getSuspiciousRegexWildcard str =
suspicious = mkRegex "([A-Za-z1-9])\\*" suspicious = mkRegex "([A-Za-z1-9])\\*"
contra = mkRegex "[^a-zA-Z1-9]\\*|[][^$+\\\\]" contra = mkRegex "[^a-zA-Z1-9]\\*|[][^$+\\\\]"
matches string regex = isJust $ matchRegex regex string
headOrDefault _ (a:_) = a headOrDefault _ (a:_) = a
headOrDefault def _ = def headOrDefault def _ = def
getAllMatches :: Regex -> String -> [[String]]
getAllMatches regex str = fromJust $ f str
where
f str = do
(_, _, rest, groups) <- matchRegexAll regex str
more <- f rest
return $ groups : more
`mappend` return []
isConstant token = isConstant token =
case token of case token of
T_NormalWord _ l -> all isConstant l T_NormalWord _ l -> all isConstant l
@ -413,14 +396,19 @@ checkEchoSed _ (T_Pipeline id _ [a, b]) =
["sed", "-e", v] -> checkIn v ["sed", "-e", v] -> checkIn v
_ -> return () _ -> return ()
where where
sedRe = mkRegex "^s(.)(.*)\\1(.*)\\1g?$" -- This should have used backreferences, but TDFA doesn't support them
sedRe = mkRegex "^s(.)([^\n]*)g?$"
isSimpleSed s = fromMaybe False $ do
[first,rest] <- matchRegex sedRe s
let delimiters = filter (== (first !! 0)) rest
guard $ length delimiters == 2
return True
acmd = deadSimple a acmd = deadSimple a
bcmd = deadSimple b bcmd = deadSimple b
checkIn s = checkIn s =
case matchRegex sedRe s of when (isSimpleSed s) $
Just _ -> style id 2001 style id 2001 "See if you can use ${variable//search/replace} instead."
"See if you can use ${variable//search/replace} instead."
_ -> return ()
checkEchoSed _ _ = return () checkEchoSed _ _ = return ()
prop_checkPipedAssignment1 = verify checkPipedAssignment "A=ls | grep foo" prop_checkPipedAssignment1 = verify checkPipedAssignment "A=ls | grep foo"
@ -2017,15 +2005,15 @@ data StackData =
-- (Base expression, specific position, var name, assigned values) -- (Base expression, specific position, var name, assigned values)
| Assignment (Token, Token, String, DataType) | Assignment (Token, Token, String, DataType)
| Reference (Token, Token, String) | Reference (Token, Token, String)
deriving (Show, Eq) deriving (Show)
data DataType = DataString DataSource | DataArray DataSource data DataType = DataString DataSource | DataArray DataSource
deriving (Show, Eq) deriving (Show)
data DataSource = SourceFrom [Token] | SourceExternal | SourceDeclaration data DataSource = SourceFrom [Token] | SourceExternal | SourceDeclaration
deriving (Show, Eq) deriving (Show)
data VariableState = Dead Token String | Alive deriving (Show, Eq) data VariableState = Dead Token String | Alive deriving (Show)
dataTypeFrom defaultType v = (case v of T_Array {} -> DataArray; _ -> defaultType) $ SourceFrom [v] dataTypeFrom defaultType v = (case v of T_Array {} -> DataArray; _ -> defaultType) $ SourceFrom [v]
@ -2220,10 +2208,11 @@ getBracedReference s = fromMaybe s $
getSpecial _ = fail "empty" getSpecial _ = fail "empty"
getIndexReferences s = fromMaybe [] $ do getIndexReferences s = fromMaybe [] $ do
(_, index, _, _) <- matchRegexAll re s match <- matchRegex re s
return $ matchAll variableNameRegex index index <- match !!! 0
return $ matchAllStrings variableNameRegex index
where where
re = mkRegex "\\[.*\\]" re = mkRegex "(\\[.*\\])"
getReferencedVariables t = getReferencedVariables t =
case t of case t of
@ -2263,7 +2252,7 @@ getReferencedVariables t =
prop_getVariablesFromLiteral1 = prop_getVariablesFromLiteral1 =
getVariablesFromLiteral "$foo${bar//a/b}$BAZ" == ["foo", "bar", "BAZ"] getVariablesFromLiteral "$foo${bar//a/b}$BAZ" == ["foo", "bar", "BAZ"]
getVariablesFromLiteral string = getVariablesFromLiteral string =
map (!! 0) $ getAllMatches variableRegex string map (!! 0) $ matchAllSubgroups variableRegex string
where where
variableRegex = mkRegex "\\$\\{?([A-Za-z0-9_]+)" variableRegex = mkRegex "\\$\\{?([A-Za-z0-9_]+)"
@ -3107,9 +3096,7 @@ checkMultipleAppends params t =
mapM_ checkList $ getCommandSequences t mapM_ checkList $ getCommandSequences t
where where
checkList list = checkList list =
mapM_ checkGroup groups mapM_ checkGroup (groupWith (liftM fst) $ map getTarget list)
where
groups = groupWith (liftM fst) $ map getTarget list
checkGroup (f:_:_:_) | isJust f = checkGroup (f:_:_:_) | isJust f =
style (snd $ fromJust f) 2129 style (snd $ fromJust f) 2129
"Consider using { cmd1; cmd2; } >> file instead of individual redirects." "Consider using { cmd1; cmd2; } >> file instead of individual redirects."

71
ShellCheck/Regex.hs Normal file
View File

@ -0,0 +1,71 @@
{-
This file is part of ShellCheck.
http://www.vidarholen.net/contents/shellcheck
ShellCheck is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
ShellCheck is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
-}
{-# LANGUAGE FlexibleContexts #-}
-- Basically Text.Regex based on regex-tdfa instead of the buggy regex-posix.
module ShellCheck.Regex where
import Data.List
import Data.Maybe
import Control.Monad
import Text.Regex.TDFA
-- Precompile the regex
mkRegex :: String -> Regex
mkRegex str =
let make :: RegexMaker Regex CompOption ExecOption String => String -> Regex
make = makeRegex
in
make str
-- Does the regex match?
matches :: String -> Regex -> Bool
matches = flip match
-- Get all subgroups of the first match
matchRegex :: Regex -> String -> Maybe [String]
matchRegex re str = do
(_, _, _, groups) <- matchM re str :: Maybe (String,String,String,[String])
return groups
-- Get all full matches
matchAllStrings :: Regex -> String -> [String]
matchAllStrings re = unfoldr f
where
f :: String -> Maybe (String, String)
f str = do
(_, match, rest, _) <- matchM re str :: Maybe (String, String, String, [String])
return (match, rest)
-- Get all subgroups from all matches
matchAllSubgroups :: Regex -> String -> [[String]]
matchAllSubgroups re = unfoldr f
where
f :: String -> Maybe ([String], String)
f str = do
(_, _, rest, groups) <- matchM re str :: Maybe (String, String, String, [String])
return (groups, rest)
-- Replace regex in input with string
subRegex :: Regex -> String -> String -> String
subRegex re input replacement = f input
where
f str = fromMaybe str $ do
(before, match, after) <- matchM re str :: Maybe (String, String, String)
when (null match) $ error ("Internal error: substituted empty in " ++ str)
return $ before ++ replacement ++ f after