Add substitution operation

This commit is contained in:
Filip Gralinski 2020-01-11 17:02:49 +01:00
parent 8f87b881b8
commit e170c37864
5 changed files with 58 additions and 6 deletions

View File

@ -27,6 +27,7 @@ data PreprocessingOperation = RegexpMatch Regex
| Sorting
| SetName Text
| SetPriority Int
| RegexpSubstition Regex Text
deriving (Eq)
leftParameterBracket :: Char
@ -53,15 +54,33 @@ readOps ('S':theRest) = (Sorting:ops, theRest')
where (ops, theRest') = readOps theRest
readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest
readOps ('P':theRest) = handleParametrizedOp (SetPriority . read) theRest
readOps ('s':theRest) = handleParametrizedBinaryOp (\a b -> RegexpSubstition (fromRight undefined $ compileM (BSU.fromString a) []) (pack b)) theRest
readOps s = ([], s)
handleParametrizedOp :: (String -> PreprocessingOperation) -> String -> ([PreprocessingOperation], String)
handleParametrizedOp constructor (leftParameterBracket:theRest) =
handleParametrizedOp constructor theRest =
case parseParameter theRest of
(Nothing, s) -> ([], s)
(Just param, theRest') -> let (ops, theRest'') = readOps theRest'
in ((constructor param):ops, theRest'')
handleParametrizedBinaryOp :: (String -> String -> PreprocessingOperation) -> String -> ([PreprocessingOperation], String)
handleParametrizedBinaryOp constructor theRest =
case parseParameter theRest of
(Nothing, s) -> ([], s)
(Just paramA, theRest') ->
case parseParameter theRest' of
(Nothing, s) -> ([], s)
(Just paramB, theRest'') -> let (ops, theRest''') = readOps theRest''
in ((constructor paramA paramB):ops, theRest''')
parseParameter :: String -> (Maybe String, String)
parseParameter (leftParameterBracket:theRest) =
case break (== rightParameterBracket) theRest of
(s, []) -> ([], s)
(param, (_:theRest')) -> let (ops, theRest'') = readOps theRest'
in ((constructor param):ops, theRest'')
handleParametrizedOp _ s = ([], s)
(s, []) -> (Nothing, s)
(param, (_:theRest')) -> (Just param, theRest')
parseParameter s = (Nothing, s)
instance Show EvaluationScheme where
show (EvaluationScheme metric operations) = (show metric) ++ (if null operations
@ -97,9 +116,28 @@ instance Show PreprocessingOperation where
show Sorting = "S"
show (SetName t) = parametrizedOperation "N" (unpack t)
show (SetPriority p) = parametrizedOperation "P" (show p)
show (RegexpSubstition (Regex _ regexp) s) = "s" ++ (formatParameter $ BSU.toString regexp) ++ (formatParameter $ unpack s)
applySubstitution :: Regex -> Text -> Text -> Text
applySubstitution r substitution t =
gsub r (handleRefs substitution) t
handleRefs :: Text -> Text -> [Text] -> Text
handleRefs substitution mainMatch subMatches = gsub refRegexp handleRef substitution
where Right refRegexp = compileM (BSU.fromString "\\\\\\d+") []
indexables = mainMatch : subMatches
handleRef :: Text -> Text
handleRef ref =
let ix = (read $ tail $ unpack ref)
in if ix >= length indexables
then (pack "")
else indexables !! ix
parametrizedOperation :: String -> String -> String
parametrizedOperation opCode opArg = opCode ++ [leftParameterBracket] ++ opArg ++ [rightParameterBracket]
parametrizedOperation opCode opArg = opCode ++ (formatParameter opArg)
formatParameter :: String -> String
formatParameter p = [leftParameterBracket] ++ p ++ [rightParameterBracket]
applyPreprocessingOperations :: EvaluationScheme -> Text -> Text
applyPreprocessingOperations (EvaluationScheme _ operations) t = foldl (flip applyPreprocessingOperation) t operations
@ -111,3 +149,4 @@ applyPreprocessingOperation UpperCasing = toUpper
applyPreprocessingOperation Sorting = Data.Text.unwords . sort . Data.Text.words
applyPreprocessingOperation (SetName _) = id
applyPreprocessingOperation (SetPriority _) = id
applyPreprocessingOperation (RegexpSubstition regex substition) = applySubstitution regex substition

View File

@ -339,6 +339,8 @@ main = hspec $ do
describe "Preprocessing operations" $ do
it "F1 with preprocessing" $ do
runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857
it "Regexp substition" $ do
runGEvalTest "accuracy-with-flags" `shouldReturnAlmost` 0.8
describe "evaluating single lines" $ do
it "RMSE" $ do
(MetricOutput v _) <- gevalCoreOnSingleLines RMSE id RawItemTarget

View File

@ -0,0 +1,5 @@
b88 b901
a100
a93
t34
y23
1 b88 b901
2 a100
3 a93
4 t34
5 y23

View File

@ -0,0 +1 @@
--metric Accuracy:s<[abc](\d+)><!\1>

View File

@ -0,0 +1,5 @@
a88 b901
c100
b93
t34
z23
1 a88 b901
2 c100
3 b93
4 t34
5 z23