From e170c378648ce87e413a6d39fb85ac3e4ab025d1 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 11 Jan 2020 17:02:49 +0100 Subject: [PATCH] Add substitution operation --- src/GEval/EvaluationScheme.hs | 51 ++++++++++++++++--- test/Spec.hs | 2 + .../test-A/out.tsv | 5 ++ .../accuracy-with-flags/config.txt | 1 + .../accuracy-with-flags/test-A/expected.tsv | 5 ++ 5 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 test/accuracy-with-flags/accuracy-with-flags-solution/test-A/out.tsv create mode 100644 test/accuracy-with-flags/accuracy-with-flags/config.txt create mode 100644 test/accuracy-with-flags/accuracy-with-flags/test-A/expected.tsv diff --git a/src/GEval/EvaluationScheme.hs b/src/GEval/EvaluationScheme.hs index 97235d6..d138f06 100644 --- a/src/GEval/EvaluationScheme.hs +++ b/src/GEval/EvaluationScheme.hs @@ -27,6 +27,7 @@ data PreprocessingOperation = RegexpMatch Regex | Sorting | SetName Text | SetPriority Int + | RegexpSubstition Regex Text deriving (Eq) leftParameterBracket :: Char @@ -53,15 +54,33 @@ readOps ('S':theRest) = (Sorting:ops, theRest') where (ops, theRest') = readOps theRest readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest readOps ('P':theRest) = handleParametrizedOp (SetPriority . read) theRest +readOps ('s':theRest) = handleParametrizedBinaryOp (\a b -> RegexpSubstition (fromRight undefined $ compileM (BSU.fromString a) []) (pack b)) theRest readOps s = ([], s) handleParametrizedOp :: (String -> PreprocessingOperation) -> String -> ([PreprocessingOperation], String) -handleParametrizedOp constructor (leftParameterBracket:theRest) = +handleParametrizedOp constructor theRest = + case parseParameter theRest of + (Nothing, s) -> ([], s) + (Just param, theRest') -> let (ops, theRest'') = readOps theRest' + in ((constructor param):ops, theRest'') + +handleParametrizedBinaryOp :: (String -> String -> PreprocessingOperation) -> String -> ([PreprocessingOperation], String) +handleParametrizedBinaryOp constructor theRest = + case parseParameter theRest of + (Nothing, s) -> ([], s) + (Just paramA, theRest') -> + case parseParameter theRest' of + (Nothing, s) -> ([], s) + (Just paramB, theRest'') -> let (ops, theRest''') = readOps theRest'' + in ((constructor paramA paramB):ops, theRest''') + +parseParameter :: String -> (Maybe String, String) +parseParameter (leftParameterBracket:theRest) = case break (== rightParameterBracket) theRest of - (s, []) -> ([], s) - (param, (_:theRest')) -> let (ops, theRest'') = readOps theRest' - in ((constructor param):ops, theRest'') -handleParametrizedOp _ s = ([], s) + (s, []) -> (Nothing, s) + (param, (_:theRest')) -> (Just param, theRest') +parseParameter s = (Nothing, s) + instance Show EvaluationScheme where show (EvaluationScheme metric operations) = (show metric) ++ (if null operations @@ -97,9 +116,28 @@ instance Show PreprocessingOperation where show Sorting = "S" show (SetName t) = parametrizedOperation "N" (unpack t) show (SetPriority p) = parametrizedOperation "P" (show p) + show (RegexpSubstition (Regex _ regexp) s) = "s" ++ (formatParameter $ BSU.toString regexp) ++ (formatParameter $ unpack s) + +applySubstitution :: Regex -> Text -> Text -> Text +applySubstitution r substitution t = + gsub r (handleRefs substitution) t + +handleRefs :: Text -> Text -> [Text] -> Text +handleRefs substitution mainMatch subMatches = gsub refRegexp handleRef substitution + where Right refRegexp = compileM (BSU.fromString "\\\\\\d+") [] + indexables = mainMatch : subMatches + handleRef :: Text -> Text + handleRef ref = + let ix = (read $ tail $ unpack ref) + in if ix >= length indexables + then (pack "") + else indexables !! ix parametrizedOperation :: String -> String -> String -parametrizedOperation opCode opArg = opCode ++ [leftParameterBracket] ++ opArg ++ [rightParameterBracket] +parametrizedOperation opCode opArg = opCode ++ (formatParameter opArg) + +formatParameter :: String -> String +formatParameter p = [leftParameterBracket] ++ p ++ [rightParameterBracket] applyPreprocessingOperations :: EvaluationScheme -> Text -> Text applyPreprocessingOperations (EvaluationScheme _ operations) t = foldl (flip applyPreprocessingOperation) t operations @@ -111,3 +149,4 @@ applyPreprocessingOperation UpperCasing = toUpper applyPreprocessingOperation Sorting = Data.Text.unwords . sort . Data.Text.words applyPreprocessingOperation (SetName _) = id applyPreprocessingOperation (SetPriority _) = id +applyPreprocessingOperation (RegexpSubstition regex substition) = applySubstitution regex substition diff --git a/test/Spec.hs b/test/Spec.hs index b4d3511..ff8334b 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -339,6 +339,8 @@ main = hspec $ do describe "Preprocessing operations" $ do it "F1 with preprocessing" $ do runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857 + it "Regexp substition" $ do + runGEvalTest "accuracy-with-flags" `shouldReturnAlmost` 0.8 describe "evaluating single lines" $ do it "RMSE" $ do (MetricOutput v _) <- gevalCoreOnSingleLines RMSE id RawItemTarget diff --git a/test/accuracy-with-flags/accuracy-with-flags-solution/test-A/out.tsv b/test/accuracy-with-flags/accuracy-with-flags-solution/test-A/out.tsv new file mode 100644 index 0000000..f4cf94b --- /dev/null +++ b/test/accuracy-with-flags/accuracy-with-flags-solution/test-A/out.tsv @@ -0,0 +1,5 @@ +b88 b901 +a100 +a93 +t34 +y23 diff --git a/test/accuracy-with-flags/accuracy-with-flags/config.txt b/test/accuracy-with-flags/accuracy-with-flags/config.txt new file mode 100644 index 0000000..0013dd6 --- /dev/null +++ b/test/accuracy-with-flags/accuracy-with-flags/config.txt @@ -0,0 +1 @@ +--metric Accuracy:s<[abc](\d+)> diff --git a/test/accuracy-with-flags/accuracy-with-flags/test-A/expected.tsv b/test/accuracy-with-flags/accuracy-with-flags/test-A/expected.tsv new file mode 100644 index 0000000..16a810e --- /dev/null +++ b/test/accuracy-with-flags/accuracy-with-flags/test-A/expected.tsv @@ -0,0 +1,5 @@ +a88 b901 +c100 +b93 +t34 +z23