From 40d5ac602e64f107420ec4f9e790d4cfe8229525 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 17 Oct 2020 18:56:30 +0200 Subject: [PATCH] Handle escaping spaces in configuration files --- src/GEval/OptionsParser.hs | 24 ++++++++++++++++++- test/Spec.hs | 3 ++- .../test-A/out.tsv | 2 ++ .../cer-space-escaping/config.txt | 1 + .../cer-space-escaping/test-A/expected.tsv | 2 ++ 5 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 test/cer-space-escaping/cer-space-escaping-solution/test-A/out.tsv create mode 100644 test/cer-space-escaping/cer-space-escaping/config.txt create mode 100644 test/cer-space-escaping/cer-space-escaping/test-A/expected.tsv diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index b97e2e6..7c9f164 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -40,6 +40,9 @@ import GEval.Model import GEval.ModelTraining import Data.List (find, intercalate) +import Data.List.Utils (split) + +import Data.Char (isSpace) import Data.Conduit.SmartSource import Data.CartesianStrings @@ -352,7 +355,26 @@ readOptsFromConfigFile :: [String] -> FilePath -> IO (Either (ParserResult GEval readOptsFromConfigFile args configFilePath = do configH <- openFile configFilePath ReadMode contents <- hGetContents configH - getOptions' False ((words contents) ++ args) + getOptions' False ((parseConfigFileContents contents) ++ args) + +data ConfigFileSymbol = Literal Char | Separator + deriving (Eq, Show) + +-- very simplistic handling of backslash escaping +-- even "\\" is treated as double backslashes... +parseConfigFileContents :: String -> [String] +parseConfigFileContents contents = + filter (not . null) + $ map (map (\(Literal c) -> c)) + $ split [Separator] + $ parseSymbols contents + where parseSymbols ('\\':c:t) + | isSpace c = (Literal c) : parseSymbols t + | otherwise = (Literal '\\'): (Literal c) :parseSymbols t + parseSymbols (c:t) + | isSpace c = Separator : parseSymbols t + | otherwise = (Literal c) : parseSymbols t + parseSymbols [] = [] attemptToReadOptsFromConfigFile :: [String] -> GEvalOptions -> IO (Either (ParserResult GEvalOptions) GEvalOptions) attemptToReadOptsFromConfigFile args opts = do diff --git a/test/Spec.hs b/test/Spec.hs index 23f0ba1..c0e7392 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -131,9 +131,10 @@ main = hspec $ do describe "CER" $ do it "simple example" $ runGEvalTest "cer-simple" `shouldReturnAlmost` 0.28947368421 - describe "CER" $ do it "simple example (Mean/CER)" $ runGEvalTest "cer-mean-simple" `shouldReturnAlmost` 0.277777777777778 + it "space escaping" $ + runGEvalTest "cer-space-escaping" `shouldReturnAlmost` 0.0555555 describe "Accuracy" $ do it "simple example" $ runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6 diff --git a/test/cer-space-escaping/cer-space-escaping-solution/test-A/out.tsv b/test/cer-space-escaping/cer-space-escaping-solution/test-A/out.tsv new file mode 100644 index 0000000..8aac42d --- /dev/null +++ b/test/cer-space-escaping/cer-space-escaping-solution/test-A/out.tsv @@ -0,0 +1,2 @@ +foo bar +aaa bxb ccc diff --git a/test/cer-space-escaping/cer-space-escaping/config.txt b/test/cer-space-escaping/cer-space-escaping/config.txt new file mode 100644 index 0000000..03742e9 --- /dev/null +++ b/test/cer-space-escaping/cer-space-escaping/config.txt @@ -0,0 +1 @@ +--metric CER:s<\ +><\ > diff --git a/test/cer-space-escaping/cer-space-escaping/test-A/expected.tsv b/test/cer-space-escaping/cer-space-escaping/test-A/expected.tsv new file mode 100644 index 0000000..a594d2f --- /dev/null +++ b/test/cer-space-escaping/cer-space-escaping/test-A/expected.tsv @@ -0,0 +1,2 @@ +foo bar +aaa bbb ccc