From 0f9ab275efbdf493efe05f0f267cced0f0c2b2a0 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Wed, 30 Jun 2021 09:33:07 +0200 Subject: [PATCH] Handle DOS/Windows end-of-lines --- src/GEval/Core.hs | 6 ++++-- test/Spec.hs | 2 ++ .../dos-end-of-line-solution/test-A/out.tsv.gz | Bin 0 -> 43 bytes test/dos-end-of-line/dos-end-of-line/config.txt | 1 + .../dos-end-of-line/test-A/expected.tsv | 4 ++++ 5 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 test/dos-end-of-line/dos-end-of-line-solution/test-A/out.tsv.gz create mode 100644 test/dos-end-of-line/dos-end-of-line/config.txt create mode 100644 test/dos-end-of-line/dos-end-of-line/test-A/expected.tsv diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 194547c..ef2413b 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -5,7 +5,7 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE PackageImports #-} - +{-# LANGUAGE OverloadedStrings #-} module GEval.Core ( geval, @@ -509,10 +509,12 @@ data FileProcessingOptions = FileProcessingOptions { fileProcessingOptionsPreprocess :: (Text -> Text), fileProcessingOptionsHeader :: Maybe TabularHeader } +cleanLine :: Text -> Text +cleanLine = replace "\r" "" fileAsLineSource :: SourceSpec -> FileProcessingOptions -> LineSource (ResourceT IO) fileAsLineSource spec options = - LineSource ((smartSource spec) .| autoDecompress .| CT.decodeUtf8Lenient .| CT.lines .| processHeader mHeader) (select (getDataFormat spec) mSelector) preprocess spec 1 + LineSource ((smartSource spec) .| autoDecompress .| CT.decodeUtf8Lenient .| CT.lines .| CL.map cleanLine .| processHeader mHeader) (select (getDataFormat spec) mSelector) preprocess spec 1 where mSelector = fileProcessingOptionsSelector options preprocess = fileProcessingOptionsPreprocess options mHeader = fileProcessingOptionsHeader options diff --git a/test/Spec.hs b/test/Spec.hs index b80c16b..80f2f33 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -336,6 +336,8 @@ main = hspec $ do ["one", "one"]) `shouldBeAlmost` 0.5 it "simple test" $ do runGEvalTest "map-simple" `shouldReturnAlmost` 0.444444444 + it "dos-end-of-lines" $ + runGEvalTest "dos-end-of-line" `shouldReturnAlmost` 0.75 describe "LogLoss" $ do it "simple" $ do runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824 diff --git a/test/dos-end-of-line/dos-end-of-line-solution/test-A/out.tsv.gz b/test/dos-end-of-line/dos-end-of-line-solution/test-A/out.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b2306c4bc71e1e2612a89ed76b6f918245ee347 GIT binary patch literal 43 zcmb2|=HSqkzLUVfoL^d^S5jQY;C=f1^QT^3I!uDC3X8g?b2BWx8C)X5z`y_iKgkYt literal 0 HcmV?d00001 diff --git a/test/dos-end-of-line/dos-end-of-line/config.txt b/test/dos-end-of-line/dos-end-of-line/config.txt new file mode 100644 index 0000000..8965324 --- /dev/null +++ b/test/dos-end-of-line/dos-end-of-line/config.txt @@ -0,0 +1 @@ +--metric MAP diff --git a/test/dos-end-of-line/dos-end-of-line/test-A/expected.tsv b/test/dos-end-of-line/dos-end-of-line/test-A/expected.tsv new file mode 100644 index 0000000..b2f9e36 --- /dev/null +++ b/test/dos-end-of-line/dos-end-of-line/test-A/expected.tsv @@ -0,0 +1,4 @@ +foo +bar +baz +baz