Handle DOS/Windows end-of-lines

This commit is contained in:
Filip Gralinski 2021-06-30 09:33:07 +02:00
parent ed1bb47a81
commit 0f9ab275ef
5 changed files with 11 additions and 2 deletions

View File

@ -5,7 +5,7 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-}
module GEval.Core
( geval,
@ -509,10 +509,12 @@ data FileProcessingOptions = FileProcessingOptions {
fileProcessingOptionsPreprocess :: (Text -> Text),
fileProcessingOptionsHeader :: Maybe TabularHeader }
cleanLine :: Text -> Text
cleanLine = replace "\r" ""
fileAsLineSource :: SourceSpec -> FileProcessingOptions -> LineSource (ResourceT IO)
fileAsLineSource spec options =
LineSource ((smartSource spec) .| autoDecompress .| CT.decodeUtf8Lenient .| CT.lines .| processHeader mHeader) (select (getDataFormat spec) mSelector) preprocess spec 1
LineSource ((smartSource spec) .| autoDecompress .| CT.decodeUtf8Lenient .| CT.lines .| CL.map cleanLine .| processHeader mHeader) (select (getDataFormat spec) mSelector) preprocess spec 1
where mSelector = fileProcessingOptionsSelector options
preprocess = fileProcessingOptionsPreprocess options
mHeader = fileProcessingOptionsHeader options

View File

@ -336,6 +336,8 @@ main = hspec $ do
["one", "one"]) `shouldBeAlmost` 0.5
it "simple test" $ do
runGEvalTest "map-simple" `shouldReturnAlmost` 0.444444444
it "dos-end-of-lines" $
runGEvalTest "dos-end-of-line" `shouldReturnAlmost` 0.75
describe "LogLoss" $ do
it "simple" $ do
runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824

View File

@ -0,0 +1 @@
--metric MAP

View File

@ -0,0 +1,4 @@
foo
bar
baz
baz
1 foo
2 bar
3 baz
4 baz