diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 69ebe74..e683a1b 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -40,7 +40,8 @@ module GEval.Core checkMultipleOutsCore, gesMainMetric, gesPreprocess, - getDataDecoder + getDataDecoder, + threeLineSource ) where import Data.Conduit @@ -951,7 +952,11 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithIn checkStepM _ (_, WrappedParsedRecordWithInput Done Done Done) = return Nothing - +threeLineSource :: (MonadUnliftIO m, MonadIO m, MonadThrow m) => WithInput m Text Text Text -> ConduitT () (WrappedParsedRecord (WithInput m Text Text Text)) (ResourceT m) () +threeLineSource (WithInput inputLineSource expectedLineSource outLineSource) = getZipSource $ (\x (y,z) -> WrappedParsedRecordWithInput x y z) + <$> ZipSource (linesAsItems inputLineSource) <*> (ZipSource $ getZipSource $ (,) + <$> ZipSource (linesAsItems expectedLineSource) + <*> ZipSource (linesAsItems outLineSource)) averageC :: MonadResource m => ConduitT Double Void m Double averageC = getZipSink @@ -959,6 +964,8 @@ averageC = getZipSink <$> ZipSink CC.sum <*> ZipSink CC.length +-- | Takes a source of lines and returns a source of lines and returns a conduit of +-- items (using a given preprocessor and parser). items :: MonadResource m => LineSource m -> (ItemTarget -> Either String a) -> ConduitT () (SourceItem a) m () items (LineSource lineSource itemDecoder preprocess _ _) parser = (lineSource .| CL.map (toItem . parser . preprocess' . itemDecoder)) >> yield Done @@ -967,6 +974,12 @@ items (LineSource lineSource itemDecoder preprocess _ _) parser = preprocess' (RawItemTarget t) = RawItemTarget $ preprocess t preprocess' (PartiallyParsedItemTarget ts) = PartiallyParsedItemTarget $ Prelude.map preprocess ts +-- | Takes a source of lines and returns a conduit of lines represented as +-- items (without preprocessing and parsing!) to be used in line-by-line modes. +linesAsItems :: MonadResource m => LineSource m -> ConduitT () (SourceItem Text) m () +linesAsItems (LineSource lineSource _ _ _ _) = + (lineSource .| CL.map Got) >> yield Done + itemAbsoluteError :: (Double, Double) -> Double itemAbsoluteError (exp, out) = abs (exp-out) diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index 60a96e7..ea33b82 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -420,9 +420,9 @@ gevalLineByLineSource :: Metric -> Maybe Selector -> (Text -> Text) -> SourceSpe gevalLineByLineSource metric mSelector preprocess inputSource expectedSource outSource = (getZipSource $ (,) <$> ZipSource (CL.sourceList [1..]) - <*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes - where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id)) - context = (WithInput inputLineSource expectedLineSource outputLineSource) + <*> (ZipSource $ threeLineSource context)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes + where context = (WithInput inputLineSource expectedLineSource outputLineSource) + -- preparing sources, `id` means that no preprocessing is done (to avoid double preprocessing) inputLineSource = fileAsLineSource inputSource mSelector id expectedLineSource = fileAsLineSource expectedSource mSelector id outputLineSource = fileAsLineSource outSource mSelector id diff --git a/test/Spec.hs b/test/Spec.hs index c835830..0f11a95 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -23,6 +23,7 @@ import Text.EditDistance import GEval.Annotation import GEval.BlackBoxDebugging import GEval.FeatureExtractor +import GEval.Selector import Data.Map.Strict @@ -287,8 +288,11 @@ main = hspec $ do runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827 describe "evaluating single lines" $ do it "RMSE" $ do - gevalCoreOnSingleLines RMSE id (LineInFile (FilePathSpec "stub1") 1 "blabla") + gevalCoreOnSingleLines RMSE id RawItemTarget + (LineInFile (FilePathSpec "stub1") 1 "blabla") + RawItemTarget (LineInFile (FilePathSpec "stub2") 1 "3.4") + RawItemTarget (LineInFile (FilePathSpec "stub3") 1 "2.6") `shouldReturnAlmost` 0.8 describe "Annotation format" $ do it "just parse" $ do