add line-by-line mode

This commit is contained in:
Filip Gralinski 2018-01-09 11:17:11 +01:00 committed by Filip Gralinski
parent a7d2ed8c21
commit c70d49c418
4 changed files with 101 additions and 6 deletions

View File

@ -25,6 +25,7 @@ library
, GEval.Common
, GEval.LogLossHashed
, GEval.CharMatch
, GEval.LineByLine
build-depends: base >= 4.7 && < 5
, cond
, conduit

View File

@ -26,7 +26,14 @@ module GEval.Core
defaultInputFile,
defaultMetric,
getExpectedDirectory,
configFileName
configFileName,
ParsedRecord(..),
WithoutInput(..),
WithInput(..),
EvaluationContext(..),
ParserSpec(..),
fileAsLineSource,
checkAndGetFiles
) where
import Data.Conduit
@ -145,7 +152,7 @@ getExpectedDirectory :: GEvalSpecification -> FilePath
getExpectedDirectory spec = fromMaybe outDirectory $ gesExpectedDirectory spec
where outDirectory = gesOutDirectory spec
data GEvalSpecialCommand = Init
data GEvalSpecialCommand = Init | LineByLine
data GEvalOptions = GEvalOptions
{ geoSpecialCommand :: Maybe GEvalSpecialCommand,
@ -206,7 +213,7 @@ isEmptyFile path = do
return ((fileSize stat) == 0)
data LineSource m = LineSource (Source m Text) FilePath Int
data LineSource m = LineSource (Source m Text) FilePath Word32
geval :: GEvalSpecification -> IO (MetricValue)
geval gevalSpec = do
@ -221,7 +228,7 @@ checkAndGetFiles gevalSpec = do
unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory
unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory
checkInputFileIfNeeded metric inputFilePath
return (inputFilePath expectedFilePath outFilePath)
return (inputFilePath, expectedFilePath, outFilePath)
where expectedFilePath = expectedTestDirectory </> (gesExpectedFile gevalSpec)
outFilePath = outTestDirectory </> (gesOutFile gevalSpec)
inputFilePath = expectedTestDirectory </> (gesInputFile gevalSpec)
@ -274,7 +281,7 @@ gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
gevalCore' metric inputLineSource expectedLineSource outLineSource
data LineInFile = LineInFile FilePath Int Text
data LineInFile = LineInFile FilePath Word32 Text
gevalCore' :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id
@ -385,6 +392,7 @@ class EvaluationContext ctxt m where
getExpectedFilePath :: ctxt -> String
getOutFilePath :: ctxt -> String
checkStep :: Proxy m -> ((Word32, ParsedRecord ctxt) -> c) -> (Word32, WrappedParsedRecord ctxt) -> Maybe c
checkStepM :: ((Word32, ParsedRecord ctxt) -> (ResourceT m) c) -> (Word32, WrappedParsedRecord ctxt) -> (ResourceT m) (Maybe c)
data WithoutInput m e o = WithoutInput (LineSource (ResourceT m)) (LineSource (ResourceT m))
@ -402,6 +410,11 @@ instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (
checkStep _ _ (_, WrappedParsedRecordWithoutInput Done (Got _)) = throw TooManyLines
checkStep _ _ (_, WrappedParsedRecordWithoutInput Done Done) = Nothing
checkStepM step (lineNo, WrappedParsedRecordWithoutInput (Got expectedItem) (Got outItem)) = Just <$> step (lineNo, ParsedRecordWithoutInput expectedItem outItem)
checkStepM _ (_, WrappedParsedRecordWithoutInput (Got _) Done) = throwM TooFewLines
checkStepM _ (_, WrappedParsedRecordWithoutInput Done (Got _)) = throwM TooManyLines
checkStepM _ (_, WrappedParsedRecordWithoutInput Done Done) = return Nothing
data WithInput m i e o = WithInput (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m))
@ -424,6 +437,14 @@ instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (
checkStep _ _ (_, WrappedParsedRecordWithInput (Got _) Done Done) = throw TooManyLinesInInput
checkStep _ _ (_, WrappedParsedRecordWithInput Done Done Done) = Nothing
checkStepM step (lineNo, WrappedParsedRecordWithInput (Got inputItem) (Got expectedItem) (Got outItem)) = Just <$> step (lineNo, ParsedRecordWithInput inputItem expectedItem outItem)
checkStepM _ (_, WrappedParsedRecordWithInput _ (Got _) Done) = throw TooFewLines
checkStepM _ (_, WrappedParsedRecordWithInput _ Done (Got _)) = throw TooManyLines
checkStepM _ (_, WrappedParsedRecordWithInput Done (Got _) (Got _)) = throw TooFewLinesInInput
checkStepM _ (_, WrappedParsedRecordWithInput (Got _) Done Done) = throw TooManyLinesInInput
checkStepM _ (_, WrappedParsedRecordWithInput Done Done Done) = return Nothing
averageC :: MonadResource m => Sink Double m Double

65
src/GEval/LineByLine.hs Normal file
View File

@ -0,0 +1,65 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
module GEval.LineByLine
(runLineByLine
) where
import GEval.Core
import Data.Conduit
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC
import Data.Text
import Data.Text.Encoding
import Control.Monad.IO.Class
import Control.Monad.Trans.Resource
import Data.Word
import Text.Printf
data LineRecord = LineRecord Text Text Text Word32 MetricValue
deriving (Eq, Show)
runLineByLine :: GEvalSpecification -> IO ()
runLineByLine spec = do
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum
where metric = gesMetric spec
justScore (LineRecord _ _ _ _ score) = score
consum :: Consumer LineRecord (ResourceT IO) ()
consum = (CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout)
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
formatScore score,
escapeTabs inp,
escapeTabs exp,
escapeTabs out]
formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f"
escapeTabs = Data.Text.replace "\t" "<tab>"
gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> Sink LineRecord (ResourceT IO) () -> IO ()
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum =
runResourceT $
((getZipSource $ (,)
<$> ZipSource (CL.sourceList [1..])
<*> (ZipSource $ recordSource context parserSpec)) =$= CL.mapM (checkStepM evaluateLine) =$= CL.catMaybes $$ consum)
where parserSpec = (ParserSpecWithInput id id id)
context = (WithInput inputLineSource expectedLineSource outputLineSource)
inputLineSource = fileAsLineSource inputFilePath
expectedLineSource = fileAsLineSource expectedFilePath
outputLineSource = fileAsLineSource outFilePath
justLine (LineInFile _ _ l) = l
evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do
s <- liftIO $ gevalCoreOnSingleLines metric (LineInFile inputFilePath lineNo inp)
(LineInFile expectedFilePath lineNo exp)
(LineInFile outFilePath lineNo out)
return $ LineRecord inp exp out lineNo s

View File

@ -18,6 +18,7 @@ import Data.Monoid ((<>))
import GEval.Core
import GEval.CreateChallenge
import GEval.LineByLine
fullOptionsParser = info (helper <*> optionsParser)
(fullDesc
@ -26,9 +27,13 @@ fullOptionsParser = info (helper <*> optionsParser)
optionsParser :: Parser GEvalOptions
optionsParser = GEvalOptions
<$> optional (flag' Init
<$> optional ((flag' Init
( long "init"
<> help "Init a sample Gonito challenge rather than run an evaluation" ))
<|>
(flag' LineByLine
( long "line-by-line"
<> help "Give scores for each line rather than the whole test set" )))
<*> optional precisionArgParser
<*> specParser
@ -136,6 +141,9 @@ runGEval''' Nothing spec = do
runGEval''' (Just Init) spec = do
initChallenge spec
return Nothing
runGEval''' (Just LineByLine) spec = do
runLineByLine spec
return Nothing
initChallenge :: GEvalSpecification -> IO ()
initChallenge spec = case gesExpectedDirectory spec of