Add plotting graphs for selected metrics

This commit is contained in:
Filip Graliński 2019-03-15 14:58:16 +01:00 committed by Filip Gralinski
parent bbcb11c498
commit eb10a4c3b4
6 changed files with 152 additions and 68 deletions

View File

@ -88,6 +88,8 @@ library
, aeson
, aeson-pretty
, integration
, Chart
, Chart-cairo
default-language: Haskell2010
executable geval

View File

@ -8,6 +8,16 @@ import Data.Attoparsec.Text
type MetricValue = Double
data GraphSeries = GraphSeries [(Double, Double)]
data MetricOutput = MetricOutput MetricValue (Maybe GraphSeries)
getMetricValue :: MetricOutput -> MetricValue
getMetricValue (MetricOutput v _) = v
getGraphSeries :: MetricOutput -> Maybe GraphSeries
getGraphSeries (MetricOutput _ gs) = gs
-- some operations can be "hard" (on ints) or "soft" (on doubles),
-- introduce a typeclass so that we could generalise easily
class ConvertibleToDouble n where

View File

@ -66,7 +66,7 @@ import Data.Maybe
import Data.Either (rights)
import Data.Tuple
import qualified Data.List.Split as DLS
import Data.List (sortBy, isSuffixOf)
import Data.List (sortBy, isSuffixOf, minimum, maximum)
import Text.NaturalComp
import Control.Monad.IO.Class
@ -98,9 +98,12 @@ import qualified Data.HashMap.Strict as M
import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as DVU
import Statistics.Correlation
import Data.Statistics.Calibration(softCalibration)
import Data.Statistics.Loess(loess)
import Data.Proxy
@ -288,7 +291,8 @@ data GEvalSpecification = GEvalSpecification
gesTokenizer :: Maybe Tokenizer,
gesGonitoHost :: Maybe String,
gesToken :: Maybe String,
gesGonitoGitAnnexRemote :: Maybe String }
gesGonitoGitAnnexRemote :: Maybe String}
gesMainMetric :: GEvalSpecification -> Metric
gesMainMetric spec = case gesMetrics spec of
@ -316,7 +320,8 @@ data GEvalOptions = GEvalOptions
geoResultOrdering :: ResultOrdering,
geoFilter :: Maybe String,
geoSpec :: GEvalSpecification,
geoBlackBoxDebugginsOptions :: BlackBoxDebuggingOptions }
geoBlackBoxDebugginsOptions :: BlackBoxDebuggingOptions,
geoGraphFile :: Maybe FilePath }
data GEvalException = NoExpectedFile FilePath
| NoOutFile FilePath
@ -388,13 +393,16 @@ extensionsHandled = ["tsv", "jsonl"]
data LineSource m = LineSource (ConduitT () Text m ()) (Text -> ItemTarget) (Text -> Text) SourceSpec Word32
geval :: GEvalSpecification -> IO [(SourceSpec, [MetricValue])]
geval :: GEvalSpecification -> IO [(SourceSpec, [MetricOutput])]
geval gevalSpec = do
(inputSource, expectedSource, outSources) <- checkAndGetFiles False gevalSpec
results <- Prelude.mapM (gevalOnSingleOut gevalSpec inputSource expectedSource) outSources
return $ sortBy (\a b -> (show $ fst a) `naturalComp` (show $ fst b)) results
gevalOnSingleOut :: GEvalSpecification -> SourceSpec -> SourceSpec -> SourceSpec -> IO (SourceSpec, [MetricValue])
noGraph :: d -> Maybe GraphSeries
noGraph = const Nothing
gevalOnSingleOut :: GEvalSpecification -> SourceSpec -> SourceSpec -> SourceSpec -> IO (SourceSpec, [MetricOutput])
gevalOnSingleOut gevalSpec inputSource expectedSource outSource = do
vals <- Prelude.mapM (\metric -> gevalCore metric mSelector preprocess inputSource expectedSource outSource) metrics
return (outSource, vals)
@ -530,7 +538,7 @@ getDataFormatFromFilePath path =
dataDecoder fmt mSelector = CC.map (select fmt mSelector)
gevalCoreOnSingleLines :: Metric -> (Text -> Text) -> (Text -> ItemTarget) -> LineInFile -> (Text -> ItemTarget) -> LineInFile -> (Text -> ItemTarget) -> LineInFile -> IO (MetricValue)
gevalCoreOnSingleLines :: Metric -> (Text -> Text) -> (Text -> ItemTarget) -> LineInFile -> (Text -> ItemTarget) -> LineInFile -> (Text -> ItemTarget) -> LineInFile -> IO (MetricOutput)
gevalCoreOnSingleLines metric preprocess inpDecoder inpLine expDecoder expLine outDecoder outLine =
gevalCoreOnSources metric (singleLineAsLineSource inpLine inpDecoder preprocess)
(singleLineAsLineSource expLine expDecoder outputPreprocess)
@ -553,7 +561,7 @@ gevalCore :: Metric -- ^ evaluation metric
-> SourceSpec -- ^ source specification for the input values
-> SourceSpec -- ^ source specification for the expected output
-> SourceSpec -- ^ source specification for the output
-> IO (MetricValue) -- ^ metric value for the output against the expected output
-> IO (MetricOutput) -- ^ metric value for the output against the expected output
gevalCore metric mSelector preprocess inputSource expectedSource outSource = do
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
gevalCoreOnSources metric
@ -585,22 +593,22 @@ gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
-> LineSource (ResourceT m) -- ^ source of the input values
-> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricValue) -- ^ metric values for the output against the expected output
-> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
return $ mse ** 0.5
MetricOutput mse g <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
return $ MetricOutput (mse ** 0.5) g
gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss
MetricOutput logLoss g <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource
return $ MetricOutput (logLossToLikehood logLoss) g
gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss
MetricOutput logLoss g <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
return $ MetricOutput (logLossToLikehood logLoss) g
gevalCoreOnSources MultiLabelLikelihood inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources MultiLabelLogLoss inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss
MetricOutput logLoss g <- gevalCoreOnSources MultiLabelLogLoss inputLineSource expectedLineSource outLineSource
return $ MetricOutput (logLossToLikehood logLoss) g
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
gevalCore' metric inputLineSource expectedLineSource outLineSource
@ -620,24 +628,24 @@ gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
-> LineSource (ResourceT m) -- ^ source of the input values
-> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricValue) -- ^ metric values for the output against the expected output
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemSquaredError averageC id
-> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemSquaredError averageC id noGraph
where outParser = getValue . TR.double
gevalCore' MAE _ = gevalCoreWithoutInput outParser outParser itemAbsoluteError averageC id
gevalCore' MAE _ = gevalCoreWithoutInput outParser outParser itemAbsoluteError averageC id noGraph
where outParser = getValue . TR.double
gevalCore' SMAPE _ = gevalCoreWithoutInput outParser outParser smape averageC (* 100.0)
gevalCore' SMAPE _ = gevalCoreWithoutInput outParser outParser smape averageC (* 100.0) noGraph
where outParser = getValue . TR.double
smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out))
gevalCore' Pearson _ = gevalCoreByCorrelationMeasure pearson
gevalCore' Spearman _ = gevalCoreByCorrelationMeasure spearman
gevalCore' LogLoss _ = gevalCoreWithoutInput outParser outParser itemLogLossError averageC id
gevalCore' LogLoss _ = gevalCoreWithoutInput outParser outParser itemLogLossError averageC id noGraph
where outParser = getValue . TR.double
gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack) (Right . Prelude.words . unpack) bleuCombine bleuAgg bleuFinal
gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack) (Right . Prelude.words . unpack) bleuCombine bleuAgg bleuFinal noGraph
where bleuFinal (p1, p2, p3, p4, rl, l1, l2, l3, l4) = ((p1 /. l1) * (p2 /. l2) * (p3 /. l3) * (p4 /. l4)) ** 0.25 * (brevityPenalty l1 rl)
bleuCombine (refs, sen) = bleuStep refs sen
bleuAgg = CC.foldl bleuFuse (0, 0, 0, 0, 0, 0, 0, 0, 0)
@ -647,15 +655,15 @@ gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . D
| c == 0 && r > 0 = 0.0
| otherwise = exp (1.0 - (r /. c))
gevalCore' GLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack) (Right . Prelude.words . unpack) gleuCombine gleuAgg gleuFinal
gevalCore' GLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack) (Right . Prelude.words . unpack) gleuCombine gleuAgg gleuFinal noGraph
where gleuFinal (m, t) = m /. t
gleuCombine (refs, sen) = gleuStep refs sen
gleuAgg = CC.foldl gleuFuse (0, 0)
gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2)
gevalCore' WER _ = gevalCoreWithoutInput (Right . Prelude.words . unpack) (Right . Prelude.words . unpack) (uncurry werStep) averageC id
gevalCore' WER _ = gevalCoreWithoutInput (Right . Prelude.words . unpack) (Right . Prelude.words . unpack) (uncurry werStep) averageC id noGraph
gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id
gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id noGraph
where hitOrMiss (exp, got) =
-- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric)
@ -680,7 +688,7 @@ gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hi
tryReadingAsFloat :: Text -> Maybe Float
tryReadingAsFloat = readMaybe . unpack
gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCount countAgg (fMeasureOnCounts beta)
gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCount countAgg (fMeasureOnCounts beta) noGraph
where outParser = detected <=< (getValue . TR.double)
expParser = expected <=< (getValue . TR.decimal)
expected 1 = Right True
@ -698,7 +706,7 @@ gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCoun
getCount (False, True) = (0, 0, 1)
getCount (False, False) = (0, 0, 0)
gevalCore' (MacroFMeasure beta) _ = gevalCoreWithoutInput (Right . Just . strip) (Right . predicted . strip) getClassesInvolved gatherClassC macroAverageOnCounts
gevalCore' (MacroFMeasure beta) _ = gevalCoreWithoutInput (Right . Just . strip) (Right . predicted . strip) getClassesInvolved gatherClassC macroAverageOnCounts noGraph
where predicted got =
-- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric)
@ -732,6 +740,7 @@ gevalCore' (SoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnotations
getSoftCounts
countAgg
(fMeasureOnCounts beta)
noGraph
where getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got,
Prelude.length expected,
Prelude.length got)
@ -741,15 +750,22 @@ gevalCore' (ProbabilisticSoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnot
getProbabilisticSoftCounts
probabilisticSoftAgg
(fMeasureOnProbabilisticCounts beta)
loessGraph
where probabilisticSoftAgg :: Monad m => ConduitM ([Double], [Double], Double, Int) o m ([Double], [Double], Double, Int)
probabilisticSoftAgg = CC.foldl probabilisticSoftFolder ([], [], fromInteger 0, 0)
probabilisticSoftFolder (r1, p1, g1, e1) (r2, p2, g2, e2) = (r1 ++ r2, p1 ++ p2, g1 + g2, e1 + e2)
loessGraph :: ([Double], [Double], Double, Int) -> Maybe GraphSeries
loessGraph (results, probs, _, _) = Just $ GraphSeries $ Prelude.map (\x -> (x, loess results' probs' x)) $ Prelude.filter (\p -> p >= lowest && p <= highest) $ Prelude.map (\d -> 0.01 * (fromIntegral d)) [1..99]
where results' = DVU.fromList results
probs' = DVU.fromList probs
lowest = Data.List.minimum probs
highest = Data.List.maximum probs
fMeasureOnProbabilisticCounts :: Double -> ([Double], [Double], Double, Int) -> Double
fMeasureOnProbabilisticCounts beta (results, probs, got, nbExpected) = weightedHarmonicMean beta calibrationMeasure recall
where calibrationMeasure = softCalibration results probs
recall = got /. nbExpected
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep noGraph
where
parseClippings = controlledParse lineClippingsParser
parseClippingSpecs = controlledParse lineClippingSpecsParser
@ -759,18 +775,19 @@ gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings m
clippeuAgg = CC.foldl countFolder (0, 0, 0)
finalStep counts = f2MeasureOnCounts counts
gevalCore' NMI _ = gevalCoreWithoutInput (Right . id) (Right . id) id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix
gevalCore' NMI _ = gevalCoreWithoutInput (Right . id) (Right . id) id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix noGraph
gevalCore' MAP _ = gevalCoreWithoutInput (Right . DLS.splitOn "\t" . unpack)
(Right . DLS.splitOn "\t" . unpack)
(\(e,g) -> calculateMAPForOneResult e g)
averageC
id
noGraph
gevalCore' (LogLossHashed nbOfBits) _ = helper nbOfBits
-- for LogLossHashed we "salt" each hash with the line number
where helper nbOfBits expectedLineSource outLineSource =
gevalCore''' (ParserSpecWithoutInput (liftOp (Right . id)) (liftOp tentativeParser)) (\(lineNo, (t,d)) -> calculateLogLoss nbOfBits lineNo t (parseDistributionWrapper nbOfBits lineNo d)) averageC negate (WithoutInput expectedLineSource outLineSource)
gevalCore''' (ParserSpecWithoutInput (liftOp (Right . id)) (liftOp tentativeParser)) (\(lineNo, (t,d)) -> calculateLogLoss nbOfBits lineNo t (parseDistributionWrapper nbOfBits lineNo d)) averageC negate noGraph (WithoutInput expectedLineSource outLineSource)
-- Unfortunately, we're parsing the distribution twice. We need to
-- tentatively parse the distribution when the line number is unknown
-- (so we just set it to 1)
@ -782,14 +799,14 @@ gevalCore' (LogLossHashed nbOfBits) _ = helper nbOfBits
gevalCore' CharMatch inputLineSource = helper inputLineSource
where
helper inputLineSource expectedLineSource outputLineSource = do
gevalCoreGeneralized (ParserSpecWithInput justUnpack justUnpack justUnpack) step countAgg (fMeasureOnCounts charMatchBeta) (WithInput inputLineSource expectedLineSource outputLineSource)
gevalCoreGeneralized (ParserSpecWithInput justUnpack justUnpack justUnpack) step countAgg (fMeasureOnCounts charMatchBeta) noGraph (WithInput inputLineSource expectedLineSource outputLineSource)
step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out
justUnpack = liftOp (Right . unpack)
gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph
gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph
where parseBioSequenceIntoEntitiesWithoutNormalization s = do
entities <- parseBioSequenceIntoEntities s
return $ Prelude.map eraseNormalisation entities
@ -799,6 +816,7 @@ gevalCore' TokenAccuracy _ = gevalCoreWithoutInput intoTokens
countHitsAndTotals
hitsAndTotalsAgg
(\(hits, total) -> hits /. total)
noGraph
where intoTokens = Right . Data.Text.words
countHitsAndTotals :: ([Text], [Text]) -> (Int, Int)
countHitsAndTotals (es, os) =
@ -820,6 +838,7 @@ gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInputOnItemTargets (Rig
(getCounts (==))
countAgg
(fMeasureOnCounts beta)
noGraph
where
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
@ -831,6 +850,7 @@ gevalCore' MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
(uncurry countLogLossOnProbList)
averageC
id
noGraph
where
intoWords = Right . Data.Text.words
@ -841,9 +861,9 @@ gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
(V.Vector (Double, Double) -> Double) -> -- ^ correlation function
LineSource (ResourceT m) -> -- ^ source to read the expected output
LineSource (ResourceT m) -> -- ^ source to read the output
m (MetricValue) -- ^ metric values for the output against the expected output
m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreByCorrelationMeasure correlationFunction =
gevalCoreWithoutInput outParser outParser id correlationC finalStep
gevalCoreWithoutInput outParser outParser id correlationC finalStep noGraph
where outParser = getValue . TR.double
correlationC = CC.foldl (flip (:)) []
finalStep pairs = correlationFunction $ V.fromList pairs
@ -868,11 +888,12 @@ gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
-> (ConduitT c Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into
-- a "total" value
-> (d -> Double) -- ^ function to transform the "total" value into the final score
-> (d -> Maybe GraphSeries)
-> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricValue) -- ^ metric values for the output against the expected output
gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream =
gevalCoreWithoutInputOnItemTargets (liftOp expParser) (liftOp outParser) itemStep aggregator finalStep expectedLineStream outLineStream
-> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep generateGraph expectedLineStream outLineStream =
gevalCoreWithoutInputOnItemTargets (liftOp expParser) (liftOp outParser) itemStep aggregator finalStep generateGraph expectedLineStream outLineStream
gevalCoreWithoutInputOnItemTargets :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
=> (ItemTarget -> Either String a) -- ^ parser for values in the expected output
@ -883,20 +904,21 @@ gevalCoreWithoutInputOnItemTargets :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
-> (ConduitT c Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into
-- a "total" value
-> (d -> Double) -- ^ function to transform the "total" value into the final score
-> (d -> Maybe GraphSeries)
-> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricValue) -- ^ metric values for the output against the expected output
gevalCoreWithoutInputOnItemTargets expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream =
gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep (WithoutInput expectedLineStream outLineStream)
-> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreWithoutInputOnItemTargets expParser outParser itemStep aggregator finalStep generateGraph expectedLineStream outLineStream =
gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep generateGraph (WithoutInput expectedLineStream outLineStream)
where
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y)
gevalCore''' :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (ConduitT c Void (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue)
gevalCore''' parserSpec itemStep aggregator finalStep context =
gevalCoreGeneralized' parserSpec (trans itemStep) aggregator finalStep context
gevalCore''' :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (ConduitT c Void (ResourceT m) d) -> (d -> Double) -> (d -> Maybe GraphSeries) -> WithoutInput m a b -> m (MetricOutput)
gevalCore''' parserSpec itemStep aggregator finalStep generateGraph context =
gevalCoreGeneralized' parserSpec (trans itemStep) aggregator finalStep generateGraph context
where
trans :: ((Word32, (a, b)) -> c) -> (Word32, ParsedRecord (WithoutInput m a b)) -> c
trans step (n, ParsedRecordWithoutInput x y) = step (n, (x, y))
@ -911,18 +933,19 @@ gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m
-- some "local" score calculated for each line (item)
-> (ConduitT c Void (ResourceT m) d) -- ^ a Conduit to aggregate score into a "total"
-> (d -> Double) -- ^ function to transform the "total" value into the final score
-> (d -> Maybe GraphSeries)
-> ctxt -- ^ "context", i.e. 2 or 3 sources needed to operate
-> m (MetricValue)
gevalCoreGeneralized parserSpec itemStep aggregator finalStep context =
gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep context
-> m MetricOutput
gevalCoreGeneralized parserSpec itemStep aggregator finalStep generateGraph context =
gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep generateGraph context
gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (ConduitT c Void (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
gevalCoreGeneralized' parserSpec itemStep aggregator finalStep context = do
gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (ConduitT c Void (ResourceT m) d) -> (d -> Double) -> (d -> Maybe GraphSeries) -> ctxt -> m (MetricOutput)
gevalCoreGeneralized' parserSpec itemStep aggregator finalStep generateGraph context = do
v <- runResourceT $ runConduit $
(((getZipSource $ (,)
<$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..])
<*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| aggregator)
return $ finalStep v
return $ MetricOutput (finalStep v) (generateGraph v)
-- | A type family to handle all the evaluation "context".
--

View File

@ -431,7 +431,7 @@ gevalLineByLineSource metric mSelector preprocess inputSource expectedSource out
s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp)
(getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp)
(getDataDecoder outputLineSource) (LineInFile outSource lineNo out)
return $ LineRecord inp exp out lineNo s
return $ LineRecord inp exp out lineNo (getMetricValue s)
justTokenize :: Maybe Tokenizer -> IO ()
justTokenize Nothing = error "a tokenizer must be specified with --tokenizer option"

View File

@ -9,9 +9,14 @@ module GEval.OptionsParser
precisionArgParser
) where
import Debug.Trace
import Paths_geval (version)
import Data.Version (showVersion)
import Graphics.Rendering.Chart.Easy
import Graphics.Rendering.Chart.Backend.Cairo
import Options.Applicative
import qualified System.Directory as D
import System.FilePath
@ -96,6 +101,10 @@ optionsParser = GEvalOptions
<> help "When in line-by-line or diff mode, show only items with a given feature"))
<*> specParser
<*> blackBoxDebuggingOptionsParser
<*> optional (strOption
( long "plot-graph"
<> metavar "FILE-PATH"
<> help "Plot an extra graph, applicable only for Probabilistic-Soft-F-score (LOESS function for calibration)"))
precisionArgParser :: Parser Int
precisionArgParser = option auto
@ -277,41 +286,80 @@ runGEval'' opts = runGEval''' (geoSpecialCommand opts)
(geoFilter opts)
(geoSpec opts)
(geoBlackBoxDebugginsOptions opts)
(geoGraphFile opts)
runGEval''' :: Maybe GEvalSpecialCommand
-> ResultOrdering
-> Maybe String
-> GEvalSpecification
-> BlackBoxDebuggingOptions
-> Maybe FilePath
-> IO (Maybe [(SourceSpec, [MetricValue])])
runGEval''' Nothing _ _ spec _ = do
vals <- geval spec
runGEval''' Nothing _ _ spec _ mGraphFile = do
vals' <- geval spec
let vals = map (\(s, val) -> (s, map getMetricValue val)) vals'
case mGraphFile of
Just graphFile -> do
let graphsData = groupByMetric (gesMetrics spec) vals'
mapM_ (\(ix, d) -> (plotGraph (getGraphFilename ix graphFile) d)) $ zip [0..] graphsData
Nothing -> return ()
return $ Just vals
runGEval''' (Just Init) _ _ spec _ = do
runGEval''' (Just Init) _ _ spec _ _ = do
initChallenge spec
return Nothing
runGEval''' (Just PrintVersion) _ _ _ _ = do
runGEval''' (Just PrintVersion) _ _ _ _ _ = do
putStrLn ("geval " ++ showVersion version)
return Nothing
runGEval''' (Just LineByLine) ordering featureFilter spec bbdo = do
runGEval''' (Just LineByLine) ordering featureFilter spec bbdo _ = do
runLineByLine ordering featureFilter spec bbdo
return Nothing
runGEval''' (Just WorstFeatures) ordering _ spec bbdo = do
runGEval''' (Just WorstFeatures) ordering _ spec bbdo _ = do
runWorstFeatures ordering spec bbdo
return Nothing
runGEval''' (Just (Diff otherOut)) ordering featureFilter spec bbdo = do
runGEval''' (Just (Diff otherOut)) ordering featureFilter spec bbdo _ = do
runDiff ordering featureFilter otherOut spec bbdo
return Nothing
runGEval''' (Just (MostWorseningFeatures otherOut)) ordering _ spec bbdo = do
runGEval''' (Just (MostWorseningFeatures otherOut)) ordering _ spec bbdo _ = do
runMostWorseningFeatures ordering otherOut spec bbdo
return Nothing
runGEval''' (Just JustTokenize) _ _ spec _ = do
runGEval''' (Just JustTokenize) _ _ spec _ _ = do
justTokenize (gesTokenizer spec)
return Nothing
runGEval''' (Just Submit) _ _ spec _ = do
runGEval''' (Just Submit) _ _ spec _ _ = do
submit (gesGonitoHost spec) (gesToken spec) (gesGonitoGitAnnexRemote spec)
return Nothing
getGraphFilename :: Int -> FilePath -> FilePath
getGraphFilename 0 fp = fp
getGraphFilename ix fp = ((dropExtension fp) ++ "-" ++ (show ix)) ++ (takeExtension fp)
groupByMetric :: [Metric]
-> [(SourceSpec, [MetricOutput])]
-> [(Metric, [(SourceSpec, GraphSeries)])]
groupByMetric metrics results = filter (\(_, ss) -> not (null ss))
$ map extractMetric
$ zip [0..] metrics
where extractMetric (ix, metric) =
(metric, map (\(s, Just g) -> (s, g))
$ filter (\(s, mg) -> isJust mg)
$ map (\(s, out) -> (s, getGraphSeries out))
$ map (\(s, outs) -> (s, outs !! ix)) results)
plotGraph :: FilePath -> (Metric, [(SourceSpec, GraphSeries)]) -> IO ()
plotGraph graphFile (metric@(ProbabilisticSoftFMeasure _), seriesSpecs) = do
toFile def graphFile $ do
layoutlr_title .= "GEval Graph / Loess / " ++ (show metric)
let perfectSeries = (FilePathSpec "Perfect",
GraphSeries [(0.0, 0.0), (1.0, 1.0)])
mapM_ plotOneSeries $ (perfectSeries : seriesSpecs)
return ()
where
plotOneSeries :: (SourceSpec, GraphSeries) -> EC (LayoutLR Double Double Double) ()
plotOneSeries (sspec, GraphSeries series) = plotLeft (line (recoverPath sspec) [series])
plotGraph _ _ = error "No graph for this metric!"
initChallenge :: GEvalSpecification -> IO ()
initChallenge spec = case gesExpectedDirectory spec of
Nothing -> showInitInstructions

View File

@ -81,7 +81,7 @@ main :: IO ()
main = hspec $ do
describe "root mean square error" $ do
it "simple test" $ do
[(_, (val:_))] <- geval $ defaultGEvalSpecification {gesExpectedDirectory=Just "test/rmse-simple/rmse-simple", gesOutDirectory="test/rmse-simple/rmse-simple-solution"}
[(_, ((MetricOutput val _):_))] <- geval $ defaultGEvalSpecification {gesExpectedDirectory=Just "test/rmse-simple/rmse-simple", gesOutDirectory="test/rmse-simple/rmse-simple-solution"}
val `shouldBeAlmost` 0.64549722436790
describe "mean square error" $ do
it "simple test with arguments" $
@ -297,12 +297,13 @@ main = hspec $ do
runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827
describe "evaluating single lines" $ do
it "RMSE" $ do
gevalCoreOnSingleLines RMSE id RawItemTarget
(LineInFile (FilePathSpec "stub1") 1 "blabla")
RawItemTarget
(LineInFile (FilePathSpec "stub2") 1 "3.4")
RawItemTarget
(LineInFile (FilePathSpec "stub3") 1 "2.6") `shouldReturnAlmost` 0.8
(MetricOutput v _) <- gevalCoreOnSingleLines RMSE id RawItemTarget
(LineInFile (FilePathSpec "stub1") 1 "blabla")
RawItemTarget
(LineInFile (FilePathSpec "stub2") 1 "3.4")
RawItemTarget
(LineInFile (FilePathSpec "stub3") 1 "2.6")
v `shouldBeAlmost` 0.8
describe "Annotation format" $ do
it "just parse" $ do
parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]),