1
0

Add plotting graphs for selected metrics

This commit is contained in:
Filip Graliński 2019-03-15 14:58:16 +01:00 committed by Filip Gralinski
parent bbcb11c498
commit eb10a4c3b4
6 changed files with 152 additions and 68 deletions

View File

@ -88,6 +88,8 @@ library
, aeson , aeson
, aeson-pretty , aeson-pretty
, integration , integration
, Chart
, Chart-cairo
default-language: Haskell2010 default-language: Haskell2010
executable geval executable geval

View File

@ -8,6 +8,16 @@ import Data.Attoparsec.Text
type MetricValue = Double type MetricValue = Double
data GraphSeries = GraphSeries [(Double, Double)]
data MetricOutput = MetricOutput MetricValue (Maybe GraphSeries)
getMetricValue :: MetricOutput -> MetricValue
getMetricValue (MetricOutput v _) = v
getGraphSeries :: MetricOutput -> Maybe GraphSeries
getGraphSeries (MetricOutput _ gs) = gs
-- some operations can be "hard" (on ints) or "soft" (on doubles), -- some operations can be "hard" (on ints) or "soft" (on doubles),
-- introduce a typeclass so that we could generalise easily -- introduce a typeclass so that we could generalise easily
class ConvertibleToDouble n where class ConvertibleToDouble n where

View File

@ -66,7 +66,7 @@ import Data.Maybe
import Data.Either (rights) import Data.Either (rights)
import Data.Tuple import Data.Tuple
import qualified Data.List.Split as DLS import qualified Data.List.Split as DLS
import Data.List (sortBy, isSuffixOf) import Data.List (sortBy, isSuffixOf, minimum, maximum)
import Text.NaturalComp import Text.NaturalComp
import Control.Monad.IO.Class import Control.Monad.IO.Class
@ -98,9 +98,12 @@ import qualified Data.HashMap.Strict as M
import qualified Data.Vector as V import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as DVU
import Statistics.Correlation import Statistics.Correlation
import Data.Statistics.Calibration(softCalibration) import Data.Statistics.Calibration(softCalibration)
import Data.Statistics.Loess(loess)
import Data.Proxy import Data.Proxy
@ -288,7 +291,8 @@ data GEvalSpecification = GEvalSpecification
gesTokenizer :: Maybe Tokenizer, gesTokenizer :: Maybe Tokenizer,
gesGonitoHost :: Maybe String, gesGonitoHost :: Maybe String,
gesToken :: Maybe String, gesToken :: Maybe String,
gesGonitoGitAnnexRemote :: Maybe String } gesGonitoGitAnnexRemote :: Maybe String}
gesMainMetric :: GEvalSpecification -> Metric gesMainMetric :: GEvalSpecification -> Metric
gesMainMetric spec = case gesMetrics spec of gesMainMetric spec = case gesMetrics spec of
@ -316,7 +320,8 @@ data GEvalOptions = GEvalOptions
geoResultOrdering :: ResultOrdering, geoResultOrdering :: ResultOrdering,
geoFilter :: Maybe String, geoFilter :: Maybe String,
geoSpec :: GEvalSpecification, geoSpec :: GEvalSpecification,
geoBlackBoxDebugginsOptions :: BlackBoxDebuggingOptions } geoBlackBoxDebugginsOptions :: BlackBoxDebuggingOptions,
geoGraphFile :: Maybe FilePath }
data GEvalException = NoExpectedFile FilePath data GEvalException = NoExpectedFile FilePath
| NoOutFile FilePath | NoOutFile FilePath
@ -388,13 +393,16 @@ extensionsHandled = ["tsv", "jsonl"]
data LineSource m = LineSource (ConduitT () Text m ()) (Text -> ItemTarget) (Text -> Text) SourceSpec Word32 data LineSource m = LineSource (ConduitT () Text m ()) (Text -> ItemTarget) (Text -> Text) SourceSpec Word32
geval :: GEvalSpecification -> IO [(SourceSpec, [MetricValue])] geval :: GEvalSpecification -> IO [(SourceSpec, [MetricOutput])]
geval gevalSpec = do geval gevalSpec = do
(inputSource, expectedSource, outSources) <- checkAndGetFiles False gevalSpec (inputSource, expectedSource, outSources) <- checkAndGetFiles False gevalSpec
results <- Prelude.mapM (gevalOnSingleOut gevalSpec inputSource expectedSource) outSources results <- Prelude.mapM (gevalOnSingleOut gevalSpec inputSource expectedSource) outSources
return $ sortBy (\a b -> (show $ fst a) `naturalComp` (show $ fst b)) results return $ sortBy (\a b -> (show $ fst a) `naturalComp` (show $ fst b)) results
gevalOnSingleOut :: GEvalSpecification -> SourceSpec -> SourceSpec -> SourceSpec -> IO (SourceSpec, [MetricValue]) noGraph :: d -> Maybe GraphSeries
noGraph = const Nothing
gevalOnSingleOut :: GEvalSpecification -> SourceSpec -> SourceSpec -> SourceSpec -> IO (SourceSpec, [MetricOutput])
gevalOnSingleOut gevalSpec inputSource expectedSource outSource = do gevalOnSingleOut gevalSpec inputSource expectedSource outSource = do
vals <- Prelude.mapM (\metric -> gevalCore metric mSelector preprocess inputSource expectedSource outSource) metrics vals <- Prelude.mapM (\metric -> gevalCore metric mSelector preprocess inputSource expectedSource outSource) metrics
return (outSource, vals) return (outSource, vals)
@ -530,7 +538,7 @@ getDataFormatFromFilePath path =
dataDecoder fmt mSelector = CC.map (select fmt mSelector) dataDecoder fmt mSelector = CC.map (select fmt mSelector)
gevalCoreOnSingleLines :: Metric -> (Text -> Text) -> (Text -> ItemTarget) -> LineInFile -> (Text -> ItemTarget) -> LineInFile -> (Text -> ItemTarget) -> LineInFile -> IO (MetricValue) gevalCoreOnSingleLines :: Metric -> (Text -> Text) -> (Text -> ItemTarget) -> LineInFile -> (Text -> ItemTarget) -> LineInFile -> (Text -> ItemTarget) -> LineInFile -> IO (MetricOutput)
gevalCoreOnSingleLines metric preprocess inpDecoder inpLine expDecoder expLine outDecoder outLine = gevalCoreOnSingleLines metric preprocess inpDecoder inpLine expDecoder expLine outDecoder outLine =
gevalCoreOnSources metric (singleLineAsLineSource inpLine inpDecoder preprocess) gevalCoreOnSources metric (singleLineAsLineSource inpLine inpDecoder preprocess)
(singleLineAsLineSource expLine expDecoder outputPreprocess) (singleLineAsLineSource expLine expDecoder outputPreprocess)
@ -553,7 +561,7 @@ gevalCore :: Metric -- ^ evaluation metric
-> SourceSpec -- ^ source specification for the input values -> SourceSpec -- ^ source specification for the input values
-> SourceSpec -- ^ source specification for the expected output -> SourceSpec -- ^ source specification for the expected output
-> SourceSpec -- ^ source specification for the output -> SourceSpec -- ^ source specification for the output
-> IO (MetricValue) -- ^ metric value for the output against the expected output -> IO (MetricOutput) -- ^ metric value for the output against the expected output
gevalCore metric mSelector preprocess inputSource expectedSource outSource = do gevalCore metric mSelector preprocess inputSource expectedSource outSource = do
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
gevalCoreOnSources metric gevalCoreOnSources metric
@ -585,22 +593,22 @@ gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
-> LineSource (ResourceT m) -- ^ source of the input values -> LineSource (ResourceT m) -- ^ source of the input values
-> LineSource (ResourceT m) -- ^ source to read the expected output -> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output -> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricValue) -- ^ metric values for the output against the expected output -> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource MetricOutput mse g <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
return $ mse ** 0.5 return $ MetricOutput (mse ** 0.5) g
gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource MetricOutput logLoss g <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss return $ MetricOutput (logLossToLikehood logLoss) g
gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource MetricOutput logLoss g <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss return $ MetricOutput (logLossToLikehood logLoss) g
gevalCoreOnSources MultiLabelLikelihood inputLineSource expectedLineSource outLineSource = do gevalCoreOnSources MultiLabelLikelihood inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources MultiLabelLogLoss inputLineSource expectedLineSource outLineSource MetricOutput logLoss g <- gevalCoreOnSources MultiLabelLogLoss inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss return $ MetricOutput (logLossToLikehood logLoss) g
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
gevalCore' metric inputLineSource expectedLineSource outLineSource gevalCore' metric inputLineSource expectedLineSource outLineSource
@ -620,24 +628,24 @@ gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
-> LineSource (ResourceT m) -- ^ source of the input values -> LineSource (ResourceT m) -- ^ source of the input values
-> LineSource (ResourceT m) -- ^ source to read the expected output -> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output -> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricValue) -- ^ metric values for the output against the expected output -> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemSquaredError averageC id gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemSquaredError averageC id noGraph
where outParser = getValue . TR.double where outParser = getValue . TR.double
gevalCore' MAE _ = gevalCoreWithoutInput outParser outParser itemAbsoluteError averageC id gevalCore' MAE _ = gevalCoreWithoutInput outParser outParser itemAbsoluteError averageC id noGraph
where outParser = getValue . TR.double where outParser = getValue . TR.double
gevalCore' SMAPE _ = gevalCoreWithoutInput outParser outParser smape averageC (* 100.0) gevalCore' SMAPE _ = gevalCoreWithoutInput outParser outParser smape averageC (* 100.0) noGraph
where outParser = getValue . TR.double where outParser = getValue . TR.double
smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out)) smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out))
gevalCore' Pearson _ = gevalCoreByCorrelationMeasure pearson gevalCore' Pearson _ = gevalCoreByCorrelationMeasure pearson
gevalCore' Spearman _ = gevalCoreByCorrelationMeasure spearman gevalCore' Spearman _ = gevalCoreByCorrelationMeasure spearman
gevalCore' LogLoss _ = gevalCoreWithoutInput outParser outParser itemLogLossError averageC id gevalCore' LogLoss _ = gevalCoreWithoutInput outParser outParser itemLogLossError averageC id noGraph
where outParser = getValue . TR.double where outParser = getValue . TR.double
gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack) (Right . Prelude.words . unpack) bleuCombine bleuAgg bleuFinal gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack) (Right . Prelude.words . unpack) bleuCombine bleuAgg bleuFinal noGraph
where bleuFinal (p1, p2, p3, p4, rl, l1, l2, l3, l4) = ((p1 /. l1) * (p2 /. l2) * (p3 /. l3) * (p4 /. l4)) ** 0.25 * (brevityPenalty l1 rl) where bleuFinal (p1, p2, p3, p4, rl, l1, l2, l3, l4) = ((p1 /. l1) * (p2 /. l2) * (p3 /. l3) * (p4 /. l4)) ** 0.25 * (brevityPenalty l1 rl)
bleuCombine (refs, sen) = bleuStep refs sen bleuCombine (refs, sen) = bleuStep refs sen
bleuAgg = CC.foldl bleuFuse (0, 0, 0, 0, 0, 0, 0, 0, 0) bleuAgg = CC.foldl bleuFuse (0, 0, 0, 0, 0, 0, 0, 0, 0)
@ -647,15 +655,15 @@ gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . D
| c == 0 && r > 0 = 0.0 | c == 0 && r > 0 = 0.0
| otherwise = exp (1.0 - (r /. c)) | otherwise = exp (1.0 - (r /. c))
gevalCore' GLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack) (Right . Prelude.words . unpack) gleuCombine gleuAgg gleuFinal gevalCore' GLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack) (Right . Prelude.words . unpack) gleuCombine gleuAgg gleuFinal noGraph
where gleuFinal (m, t) = m /. t where gleuFinal (m, t) = m /. t
gleuCombine (refs, sen) = gleuStep refs sen gleuCombine (refs, sen) = gleuStep refs sen
gleuAgg = CC.foldl gleuFuse (0, 0) gleuAgg = CC.foldl gleuFuse (0, 0)
gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2) gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2)
gevalCore' WER _ = gevalCoreWithoutInput (Right . Prelude.words . unpack) (Right . Prelude.words . unpack) (uncurry werStep) averageC id gevalCore' WER _ = gevalCoreWithoutInput (Right . Prelude.words . unpack) (Right . Prelude.words . unpack) (uncurry werStep) averageC id noGraph
gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id noGraph
where hitOrMiss (exp, got) = where hitOrMiss (exp, got) =
-- first try to parse what we got as a probability distribution -- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric) -- (like the one used for Likelikehood/LogLossHashed metric)
@ -680,7 +688,7 @@ gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hi
tryReadingAsFloat :: Text -> Maybe Float tryReadingAsFloat :: Text -> Maybe Float
tryReadingAsFloat = readMaybe . unpack tryReadingAsFloat = readMaybe . unpack
gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCount countAgg (fMeasureOnCounts beta) gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCount countAgg (fMeasureOnCounts beta) noGraph
where outParser = detected <=< (getValue . TR.double) where outParser = detected <=< (getValue . TR.double)
expParser = expected <=< (getValue . TR.decimal) expParser = expected <=< (getValue . TR.decimal)
expected 1 = Right True expected 1 = Right True
@ -698,7 +706,7 @@ gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCoun
getCount (False, True) = (0, 0, 1) getCount (False, True) = (0, 0, 1)
getCount (False, False) = (0, 0, 0) getCount (False, False) = (0, 0, 0)
gevalCore' (MacroFMeasure beta) _ = gevalCoreWithoutInput (Right . Just . strip) (Right . predicted . strip) getClassesInvolved gatherClassC macroAverageOnCounts gevalCore' (MacroFMeasure beta) _ = gevalCoreWithoutInput (Right . Just . strip) (Right . predicted . strip) getClassesInvolved gatherClassC macroAverageOnCounts noGraph
where predicted got = where predicted got =
-- first try to parse what we got as a probability distribution -- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric) -- (like the one used for Likelikehood/LogLossHashed metric)
@ -732,6 +740,7 @@ gevalCore' (SoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnotations
getSoftCounts getSoftCounts
countAgg countAgg
(fMeasureOnCounts beta) (fMeasureOnCounts beta)
noGraph
where getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got, where getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got,
Prelude.length expected, Prelude.length expected,
Prelude.length got) Prelude.length got)
@ -741,15 +750,22 @@ gevalCore' (ProbabilisticSoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnot
getProbabilisticSoftCounts getProbabilisticSoftCounts
probabilisticSoftAgg probabilisticSoftAgg
(fMeasureOnProbabilisticCounts beta) (fMeasureOnProbabilisticCounts beta)
loessGraph
where probabilisticSoftAgg :: Monad m => ConduitM ([Double], [Double], Double, Int) o m ([Double], [Double], Double, Int) where probabilisticSoftAgg :: Monad m => ConduitM ([Double], [Double], Double, Int) o m ([Double], [Double], Double, Int)
probabilisticSoftAgg = CC.foldl probabilisticSoftFolder ([], [], fromInteger 0, 0) probabilisticSoftAgg = CC.foldl probabilisticSoftFolder ([], [], fromInteger 0, 0)
probabilisticSoftFolder (r1, p1, g1, e1) (r2, p2, g2, e2) = (r1 ++ r2, p1 ++ p2, g1 + g2, e1 + e2) probabilisticSoftFolder (r1, p1, g1, e1) (r2, p2, g2, e2) = (r1 ++ r2, p1 ++ p2, g1 + g2, e1 + e2)
loessGraph :: ([Double], [Double], Double, Int) -> Maybe GraphSeries
loessGraph (results, probs, _, _) = Just $ GraphSeries $ Prelude.map (\x -> (x, loess results' probs' x)) $ Prelude.filter (\p -> p >= lowest && p <= highest) $ Prelude.map (\d -> 0.01 * (fromIntegral d)) [1..99]
where results' = DVU.fromList results
probs' = DVU.fromList probs
lowest = Data.List.minimum probs
highest = Data.List.maximum probs
fMeasureOnProbabilisticCounts :: Double -> ([Double], [Double], Double, Int) -> Double fMeasureOnProbabilisticCounts :: Double -> ([Double], [Double], Double, Int) -> Double
fMeasureOnProbabilisticCounts beta (results, probs, got, nbExpected) = weightedHarmonicMean beta calibrationMeasure recall fMeasureOnProbabilisticCounts beta (results, probs, got, nbExpected) = weightedHarmonicMean beta calibrationMeasure recall
where calibrationMeasure = softCalibration results probs where calibrationMeasure = softCalibration results probs
recall = got /. nbExpected recall = got /. nbExpected
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep noGraph
where where
parseClippings = controlledParse lineClippingsParser parseClippings = controlledParse lineClippingsParser
parseClippingSpecs = controlledParse lineClippingSpecsParser parseClippingSpecs = controlledParse lineClippingSpecsParser
@ -759,18 +775,19 @@ gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings m
clippeuAgg = CC.foldl countFolder (0, 0, 0) clippeuAgg = CC.foldl countFolder (0, 0, 0)
finalStep counts = f2MeasureOnCounts counts finalStep counts = f2MeasureOnCounts counts
gevalCore' NMI _ = gevalCoreWithoutInput (Right . id) (Right . id) id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix gevalCore' NMI _ = gevalCoreWithoutInput (Right . id) (Right . id) id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix noGraph
gevalCore' MAP _ = gevalCoreWithoutInput (Right . DLS.splitOn "\t" . unpack) gevalCore' MAP _ = gevalCoreWithoutInput (Right . DLS.splitOn "\t" . unpack)
(Right . DLS.splitOn "\t" . unpack) (Right . DLS.splitOn "\t" . unpack)
(\(e,g) -> calculateMAPForOneResult e g) (\(e,g) -> calculateMAPForOneResult e g)
averageC averageC
id id
noGraph
gevalCore' (LogLossHashed nbOfBits) _ = helper nbOfBits gevalCore' (LogLossHashed nbOfBits) _ = helper nbOfBits
-- for LogLossHashed we "salt" each hash with the line number -- for LogLossHashed we "salt" each hash with the line number
where helper nbOfBits expectedLineSource outLineSource = where helper nbOfBits expectedLineSource outLineSource =
gevalCore''' (ParserSpecWithoutInput (liftOp (Right . id)) (liftOp tentativeParser)) (\(lineNo, (t,d)) -> calculateLogLoss nbOfBits lineNo t (parseDistributionWrapper nbOfBits lineNo d)) averageC negate (WithoutInput expectedLineSource outLineSource) gevalCore''' (ParserSpecWithoutInput (liftOp (Right . id)) (liftOp tentativeParser)) (\(lineNo, (t,d)) -> calculateLogLoss nbOfBits lineNo t (parseDistributionWrapper nbOfBits lineNo d)) averageC negate noGraph (WithoutInput expectedLineSource outLineSource)
-- Unfortunately, we're parsing the distribution twice. We need to -- Unfortunately, we're parsing the distribution twice. We need to
-- tentatively parse the distribution when the line number is unknown -- tentatively parse the distribution when the line number is unknown
-- (so we just set it to 1) -- (so we just set it to 1)
@ -782,14 +799,14 @@ gevalCore' (LogLossHashed nbOfBits) _ = helper nbOfBits
gevalCore' CharMatch inputLineSource = helper inputLineSource gevalCore' CharMatch inputLineSource = helper inputLineSource
where where
helper inputLineSource expectedLineSource outputLineSource = do helper inputLineSource expectedLineSource outputLineSource = do
gevalCoreGeneralized (ParserSpecWithInput justUnpack justUnpack justUnpack) step countAgg (fMeasureOnCounts charMatchBeta) (WithInput inputLineSource expectedLineSource outputLineSource) gevalCoreGeneralized (ParserSpecWithInput justUnpack justUnpack justUnpack) step countAgg (fMeasureOnCounts charMatchBeta) noGraph (WithInput inputLineSource expectedLineSource outputLineSource)
step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out
justUnpack = liftOp (Right . unpack) justUnpack = liftOp (Right . unpack)
gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph
gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph
where parseBioSequenceIntoEntitiesWithoutNormalization s = do where parseBioSequenceIntoEntitiesWithoutNormalization s = do
entities <- parseBioSequenceIntoEntities s entities <- parseBioSequenceIntoEntities s
return $ Prelude.map eraseNormalisation entities return $ Prelude.map eraseNormalisation entities
@ -799,6 +816,7 @@ gevalCore' TokenAccuracy _ = gevalCoreWithoutInput intoTokens
countHitsAndTotals countHitsAndTotals
hitsAndTotalsAgg hitsAndTotalsAgg
(\(hits, total) -> hits /. total) (\(hits, total) -> hits /. total)
noGraph
where intoTokens = Right . Data.Text.words where intoTokens = Right . Data.Text.words
countHitsAndTotals :: ([Text], [Text]) -> (Int, Int) countHitsAndTotals :: ([Text], [Text]) -> (Int, Int)
countHitsAndTotals (es, os) = countHitsAndTotals (es, os) =
@ -820,6 +838,7 @@ gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInputOnItemTargets (Rig
(getCounts (==)) (getCounts (==))
countAgg countAgg
(fMeasureOnCounts beta) (fMeasureOnCounts beta)
noGraph
where where
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
@ -831,6 +850,7 @@ gevalCore' MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
(uncurry countLogLossOnProbList) (uncurry countLogLossOnProbList)
averageC averageC
id id
noGraph
where where
intoWords = Right . Data.Text.words intoWords = Right . Data.Text.words
@ -841,9 +861,9 @@ gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
(V.Vector (Double, Double) -> Double) -> -- ^ correlation function (V.Vector (Double, Double) -> Double) -> -- ^ correlation function
LineSource (ResourceT m) -> -- ^ source to read the expected output LineSource (ResourceT m) -> -- ^ source to read the expected output
LineSource (ResourceT m) -> -- ^ source to read the output LineSource (ResourceT m) -> -- ^ source to read the output
m (MetricValue) -- ^ metric values for the output against the expected output m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreByCorrelationMeasure correlationFunction = gevalCoreByCorrelationMeasure correlationFunction =
gevalCoreWithoutInput outParser outParser id correlationC finalStep gevalCoreWithoutInput outParser outParser id correlationC finalStep noGraph
where outParser = getValue . TR.double where outParser = getValue . TR.double
correlationC = CC.foldl (flip (:)) [] correlationC = CC.foldl (flip (:)) []
finalStep pairs = correlationFunction $ V.fromList pairs finalStep pairs = correlationFunction $ V.fromList pairs
@ -868,11 +888,12 @@ gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
-> (ConduitT c Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into -> (ConduitT c Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into
-- a "total" value -- a "total" value
-> (d -> Double) -- ^ function to transform the "total" value into the final score -> (d -> Double) -- ^ function to transform the "total" value into the final score
-> (d -> Maybe GraphSeries)
-> LineSource (ResourceT m) -- ^ source to read the expected output -> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output -> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricValue) -- ^ metric values for the output against the expected output -> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream = gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep generateGraph expectedLineStream outLineStream =
gevalCoreWithoutInputOnItemTargets (liftOp expParser) (liftOp outParser) itemStep aggregator finalStep expectedLineStream outLineStream gevalCoreWithoutInputOnItemTargets (liftOp expParser) (liftOp outParser) itemStep aggregator finalStep generateGraph expectedLineStream outLineStream
gevalCoreWithoutInputOnItemTargets :: (MonadUnliftIO m, MonadThrow m, MonadIO m) gevalCoreWithoutInputOnItemTargets :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
=> (ItemTarget -> Either String a) -- ^ parser for values in the expected output => (ItemTarget -> Either String a) -- ^ parser for values in the expected output
@ -883,20 +904,21 @@ gevalCoreWithoutInputOnItemTargets :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
-> (ConduitT c Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into -> (ConduitT c Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into
-- a "total" value -- a "total" value
-> (d -> Double) -- ^ function to transform the "total" value into the final score -> (d -> Double) -- ^ function to transform the "total" value into the final score
-> (d -> Maybe GraphSeries)
-> LineSource (ResourceT m) -- ^ source to read the expected output -> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output -> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricValue) -- ^ metric values for the output against the expected output -> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreWithoutInputOnItemTargets expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream = gevalCoreWithoutInputOnItemTargets expParser outParser itemStep aggregator finalStep generateGraph expectedLineStream outLineStream =
gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep (WithoutInput expectedLineStream outLineStream) gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep generateGraph (WithoutInput expectedLineStream outLineStream)
where where
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y) trans step (ParsedRecordWithoutInput x y) = step (x, y)
gevalCore''' :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (ConduitT c Void (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue) gevalCore''' :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (ConduitT c Void (ResourceT m) d) -> (d -> Double) -> (d -> Maybe GraphSeries) -> WithoutInput m a b -> m (MetricOutput)
gevalCore''' parserSpec itemStep aggregator finalStep context = gevalCore''' parserSpec itemStep aggregator finalStep generateGraph context =
gevalCoreGeneralized' parserSpec (trans itemStep) aggregator finalStep context gevalCoreGeneralized' parserSpec (trans itemStep) aggregator finalStep generateGraph context
where where
trans :: ((Word32, (a, b)) -> c) -> (Word32, ParsedRecord (WithoutInput m a b)) -> c trans :: ((Word32, (a, b)) -> c) -> (Word32, ParsedRecord (WithoutInput m a b)) -> c
trans step (n, ParsedRecordWithoutInput x y) = step (n, (x, y)) trans step (n, ParsedRecordWithoutInput x y) = step (n, (x, y))
@ -911,18 +933,19 @@ gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m
-- some "local" score calculated for each line (item) -- some "local" score calculated for each line (item)
-> (ConduitT c Void (ResourceT m) d) -- ^ a Conduit to aggregate score into a "total" -> (ConduitT c Void (ResourceT m) d) -- ^ a Conduit to aggregate score into a "total"
-> (d -> Double) -- ^ function to transform the "total" value into the final score -> (d -> Double) -- ^ function to transform the "total" value into the final score
-> (d -> Maybe GraphSeries)
-> ctxt -- ^ "context", i.e. 2 or 3 sources needed to operate -> ctxt -- ^ "context", i.e. 2 or 3 sources needed to operate
-> m (MetricValue) -> m MetricOutput
gevalCoreGeneralized parserSpec itemStep aggregator finalStep context = gevalCoreGeneralized parserSpec itemStep aggregator finalStep generateGraph context =
gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep context gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep generateGraph context
gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (ConduitT c Void (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue) gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (ConduitT c Void (ResourceT m) d) -> (d -> Double) -> (d -> Maybe GraphSeries) -> ctxt -> m (MetricOutput)
gevalCoreGeneralized' parserSpec itemStep aggregator finalStep context = do gevalCoreGeneralized' parserSpec itemStep aggregator finalStep generateGraph context = do
v <- runResourceT $ runConduit $ v <- runResourceT $ runConduit $
(((getZipSource $ (,) (((getZipSource $ (,)
<$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..]) <$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..])
<*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| aggregator) <*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| aggregator)
return $ finalStep v return $ MetricOutput (finalStep v) (generateGraph v)
-- | A type family to handle all the evaluation "context". -- | A type family to handle all the evaluation "context".
-- --

View File

@ -431,7 +431,7 @@ gevalLineByLineSource metric mSelector preprocess inputSource expectedSource out
s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp) s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp)
(getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) (getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp)
(getDataDecoder outputLineSource) (LineInFile outSource lineNo out) (getDataDecoder outputLineSource) (LineInFile outSource lineNo out)
return $ LineRecord inp exp out lineNo s return $ LineRecord inp exp out lineNo (getMetricValue s)
justTokenize :: Maybe Tokenizer -> IO () justTokenize :: Maybe Tokenizer -> IO ()
justTokenize Nothing = error "a tokenizer must be specified with --tokenizer option" justTokenize Nothing = error "a tokenizer must be specified with --tokenizer option"

View File

@ -9,9 +9,14 @@ module GEval.OptionsParser
precisionArgParser precisionArgParser
) where ) where
import Debug.Trace
import Paths_geval (version) import Paths_geval (version)
import Data.Version (showVersion) import Data.Version (showVersion)
import Graphics.Rendering.Chart.Easy
import Graphics.Rendering.Chart.Backend.Cairo
import Options.Applicative import Options.Applicative
import qualified System.Directory as D import qualified System.Directory as D
import System.FilePath import System.FilePath
@ -96,6 +101,10 @@ optionsParser = GEvalOptions
<> help "When in line-by-line or diff mode, show only items with a given feature")) <> help "When in line-by-line or diff mode, show only items with a given feature"))
<*> specParser <*> specParser
<*> blackBoxDebuggingOptionsParser <*> blackBoxDebuggingOptionsParser
<*> optional (strOption
( long "plot-graph"
<> metavar "FILE-PATH"
<> help "Plot an extra graph, applicable only for Probabilistic-Soft-F-score (LOESS function for calibration)"))
precisionArgParser :: Parser Int precisionArgParser :: Parser Int
precisionArgParser = option auto precisionArgParser = option auto
@ -277,41 +286,80 @@ runGEval'' opts = runGEval''' (geoSpecialCommand opts)
(geoFilter opts) (geoFilter opts)
(geoSpec opts) (geoSpec opts)
(geoBlackBoxDebugginsOptions opts) (geoBlackBoxDebugginsOptions opts)
(geoGraphFile opts)
runGEval''' :: Maybe GEvalSpecialCommand runGEval''' :: Maybe GEvalSpecialCommand
-> ResultOrdering -> ResultOrdering
-> Maybe String -> Maybe String
-> GEvalSpecification -> GEvalSpecification
-> BlackBoxDebuggingOptions -> BlackBoxDebuggingOptions
-> Maybe FilePath
-> IO (Maybe [(SourceSpec, [MetricValue])]) -> IO (Maybe [(SourceSpec, [MetricValue])])
runGEval''' Nothing _ _ spec _ = do runGEval''' Nothing _ _ spec _ mGraphFile = do
vals <- geval spec vals' <- geval spec
let vals = map (\(s, val) -> (s, map getMetricValue val)) vals'
case mGraphFile of
Just graphFile -> do
let graphsData = groupByMetric (gesMetrics spec) vals'
mapM_ (\(ix, d) -> (plotGraph (getGraphFilename ix graphFile) d)) $ zip [0..] graphsData
Nothing -> return ()
return $ Just vals return $ Just vals
runGEval''' (Just Init) _ _ spec _ = do runGEval''' (Just Init) _ _ spec _ _ = do
initChallenge spec initChallenge spec
return Nothing return Nothing
runGEval''' (Just PrintVersion) _ _ _ _ = do runGEval''' (Just PrintVersion) _ _ _ _ _ = do
putStrLn ("geval " ++ showVersion version) putStrLn ("geval " ++ showVersion version)
return Nothing return Nothing
runGEval''' (Just LineByLine) ordering featureFilter spec bbdo = do runGEval''' (Just LineByLine) ordering featureFilter spec bbdo _ = do
runLineByLine ordering featureFilter spec bbdo runLineByLine ordering featureFilter spec bbdo
return Nothing return Nothing
runGEval''' (Just WorstFeatures) ordering _ spec bbdo = do runGEval''' (Just WorstFeatures) ordering _ spec bbdo _ = do
runWorstFeatures ordering spec bbdo runWorstFeatures ordering spec bbdo
return Nothing return Nothing
runGEval''' (Just (Diff otherOut)) ordering featureFilter spec bbdo = do runGEval''' (Just (Diff otherOut)) ordering featureFilter spec bbdo _ = do
runDiff ordering featureFilter otherOut spec bbdo runDiff ordering featureFilter otherOut spec bbdo
return Nothing return Nothing
runGEval''' (Just (MostWorseningFeatures otherOut)) ordering _ spec bbdo = do runGEval''' (Just (MostWorseningFeatures otherOut)) ordering _ spec bbdo _ = do
runMostWorseningFeatures ordering otherOut spec bbdo runMostWorseningFeatures ordering otherOut spec bbdo
return Nothing return Nothing
runGEval''' (Just JustTokenize) _ _ spec _ = do runGEval''' (Just JustTokenize) _ _ spec _ _ = do
justTokenize (gesTokenizer spec) justTokenize (gesTokenizer spec)
return Nothing return Nothing
runGEval''' (Just Submit) _ _ spec _ = do runGEval''' (Just Submit) _ _ spec _ _ = do
submit (gesGonitoHost spec) (gesToken spec) (gesGonitoGitAnnexRemote spec) submit (gesGonitoHost spec) (gesToken spec) (gesGonitoGitAnnexRemote spec)
return Nothing return Nothing
getGraphFilename :: Int -> FilePath -> FilePath
getGraphFilename 0 fp = fp
getGraphFilename ix fp = ((dropExtension fp) ++ "-" ++ (show ix)) ++ (takeExtension fp)
groupByMetric :: [Metric]
-> [(SourceSpec, [MetricOutput])]
-> [(Metric, [(SourceSpec, GraphSeries)])]
groupByMetric metrics results = filter (\(_, ss) -> not (null ss))
$ map extractMetric
$ zip [0..] metrics
where extractMetric (ix, metric) =
(metric, map (\(s, Just g) -> (s, g))
$ filter (\(s, mg) -> isJust mg)
$ map (\(s, out) -> (s, getGraphSeries out))
$ map (\(s, outs) -> (s, outs !! ix)) results)
plotGraph :: FilePath -> (Metric, [(SourceSpec, GraphSeries)]) -> IO ()
plotGraph graphFile (metric@(ProbabilisticSoftFMeasure _), seriesSpecs) = do
toFile def graphFile $ do
layoutlr_title .= "GEval Graph / Loess / " ++ (show metric)
let perfectSeries = (FilePathSpec "Perfect",
GraphSeries [(0.0, 0.0), (1.0, 1.0)])
mapM_ plotOneSeries $ (perfectSeries : seriesSpecs)
return ()
where
plotOneSeries :: (SourceSpec, GraphSeries) -> EC (LayoutLR Double Double Double) ()
plotOneSeries (sspec, GraphSeries series) = plotLeft (line (recoverPath sspec) [series])
plotGraph _ _ = error "No graph for this metric!"
initChallenge :: GEvalSpecification -> IO () initChallenge :: GEvalSpecification -> IO ()
initChallenge spec = case gesExpectedDirectory spec of initChallenge spec = case gesExpectedDirectory spec of
Nothing -> showInitInstructions Nothing -> showInitInstructions

View File

@ -81,7 +81,7 @@ main :: IO ()
main = hspec $ do main = hspec $ do
describe "root mean square error" $ do describe "root mean square error" $ do
it "simple test" $ do it "simple test" $ do
[(_, (val:_))] <- geval $ defaultGEvalSpecification {gesExpectedDirectory=Just "test/rmse-simple/rmse-simple", gesOutDirectory="test/rmse-simple/rmse-simple-solution"} [(_, ((MetricOutput val _):_))] <- geval $ defaultGEvalSpecification {gesExpectedDirectory=Just "test/rmse-simple/rmse-simple", gesOutDirectory="test/rmse-simple/rmse-simple-solution"}
val `shouldBeAlmost` 0.64549722436790 val `shouldBeAlmost` 0.64549722436790
describe "mean square error" $ do describe "mean square error" $ do
it "simple test with arguments" $ it "simple test with arguments" $
@ -297,12 +297,13 @@ main = hspec $ do
runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827 runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827
describe "evaluating single lines" $ do describe "evaluating single lines" $ do
it "RMSE" $ do it "RMSE" $ do
gevalCoreOnSingleLines RMSE id RawItemTarget (MetricOutput v _) <- gevalCoreOnSingleLines RMSE id RawItemTarget
(LineInFile (FilePathSpec "stub1") 1 "blabla") (LineInFile (FilePathSpec "stub1") 1 "blabla")
RawItemTarget RawItemTarget
(LineInFile (FilePathSpec "stub2") 1 "3.4") (LineInFile (FilePathSpec "stub2") 1 "3.4")
RawItemTarget RawItemTarget
(LineInFile (FilePathSpec "stub3") 1 "2.6") `shouldReturnAlmost` 0.8 (LineInFile (FilePathSpec "stub3") 1 "2.6")
v `shouldBeAlmost` 0.8
describe "Annotation format" $ do describe "Annotation format" $ do
it "just parse" $ do it "just parse" $ do
parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]), parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]),