Refactor code

This commit is contained in:
Filip Gralinski 2019-09-20 20:30:12 +02:00
parent f68bd8df74
commit 00addc5620

View File

@ -478,10 +478,54 @@ gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
-> LineSource (ResourceT m) -- ^ source to read the expected output -> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output -> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricOutput) -- ^ metric values for the output against the expected output -> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreOnSources Likelihood _ = gevalCoreWithoutInput doubleParser doubleParser itemLogLossError averageC logLossToLikehood noGraph
-- first not-standard metrics
-- CharMatch is the only metric needing input lines (so far)
gevalCoreOnSources CharMatch inputLineSource = helper inputLineSource
where
helper inputLineSource expectedLineSource outputLineSource = do
gevalCoreGeneralized (ParserSpecWithInput justUnpack justUnpack justUnpack) step countAgg (fMeasureOnCounts charMatchBeta) noGraph (WithInput inputLineSource expectedLineSource outputLineSource)
step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out
justUnpack = liftOp (Right . unpack)
gevalCoreOnSources (LogLossHashed nbOfBits) _ = helperLogLossHashed nbOfBits id
gevalCoreOnSources (LikelihoodHashed nbOfBits) _ = helperLogLossHashed nbOfBits logLossToLikehood gevalCoreOnSources (LikelihoodHashed nbOfBits) _ = helperLogLossHashed nbOfBits logLossToLikehood
-- only MultiLabel-F1 handled for JSONs for the time being...
gevalCoreOnSources (MultiLabelFMeasure beta) _ = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
(Right . getWords)
(getCounts (==))
countAgg
(fMeasureOnCounts beta)
noGraph
where
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t
intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
gevalCoreOnSources Pearson _ = gevalCoreByCorrelationMeasure pearson
gevalCoreOnSources Spearman _ = gevalCoreByCorrelationMeasure spearman
gevalCoreOnSources (ProbabilisticMultiLabelFMeasure beta) _ = generalizedProbabilisticFMeasure beta
intoWords
(Right . (\(ProbList es) -> es) . parseIntoProbList)
where intoWords = Right . Data.Text.words
gevalCoreOnSources (ProbabilisticSoftFMeasure beta) _ = generalizedProbabilisticFMeasure beta
parseAnnotations
parseObtainedAnnotations
-- and now more typical metrics, which:
-- 1) parse the expected output
-- 2) parse the actual output
-- 3) compare the actual output and the expected output (for each record/line separately)
-- 4) aggregate the results
-- 5) apply some final funtion on the aggregate
-- 6) create a graph using the aggregate (applicable only to some metrics)
gevalCoreOnSources Likelihood _ = gevalCoreWithoutInput doubleParser doubleParser itemLogLossError averageC logLossToLikehood noGraph
gevalCoreOnSources MultiLabelLikelihood _ = gevalCoreWithoutInput intoWords gevalCoreOnSources MultiLabelLikelihood _ = gevalCoreWithoutInput intoWords
(Right . parseIntoProbList) (Right . parseIntoProbList)
(uncurry countLogLossOnProbList) (uncurry countLogLossOnProbList)
@ -500,9 +544,6 @@ gevalCoreOnSources MAE _ = gevalCoreWithoutInput doubleParser doubleParser itemA
gevalCoreOnSources SMAPE _ = gevalCoreWithoutInput doubleParser doubleParser smape averageC (* 100.0) noGraph gevalCoreOnSources SMAPE _ = gevalCoreWithoutInput doubleParser doubleParser smape averageC (* 100.0) noGraph
where smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out)) where smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out))
gevalCoreOnSources Pearson _ = gevalCoreByCorrelationMeasure pearson
gevalCoreOnSources Spearman _ = gevalCoreByCorrelationMeasure spearman
gevalCoreOnSources LogLoss _ = gevalCoreWithoutInput doubleParser doubleParser itemLogLossError averageC id noGraph gevalCoreOnSources LogLoss _ = gevalCoreWithoutInput doubleParser doubleParser itemLogLossError averageC id noGraph
gevalCoreOnSources BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack) (Right . Prelude.words . unpack) bleuCombine bleuAgg bleuFinal noGraph gevalCoreOnSources BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack) (Right . Prelude.words . unpack) bleuCombine bleuAgg bleuFinal noGraph
@ -605,15 +646,6 @@ gevalCoreOnSources (SoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnotation
Prelude.length expected, Prelude.length expected,
Prelude.length got) Prelude.length got)
gevalCoreOnSources (ProbabilisticMultiLabelFMeasure beta) _ = generalizedProbabilisticFMeasure beta
intoWords
(Right . (\(ProbList es) -> es) . parseIntoProbList)
where intoWords = Right . Data.Text.words
gevalCoreOnSources (ProbabilisticSoftFMeasure beta) _ = generalizedProbabilisticFMeasure beta
parseAnnotations
parseObtainedAnnotations
gevalCoreOnSources (Soft2DFMeasure beta) _ = gevalCoreWithoutInput parseLabeledClippings gevalCoreOnSources (Soft2DFMeasure beta) _ = gevalCoreWithoutInput parseLabeledClippings
parseLabeledClippings parseLabeledClippings
count2DFScore count2DFScore
@ -646,16 +678,6 @@ gevalCoreOnSources MAP _ = gevalCoreWithoutInput (Right . DLS.splitOn "\t" . unp
id id
noGraph noGraph
gevalCoreOnSources (LogLossHashed nbOfBits) _ = helperLogLossHashed nbOfBits id
gevalCoreOnSources CharMatch inputLineSource = helper inputLineSource
where
helper inputLineSource expectedLineSource outputLineSource = do
gevalCoreGeneralized (ParserSpecWithInput justUnpack justUnpack justUnpack) step countAgg (fMeasureOnCounts charMatchBeta) noGraph (WithInput inputLineSource expectedLineSource outputLineSource)
step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out
justUnpack = liftOp (Right . unpack)
gevalCoreOnSources BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph gevalCoreOnSources BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph
gevalCoreOnSources BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph gevalCoreOnSources BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph
@ -684,19 +706,6 @@ gevalCoreOnSources TokenAccuracy _ = gevalCoreWithoutInput intoTokens
| otherwise = (h, t + 1) | otherwise = (h, t + 1)
hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0) hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0)
-- only MultiLabel-F1 handled for JSONs for the time being...
gevalCoreOnSources (MultiLabelFMeasure beta) _ = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
(Right . getWords)
(getCounts (==))
countAgg
(fMeasureOnCounts beta)
noGraph
where
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t
intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
gevalCoreOnSources MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords gevalCoreOnSources MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
(Right . parseIntoProbList) (Right . parseIntoProbList)
(uncurry countLogLossOnProbList) (uncurry countLogLossOnProbList)