Hande itemStep via dependent types

This commit is contained in:
Filip Graliński 2019-11-02 11:11:53 +01:00
parent 32969bb56a
commit d67061c230
6 changed files with 226 additions and 173 deletions

View File

@ -14,6 +14,7 @@ import Data.Maybe (catMaybes)
import Debug.Trace import Debug.Trace
import GEval.Common import GEval.Common
import GEval.PrecisionRecall (maxMatch)
newtype PageNumber = PageNumber Int newtype PageNumber = PageNumber Int
deriving (Eq, Show) deriving (Eq, Show)
@ -160,3 +161,7 @@ getLeftovers (Rectangle (Point x0 y0) (Point x1 y1))
Rectangle (Point x0' y0') (Point (x0 - 1) y1'), Rectangle (Point x0' y0') (Point (x0 - 1) y1'),
Rectangle (Point (x1 + 1) y0') (Point x1' y1')] Rectangle (Point (x1 + 1) y0') (Point x1' y1')]
where validRectangle (Rectangle (Point x0 y0) (Point x1 y1)) = x0 <= x1 && y0 <= y1 where validRectangle (Rectangle (Point x0 y0) (Point x1 y1)) = x0 <= x1 && y0 <= y1
clippEUMatchStep (clippingSpecs, clippings) = (maxMatch matchClippingToSpec clippingSpecs clippings,
Prelude.length clippingSpecs,
Prelude.length clippings)

View File

@ -4,6 +4,9 @@ module GEval.Common
import qualified Data.Text as T import qualified Data.Text as T
import Data.Text.Read as TR import Data.Text.Read as TR
import Data.Word
import Control.Exception
import Data.Attoparsec.Text import Data.Attoparsec.Text
type MetricValue = Double type MetricValue = Double
@ -79,3 +82,62 @@ class AEq a where
instance AEq Double where instance AEq Double where
x =~ y = abs ( x - y ) < (1.0e-4 :: Double) x =~ y = abs ( x - y ) < (1.0e-4 :: Double)
itemAbsoluteError :: (Double, Double) -> Double
itemAbsoluteError (exp, out) = abs (exp-out)
itemSquaredError :: (Double, Double) -> Double
itemSquaredError (exp, out) = (exp-out)**2
itemLogLossError :: (Double, Double) -> Double
itemLogLossError (exp, out)
| exp' > 0.5 = - (log out')
| otherwise = - (log (1 - out'))
where exp' = normalizeAsProb exp
out' = normalizeAsProb out
normalizeAsProb v
| v >= 1.0 = 1.0
| v <= 0.0 = 0.0
| otherwise = v
data GEvalException = NoExpectedFile FilePath
| NoOutFile FilePath
| NoExpectedDirectory FilePath
| NoOutDirectory FilePath
| NoExpectedTestDirectory FilePath
| NoOutTestDirectory FilePath
| NoInputFile FilePath
| FileAlreadyThere FilePath
| TooFewLines
| TooManyLines
| TooFewLinesInInput
| TooManyLinesInInput
| EmptyOutput
| UnexpectedData Word32 String
| UnexpectedMultipleOutputs
| OtherException String
deriving (Eq)
instance Exception GEvalException
instance Show GEvalException where
show (NoExpectedFile filePath) = somethingWrongWithFilesMessage "No file with the expected results" filePath
show (NoOutFile filePath) = somethingWrongWithFilesMessage "No file with the test results" filePath
show (NoExpectedDirectory filePath) = somethingWrongWithFilesMessage "No directory with the expected results" filePath
show (NoOutDirectory filePath) = somethingWrongWithFilesMessage "No directory with the test results" filePath
show (NoExpectedTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the expected results" filePath
show (NoOutTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the results obtained" filePath
show (NoInputFile filePath) = somethingWrongWithFilesMessage "No file with the input" filePath
show (FileAlreadyThere filePath) = somethingWrongWithFilesMessage "File already there" filePath
show TooFewLines = "Too few lines in the output file"
show TooManyLines = "Too many lines in the output file"
show TooFewLinesInInput = "Too few lines in the input file"
show TooManyLinesInInput = "Too many lines in the input file"
show EmptyOutput = "The output file is empty"
show (UnexpectedData lineNo message) = "Line " ++ (show lineNo) ++ ": Unexpected data [" ++ message ++ "]"
show UnexpectedMultipleOutputs = "Multiple outputs are not possible in this mode, use -o option to select an output file"
show (OtherException message) = message
somethingWrongWithFilesMessage :: String -> FilePath -> String
somethingWrongWithFilesMessage msg filePath = Prelude.concat
[ msg, ": `", filePath, "`" ]

View File

@ -19,7 +19,6 @@ module GEval.Core
GEvalSpecification(..), GEvalSpecification(..),
ResultOrdering(..), ResultOrdering(..),
GEvalOptions(..), GEvalOptions(..),
GEvalException(..),
defaultGEvalSpecification, defaultGEvalSpecification,
defaultOutDirectory, defaultOutDirectory,
defaultTestName, defaultTestName,
@ -44,8 +43,7 @@ module GEval.Core
getDataDecoder, getDataDecoder,
threeLineSource, threeLineSource,
extensionsHandled, extensionsHandled,
isEmptyFile, isEmptyFile
somethingWrongWithFilesMessage
) where ) where
import Debug.Trace import Debug.Trace
@ -66,7 +64,6 @@ import Data.Text
import Data.Text.Read as TR import Data.Text.Read as TR
import Control.Applicative import Control.Applicative
import Control.Exception import Control.Exception
import Text.Read (readMaybe)
import Control.Conditional (unlessM, whenM) import Control.Conditional (unlessM, whenM)
import qualified System.Directory as D import qualified System.Directory as D
import System.Posix import System.Posix
@ -204,47 +201,6 @@ data GEvalOptions = GEvalOptions
geoBlackBoxDebugginsOptions :: BlackBoxDebuggingOptions, geoBlackBoxDebugginsOptions :: BlackBoxDebuggingOptions,
geoGraphFile :: Maybe FilePath } geoGraphFile :: Maybe FilePath }
data GEvalException = NoExpectedFile FilePath
| NoOutFile FilePath
| NoExpectedDirectory FilePath
| NoOutDirectory FilePath
| NoExpectedTestDirectory FilePath
| NoOutTestDirectory FilePath
| NoInputFile FilePath
| FileAlreadyThere FilePath
| TooFewLines
| TooManyLines
| TooFewLinesInInput
| TooManyLinesInInput
| EmptyOutput
| UnexpectedData Word32 String
| UnexpectedMultipleOutputs
| OtherException String
deriving (Eq)
instance Exception GEvalException
instance Show GEvalException where
show (NoExpectedFile filePath) = somethingWrongWithFilesMessage "No file with the expected results" filePath
show (NoOutFile filePath) = somethingWrongWithFilesMessage "No file with the test results" filePath
show (NoExpectedDirectory filePath) = somethingWrongWithFilesMessage "No directory with the expected results" filePath
show (NoOutDirectory filePath) = somethingWrongWithFilesMessage "No directory with the test results" filePath
show (NoExpectedTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the expected results" filePath
show (NoOutTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the results obtained" filePath
show (NoInputFile filePath) = somethingWrongWithFilesMessage "No file with the input" filePath
show (FileAlreadyThere filePath) = somethingWrongWithFilesMessage "File already there" filePath
show TooFewLines = "Too few lines in the output file"
show TooManyLines = "Too many lines in the output file"
show TooFewLinesInInput = "Too few lines in the input file"
show TooManyLinesInInput = "Too many lines in the input file"
show EmptyOutput = "The output file is empty"
show (UnexpectedData lineNo message) = "Line " ++ (show lineNo) ++ ": Unexpected data [" ++ message ++ "]"
show UnexpectedMultipleOutputs = "Multiple outputs are not possible in this mode, use -o option to select an output file"
show (OtherException message) = message
somethingWrongWithFilesMessage :: String -> FilePath -> String
somethingWrongWithFilesMessage msg filePath = Prelude.concat
[ msg, ": `", filePath, "`" ]
defaultGEvalSpecification = GEvalSpecification { defaultGEvalSpecification = GEvalSpecification {
gesOutDirectory = defaultOutDirectory, gesOutDirectory = defaultOutDirectory,
@ -513,12 +469,10 @@ gevalCoreOnSources Spearman _ = gevalCoreByCorrelationMeasure spearman
gevalCoreOnSources (ProbabilisticMultiLabelFMeasure beta) _ = generalizedProbabilisticFMeasure beta gevalCoreOnSources (ProbabilisticMultiLabelFMeasure beta) _ = generalizedProbabilisticFMeasure beta
SAProbabilisticMultiLabelFMeasure SAProbabilisticMultiLabelFMeasure
(Right . (\(ProbList es) -> es) . parseIntoProbList)
where intoWords = Right . Data.Text.words where intoWords = Right . Data.Text.words
gevalCoreOnSources (ProbabilisticSoftFMeasure beta) _ = generalizedProbabilisticFMeasure beta gevalCoreOnSources (ProbabilisticSoftFMeasure beta) _ = generalizedProbabilisticFMeasure beta
SAProbabilisticSoftFMeasure SAProbabilisticSoftFMeasure
parseObtainedAnnotations
-- and now more typical metrics, which: -- and now more typical metrics, which:
-- 1) parse the expected output -- 1) parse the expected output
@ -527,30 +481,27 @@ gevalCoreOnSources (ProbabilisticSoftFMeasure beta) _ = generalizedProbabilistic
-- 4) aggregate the results -- 4) aggregate the results
-- 5) apply some final funtion on the aggregate -- 5) apply some final funtion on the aggregate
-- 6) create a graph using the aggregate (applicable only to some metrics) -- 6) create a graph using the aggregate (applicable only to some metrics)
gevalCoreOnSources Likelihood _ = gevalCoreWithoutInput SALikelihood itemLogLossError averageC logLossToLikehood noGraph gevalCoreOnSources Likelihood _ = gevalCoreWithoutInput SALikelihood averageC logLossToLikehood noGraph
gevalCoreOnSources MultiLabelLikelihood _ = gevalCoreWithoutInput SAMultiLabelLikelihood gevalCoreOnSources MultiLabelLikelihood _ = gevalCoreWithoutInput SAMultiLabelLikelihood
(uncurry countLogLossOnProbList)
averageC averageC
logLossToLikehood logLossToLikehood
noGraph noGraph
where where
intoWords = Right . Data.Text.words intoWords = Right . Data.Text.words
gevalCoreOnSources MSE _ = gevalCoreWithoutInput SAMSE itemSquaredError averageC id noGraph gevalCoreOnSources MSE _ = gevalCoreWithoutInput SAMSE averageC id noGraph
gevalCoreOnSources RMSE _ = gevalCoreWithoutInput SARMSE itemSquaredError averageC (** 0.5) noGraph gevalCoreOnSources RMSE _ = gevalCoreWithoutInput SARMSE averageC (** 0.5) noGraph
gevalCoreOnSources MAE _ = gevalCoreWithoutInput SAMAE itemAbsoluteError averageC id noGraph gevalCoreOnSources MAE _ = gevalCoreWithoutInput SAMAE averageC id noGraph
gevalCoreOnSources SMAPE _ = gevalCoreWithoutInput SASMAPE smape averageC (* 100.0) noGraph gevalCoreOnSources SMAPE _ = gevalCoreWithoutInput SASMAPE averageC (* 100.0) noGraph
where smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out))
gevalCoreOnSources LogLoss _ = gevalCoreWithoutInput SALogLoss itemLogLossError averageC id noGraph gevalCoreOnSources LogLoss _ = gevalCoreWithoutInput SALogLoss averageC id noGraph
gevalCoreOnSources BLEU _ = gevalCoreWithoutInput SABLEU bleuCombine bleuAgg bleuFinal noGraph gevalCoreOnSources BLEU _ = gevalCoreWithoutInput SABLEU bleuAgg bleuFinal noGraph
where bleuFinal (p1, p2, p3, p4, rl, l1, l2, l3, l4) = ((p1 /. l1) * (p2 /. l2) * (p3 /. l3) * (p4 /. l4)) ** 0.25 * (brevityPenalty l1 rl) where bleuFinal (p1, p2, p3, p4, rl, l1, l2, l3, l4) = ((p1 /. l1) * (p2 /. l2) * (p3 /. l3) * (p4 /. l4)) ** 0.25 * (brevityPenalty l1 rl)
bleuCombine (refs, sen) = bleuStep refs sen
bleuAgg = CC.foldl bleuFuse (0, 0, 0, 0, 0, 0, 0, 0, 0) bleuAgg = CC.foldl bleuFuse (0, 0, 0, 0, 0, 0, 0, 0, 0)
bleuFuse (a1, a2, a3, a4, a5, a6, a7, a8, a9) (b1, b2, b3, b4, b5, b6, b7, b8, b9) = (a1+b1, a2+b2, a3+b3, a4+b4, a5+b5, a6+b6, a7+b7, a8+b8, a9+b9) bleuFuse (a1, a2, a3, a4, a5, a6, a7, a8, a9) (b1, b2, b3, b4, b5, b6, b7, b8, b9) = (a1+b1, a2+b2, a3+b3, a4+b4, a5+b5, a6+b6, a7+b7, a8+b8, a9+b9)
brevityPenalty c r brevityPenalty c r
@ -558,63 +509,19 @@ gevalCoreOnSources BLEU _ = gevalCoreWithoutInput SABLEU bleuCombine bleuAgg ble
| c == 0 && r > 0 = 0.0 | c == 0 && r > 0 = 0.0
| otherwise = exp (1.0 - (r /. c)) | otherwise = exp (1.0 - (r /. c))
gevalCoreOnSources GLEU _ = gevalCoreWithoutInput SAGLEU gleuCombine gleuAgg gleuFinal noGraph gevalCoreOnSources GLEU _ = gevalCoreWithoutInput SAGLEU gleuAgg gleuFinal noGraph
where gleuFinal (m, t) = m /. t where gleuFinal (m, t) = m /. t
gleuCombine (refs, sen) = gleuStep refs sen
gleuAgg = CC.foldl gleuFuse (0, 0) gleuAgg = CC.foldl gleuFuse (0, 0)
gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2) gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2)
gevalCoreOnSources WER _ = gevalCoreWithoutInput SAWER (uncurry werStep) averageC id noGraph gevalCoreOnSources WER _ = gevalCoreWithoutInput SAWER averageC id noGraph
gevalCoreOnSources Accuracy _ = gevalCoreWithoutInput SAAccuracy hitOrMiss averageC id noGraph gevalCoreOnSources Accuracy _ = gevalCoreWithoutInput SAAccuracy averageC id noGraph
where hitOrMiss (exp, got) =
-- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric)
case parseWordSpecs got of
Right wordSpecs -> if Prelude.null pairs
then 0.0
else indicator (exp == (snd $ Prelude.maximum pairs))
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp)
-- if the expected value is 0 or 1 treat values
-- between 0.0 and 1.0 as probabilities
-- for the positive outcome
normalizeProbForAccuracy :: Text -> Text -> Text
normalizeProbForAccuracy exp got
| exp == (pack "1") = case tryReadingAsFloat got of
Just p -> if p >= 0.5 && p <= 1.0 then exp else got
Nothing -> got
| exp == (pack "0") = case tryReadingAsFloat got of
Just p -> if p < 0.5 && p >= 0.0 then exp else got
Nothing -> got
| otherwise = got
tryReadingAsFloat :: Text -> Maybe Float
tryReadingAsFloat = readMaybe . unpack
gevalCoreOnSources (FMeasure beta) _ = gevalCoreWithoutInput SAFMeasure getCount countAgg (fMeasureOnCounts beta) noGraph gevalCoreOnSources (FMeasure beta) _ = gevalCoreWithoutInput SAFMeasure countAgg (fMeasureOnCounts beta) noGraph
where -- output value could be a probability (for compatibility with other measures)
getCount :: (Bool, Bool) -> (Int, Int, Int)
getCount (True, True) = (1, 1, 1)
getCount (True, False) = (0, 1, 0)
getCount (False, True) = (0, 0, 1)
getCount (False, False) = (0, 0, 0)
gevalCoreOnSources (MacroFMeasure beta) _ = gevalCoreWithoutInput SAMacroFMeasure getClassesInvolved gatherClassC macroAverageOnCounts noGraph gevalCoreOnSources (MacroFMeasure beta) _ = gevalCoreWithoutInput SAMacroFMeasure gatherClassC macroAverageOnCounts noGraph
where predicted got = where gatherClassC = CC.foldl gatherClassCombiner (M.empty, M.empty, M.empty)
-- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric)
case parseWordSpecs got of
Right wordSpecs -> if Prelude.null pairs
then Nothing
else Just $ snd $ Prelude.maximum pairs
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
Left _ -> Just got
getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing)
getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness
getClassesInvolved (Just a, Just b) = if a == b
then (Just a, Just a, Just a)
else (Nothing, Just a, Just b)
gatherClassC = CC.foldl gatherClassCombiner (M.empty, M.empty, M.empty)
gatherClassCombiner (tpMap, expectedMap, gotMap) (tp, expected, got) = gatherClassCombiner (tpMap, expectedMap, gotMap) (tp, expected, got) =
(insertMaybeToMap tp tpMap, (insertMaybeToMap tp tpMap,
insertMaybeToMap expected expectedMap, insertMaybeToMap expected expectedMap,
@ -629,66 +536,39 @@ gevalCoreOnSources (MacroFMeasure beta) _ = gevalCoreWithoutInput SAMacroFMeasur
$ M.keys expectedMap) / (fromIntegral $ Prelude.length $ M.keys expectedMap) $ M.keys expectedMap) / (fromIntegral $ Prelude.length $ M.keys expectedMap)
gevalCoreOnSources (SoftFMeasure beta) _ = gevalCoreWithoutInput SASoftFMeasure gevalCoreOnSources (SoftFMeasure beta) _ = gevalCoreWithoutInput SASoftFMeasure
getSoftCounts
countAgg countAgg
(fMeasureOnCounts beta) (fMeasureOnCounts beta)
noGraph noGraph
where getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got,
Prelude.length expected,
Prelude.length got)
gevalCoreOnSources (Soft2DFMeasure beta) _ = gevalCoreWithoutInput SASoft2DFMeasure gevalCoreOnSources (Soft2DFMeasure beta) _ = gevalCoreWithoutInput SASoft2DFMeasure
count2DFScore (CC.map (fMeasureOnCounts beta) .| averageC)
averageC
id id
noGraph noGraph
where
count2DFScore (expected, got) = fMeasureOnCounts beta (tpArea, expArea, gotArea)
where tpArea = coveredBy expected got
expArea = totalArea expected
gotArea = totalArea got
gevalCoreOnSources ClippEU _ = gevalCoreWithoutInput SAClippEU matchStep clippeuAgg finalStep noGraph gevalCoreOnSources ClippEU _ = gevalCoreWithoutInput SAClippEU clippeuAgg finalStep noGraph
where where
matchStep (clippingSpecs, clippings) = (maxMatch matchClippingToSpec clippingSpecs clippings,
Prelude.length clippingSpecs,
Prelude.length clippings)
clippeuAgg = CC.foldl countFolder (0, 0, 0) clippeuAgg = CC.foldl countFolder (0, 0, 0)
finalStep counts = f2MeasureOnCounts counts finalStep counts = f2MeasureOnCounts counts
gevalCoreOnSources NMI _ = gevalCoreWithoutInput SANMI id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix noGraph gevalCoreOnSources NMI _ = gevalCoreWithoutInput SANMI (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix noGraph
gevalCoreOnSources MAP _ = gevalCoreWithoutInput SAMAP gevalCoreOnSources MAP _ = gevalCoreWithoutInput SAMAP
(\(e,g) -> calculateMAPForOneResult e g)
averageC averageC
id id
noGraph noGraph
gevalCoreOnSources BIOF1 _ = gevalCoreWithoutInput SABIOF1 (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph gevalCoreOnSources BIOF1 _ = gevalCoreWithoutInput SABIOF1 countAgg f1MeasureOnCounts noGraph
gevalCoreOnSources BIOF1Labels _ = gevalCoreWithoutInput SABIOF1Labels (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph gevalCoreOnSources BIOF1Labels _ = gevalCoreWithoutInput SABIOF1Labels countAgg f1MeasureOnCounts noGraph
gevalCoreOnSources TokenAccuracy _ = gevalCoreWithoutInput SATokenAccuracy gevalCoreOnSources TokenAccuracy _ = gevalCoreWithoutInput SATokenAccuracy
countHitsAndTotals
hitsAndTotalsAgg hitsAndTotalsAgg
(\(hits, total) -> hits /. total) (\(hits, total) -> hits /. total)
noGraph noGraph
where countHitsAndTotals :: ([Text], [Text]) -> (Int, Int) where
countHitsAndTotals (es, os) =
if Prelude.length os /= Prelude.length es
then throw $ OtherException "wrong number of tokens"
else Prelude.foldl matchFun
(0, 0)
(Prelude.zip es os)
matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int)
matchFun (h, t) (e, o)
| e == (pack "*") = (h, t)
| o `Prelude.elem` (splitOn (pack ";") e) = (h + 1, t + 1)
| otherwise = (h, t + 1)
hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0) hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0)
gevalCoreOnSources MultiLabelLogLoss _ = gevalCoreWithoutInput SAMultiLabelLogLoss gevalCoreOnSources MultiLabelLogLoss _ = gevalCoreWithoutInput SAMultiLabelLogLoss
(uncurry countLogLossOnProbList)
averageC averageC
id id
noGraph noGraph
@ -703,8 +583,7 @@ helperLogLossHashed nbOfBits finalStep expectedLineSource outLineSource =
Right _ -> Right t Right _ -> Right t
Left m -> Left m Left m -> Left m
generalizedProbabilisticFMeasure beta metric parseEntities = gevalCoreWithoutInput metric generalizedProbabilisticFMeasure beta metric = gevalCoreWithoutInput metric
getProbabilisticCounts
probabilisticSoftAgg probabilisticSoftAgg
(fMeasureOnProbabilisticCounts beta) (fMeasureOnProbabilisticCounts beta)
loessGraph loessGraph
@ -732,7 +611,7 @@ gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
LineSource (ResourceT m) -> -- ^ source to read the output LineSource (ResourceT m) -> -- ^ source to read the output
m (MetricOutput) -- ^ metric values for the output against the expected output m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreByCorrelationMeasure correlationFunction = gevalCoreByCorrelationMeasure correlationFunction =
gevalCoreWithoutInput SAPearson id correlationC finalStep noGraph gevalCoreWithoutInput SAPearson correlationC finalStep noGraph
where correlationC = CC.foldl (flip (:)) [] where correlationC = CC.foldl (flip (:)) []
finalStep pairs = correlationFunction $ V.fromList pairs finalStep pairs = correlationFunction $ V.fromList pairs
@ -749,20 +628,18 @@ skipLineNumber fun = fun . snd
-- | A helper function to run evaluation when the input is not needed to calculate the metric value. -- | A helper function to run evaluation when the input is not needed to calculate the metric value.
gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m) gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
=> SAMetric t => SAMetric t
-> ((ParsedExpectedType t, ParsedOutputType t) -> c) -- ^ function which combines parsed values into a single value -> (ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into
-- (will be launched for each item, e.g. an error/cost function
-- could be calculated here)
-> (ConduitT c Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into
-- a "total" value -- a "total" value
-> (d -> Double) -- ^ function to transform the "total" value into the final score -> (d -> Double) -- ^ function to transform the "total" value into the final score
-> (d -> Maybe GraphSeries) -> (d -> Maybe GraphSeries)
-> LineSource (ResourceT m) -- ^ source to read the expected output -> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output -> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricOutput) -- ^ metric values for the output against the expected output -> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreWithoutInput smetric itemStep aggregator finalStep generateGraph expectedLineStream outLineStream = gevalCoreWithoutInput smetric aggregator finalStep generateGraph expectedLineStream outLineStream =
gevalCoreWithoutInputOnItemTargets (liftOp expParser) (liftOp outParser) itemStep aggregator finalStep generateGraph expectedLineStream outLineStream gevalCoreWithoutInputOnItemTargets (liftOp expParser) (liftOp outParser) iStep aggregator finalStep generateGraph expectedLineStream outLineStream
where expParser = expectedParser smetric where expParser = expectedParser smetric
outParser = outputParser smetric outParser = outputParser smetric
iStep = itemStep smetric
gevalCoreWithoutInputOnItemTargets :: (MonadUnliftIO m, MonadThrow m, MonadIO m) gevalCoreWithoutInputOnItemTargets :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
=> (ItemTarget -> Either String a) -- ^ parser for values in the expected output => (ItemTarget -> Either String a) -- ^ parser for values in the expected output
@ -922,21 +799,3 @@ items (LineSource lineSource itemDecoder preprocess _ _) parser =
linesAsItems :: MonadResource m => LineSource m -> ConduitT () (SourceItem Text) m () linesAsItems :: MonadResource m => LineSource m -> ConduitT () (SourceItem Text) m ()
linesAsItems (LineSource lineSource _ _ _ _) = linesAsItems (LineSource lineSource _ _ _ _) =
(lineSource .| CL.map Got) >> yield Done (lineSource .| CL.map Got) >> yield Done
itemAbsoluteError :: (Double, Double) -> Double
itemAbsoluteError (exp, out) = abs (exp-out)
itemSquaredError :: (Double, Double) -> Double
itemSquaredError (exp, out) = (exp-out)**2
itemLogLossError :: (Double, Double) -> Double
itemLogLossError (exp, out)
| exp' > 0.5 = - (log out')
| otherwise = - (log (1 - out'))
where exp' = normalizeAsProb exp
out' = normalizeAsProb out
normalizeAsProb v
| v >= 1.0 = 1.0
| v <= 0.0 = 0.0
| otherwise = v

View File

@ -7,7 +7,8 @@ module GEval.CreateChallenge
import GEval.Metric import GEval.Metric
import GEval.EvaluationScheme import GEval.EvaluationScheme
import GEval.Core (GEvalSpecification(..), GEvalException(..), configFileName, gesMainMetric, defaultTestName) import GEval.Common (GEvalException(..))
import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName)
import GEval.Submit (tokenFileName) import GEval.Submit (tokenFileName)
import qualified System.Directory as D import qualified System.Directory as D
import Control.Conditional (whenM) import Control.Conditional (whenM)

View File

@ -11,7 +11,19 @@ module GEval.MetricsMechanics
import Data.Singletons.TH import Data.Singletons.TH
import Text.Read (readMaybe)
import GEval.Metric import GEval.Metric
import GEval.Common
import GEval.BLEU (bleuStep, gleuStep)
import GEval.WER (werStep)
import GEval.Clippings (totalArea, coveredBy, clippEUMatchStep)
import GEval.BIO (gatherCountsForBIO)
import GEval.Probability
import GEval.PrecisionRecall (weightedMaxMatch, fMeasureOnCounts, calculateMAPForOneResult, getProbabilisticCounts, getCounts)
import Control.Exception
import Data.Text import Data.Text
import Data.Text.Read as TR import Data.Text.Read as TR
@ -25,7 +37,7 @@ import GEval.Annotation (Annotation, ObtainedAnnotation, parseAnnotations, parse
import GEval.Clippings (Clipping, ClippingSpec, LabeledClipping, lineClippingsParser, lineClippingSpecsParser, lineLabeledClippingsParser) import GEval.Clippings (Clipping, ClippingSpec, LabeledClipping, lineClippingsParser, lineClippingSpecsParser, lineLabeledClippingsParser)
import GEval.BIO (TaggedEntity, parseBioSequenceIntoEntities, parseBioSequenceIntoEntitiesWithoutNormalization) import GEval.BIO (TaggedEntity, parseBioSequenceIntoEntities, parseBioSequenceIntoEntitiesWithoutNormalization)
import GEval.LogLossHashed (parseWordSpecs, wordSpecToPair) import GEval.LogLossHashed (parseWordSpecs, wordSpecToPair)
import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..)) import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..), countLogLossOnProbList)
-- | Helper type so that singleton can be used. -- | Helper type so that singleton can be used.
-- | (The problem is that some metrics are parametrized by Double -- | (The problem is that some metrics are parametrized by Double
@ -184,6 +196,60 @@ outputParser SAMultiLabelFMeasure = intoWords
outputParser SAMultiLabelLogLoss = Right . parseIntoProbList outputParser SAMultiLabelLogLoss = Right . parseIntoProbList
outputParser SAMultiLabelLikelihood = Right . parseIntoProbList outputParser SAMultiLabelLikelihood = Right . parseIntoProbList
type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
ItemIntermediateRepresentationType ABLEU = (Int, Int, Int, Int, Int, Int, Int, Int, Int)
ItemIntermediateRepresentationType AGLEU = (Int, Int)
ItemIntermediateRepresentationType AFMeasure = (Int, Int, Int)
ItemIntermediateRepresentationType AMacroFMeasure = (Maybe Text, Maybe Text, Maybe Text)
ItemIntermediateRepresentationType ASoftFMeasure = (Double, Int, Int)
ItemIntermediateRepresentationType ASoft2DFMeasure = (Integer, Integer, Integer)
ItemIntermediateRepresentationType AClippEU = (Int, Int, Int)
ItemIntermediateRepresentationType ANMI = (Text, Text)
ItemIntermediateRepresentationType ABIOF1 = (Int, Int, Int)
ItemIntermediateRepresentationType ABIOF1Labels = (Int, Int, Int)
ItemIntermediateRepresentationType ATokenAccuracy = (Int, Int)
ItemIntermediateRepresentationType AProbabilisticMultiLabelFMeasure = ([Double], [Double], Double, Int)
ItemIntermediateRepresentationType AProbabilisticSoftFMeasure = ([Double], [Double], Double, Int)
ItemIntermediateRepresentationType APearson = (Double, Double)
ItemIntermediateRepresentationType ASpearman = (Double, Double)
ItemIntermediateRepresentationType AMultiLabelFMeasure = (Int, Int, Int)
ItemIntermediateRepresentationType ALogLossHashed = (Text, Text)
ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text)
ItemIntermediateRepresentationType ACharMatch = (Text, Text)
ItemIntermediateRepresentationType t = Double
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
itemStep SARMSE = itemSquaredError
itemStep SAMSE = itemSquaredError
itemStep SAPearson = id
itemStep SASpearman = id
itemStep SABLEU = uncurry bleuStep
itemStep SAGLEU = uncurry gleuStep
itemStep SAWER = uncurry werStep
itemStep SAAccuracy = hitOrMiss
itemStep SAClippEU = clippEUMatchStep
itemStep SAFMeasure = getCount
itemStep SAMacroFMeasure = getClassesInvolved
itemStep SASoftFMeasure = getSoftCounts
itemStep SAProbabilisticMultiLabelFMeasure = getProbabilisticCounts
itemStep SAProbabilisticSoftFMeasure = getProbabilisticCounts
itemStep SASoft2DFMeasure = getSoft2DCounts
itemStep SANMI = id
itemStep SALogLossHashed = id
itemStep SALikelihoodHashed = id
itemStep SACharMatch = id
itemStep SAMAP = uncurry calculateMAPForOneResult
itemStep SALogLoss = itemLogLossError
itemStep SALikelihood = itemLogLossError
itemStep SABIOF1 = uncurry gatherCountsForBIO
itemStep SABIOF1Labels = uncurry gatherCountsForBIO
itemStep SATokenAccuracy = countHitsAndTotals
itemStep SAMAE = itemAbsoluteError
itemStep SASMAPE = smape
itemStep SAMultiLabelFMeasure = getCounts (==)
itemStep SAMultiLabelLogLoss = uncurry countLogLossOnProbList
itemStep SAMultiLabelLikelihood = uncurry countLogLossOnProbList
doubleParser = getValue . TR.double doubleParser = getValue . TR.double
@ -233,3 +299,63 @@ controlledParse parser t =
case parseOnly parser t of case parseOnly parser t of
(Right v) -> Right v (Right v) -> Right v
(Left _) -> Left "cannot parse line" (Left _) -> Left "cannot parse line"
smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out))
hitOrMiss (exp, got) =
-- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric)
case parseWordSpecs got of
Right wordSpecs -> if Prelude.null pairs
then 0.0
else indicator (exp == (snd $ Prelude.maximum pairs))
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp)
-- if the expected value is 0 or 1 treat values
-- between 0.0 and 1.0 as probabilities
-- for the positive outcome
where normalizeProbForAccuracy :: Text -> Text -> Text
normalizeProbForAccuracy exp got
| exp == (pack "1") = case tryReadingAsFloat got of
Just p -> if p >= 0.5 && p <= 1.0 then exp else got
Nothing -> got
| exp == (pack "0") = case tryReadingAsFloat got of
Just p -> if p < 0.5 && p >= 0.0 then exp else got
Nothing -> got
| otherwise = got
tryReadingAsFloat :: Text -> Maybe Float
tryReadingAsFloat = readMaybe . unpack
getCount :: (Bool, Bool) -> (Int, Int, Int)
getCount (True, True) = (1, 1, 1)
getCount (True, False) = (0, 1, 0)
getCount (False, True) = (0, 0, 1)
getCount (False, False) = (0, 0, 0)
getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing)
getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness
getClassesInvolved (Just a, Just b) = if a == b
then (Just a, Just a, Just a)
else (Nothing, Just a, Just b)
getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got,
Prelude.length expected,
Prelude.length got)
getSoft2DCounts (expected, got) = (tpArea, expArea, gotArea)
where tpArea = coveredBy expected got
expArea = totalArea expected
gotArea = totalArea got
countHitsAndTotals :: ([Text], [Text]) -> (Int, Int)
countHitsAndTotals (es, os) =
if Prelude.length os /= Prelude.length es
then throw $ OtherException "wrong number of tokens"
else Prelude.foldl matchFun
(0, 0)
(Prelude.zip es os)
where matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int)
matchFun (h, t) (e, o)
| e == (pack "*") = (h, t)
| o `Prelude.elem` (splitOn (pack ";") e) = (h + 1, t + 1)
| otherwise = (h, t + 1)

View File

@ -6,7 +6,7 @@ module GEval.Validation
import GEval.Metric import GEval.Metric
import GEval.EvaluationScheme import GEval.EvaluationScheme
import GEval.Core (GEvalSpecification(..), GEvalException(..), somethingWrongWithFilesMessage, isEmptyFile, geval, defaultInputFile, defaultExpectedFile, defaultOutFile) import GEval.Core (GEvalSpecification(..), isEmptyFile, geval, defaultInputFile, defaultExpectedFile, defaultOutFile)
import GEval.Common import GEval.Common
import qualified System.Directory as D import qualified System.Directory as D