Hande itemStep via dependent types

This commit is contained in:
Filip Graliński 2019-11-02 11:11:53 +01:00
parent 32969bb56a
commit d67061c230
6 changed files with 226 additions and 173 deletions

View File

@ -14,6 +14,7 @@ import Data.Maybe (catMaybes)
import Debug.Trace
import GEval.Common
import GEval.PrecisionRecall (maxMatch)
newtype PageNumber = PageNumber Int
deriving (Eq, Show)
@ -160,3 +161,7 @@ getLeftovers (Rectangle (Point x0 y0) (Point x1 y1))
Rectangle (Point x0' y0') (Point (x0 - 1) y1'),
Rectangle (Point (x1 + 1) y0') (Point x1' y1')]
where validRectangle (Rectangle (Point x0 y0) (Point x1 y1)) = x0 <= x1 && y0 <= y1
clippEUMatchStep (clippingSpecs, clippings) = (maxMatch matchClippingToSpec clippingSpecs clippings,
Prelude.length clippingSpecs,
Prelude.length clippings)

View File

@ -4,6 +4,9 @@ module GEval.Common
import qualified Data.Text as T
import Data.Text.Read as TR
import Data.Word
import Control.Exception
import Data.Attoparsec.Text
type MetricValue = Double
@ -79,3 +82,62 @@ class AEq a where
instance AEq Double where
x =~ y = abs ( x - y ) < (1.0e-4 :: Double)
itemAbsoluteError :: (Double, Double) -> Double
itemAbsoluteError (exp, out) = abs (exp-out)
itemSquaredError :: (Double, Double) -> Double
itemSquaredError (exp, out) = (exp-out)**2
itemLogLossError :: (Double, Double) -> Double
itemLogLossError (exp, out)
| exp' > 0.5 = - (log out')
| otherwise = - (log (1 - out'))
where exp' = normalizeAsProb exp
out' = normalizeAsProb out
normalizeAsProb v
| v >= 1.0 = 1.0
| v <= 0.0 = 0.0
| otherwise = v
data GEvalException = NoExpectedFile FilePath
| NoOutFile FilePath
| NoExpectedDirectory FilePath
| NoOutDirectory FilePath
| NoExpectedTestDirectory FilePath
| NoOutTestDirectory FilePath
| NoInputFile FilePath
| FileAlreadyThere FilePath
| TooFewLines
| TooManyLines
| TooFewLinesInInput
| TooManyLinesInInput
| EmptyOutput
| UnexpectedData Word32 String
| UnexpectedMultipleOutputs
| OtherException String
deriving (Eq)
instance Exception GEvalException
instance Show GEvalException where
show (NoExpectedFile filePath) = somethingWrongWithFilesMessage "No file with the expected results" filePath
show (NoOutFile filePath) = somethingWrongWithFilesMessage "No file with the test results" filePath
show (NoExpectedDirectory filePath) = somethingWrongWithFilesMessage "No directory with the expected results" filePath
show (NoOutDirectory filePath) = somethingWrongWithFilesMessage "No directory with the test results" filePath
show (NoExpectedTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the expected results" filePath
show (NoOutTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the results obtained" filePath
show (NoInputFile filePath) = somethingWrongWithFilesMessage "No file with the input" filePath
show (FileAlreadyThere filePath) = somethingWrongWithFilesMessage "File already there" filePath
show TooFewLines = "Too few lines in the output file"
show TooManyLines = "Too many lines in the output file"
show TooFewLinesInInput = "Too few lines in the input file"
show TooManyLinesInInput = "Too many lines in the input file"
show EmptyOutput = "The output file is empty"
show (UnexpectedData lineNo message) = "Line " ++ (show lineNo) ++ ": Unexpected data [" ++ message ++ "]"
show UnexpectedMultipleOutputs = "Multiple outputs are not possible in this mode, use -o option to select an output file"
show (OtherException message) = message
somethingWrongWithFilesMessage :: String -> FilePath -> String
somethingWrongWithFilesMessage msg filePath = Prelude.concat
[ msg, ": `", filePath, "`" ]

View File

@ -19,7 +19,6 @@ module GEval.Core
GEvalSpecification(..),
ResultOrdering(..),
GEvalOptions(..),
GEvalException(..),
defaultGEvalSpecification,
defaultOutDirectory,
defaultTestName,
@ -44,8 +43,7 @@ module GEval.Core
getDataDecoder,
threeLineSource,
extensionsHandled,
isEmptyFile,
somethingWrongWithFilesMessage
isEmptyFile
) where
import Debug.Trace
@ -66,7 +64,6 @@ import Data.Text
import Data.Text.Read as TR
import Control.Applicative
import Control.Exception
import Text.Read (readMaybe)
import Control.Conditional (unlessM, whenM)
import qualified System.Directory as D
import System.Posix
@ -204,47 +201,6 @@ data GEvalOptions = GEvalOptions
geoBlackBoxDebugginsOptions :: BlackBoxDebuggingOptions,
geoGraphFile :: Maybe FilePath }
data GEvalException = NoExpectedFile FilePath
| NoOutFile FilePath
| NoExpectedDirectory FilePath
| NoOutDirectory FilePath
| NoExpectedTestDirectory FilePath
| NoOutTestDirectory FilePath
| NoInputFile FilePath
| FileAlreadyThere FilePath
| TooFewLines
| TooManyLines
| TooFewLinesInInput
| TooManyLinesInInput
| EmptyOutput
| UnexpectedData Word32 String
| UnexpectedMultipleOutputs
| OtherException String
deriving (Eq)
instance Exception GEvalException
instance Show GEvalException where
show (NoExpectedFile filePath) = somethingWrongWithFilesMessage "No file with the expected results" filePath
show (NoOutFile filePath) = somethingWrongWithFilesMessage "No file with the test results" filePath
show (NoExpectedDirectory filePath) = somethingWrongWithFilesMessage "No directory with the expected results" filePath
show (NoOutDirectory filePath) = somethingWrongWithFilesMessage "No directory with the test results" filePath
show (NoExpectedTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the expected results" filePath
show (NoOutTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the results obtained" filePath
show (NoInputFile filePath) = somethingWrongWithFilesMessage "No file with the input" filePath
show (FileAlreadyThere filePath) = somethingWrongWithFilesMessage "File already there" filePath
show TooFewLines = "Too few lines in the output file"
show TooManyLines = "Too many lines in the output file"
show TooFewLinesInInput = "Too few lines in the input file"
show TooManyLinesInInput = "Too many lines in the input file"
show EmptyOutput = "The output file is empty"
show (UnexpectedData lineNo message) = "Line " ++ (show lineNo) ++ ": Unexpected data [" ++ message ++ "]"
show UnexpectedMultipleOutputs = "Multiple outputs are not possible in this mode, use -o option to select an output file"
show (OtherException message) = message
somethingWrongWithFilesMessage :: String -> FilePath -> String
somethingWrongWithFilesMessage msg filePath = Prelude.concat
[ msg, ": `", filePath, "`" ]
defaultGEvalSpecification = GEvalSpecification {
gesOutDirectory = defaultOutDirectory,
@ -513,12 +469,10 @@ gevalCoreOnSources Spearman _ = gevalCoreByCorrelationMeasure spearman
gevalCoreOnSources (ProbabilisticMultiLabelFMeasure beta) _ = generalizedProbabilisticFMeasure beta
SAProbabilisticMultiLabelFMeasure
(Right . (\(ProbList es) -> es) . parseIntoProbList)
where intoWords = Right . Data.Text.words
gevalCoreOnSources (ProbabilisticSoftFMeasure beta) _ = generalizedProbabilisticFMeasure beta
SAProbabilisticSoftFMeasure
parseObtainedAnnotations
-- and now more typical metrics, which:
-- 1) parse the expected output
@ -527,30 +481,27 @@ gevalCoreOnSources (ProbabilisticSoftFMeasure beta) _ = generalizedProbabilistic
-- 4) aggregate the results
-- 5) apply some final funtion on the aggregate
-- 6) create a graph using the aggregate (applicable only to some metrics)
gevalCoreOnSources Likelihood _ = gevalCoreWithoutInput SALikelihood itemLogLossError averageC logLossToLikehood noGraph
gevalCoreOnSources Likelihood _ = gevalCoreWithoutInput SALikelihood averageC logLossToLikehood noGraph
gevalCoreOnSources MultiLabelLikelihood _ = gevalCoreWithoutInput SAMultiLabelLikelihood
(uncurry countLogLossOnProbList)
averageC
logLossToLikehood
noGraph
where
intoWords = Right . Data.Text.words
gevalCoreOnSources MSE _ = gevalCoreWithoutInput SAMSE itemSquaredError averageC id noGraph
gevalCoreOnSources MSE _ = gevalCoreWithoutInput SAMSE averageC id noGraph
gevalCoreOnSources RMSE _ = gevalCoreWithoutInput SARMSE itemSquaredError averageC (** 0.5) noGraph
gevalCoreOnSources RMSE _ = gevalCoreWithoutInput SARMSE averageC (** 0.5) noGraph
gevalCoreOnSources MAE _ = gevalCoreWithoutInput SAMAE itemAbsoluteError averageC id noGraph
gevalCoreOnSources MAE _ = gevalCoreWithoutInput SAMAE averageC id noGraph
gevalCoreOnSources SMAPE _ = gevalCoreWithoutInput SASMAPE smape averageC (* 100.0) noGraph
where smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out))
gevalCoreOnSources SMAPE _ = gevalCoreWithoutInput SASMAPE averageC (* 100.0) noGraph
gevalCoreOnSources LogLoss _ = gevalCoreWithoutInput SALogLoss itemLogLossError averageC id noGraph
gevalCoreOnSources LogLoss _ = gevalCoreWithoutInput SALogLoss averageC id noGraph
gevalCoreOnSources BLEU _ = gevalCoreWithoutInput SABLEU bleuCombine bleuAgg bleuFinal noGraph
gevalCoreOnSources BLEU _ = gevalCoreWithoutInput SABLEU bleuAgg bleuFinal noGraph
where bleuFinal (p1, p2, p3, p4, rl, l1, l2, l3, l4) = ((p1 /. l1) * (p2 /. l2) * (p3 /. l3) * (p4 /. l4)) ** 0.25 * (brevityPenalty l1 rl)
bleuCombine (refs, sen) = bleuStep refs sen
bleuAgg = CC.foldl bleuFuse (0, 0, 0, 0, 0, 0, 0, 0, 0)
bleuFuse (a1, a2, a3, a4, a5, a6, a7, a8, a9) (b1, b2, b3, b4, b5, b6, b7, b8, b9) = (a1+b1, a2+b2, a3+b3, a4+b4, a5+b5, a6+b6, a7+b7, a8+b8, a9+b9)
brevityPenalty c r
@ -558,63 +509,19 @@ gevalCoreOnSources BLEU _ = gevalCoreWithoutInput SABLEU bleuCombine bleuAgg ble
| c == 0 && r > 0 = 0.0
| otherwise = exp (1.0 - (r /. c))
gevalCoreOnSources GLEU _ = gevalCoreWithoutInput SAGLEU gleuCombine gleuAgg gleuFinal noGraph
gevalCoreOnSources GLEU _ = gevalCoreWithoutInput SAGLEU gleuAgg gleuFinal noGraph
where gleuFinal (m, t) = m /. t
gleuCombine (refs, sen) = gleuStep refs sen
gleuAgg = CC.foldl gleuFuse (0, 0)
gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2)
gevalCoreOnSources WER _ = gevalCoreWithoutInput SAWER (uncurry werStep) averageC id noGraph
gevalCoreOnSources WER _ = gevalCoreWithoutInput SAWER averageC id noGraph
gevalCoreOnSources Accuracy _ = gevalCoreWithoutInput SAAccuracy hitOrMiss averageC id noGraph
where hitOrMiss (exp, got) =
-- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric)
case parseWordSpecs got of
Right wordSpecs -> if Prelude.null pairs
then 0.0
else indicator (exp == (snd $ Prelude.maximum pairs))
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp)
-- if the expected value is 0 or 1 treat values
-- between 0.0 and 1.0 as probabilities
-- for the positive outcome
normalizeProbForAccuracy :: Text -> Text -> Text
normalizeProbForAccuracy exp got
| exp == (pack "1") = case tryReadingAsFloat got of
Just p -> if p >= 0.5 && p <= 1.0 then exp else got
Nothing -> got
| exp == (pack "0") = case tryReadingAsFloat got of
Just p -> if p < 0.5 && p >= 0.0 then exp else got
Nothing -> got
| otherwise = got
tryReadingAsFloat :: Text -> Maybe Float
tryReadingAsFloat = readMaybe . unpack
gevalCoreOnSources Accuracy _ = gevalCoreWithoutInput SAAccuracy averageC id noGraph
gevalCoreOnSources (FMeasure beta) _ = gevalCoreWithoutInput SAFMeasure getCount countAgg (fMeasureOnCounts beta) noGraph
where -- output value could be a probability (for compatibility with other measures)
getCount :: (Bool, Bool) -> (Int, Int, Int)
getCount (True, True) = (1, 1, 1)
getCount (True, False) = (0, 1, 0)
getCount (False, True) = (0, 0, 1)
getCount (False, False) = (0, 0, 0)
gevalCoreOnSources (FMeasure beta) _ = gevalCoreWithoutInput SAFMeasure countAgg (fMeasureOnCounts beta) noGraph
gevalCoreOnSources (MacroFMeasure beta) _ = gevalCoreWithoutInput SAMacroFMeasure getClassesInvolved gatherClassC macroAverageOnCounts noGraph
where predicted got =
-- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric)
case parseWordSpecs got of
Right wordSpecs -> if Prelude.null pairs
then Nothing
else Just $ snd $ Prelude.maximum pairs
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
Left _ -> Just got
getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing)
getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness
getClassesInvolved (Just a, Just b) = if a == b
then (Just a, Just a, Just a)
else (Nothing, Just a, Just b)
gatherClassC = CC.foldl gatherClassCombiner (M.empty, M.empty, M.empty)
gevalCoreOnSources (MacroFMeasure beta) _ = gevalCoreWithoutInput SAMacroFMeasure gatherClassC macroAverageOnCounts noGraph
where gatherClassC = CC.foldl gatherClassCombiner (M.empty, M.empty, M.empty)
gatherClassCombiner (tpMap, expectedMap, gotMap) (tp, expected, got) =
(insertMaybeToMap tp tpMap,
insertMaybeToMap expected expectedMap,
@ -629,66 +536,39 @@ gevalCoreOnSources (MacroFMeasure beta) _ = gevalCoreWithoutInput SAMacroFMeasur
$ M.keys expectedMap) / (fromIntegral $ Prelude.length $ M.keys expectedMap)
gevalCoreOnSources (SoftFMeasure beta) _ = gevalCoreWithoutInput SASoftFMeasure
getSoftCounts
countAgg
(fMeasureOnCounts beta)
noGraph
where getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got,
Prelude.length expected,
Prelude.length got)
gevalCoreOnSources (Soft2DFMeasure beta) _ = gevalCoreWithoutInput SASoft2DFMeasure
count2DFScore
averageC
(CC.map (fMeasureOnCounts beta) .| averageC)
id
noGraph
where
count2DFScore (expected, got) = fMeasureOnCounts beta (tpArea, expArea, gotArea)
where tpArea = coveredBy expected got
expArea = totalArea expected
gotArea = totalArea got
gevalCoreOnSources ClippEU _ = gevalCoreWithoutInput SAClippEU matchStep clippeuAgg finalStep noGraph
gevalCoreOnSources ClippEU _ = gevalCoreWithoutInput SAClippEU clippeuAgg finalStep noGraph
where
matchStep (clippingSpecs, clippings) = (maxMatch matchClippingToSpec clippingSpecs clippings,
Prelude.length clippingSpecs,
Prelude.length clippings)
clippeuAgg = CC.foldl countFolder (0, 0, 0)
finalStep counts = f2MeasureOnCounts counts
gevalCoreOnSources NMI _ = gevalCoreWithoutInput SANMI id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix noGraph
gevalCoreOnSources NMI _ = gevalCoreWithoutInput SANMI (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix noGraph
gevalCoreOnSources MAP _ = gevalCoreWithoutInput SAMAP
(\(e,g) -> calculateMAPForOneResult e g)
averageC
id
noGraph
gevalCoreOnSources BIOF1 _ = gevalCoreWithoutInput SABIOF1 (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph
gevalCoreOnSources BIOF1 _ = gevalCoreWithoutInput SABIOF1 countAgg f1MeasureOnCounts noGraph
gevalCoreOnSources BIOF1Labels _ = gevalCoreWithoutInput SABIOF1Labels (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph
gevalCoreOnSources BIOF1Labels _ = gevalCoreWithoutInput SABIOF1Labels countAgg f1MeasureOnCounts noGraph
gevalCoreOnSources TokenAccuracy _ = gevalCoreWithoutInput SATokenAccuracy
countHitsAndTotals
hitsAndTotalsAgg
(\(hits, total) -> hits /. total)
noGraph
where countHitsAndTotals :: ([Text], [Text]) -> (Int, Int)
countHitsAndTotals (es, os) =
if Prelude.length os /= Prelude.length es
then throw $ OtherException "wrong number of tokens"
else Prelude.foldl matchFun
(0, 0)
(Prelude.zip es os)
matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int)
matchFun (h, t) (e, o)
| e == (pack "*") = (h, t)
| o `Prelude.elem` (splitOn (pack ";") e) = (h + 1, t + 1)
| otherwise = (h, t + 1)
where
hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0)
gevalCoreOnSources MultiLabelLogLoss _ = gevalCoreWithoutInput SAMultiLabelLogLoss
(uncurry countLogLossOnProbList)
averageC
id
noGraph
@ -703,8 +583,7 @@ helperLogLossHashed nbOfBits finalStep expectedLineSource outLineSource =
Right _ -> Right t
Left m -> Left m
generalizedProbabilisticFMeasure beta metric parseEntities = gevalCoreWithoutInput metric
getProbabilisticCounts
generalizedProbabilisticFMeasure beta metric = gevalCoreWithoutInput metric
probabilisticSoftAgg
(fMeasureOnProbabilisticCounts beta)
loessGraph
@ -732,7 +611,7 @@ gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
LineSource (ResourceT m) -> -- ^ source to read the output
m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreByCorrelationMeasure correlationFunction =
gevalCoreWithoutInput SAPearson id correlationC finalStep noGraph
gevalCoreWithoutInput SAPearson correlationC finalStep noGraph
where correlationC = CC.foldl (flip (:)) []
finalStep pairs = correlationFunction $ V.fromList pairs
@ -749,20 +628,18 @@ skipLineNumber fun = fun . snd
-- | A helper function to run evaluation when the input is not needed to calculate the metric value.
gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
=> SAMetric t
-> ((ParsedExpectedType t, ParsedOutputType t) -> c) -- ^ function which combines parsed values into a single value
-- (will be launched for each item, e.g. an error/cost function
-- could be calculated here)
-> (ConduitT c Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into
-> (ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into
-- a "total" value
-> (d -> Double) -- ^ function to transform the "total" value into the final score
-> (d -> Maybe GraphSeries)
-> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreWithoutInput smetric itemStep aggregator finalStep generateGraph expectedLineStream outLineStream =
gevalCoreWithoutInputOnItemTargets (liftOp expParser) (liftOp outParser) itemStep aggregator finalStep generateGraph expectedLineStream outLineStream
gevalCoreWithoutInput smetric aggregator finalStep generateGraph expectedLineStream outLineStream =
gevalCoreWithoutInputOnItemTargets (liftOp expParser) (liftOp outParser) iStep aggregator finalStep generateGraph expectedLineStream outLineStream
where expParser = expectedParser smetric
outParser = outputParser smetric
iStep = itemStep smetric
gevalCoreWithoutInputOnItemTargets :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
=> (ItemTarget -> Either String a) -- ^ parser for values in the expected output
@ -922,21 +799,3 @@ items (LineSource lineSource itemDecoder preprocess _ _) parser =
linesAsItems :: MonadResource m => LineSource m -> ConduitT () (SourceItem Text) m ()
linesAsItems (LineSource lineSource _ _ _ _) =
(lineSource .| CL.map Got) >> yield Done
itemAbsoluteError :: (Double, Double) -> Double
itemAbsoluteError (exp, out) = abs (exp-out)
itemSquaredError :: (Double, Double) -> Double
itemSquaredError (exp, out) = (exp-out)**2
itemLogLossError :: (Double, Double) -> Double
itemLogLossError (exp, out)
| exp' > 0.5 = - (log out')
| otherwise = - (log (1 - out'))
where exp' = normalizeAsProb exp
out' = normalizeAsProb out
normalizeAsProb v
| v >= 1.0 = 1.0
| v <= 0.0 = 0.0
| otherwise = v

View File

@ -7,7 +7,8 @@ module GEval.CreateChallenge
import GEval.Metric
import GEval.EvaluationScheme
import GEval.Core (GEvalSpecification(..), GEvalException(..), configFileName, gesMainMetric, defaultTestName)
import GEval.Common (GEvalException(..))
import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName)
import GEval.Submit (tokenFileName)
import qualified System.Directory as D
import Control.Conditional (whenM)

View File

@ -11,7 +11,19 @@ module GEval.MetricsMechanics
import Data.Singletons.TH
import Text.Read (readMaybe)
import GEval.Metric
import GEval.Common
import GEval.BLEU (bleuStep, gleuStep)
import GEval.WER (werStep)
import GEval.Clippings (totalArea, coveredBy, clippEUMatchStep)
import GEval.BIO (gatherCountsForBIO)
import GEval.Probability
import GEval.PrecisionRecall (weightedMaxMatch, fMeasureOnCounts, calculateMAPForOneResult, getProbabilisticCounts, getCounts)
import Control.Exception
import Data.Text
import Data.Text.Read as TR
@ -25,7 +37,7 @@ import GEval.Annotation (Annotation, ObtainedAnnotation, parseAnnotations, parse
import GEval.Clippings (Clipping, ClippingSpec, LabeledClipping, lineClippingsParser, lineClippingSpecsParser, lineLabeledClippingsParser)
import GEval.BIO (TaggedEntity, parseBioSequenceIntoEntities, parseBioSequenceIntoEntitiesWithoutNormalization)
import GEval.LogLossHashed (parseWordSpecs, wordSpecToPair)
import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..))
import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..), countLogLossOnProbList)
-- | Helper type so that singleton can be used.
-- | (The problem is that some metrics are parametrized by Double
@ -184,6 +196,60 @@ outputParser SAMultiLabelFMeasure = intoWords
outputParser SAMultiLabelLogLoss = Right . parseIntoProbList
outputParser SAMultiLabelLikelihood = Right . parseIntoProbList
type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
ItemIntermediateRepresentationType ABLEU = (Int, Int, Int, Int, Int, Int, Int, Int, Int)
ItemIntermediateRepresentationType AGLEU = (Int, Int)
ItemIntermediateRepresentationType AFMeasure = (Int, Int, Int)
ItemIntermediateRepresentationType AMacroFMeasure = (Maybe Text, Maybe Text, Maybe Text)
ItemIntermediateRepresentationType ASoftFMeasure = (Double, Int, Int)
ItemIntermediateRepresentationType ASoft2DFMeasure = (Integer, Integer, Integer)
ItemIntermediateRepresentationType AClippEU = (Int, Int, Int)
ItemIntermediateRepresentationType ANMI = (Text, Text)
ItemIntermediateRepresentationType ABIOF1 = (Int, Int, Int)
ItemIntermediateRepresentationType ABIOF1Labels = (Int, Int, Int)
ItemIntermediateRepresentationType ATokenAccuracy = (Int, Int)
ItemIntermediateRepresentationType AProbabilisticMultiLabelFMeasure = ([Double], [Double], Double, Int)
ItemIntermediateRepresentationType AProbabilisticSoftFMeasure = ([Double], [Double], Double, Int)
ItemIntermediateRepresentationType APearson = (Double, Double)
ItemIntermediateRepresentationType ASpearman = (Double, Double)
ItemIntermediateRepresentationType AMultiLabelFMeasure = (Int, Int, Int)
ItemIntermediateRepresentationType ALogLossHashed = (Text, Text)
ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text)
ItemIntermediateRepresentationType ACharMatch = (Text, Text)
ItemIntermediateRepresentationType t = Double
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
itemStep SARMSE = itemSquaredError
itemStep SAMSE = itemSquaredError
itemStep SAPearson = id
itemStep SASpearman = id
itemStep SABLEU = uncurry bleuStep
itemStep SAGLEU = uncurry gleuStep
itemStep SAWER = uncurry werStep
itemStep SAAccuracy = hitOrMiss
itemStep SAClippEU = clippEUMatchStep
itemStep SAFMeasure = getCount
itemStep SAMacroFMeasure = getClassesInvolved
itemStep SASoftFMeasure = getSoftCounts
itemStep SAProbabilisticMultiLabelFMeasure = getProbabilisticCounts
itemStep SAProbabilisticSoftFMeasure = getProbabilisticCounts
itemStep SASoft2DFMeasure = getSoft2DCounts
itemStep SANMI = id
itemStep SALogLossHashed = id
itemStep SALikelihoodHashed = id
itemStep SACharMatch = id
itemStep SAMAP = uncurry calculateMAPForOneResult
itemStep SALogLoss = itemLogLossError
itemStep SALikelihood = itemLogLossError
itemStep SABIOF1 = uncurry gatherCountsForBIO
itemStep SABIOF1Labels = uncurry gatherCountsForBIO
itemStep SATokenAccuracy = countHitsAndTotals
itemStep SAMAE = itemAbsoluteError
itemStep SASMAPE = smape
itemStep SAMultiLabelFMeasure = getCounts (==)
itemStep SAMultiLabelLogLoss = uncurry countLogLossOnProbList
itemStep SAMultiLabelLikelihood = uncurry countLogLossOnProbList
doubleParser = getValue . TR.double
@ -233,3 +299,63 @@ controlledParse parser t =
case parseOnly parser t of
(Right v) -> Right v
(Left _) -> Left "cannot parse line"
smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out))
hitOrMiss (exp, got) =
-- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric)
case parseWordSpecs got of
Right wordSpecs -> if Prelude.null pairs
then 0.0
else indicator (exp == (snd $ Prelude.maximum pairs))
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp)
-- if the expected value is 0 or 1 treat values
-- between 0.0 and 1.0 as probabilities
-- for the positive outcome
where normalizeProbForAccuracy :: Text -> Text -> Text
normalizeProbForAccuracy exp got
| exp == (pack "1") = case tryReadingAsFloat got of
Just p -> if p >= 0.5 && p <= 1.0 then exp else got
Nothing -> got
| exp == (pack "0") = case tryReadingAsFloat got of
Just p -> if p < 0.5 && p >= 0.0 then exp else got
Nothing -> got
| otherwise = got
tryReadingAsFloat :: Text -> Maybe Float
tryReadingAsFloat = readMaybe . unpack
getCount :: (Bool, Bool) -> (Int, Int, Int)
getCount (True, True) = (1, 1, 1)
getCount (True, False) = (0, 1, 0)
getCount (False, True) = (0, 0, 1)
getCount (False, False) = (0, 0, 0)
getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing)
getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness
getClassesInvolved (Just a, Just b) = if a == b
then (Just a, Just a, Just a)
else (Nothing, Just a, Just b)
getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got,
Prelude.length expected,
Prelude.length got)
getSoft2DCounts (expected, got) = (tpArea, expArea, gotArea)
where tpArea = coveredBy expected got
expArea = totalArea expected
gotArea = totalArea got
countHitsAndTotals :: ([Text], [Text]) -> (Int, Int)
countHitsAndTotals (es, os) =
if Prelude.length os /= Prelude.length es
then throw $ OtherException "wrong number of tokens"
else Prelude.foldl matchFun
(0, 0)
(Prelude.zip es os)
where matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int)
matchFun (h, t) (e, o)
| e == (pack "*") = (h, t)
| o `Prelude.elem` (splitOn (pack ";") e) = (h + 1, t + 1)
| otherwise = (h, t + 1)

View File

@ -6,7 +6,7 @@ module GEval.Validation
import GEval.Metric
import GEval.EvaluationScheme
import GEval.Core (GEvalSpecification(..), GEvalException(..), somethingWrongWithFilesMessage, isEmptyFile, geval, defaultInputFile, defaultExpectedFile, defaultOutFile)
import GEval.Core (GEvalSpecification(..), isEmptyFile, geval, defaultInputFile, defaultExpectedFile, defaultOutFile)
import GEval.Common
import qualified System.Directory as D