Hande itemStep via dependent types
This commit is contained in:
parent
32969bb56a
commit
d67061c230
@ -14,6 +14,7 @@ import Data.Maybe (catMaybes)
|
||||
import Debug.Trace
|
||||
|
||||
import GEval.Common
|
||||
import GEval.PrecisionRecall (maxMatch)
|
||||
|
||||
newtype PageNumber = PageNumber Int
|
||||
deriving (Eq, Show)
|
||||
@ -160,3 +161,7 @@ getLeftovers (Rectangle (Point x0 y0) (Point x1 y1))
|
||||
Rectangle (Point x0' y0') (Point (x0 - 1) y1'),
|
||||
Rectangle (Point (x1 + 1) y0') (Point x1' y1')]
|
||||
where validRectangle (Rectangle (Point x0 y0) (Point x1 y1)) = x0 <= x1 && y0 <= y1
|
||||
|
||||
clippEUMatchStep (clippingSpecs, clippings) = (maxMatch matchClippingToSpec clippingSpecs clippings,
|
||||
Prelude.length clippingSpecs,
|
||||
Prelude.length clippings)
|
||||
|
@ -4,6 +4,9 @@ module GEval.Common
|
||||
import qualified Data.Text as T
|
||||
import Data.Text.Read as TR
|
||||
|
||||
import Data.Word
|
||||
import Control.Exception
|
||||
|
||||
import Data.Attoparsec.Text
|
||||
|
||||
type MetricValue = Double
|
||||
@ -79,3 +82,62 @@ class AEq a where
|
||||
|
||||
instance AEq Double where
|
||||
x =~ y = abs ( x - y ) < (1.0e-4 :: Double)
|
||||
|
||||
itemAbsoluteError :: (Double, Double) -> Double
|
||||
itemAbsoluteError (exp, out) = abs (exp-out)
|
||||
|
||||
itemSquaredError :: (Double, Double) -> Double
|
||||
itemSquaredError (exp, out) = (exp-out)**2
|
||||
|
||||
itemLogLossError :: (Double, Double) -> Double
|
||||
itemLogLossError (exp, out)
|
||||
| exp' > 0.5 = - (log out')
|
||||
| otherwise = - (log (1 - out'))
|
||||
where exp' = normalizeAsProb exp
|
||||
out' = normalizeAsProb out
|
||||
normalizeAsProb v
|
||||
| v >= 1.0 = 1.0
|
||||
| v <= 0.0 = 0.0
|
||||
| otherwise = v
|
||||
|
||||
data GEvalException = NoExpectedFile FilePath
|
||||
| NoOutFile FilePath
|
||||
| NoExpectedDirectory FilePath
|
||||
| NoOutDirectory FilePath
|
||||
| NoExpectedTestDirectory FilePath
|
||||
| NoOutTestDirectory FilePath
|
||||
| NoInputFile FilePath
|
||||
| FileAlreadyThere FilePath
|
||||
| TooFewLines
|
||||
| TooManyLines
|
||||
| TooFewLinesInInput
|
||||
| TooManyLinesInInput
|
||||
| EmptyOutput
|
||||
| UnexpectedData Word32 String
|
||||
| UnexpectedMultipleOutputs
|
||||
| OtherException String
|
||||
deriving (Eq)
|
||||
|
||||
instance Exception GEvalException
|
||||
|
||||
instance Show GEvalException where
|
||||
show (NoExpectedFile filePath) = somethingWrongWithFilesMessage "No file with the expected results" filePath
|
||||
show (NoOutFile filePath) = somethingWrongWithFilesMessage "No file with the test results" filePath
|
||||
show (NoExpectedDirectory filePath) = somethingWrongWithFilesMessage "No directory with the expected results" filePath
|
||||
show (NoOutDirectory filePath) = somethingWrongWithFilesMessage "No directory with the test results" filePath
|
||||
show (NoExpectedTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the expected results" filePath
|
||||
show (NoOutTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the results obtained" filePath
|
||||
show (NoInputFile filePath) = somethingWrongWithFilesMessage "No file with the input" filePath
|
||||
show (FileAlreadyThere filePath) = somethingWrongWithFilesMessage "File already there" filePath
|
||||
show TooFewLines = "Too few lines in the output file"
|
||||
show TooManyLines = "Too many lines in the output file"
|
||||
show TooFewLinesInInput = "Too few lines in the input file"
|
||||
show TooManyLinesInInput = "Too many lines in the input file"
|
||||
show EmptyOutput = "The output file is empty"
|
||||
show (UnexpectedData lineNo message) = "Line " ++ (show lineNo) ++ ": Unexpected data [" ++ message ++ "]"
|
||||
show UnexpectedMultipleOutputs = "Multiple outputs are not possible in this mode, use -o option to select an output file"
|
||||
show (OtherException message) = message
|
||||
|
||||
somethingWrongWithFilesMessage :: String -> FilePath -> String
|
||||
somethingWrongWithFilesMessage msg filePath = Prelude.concat
|
||||
[ msg, ": `", filePath, "`" ]
|
||||
|
@ -19,7 +19,6 @@ module GEval.Core
|
||||
GEvalSpecification(..),
|
||||
ResultOrdering(..),
|
||||
GEvalOptions(..),
|
||||
GEvalException(..),
|
||||
defaultGEvalSpecification,
|
||||
defaultOutDirectory,
|
||||
defaultTestName,
|
||||
@ -44,8 +43,7 @@ module GEval.Core
|
||||
getDataDecoder,
|
||||
threeLineSource,
|
||||
extensionsHandled,
|
||||
isEmptyFile,
|
||||
somethingWrongWithFilesMessage
|
||||
isEmptyFile
|
||||
) where
|
||||
|
||||
import Debug.Trace
|
||||
@ -66,7 +64,6 @@ import Data.Text
|
||||
import Data.Text.Read as TR
|
||||
import Control.Applicative
|
||||
import Control.Exception
|
||||
import Text.Read (readMaybe)
|
||||
import Control.Conditional (unlessM, whenM)
|
||||
import qualified System.Directory as D
|
||||
import System.Posix
|
||||
@ -204,47 +201,6 @@ data GEvalOptions = GEvalOptions
|
||||
geoBlackBoxDebugginsOptions :: BlackBoxDebuggingOptions,
|
||||
geoGraphFile :: Maybe FilePath }
|
||||
|
||||
data GEvalException = NoExpectedFile FilePath
|
||||
| NoOutFile FilePath
|
||||
| NoExpectedDirectory FilePath
|
||||
| NoOutDirectory FilePath
|
||||
| NoExpectedTestDirectory FilePath
|
||||
| NoOutTestDirectory FilePath
|
||||
| NoInputFile FilePath
|
||||
| FileAlreadyThere FilePath
|
||||
| TooFewLines
|
||||
| TooManyLines
|
||||
| TooFewLinesInInput
|
||||
| TooManyLinesInInput
|
||||
| EmptyOutput
|
||||
| UnexpectedData Word32 String
|
||||
| UnexpectedMultipleOutputs
|
||||
| OtherException String
|
||||
deriving (Eq)
|
||||
|
||||
instance Exception GEvalException
|
||||
|
||||
instance Show GEvalException where
|
||||
show (NoExpectedFile filePath) = somethingWrongWithFilesMessage "No file with the expected results" filePath
|
||||
show (NoOutFile filePath) = somethingWrongWithFilesMessage "No file with the test results" filePath
|
||||
show (NoExpectedDirectory filePath) = somethingWrongWithFilesMessage "No directory with the expected results" filePath
|
||||
show (NoOutDirectory filePath) = somethingWrongWithFilesMessage "No directory with the test results" filePath
|
||||
show (NoExpectedTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the expected results" filePath
|
||||
show (NoOutTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the results obtained" filePath
|
||||
show (NoInputFile filePath) = somethingWrongWithFilesMessage "No file with the input" filePath
|
||||
show (FileAlreadyThere filePath) = somethingWrongWithFilesMessage "File already there" filePath
|
||||
show TooFewLines = "Too few lines in the output file"
|
||||
show TooManyLines = "Too many lines in the output file"
|
||||
show TooFewLinesInInput = "Too few lines in the input file"
|
||||
show TooManyLinesInInput = "Too many lines in the input file"
|
||||
show EmptyOutput = "The output file is empty"
|
||||
show (UnexpectedData lineNo message) = "Line " ++ (show lineNo) ++ ": Unexpected data [" ++ message ++ "]"
|
||||
show UnexpectedMultipleOutputs = "Multiple outputs are not possible in this mode, use -o option to select an output file"
|
||||
show (OtherException message) = message
|
||||
|
||||
somethingWrongWithFilesMessage :: String -> FilePath -> String
|
||||
somethingWrongWithFilesMessage msg filePath = Prelude.concat
|
||||
[ msg, ": `", filePath, "`" ]
|
||||
|
||||
defaultGEvalSpecification = GEvalSpecification {
|
||||
gesOutDirectory = defaultOutDirectory,
|
||||
@ -513,12 +469,10 @@ gevalCoreOnSources Spearman _ = gevalCoreByCorrelationMeasure spearman
|
||||
|
||||
gevalCoreOnSources (ProbabilisticMultiLabelFMeasure beta) _ = generalizedProbabilisticFMeasure beta
|
||||
SAProbabilisticMultiLabelFMeasure
|
||||
(Right . (\(ProbList es) -> es) . parseIntoProbList)
|
||||
where intoWords = Right . Data.Text.words
|
||||
|
||||
gevalCoreOnSources (ProbabilisticSoftFMeasure beta) _ = generalizedProbabilisticFMeasure beta
|
||||
SAProbabilisticSoftFMeasure
|
||||
parseObtainedAnnotations
|
||||
|
||||
-- and now more typical metrics, which:
|
||||
-- 1) parse the expected output
|
||||
@ -527,30 +481,27 @@ gevalCoreOnSources (ProbabilisticSoftFMeasure beta) _ = generalizedProbabilistic
|
||||
-- 4) aggregate the results
|
||||
-- 5) apply some final funtion on the aggregate
|
||||
-- 6) create a graph using the aggregate (applicable only to some metrics)
|
||||
gevalCoreOnSources Likelihood _ = gevalCoreWithoutInput SALikelihood itemLogLossError averageC logLossToLikehood noGraph
|
||||
gevalCoreOnSources Likelihood _ = gevalCoreWithoutInput SALikelihood averageC logLossToLikehood noGraph
|
||||
|
||||
gevalCoreOnSources MultiLabelLikelihood _ = gevalCoreWithoutInput SAMultiLabelLikelihood
|
||||
(uncurry countLogLossOnProbList)
|
||||
averageC
|
||||
logLossToLikehood
|
||||
noGraph
|
||||
where
|
||||
intoWords = Right . Data.Text.words
|
||||
|
||||
gevalCoreOnSources MSE _ = gevalCoreWithoutInput SAMSE itemSquaredError averageC id noGraph
|
||||
gevalCoreOnSources MSE _ = gevalCoreWithoutInput SAMSE averageC id noGraph
|
||||
|
||||
gevalCoreOnSources RMSE _ = gevalCoreWithoutInput SARMSE itemSquaredError averageC (** 0.5) noGraph
|
||||
gevalCoreOnSources RMSE _ = gevalCoreWithoutInput SARMSE averageC (** 0.5) noGraph
|
||||
|
||||
gevalCoreOnSources MAE _ = gevalCoreWithoutInput SAMAE itemAbsoluteError averageC id noGraph
|
||||
gevalCoreOnSources MAE _ = gevalCoreWithoutInput SAMAE averageC id noGraph
|
||||
|
||||
gevalCoreOnSources SMAPE _ = gevalCoreWithoutInput SASMAPE smape averageC (* 100.0) noGraph
|
||||
where smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out))
|
||||
gevalCoreOnSources SMAPE _ = gevalCoreWithoutInput SASMAPE averageC (* 100.0) noGraph
|
||||
|
||||
gevalCoreOnSources LogLoss _ = gevalCoreWithoutInput SALogLoss itemLogLossError averageC id noGraph
|
||||
gevalCoreOnSources LogLoss _ = gevalCoreWithoutInput SALogLoss averageC id noGraph
|
||||
|
||||
gevalCoreOnSources BLEU _ = gevalCoreWithoutInput SABLEU bleuCombine bleuAgg bleuFinal noGraph
|
||||
gevalCoreOnSources BLEU _ = gevalCoreWithoutInput SABLEU bleuAgg bleuFinal noGraph
|
||||
where bleuFinal (p1, p2, p3, p4, rl, l1, l2, l3, l4) = ((p1 /. l1) * (p2 /. l2) * (p3 /. l3) * (p4 /. l4)) ** 0.25 * (brevityPenalty l1 rl)
|
||||
bleuCombine (refs, sen) = bleuStep refs sen
|
||||
bleuAgg = CC.foldl bleuFuse (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
bleuFuse (a1, a2, a3, a4, a5, a6, a7, a8, a9) (b1, b2, b3, b4, b5, b6, b7, b8, b9) = (a1+b1, a2+b2, a3+b3, a4+b4, a5+b5, a6+b6, a7+b7, a8+b8, a9+b9)
|
||||
brevityPenalty c r
|
||||
@ -558,63 +509,19 @@ gevalCoreOnSources BLEU _ = gevalCoreWithoutInput SABLEU bleuCombine bleuAgg ble
|
||||
| c == 0 && r > 0 = 0.0
|
||||
| otherwise = exp (1.0 - (r /. c))
|
||||
|
||||
gevalCoreOnSources GLEU _ = gevalCoreWithoutInput SAGLEU gleuCombine gleuAgg gleuFinal noGraph
|
||||
gevalCoreOnSources GLEU _ = gevalCoreWithoutInput SAGLEU gleuAgg gleuFinal noGraph
|
||||
where gleuFinal (m, t) = m /. t
|
||||
gleuCombine (refs, sen) = gleuStep refs sen
|
||||
gleuAgg = CC.foldl gleuFuse (0, 0)
|
||||
gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2)
|
||||
|
||||
gevalCoreOnSources WER _ = gevalCoreWithoutInput SAWER (uncurry werStep) averageC id noGraph
|
||||
gevalCoreOnSources WER _ = gevalCoreWithoutInput SAWER averageC id noGraph
|
||||
|
||||
gevalCoreOnSources Accuracy _ = gevalCoreWithoutInput SAAccuracy hitOrMiss averageC id noGraph
|
||||
where hitOrMiss (exp, got) =
|
||||
-- first try to parse what we got as a probability distribution
|
||||
-- (like the one used for Likelikehood/LogLossHashed metric)
|
||||
case parseWordSpecs got of
|
||||
Right wordSpecs -> if Prelude.null pairs
|
||||
then 0.0
|
||||
else indicator (exp == (snd $ Prelude.maximum pairs))
|
||||
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
|
||||
Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp)
|
||||
-- if the expected value is 0 or 1 treat values
|
||||
-- between 0.0 and 1.0 as probabilities
|
||||
-- for the positive outcome
|
||||
normalizeProbForAccuracy :: Text -> Text -> Text
|
||||
normalizeProbForAccuracy exp got
|
||||
| exp == (pack "1") = case tryReadingAsFloat got of
|
||||
Just p -> if p >= 0.5 && p <= 1.0 then exp else got
|
||||
Nothing -> got
|
||||
| exp == (pack "0") = case tryReadingAsFloat got of
|
||||
Just p -> if p < 0.5 && p >= 0.0 then exp else got
|
||||
Nothing -> got
|
||||
| otherwise = got
|
||||
tryReadingAsFloat :: Text -> Maybe Float
|
||||
tryReadingAsFloat = readMaybe . unpack
|
||||
gevalCoreOnSources Accuracy _ = gevalCoreWithoutInput SAAccuracy averageC id noGraph
|
||||
|
||||
gevalCoreOnSources (FMeasure beta) _ = gevalCoreWithoutInput SAFMeasure getCount countAgg (fMeasureOnCounts beta) noGraph
|
||||
where -- output value could be a probability (for compatibility with other measures)
|
||||
getCount :: (Bool, Bool) -> (Int, Int, Int)
|
||||
getCount (True, True) = (1, 1, 1)
|
||||
getCount (True, False) = (0, 1, 0)
|
||||
getCount (False, True) = (0, 0, 1)
|
||||
getCount (False, False) = (0, 0, 0)
|
||||
gevalCoreOnSources (FMeasure beta) _ = gevalCoreWithoutInput SAFMeasure countAgg (fMeasureOnCounts beta) noGraph
|
||||
|
||||
gevalCoreOnSources (MacroFMeasure beta) _ = gevalCoreWithoutInput SAMacroFMeasure getClassesInvolved gatherClassC macroAverageOnCounts noGraph
|
||||
where predicted got =
|
||||
-- first try to parse what we got as a probability distribution
|
||||
-- (like the one used for Likelikehood/LogLossHashed metric)
|
||||
case parseWordSpecs got of
|
||||
Right wordSpecs -> if Prelude.null pairs
|
||||
then Nothing
|
||||
else Just $ snd $ Prelude.maximum pairs
|
||||
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
|
||||
Left _ -> Just got
|
||||
getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing)
|
||||
getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness
|
||||
getClassesInvolved (Just a, Just b) = if a == b
|
||||
then (Just a, Just a, Just a)
|
||||
else (Nothing, Just a, Just b)
|
||||
gatherClassC = CC.foldl gatherClassCombiner (M.empty, M.empty, M.empty)
|
||||
gevalCoreOnSources (MacroFMeasure beta) _ = gevalCoreWithoutInput SAMacroFMeasure gatherClassC macroAverageOnCounts noGraph
|
||||
where gatherClassC = CC.foldl gatherClassCombiner (M.empty, M.empty, M.empty)
|
||||
gatherClassCombiner (tpMap, expectedMap, gotMap) (tp, expected, got) =
|
||||
(insertMaybeToMap tp tpMap,
|
||||
insertMaybeToMap expected expectedMap,
|
||||
@ -629,66 +536,39 @@ gevalCoreOnSources (MacroFMeasure beta) _ = gevalCoreWithoutInput SAMacroFMeasur
|
||||
$ M.keys expectedMap) / (fromIntegral $ Prelude.length $ M.keys expectedMap)
|
||||
|
||||
gevalCoreOnSources (SoftFMeasure beta) _ = gevalCoreWithoutInput SASoftFMeasure
|
||||
getSoftCounts
|
||||
countAgg
|
||||
(fMeasureOnCounts beta)
|
||||
noGraph
|
||||
where getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got,
|
||||
Prelude.length expected,
|
||||
Prelude.length got)
|
||||
|
||||
gevalCoreOnSources (Soft2DFMeasure beta) _ = gevalCoreWithoutInput SASoft2DFMeasure
|
||||
count2DFScore
|
||||
averageC
|
||||
(CC.map (fMeasureOnCounts beta) .| averageC)
|
||||
id
|
||||
noGraph
|
||||
where
|
||||
count2DFScore (expected, got) = fMeasureOnCounts beta (tpArea, expArea, gotArea)
|
||||
where tpArea = coveredBy expected got
|
||||
expArea = totalArea expected
|
||||
gotArea = totalArea got
|
||||
|
||||
gevalCoreOnSources ClippEU _ = gevalCoreWithoutInput SAClippEU matchStep clippeuAgg finalStep noGraph
|
||||
gevalCoreOnSources ClippEU _ = gevalCoreWithoutInput SAClippEU clippeuAgg finalStep noGraph
|
||||
where
|
||||
matchStep (clippingSpecs, clippings) = (maxMatch matchClippingToSpec clippingSpecs clippings,
|
||||
Prelude.length clippingSpecs,
|
||||
Prelude.length clippings)
|
||||
clippeuAgg = CC.foldl countFolder (0, 0, 0)
|
||||
finalStep counts = f2MeasureOnCounts counts
|
||||
|
||||
gevalCoreOnSources NMI _ = gevalCoreWithoutInput SANMI id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix noGraph
|
||||
gevalCoreOnSources NMI _ = gevalCoreWithoutInput SANMI (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix noGraph
|
||||
|
||||
gevalCoreOnSources MAP _ = gevalCoreWithoutInput SAMAP
|
||||
(\(e,g) -> calculateMAPForOneResult e g)
|
||||
averageC
|
||||
id
|
||||
noGraph
|
||||
|
||||
gevalCoreOnSources BIOF1 _ = gevalCoreWithoutInput SABIOF1 (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph
|
||||
gevalCoreOnSources BIOF1 _ = gevalCoreWithoutInput SABIOF1 countAgg f1MeasureOnCounts noGraph
|
||||
|
||||
gevalCoreOnSources BIOF1Labels _ = gevalCoreWithoutInput SABIOF1Labels (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph
|
||||
gevalCoreOnSources BIOF1Labels _ = gevalCoreWithoutInput SABIOF1Labels countAgg f1MeasureOnCounts noGraph
|
||||
|
||||
gevalCoreOnSources TokenAccuracy _ = gevalCoreWithoutInput SATokenAccuracy
|
||||
countHitsAndTotals
|
||||
hitsAndTotalsAgg
|
||||
(\(hits, total) -> hits /. total)
|
||||
noGraph
|
||||
where countHitsAndTotals :: ([Text], [Text]) -> (Int, Int)
|
||||
countHitsAndTotals (es, os) =
|
||||
if Prelude.length os /= Prelude.length es
|
||||
then throw $ OtherException "wrong number of tokens"
|
||||
else Prelude.foldl matchFun
|
||||
(0, 0)
|
||||
(Prelude.zip es os)
|
||||
matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int)
|
||||
matchFun (h, t) (e, o)
|
||||
| e == (pack "*") = (h, t)
|
||||
| o `Prelude.elem` (splitOn (pack ";") e) = (h + 1, t + 1)
|
||||
| otherwise = (h, t + 1)
|
||||
where
|
||||
hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0)
|
||||
|
||||
gevalCoreOnSources MultiLabelLogLoss _ = gevalCoreWithoutInput SAMultiLabelLogLoss
|
||||
(uncurry countLogLossOnProbList)
|
||||
averageC
|
||||
id
|
||||
noGraph
|
||||
@ -703,11 +583,10 @@ helperLogLossHashed nbOfBits finalStep expectedLineSource outLineSource =
|
||||
Right _ -> Right t
|
||||
Left m -> Left m
|
||||
|
||||
generalizedProbabilisticFMeasure beta metric parseEntities = gevalCoreWithoutInput metric
|
||||
getProbabilisticCounts
|
||||
probabilisticSoftAgg
|
||||
(fMeasureOnProbabilisticCounts beta)
|
||||
loessGraph
|
||||
generalizedProbabilisticFMeasure beta metric = gevalCoreWithoutInput metric
|
||||
probabilisticSoftAgg
|
||||
(fMeasureOnProbabilisticCounts beta)
|
||||
loessGraph
|
||||
where probabilisticSoftAgg :: Monad m => ConduitM ([Double], [Double], Double, Int) o m ([Double], [Double], Double, Int)
|
||||
probabilisticSoftAgg = CC.foldl probabilisticSoftFolder ([], [], fromInteger 0, 0)
|
||||
probabilisticSoftFolder (r1, p1, g1, e1) (r2, p2, g2, e2) = (r1 ++ r2, p1 ++ p2, g1 + g2, e1 + e2)
|
||||
@ -732,7 +611,7 @@ gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
|
||||
LineSource (ResourceT m) -> -- ^ source to read the output
|
||||
m (MetricOutput) -- ^ metric values for the output against the expected output
|
||||
gevalCoreByCorrelationMeasure correlationFunction =
|
||||
gevalCoreWithoutInput SAPearson id correlationC finalStep noGraph
|
||||
gevalCoreWithoutInput SAPearson correlationC finalStep noGraph
|
||||
where correlationC = CC.foldl (flip (:)) []
|
||||
finalStep pairs = correlationFunction $ V.fromList pairs
|
||||
|
||||
@ -749,20 +628,18 @@ skipLineNumber fun = fun . snd
|
||||
-- | A helper function to run evaluation when the input is not needed to calculate the metric value.
|
||||
gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
|
||||
=> SAMetric t
|
||||
-> ((ParsedExpectedType t, ParsedOutputType t) -> c) -- ^ function which combines parsed values into a single value
|
||||
-- (will be launched for each item, e.g. an error/cost function
|
||||
-- could be calculated here)
|
||||
-> (ConduitT c Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into
|
||||
-> (ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) d) -- ^ a Conduit which aggregates all the combined values into
|
||||
-- a "total" value
|
||||
-> (d -> Double) -- ^ function to transform the "total" value into the final score
|
||||
-> (d -> Maybe GraphSeries)
|
||||
-> LineSource (ResourceT m) -- ^ source to read the expected output
|
||||
-> LineSource (ResourceT m) -- ^ source to read the output
|
||||
-> m (MetricOutput) -- ^ metric values for the output against the expected output
|
||||
gevalCoreWithoutInput smetric itemStep aggregator finalStep generateGraph expectedLineStream outLineStream =
|
||||
gevalCoreWithoutInputOnItemTargets (liftOp expParser) (liftOp outParser) itemStep aggregator finalStep generateGraph expectedLineStream outLineStream
|
||||
gevalCoreWithoutInput smetric aggregator finalStep generateGraph expectedLineStream outLineStream =
|
||||
gevalCoreWithoutInputOnItemTargets (liftOp expParser) (liftOp outParser) iStep aggregator finalStep generateGraph expectedLineStream outLineStream
|
||||
where expParser = expectedParser smetric
|
||||
outParser = outputParser smetric
|
||||
iStep = itemStep smetric
|
||||
|
||||
gevalCoreWithoutInputOnItemTargets :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
|
||||
=> (ItemTarget -> Either String a) -- ^ parser for values in the expected output
|
||||
@ -922,21 +799,3 @@ items (LineSource lineSource itemDecoder preprocess _ _) parser =
|
||||
linesAsItems :: MonadResource m => LineSource m -> ConduitT () (SourceItem Text) m ()
|
||||
linesAsItems (LineSource lineSource _ _ _ _) =
|
||||
(lineSource .| CL.map Got) >> yield Done
|
||||
|
||||
itemAbsoluteError :: (Double, Double) -> Double
|
||||
itemAbsoluteError (exp, out) = abs (exp-out)
|
||||
|
||||
itemSquaredError :: (Double, Double) -> Double
|
||||
itemSquaredError (exp, out) = (exp-out)**2
|
||||
|
||||
|
||||
itemLogLossError :: (Double, Double) -> Double
|
||||
itemLogLossError (exp, out)
|
||||
| exp' > 0.5 = - (log out')
|
||||
| otherwise = - (log (1 - out'))
|
||||
where exp' = normalizeAsProb exp
|
||||
out' = normalizeAsProb out
|
||||
normalizeAsProb v
|
||||
| v >= 1.0 = 1.0
|
||||
| v <= 0.0 = 0.0
|
||||
| otherwise = v
|
||||
|
@ -7,7 +7,8 @@ module GEval.CreateChallenge
|
||||
|
||||
import GEval.Metric
|
||||
import GEval.EvaluationScheme
|
||||
import GEval.Core (GEvalSpecification(..), GEvalException(..), configFileName, gesMainMetric, defaultTestName)
|
||||
import GEval.Common (GEvalException(..))
|
||||
import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName)
|
||||
import GEval.Submit (tokenFileName)
|
||||
import qualified System.Directory as D
|
||||
import Control.Conditional (whenM)
|
||||
|
@ -11,7 +11,19 @@ module GEval.MetricsMechanics
|
||||
|
||||
import Data.Singletons.TH
|
||||
|
||||
import Text.Read (readMaybe)
|
||||
|
||||
import GEval.Metric
|
||||
import GEval.Common
|
||||
import GEval.BLEU (bleuStep, gleuStep)
|
||||
import GEval.WER (werStep)
|
||||
import GEval.Clippings (totalArea, coveredBy, clippEUMatchStep)
|
||||
import GEval.BIO (gatherCountsForBIO)
|
||||
|
||||
import GEval.Probability
|
||||
import GEval.PrecisionRecall (weightedMaxMatch, fMeasureOnCounts, calculateMAPForOneResult, getProbabilisticCounts, getCounts)
|
||||
|
||||
import Control.Exception
|
||||
|
||||
import Data.Text
|
||||
import Data.Text.Read as TR
|
||||
@ -25,7 +37,7 @@ import GEval.Annotation (Annotation, ObtainedAnnotation, parseAnnotations, parse
|
||||
import GEval.Clippings (Clipping, ClippingSpec, LabeledClipping, lineClippingsParser, lineClippingSpecsParser, lineLabeledClippingsParser)
|
||||
import GEval.BIO (TaggedEntity, parseBioSequenceIntoEntities, parseBioSequenceIntoEntitiesWithoutNormalization)
|
||||
import GEval.LogLossHashed (parseWordSpecs, wordSpecToPair)
|
||||
import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..))
|
||||
import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..), countLogLossOnProbList)
|
||||
|
||||
-- | Helper type so that singleton can be used.
|
||||
-- | (The problem is that some metrics are parametrized by Double
|
||||
@ -184,6 +196,60 @@ outputParser SAMultiLabelFMeasure = intoWords
|
||||
outputParser SAMultiLabelLogLoss = Right . parseIntoProbList
|
||||
outputParser SAMultiLabelLikelihood = Right . parseIntoProbList
|
||||
|
||||
type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
|
||||
ItemIntermediateRepresentationType ABLEU = (Int, Int, Int, Int, Int, Int, Int, Int, Int)
|
||||
ItemIntermediateRepresentationType AGLEU = (Int, Int)
|
||||
ItemIntermediateRepresentationType AFMeasure = (Int, Int, Int)
|
||||
ItemIntermediateRepresentationType AMacroFMeasure = (Maybe Text, Maybe Text, Maybe Text)
|
||||
ItemIntermediateRepresentationType ASoftFMeasure = (Double, Int, Int)
|
||||
ItemIntermediateRepresentationType ASoft2DFMeasure = (Integer, Integer, Integer)
|
||||
ItemIntermediateRepresentationType AClippEU = (Int, Int, Int)
|
||||
ItemIntermediateRepresentationType ANMI = (Text, Text)
|
||||
ItemIntermediateRepresentationType ABIOF1 = (Int, Int, Int)
|
||||
ItemIntermediateRepresentationType ABIOF1Labels = (Int, Int, Int)
|
||||
ItemIntermediateRepresentationType ATokenAccuracy = (Int, Int)
|
||||
ItemIntermediateRepresentationType AProbabilisticMultiLabelFMeasure = ([Double], [Double], Double, Int)
|
||||
ItemIntermediateRepresentationType AProbabilisticSoftFMeasure = ([Double], [Double], Double, Int)
|
||||
ItemIntermediateRepresentationType APearson = (Double, Double)
|
||||
ItemIntermediateRepresentationType ASpearman = (Double, Double)
|
||||
ItemIntermediateRepresentationType AMultiLabelFMeasure = (Int, Int, Int)
|
||||
ItemIntermediateRepresentationType ALogLossHashed = (Text, Text)
|
||||
ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text)
|
||||
ItemIntermediateRepresentationType ACharMatch = (Text, Text)
|
||||
ItemIntermediateRepresentationType t = Double
|
||||
|
||||
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
|
||||
itemStep SARMSE = itemSquaredError
|
||||
itemStep SAMSE = itemSquaredError
|
||||
itemStep SAPearson = id
|
||||
itemStep SASpearman = id
|
||||
itemStep SABLEU = uncurry bleuStep
|
||||
itemStep SAGLEU = uncurry gleuStep
|
||||
itemStep SAWER = uncurry werStep
|
||||
itemStep SAAccuracy = hitOrMiss
|
||||
itemStep SAClippEU = clippEUMatchStep
|
||||
itemStep SAFMeasure = getCount
|
||||
itemStep SAMacroFMeasure = getClassesInvolved
|
||||
itemStep SASoftFMeasure = getSoftCounts
|
||||
itemStep SAProbabilisticMultiLabelFMeasure = getProbabilisticCounts
|
||||
itemStep SAProbabilisticSoftFMeasure = getProbabilisticCounts
|
||||
itemStep SASoft2DFMeasure = getSoft2DCounts
|
||||
itemStep SANMI = id
|
||||
itemStep SALogLossHashed = id
|
||||
itemStep SALikelihoodHashed = id
|
||||
itemStep SACharMatch = id
|
||||
itemStep SAMAP = uncurry calculateMAPForOneResult
|
||||
itemStep SALogLoss = itemLogLossError
|
||||
itemStep SALikelihood = itemLogLossError
|
||||
itemStep SABIOF1 = uncurry gatherCountsForBIO
|
||||
itemStep SABIOF1Labels = uncurry gatherCountsForBIO
|
||||
itemStep SATokenAccuracy = countHitsAndTotals
|
||||
itemStep SAMAE = itemAbsoluteError
|
||||
itemStep SASMAPE = smape
|
||||
itemStep SAMultiLabelFMeasure = getCounts (==)
|
||||
itemStep SAMultiLabelLogLoss = uncurry countLogLossOnProbList
|
||||
itemStep SAMultiLabelLikelihood = uncurry countLogLossOnProbList
|
||||
|
||||
|
||||
doubleParser = getValue . TR.double
|
||||
|
||||
@ -233,3 +299,63 @@ controlledParse parser t =
|
||||
case parseOnly parser t of
|
||||
(Right v) -> Right v
|
||||
(Left _) -> Left "cannot parse line"
|
||||
|
||||
smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out))
|
||||
|
||||
hitOrMiss (exp, got) =
|
||||
-- first try to parse what we got as a probability distribution
|
||||
-- (like the one used for Likelikehood/LogLossHashed metric)
|
||||
case parseWordSpecs got of
|
||||
Right wordSpecs -> if Prelude.null pairs
|
||||
then 0.0
|
||||
else indicator (exp == (snd $ Prelude.maximum pairs))
|
||||
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
|
||||
Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp)
|
||||
-- if the expected value is 0 or 1 treat values
|
||||
-- between 0.0 and 1.0 as probabilities
|
||||
-- for the positive outcome
|
||||
where normalizeProbForAccuracy :: Text -> Text -> Text
|
||||
normalizeProbForAccuracy exp got
|
||||
| exp == (pack "1") = case tryReadingAsFloat got of
|
||||
Just p -> if p >= 0.5 && p <= 1.0 then exp else got
|
||||
Nothing -> got
|
||||
| exp == (pack "0") = case tryReadingAsFloat got of
|
||||
Just p -> if p < 0.5 && p >= 0.0 then exp else got
|
||||
Nothing -> got
|
||||
| otherwise = got
|
||||
tryReadingAsFloat :: Text -> Maybe Float
|
||||
tryReadingAsFloat = readMaybe . unpack
|
||||
|
||||
getCount :: (Bool, Bool) -> (Int, Int, Int)
|
||||
getCount (True, True) = (1, 1, 1)
|
||||
getCount (True, False) = (0, 1, 0)
|
||||
getCount (False, True) = (0, 0, 1)
|
||||
getCount (False, False) = (0, 0, 0)
|
||||
|
||||
getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing)
|
||||
getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness
|
||||
getClassesInvolved (Just a, Just b) = if a == b
|
||||
then (Just a, Just a, Just a)
|
||||
else (Nothing, Just a, Just b)
|
||||
|
||||
getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got,
|
||||
Prelude.length expected,
|
||||
Prelude.length got)
|
||||
|
||||
getSoft2DCounts (expected, got) = (tpArea, expArea, gotArea)
|
||||
where tpArea = coveredBy expected got
|
||||
expArea = totalArea expected
|
||||
gotArea = totalArea got
|
||||
|
||||
countHitsAndTotals :: ([Text], [Text]) -> (Int, Int)
|
||||
countHitsAndTotals (es, os) =
|
||||
if Prelude.length os /= Prelude.length es
|
||||
then throw $ OtherException "wrong number of tokens"
|
||||
else Prelude.foldl matchFun
|
||||
(0, 0)
|
||||
(Prelude.zip es os)
|
||||
where matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int)
|
||||
matchFun (h, t) (e, o)
|
||||
| e == (pack "*") = (h, t)
|
||||
| o `Prelude.elem` (splitOn (pack ";") e) = (h + 1, t + 1)
|
||||
| otherwise = (h, t + 1)
|
||||
|
@ -6,7 +6,7 @@ module GEval.Validation
|
||||
|
||||
import GEval.Metric
|
||||
import GEval.EvaluationScheme
|
||||
import GEval.Core (GEvalSpecification(..), GEvalException(..), somethingWrongWithFilesMessage, isEmptyFile, geval, defaultInputFile, defaultExpectedFile, defaultOutFile)
|
||||
import GEval.Core (GEvalSpecification(..), isEmptyFile, geval, defaultInputFile, defaultExpectedFile, defaultOutFile)
|
||||
import GEval.Common
|
||||
import qualified System.Directory as D
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user