Add Soft2D-F metric

This commit is contained in:
Filip Gralinski 2019-08-22 09:48:49 +02:00 committed by Filip Graliński
parent dab2646798
commit 6b63740c4a
13 changed files with 265 additions and 94 deletions

View File

@ -22,7 +22,7 @@ library
, GEval.CreateChallenge , GEval.CreateChallenge
, GEval.OptionsParser , GEval.OptionsParser
, GEval.BLEU , GEval.BLEU
, GEval.ClippEU , GEval.Clippings
, GEval.PrecisionRecall , GEval.PrecisionRecall
, GEval.ClusteringMetrics , GEval.ClusteringMetrics
, GEval.Common , GEval.Common

View File

@ -1,83 +0,0 @@
module GEval.ClippEU
where
import Data.Attoparsec.Text
import Data.Text
import Control.Applicative
import Control.Exception
import GEval.Common
newtype PageNumber = PageNumber Int
deriving (Eq, Show)
data Point = Point Int Int
deriving (Show, Eq)
data Rectangle = Rectangle Point Point
deriving (Show, Eq)
data Clipping = Clipping PageNumber Rectangle
deriving (Show, Eq)
data ClippingSpec = ClippingSpec PageNumber Rectangle Rectangle
deriving (Show, Eq)
matchClippingToSpec :: ClippingSpec -> Clipping -> Bool
matchClippingToSpec (ClippingSpec pageNo (Rectangle (Point x0' y0') (Point x1' y1'))
(Rectangle (Point x0'' y0'') (Point x1'' y1'')))
(Clipping pageNo' (Rectangle (Point x0 y0) (Point x1 y1))) =
pageNo == pageNo' &&
isInside x0 x0' x0'' && isInside y0 y0' y0'' && isInside x1 x1' x1'' && isInside y1 y1' y1''
where isInside c c' c'' = c >= c' && c <= c'' || c <= c' && c >= c''
lineClippingsParser :: Parser [Clipping]
lineClippingsParser = sepByWhitespaces clippingParser
clippingParser :: Parser Clipping
clippingParser = do
pageNo <- PageNumber <$> decimal
char '/'
rectangle <- rectangleParser
return $ Clipping pageNo rectangle
lineClippingSpecsParser :: Parser [ClippingSpec]
lineClippingSpecsParser = sepByWhitespaces clippingSpecParser
clippingSpecParser :: Parser ClippingSpec
clippingSpecParser = do
pageNo <- PageNumber <$> decimal
char '/'
rectangle <- rectangleParser
char '/'
margin <- decimal
return $ ClippingSpec pageNo (smallerRectangle margin rectangle) (extendedRectangle margin rectangle)
extendedRectangle :: Int -> Rectangle -> Rectangle
extendedRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
Rectangle (Point (x0 `nonNegativeDiff` margin) (y0 `nonNegativeDiff` margin))
(Point (x1 + margin) (y1 + margin))
smallerRectangle :: Int -> Rectangle -> Rectangle
smallerRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
Rectangle (Point (x0 + margin) (y0 + margin))
(Point (x1 `nonNegativeDiff` margin) (y1 `nonNegativeDiff` margin))
nonNegativeDiff x y
| x < y = 0
| otherwise = x - y
rectangleParser :: Parser Rectangle
rectangleParser = do
x0 <- decimal
char ','
y0 <- decimal
char ','
x1 <- decimal
char ','
y1 <- decimal
return $ Rectangle (Point x0 y0) (Point x1 y1)

160
src/GEval/Clippings.hs Normal file
View File

@ -0,0 +1,160 @@
{-# LANGUAGE OverloadedStrings #-}
module GEval.Clippings
where
import Data.Attoparsec.Text
import Data.Text
import Control.Applicative
import Control.Exception
import Data.Char (isSpace)
import Data.List (unfoldr)
import Data.Maybe (catMaybes)
import Debug.Trace
import GEval.Common
newtype PageNumber = PageNumber Int
deriving (Eq, Show)
data Point = Point Int Int
deriving (Show, Eq)
data Rectangle = Rectangle Point Point
deriving (Show, Eq)
data Clipping = Clipping PageNumber Rectangle
deriving (Show, Eq)
data ClippingSpec = ClippingSpec PageNumber Rectangle Rectangle
deriving (Show, Eq)
data LabeledClipping = LabeledClipping (Maybe Text) Clipping
deriving (Show, Eq)
matchClippingToSpec :: ClippingSpec -> Clipping -> Bool
matchClippingToSpec (ClippingSpec pageNo (Rectangle (Point x0' y0') (Point x1' y1'))
(Rectangle (Point x0'' y0'') (Point x1'' y1'')))
(Clipping pageNo' (Rectangle (Point x0 y0) (Point x1 y1))) =
pageNo == pageNo' &&
isInside x0 x0' x0'' && isInside y0 y0' y0'' && isInside x1 x1' x1'' && isInside y1 y1' y1''
where isInside c c' c'' = c >= c' && c <= c'' || c <= c' && c >= c''
lineClippingsParser :: Parser [Clipping]
lineClippingsParser = sepByWhitespaces clippingParser
clippingParser :: Parser Clipping
clippingParser = do
pageNo <- PageNumber <$> decimal
char '/'
rectangle <- rectangleParser
return $ Clipping pageNo rectangle
lineClippingSpecsParser :: Parser [ClippingSpec]
lineClippingSpecsParser = sepByWhitespaces clippingSpecParser
clippingSpecParser :: Parser ClippingSpec
clippingSpecParser = do
pageNo <- PageNumber <$> decimal
char '/'
rectangle <- rectangleParser
char '/'
margin <- decimal
return $ ClippingSpec pageNo (smallerRectangle margin rectangle) (extendedRectangle margin rectangle)
labeledClippingParser :: Parser LabeledClipping
labeledClippingParser =
choice [clippingWithLabelParser,
(LabeledClipping Nothing <$> clippingParser)]
clippingWithLabelParser :: Parser LabeledClipping
clippingWithLabelParser = do
label <- takeWhile1 (\c -> not (isSpace c) && c /= ':')
string ":"
clipping <- clippingParser
return $ LabeledClipping (Just label) clipping
lineLabeledClippingsParser :: Parser [LabeledClipping]
lineLabeledClippingsParser = sepByWhitespaces labeledClippingParser
extendedRectangle :: Int -> Rectangle -> Rectangle
extendedRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
Rectangle (Point (x0 `nonNegativeDiff` margin) (y0 `nonNegativeDiff` margin))
(Point (x1 + margin) (y1 + margin))
smallerRectangle :: Int -> Rectangle -> Rectangle
smallerRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
Rectangle (Point (x0 + margin) (y0 + margin))
(Point (x1 `nonNegativeDiff` margin) (y1 `nonNegativeDiff` margin))
nonNegativeDiff x y
| x < y = 0
| otherwise = x - y
rectangleParser :: Parser Rectangle
rectangleParser = do
x0 <- decimal
char ','
y0 <- decimal
char ','
x1 <- decimal
char ','
y1 <- decimal
return $ Rectangle (Point x0 y0) (Point x1 y1)
rectangleArea :: Rectangle -> Integer
rectangleArea (Rectangle (Point x0 y0) (Point x1 y1)) =
(fromIntegral $ x1 - x0) * (fromIntegral $ y1 - y0)
clippingArea :: LabeledClipping -> Integer
clippingArea (LabeledClipping _ (Clipping _ rect)) = rectangleArea rect
totalArea :: [LabeledClipping] -> Integer
totalArea = sum . Prelude.map clippingArea
coveredBy :: [LabeledClipping] -> [LabeledClipping] -> Integer
coveredBy clippingsA clippingsB = sum
$ Prelude.map rectangleArea
$ catMaybes
$ Data.List.unfoldr step (clippingsA, clippingsB)
where
step ([], _) = Nothing
step (firstA:restA, b) = Just (result, (newA ++ restA, newB))
where (result, newA, newB) = step' firstA b
step' rectA [] = (Nothing, [], [])
step' a (firstB:restB) = case partitionClippings a firstB of
Just (commonRect, leftoversA, leftoversB) -> (Just commonRect, leftoversA, leftoversB ++ restB)
Nothing -> let
(result, leftoversA, leftoversB) = step' a restB
in (result, leftoversA, firstB:leftoversB)
partitionClippings :: LabeledClipping -> LabeledClipping -> Maybe (Rectangle, [LabeledClipping], [LabeledClipping])
partitionClippings (LabeledClipping label (Clipping page rect@(Rectangle (Point x0 y0) (Point x1 y1))))
(LabeledClipping label' (Clipping page' rect'@(Rectangle (Point x0' y0') (Point x1' y1'))))
| label == label' && page == page' && not (areDisjoint rect rect') = Just (commonRect, leftovers, leftovers')
| otherwise = Nothing
where commonRect = Rectangle (Point cx0 cy0) (Point cx1 cy1)
cx0 = max x0 x0'
cx1 = min x1 x1'
cy0 = max y0 y0'
cy1 = min y1 y1'
leftovers = Prelude.map (\r -> LabeledClipping label (Clipping page r)) $ getLeftovers commonRect rect
leftovers' = Prelude.map (\r -> LabeledClipping label (Clipping page r)) $ getLeftovers commonRect rect'
areDisjoint :: Rectangle -> Rectangle -> Bool
areDisjoint (Rectangle (Point x0 y0) (Point x1 y1))
(Rectangle (Point x0' y0') (Point x1' y1')) =
x1 < x0' || x1' < x0 || y1 < y0' || y1' < y0
getLeftovers :: Rectangle -> Rectangle -> [Rectangle]
getLeftovers (Rectangle (Point x0 y0) (Point x1 y1))
(Rectangle (Point x0' y0') (Point x1' y1')) =
Prelude.filter validRectangle [Rectangle (Point x0 y0') (Point x1 (y0 - 1)),
Rectangle (Point x0 (y1 + 1)) (Point x1 y1'),
Rectangle (Point x0' y0') (Point (x0 - 1) y1'),
Rectangle (Point (x1 + 1) y0') (Point x1' y1')]
where validRectangle (Rectangle (Point x0 y0) (Point x1 y1)) = x0 <= x1 && y0 <= y1

View File

@ -29,6 +29,9 @@ instance ConvertibleToDouble Double where
instance ConvertibleToDouble Int where instance ConvertibleToDouble Int where
toDouble = fromIntegral toDouble = fromIntegral
instance ConvertibleToDouble Integer where
toDouble = fromIntegral
(/.) :: (ConvertibleToDouble f, Integral a) => f -> a -> Double (/.) :: (ConvertibleToDouble f, Integral a) => f -> a -> Double
x /. 0 = 1.0 x /. 0 = 1.0
x /. y = (toDouble x) / (fromIntegral y) x /. y = (toDouble x) / (fromIntegral y)

View File

@ -84,7 +84,7 @@ import qualified Data.IntSet as IS
import GEval.BLEU import GEval.BLEU
import GEval.Common import GEval.Common
import GEval.ClippEU import GEval.Clippings
import GEval.PrecisionRecall import GEval.PrecisionRecall
import GEval.ClusteringMetrics import GEval.ClusteringMetrics
import GEval.LogLossHashed import GEval.LogLossHashed
@ -647,6 +647,18 @@ gevalCore' (ProbabilisticSoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnot
where calibrationMeasure = softCalibration results probs where calibrationMeasure = softCalibration results probs
recall = got /. nbExpected recall = got /. nbExpected
gevalCore' (Soft2DFMeasure beta) _ = gevalCoreWithoutInput parseLabeledClippings
parseLabeledClippings
get2DCounts
countAgg
(fMeasureOnCounts beta)
noGraph
where
parseLabeledClippings = controlledParse lineLabeledClippingsParser
get2DCounts (expected, got) = (coveredBy expected got,
totalArea expected,
totalArea got)
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep noGraph gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep noGraph
where where
parseClippings = controlledParse lineClippingsParser parseClippings = controlledParse lineClippingsParser
@ -736,8 +748,8 @@ gevalCore' MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
where where
intoWords = Right . Data.Text.words intoWords = Right . Data.Text.words
countAgg :: (Num n, Monad m) => ConduitM (n, Int, Int) o m (n, Int, Int) countAgg :: (Num n, Num v, Monad m) => ConduitM (n, v, v) o m (n, v, v)
countAgg = CC.foldl countFolder (fromInteger 0, 0, 0) countAgg = CC.foldl countFolder (fromInteger 0, fromInteger 0, fromInteger 0)
gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
(V.Vector (Double, Double) -> Double) -> -- ^ correlation function (V.Vector (Double, Double) -> Double) -> -- ^ correlation function

View File

@ -128,6 +128,23 @@ GEval sample challenge — mark numbers
This is a sample/toy classification challenge for Gonito framework with Probabilistic-Soft-F-measure as the metric. This is a sample/toy classification challenge for Gonito framework with Probabilistic-Soft-F-measure as the metric.
|] ++ (commonReadmeMDContents testName) |] ++ (commonReadmeMDContents testName)
readmeMDContents (Soft2DFMeasure _) testName = [i|
Sample challenge for clippings
==============================
The metric is Soft2D-F-score, i.e. F-score for clipping with partial
hits (when two rectangles overlaps) taken into account.
Format
------
Each clipping found in a corresponding PDF/DjVu file. Each clipping should be given as P/X0,Y0,X1,Y1, where:
P DjVu page number (starting from 1)
X0, Y0, X1, Y1 clipping coordinates (in pixels)
|] ++ (commonReadmeMDContents testName)
readmeMDContents NMI testName = [i| readmeMDContents NMI testName = [i|
Cluster proverbs Cluster proverbs
================ ================
@ -466,6 +483,7 @@ Love and hate LOVE HATE
I am sad SADNESS I am sad SADNESS
I am so sad and hateful SADNESS HATE I am so sad and hateful SADNESS HATE
|] |]
trainContents (Soft2DFMeasure _) = trainContents ClippEU
trainContents ClippEU = [hereLit|2/0,0,10,150 foo.djvu trainContents ClippEU = [hereLit|2/0,0,10,150 foo.djvu
1/30,40,100,1000 bar.djvu 1/30,40,100,1000 bar.djvu
|] |]
@ -530,6 +548,7 @@ devInContents MultiLabelLikelihood = devInContents MultiLabelLogLoss
devInContents MultiLabelLogLoss = [hereLit|I am in love devInContents MultiLabelLogLoss = [hereLit|I am in love
I am a sad hater I am a sad hater
|] |]
devInContents (Soft2DFMeasure _) = devInContents ClippEU
devInContents ClippEU = [hereLit|file1.djvu devInContents ClippEU = [hereLit|file1.djvu
file2.djvu file2.djvu
|] |]
@ -592,6 +611,9 @@ devExpectedContents MultiLabelLikelihood = devExpectedContents MultiLabelLogLoss
devExpectedContents MultiLabelLogLoss = [hereLit|LOVE devExpectedContents MultiLabelLogLoss = [hereLit|LOVE
SADNESS LOVE SADNESS LOVE
|] |]
devExpectedContents (Soft2DFMeasure _) = [hereLit|
10/10,20,30,100 3/0,50,500,500
|]
devExpectedContents ClippEU = [hereLit| devExpectedContents ClippEU = [hereLit|
10/10,20,30,100/5 3/0,50,500,500/5 10/10,20,30,100/5 3/0,50,500,500/5
|] |]
@ -656,6 +678,7 @@ testInContents MultiLabelLikelihood = testInContents MultiLabelLogLoss
testInContents MultiLabelLogLoss = [hereLit|I am very sad testInContents MultiLabelLogLoss = [hereLit|I am very sad
I hate I hate
|] |]
testInContents (Soft2DFMeasure _) = testInContents ClippEU
testInContents ClippEU = [hereLit|file3.djvu testInContents ClippEU = [hereLit|file3.djvu
file4.djvu file4.djvu
|] |]
@ -720,6 +743,9 @@ testExpectedContents MultiLabelLikelihood = testExpectedContents MultiLabelLogLo
testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS
HATE HATE
|] |]
testExpectedContents (Soft2DFMeasure _) = [hereLit|3/0,0,100,100
1/10,10,1000,1000
|]
testExpectedContents ClippEU = [hereLit|3/0,0,100,100/10 testExpectedContents ClippEU = [hereLit|3/0,0,100,100/10
1/10,10,1000,1000/10 1/10,10,1000,1000/10
|] |]

View File

@ -16,7 +16,7 @@ import Data.Text
import Data.Monoid ((<>)) import Data.Monoid ((<>))
import GEval.Common import GEval.Common
import GEval.ClippEU import GEval.Clippings
import Data.Attoparsec.Text (parseOnly) import Data.Attoparsec.Text (parseOnly)
-- here metrics and their basic properties are listed, -- here metrics and their basic properties are listed,
@ -28,7 +28,7 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
| BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double | BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
| MultiLabelLogLoss | MultiLabelLikelihood | MultiLabelLogLoss | MultiLabelLikelihood
| SoftFMeasure Double | ProbabilisticSoftFMeasure Double | SoftFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
deriving (Eq) deriving (Eq)
instance Show Metric where instance Show Metric where
@ -45,6 +45,7 @@ instance Show Metric where
show (MacroFMeasure beta) = "Macro-F" ++ (show beta) show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
show (SoftFMeasure beta) = "Soft-F" ++ (show beta) show (SoftFMeasure beta) = "Soft-F" ++ (show beta)
show (ProbabilisticSoftFMeasure beta) = "Probabilistic-Soft-F" ++ (show beta) show (ProbabilisticSoftFMeasure beta) = "Probabilistic-Soft-F" ++ (show beta)
show (Soft2DFMeasure beta) = "Soft2D-F" ++ (show beta)
show NMI = "NMI" show NMI = "NMI"
show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if
nbOfBits == defaultLogLossHashedSize nbOfBits == defaultLogLossHashedSize
@ -91,6 +92,9 @@ instance Read Metric where
readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
[(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)] [(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
_ -> [] _ -> []
readsPrec p ('S':'o':'f':'t':'2':'D':'-':'F':theRest) = case readsPrec p theRest of
[(beta, theRest)] -> [(Soft2DFMeasure beta, theRest)]
_ -> []
readsPrec p ('S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of readsPrec p ('S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
[(beta, theRest)] -> [(SoftFMeasure beta, theRest)] [(beta, theRest)] -> [(SoftFMeasure beta, theRest)]
_ -> [] _ -> []
@ -134,6 +138,7 @@ getMetricOrdering (FMeasure _) = TheHigherTheBetter
getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
getMetricOrdering (SoftFMeasure _) = TheHigherTheBetter getMetricOrdering (SoftFMeasure _) = TheHigherTheBetter
getMetricOrdering (ProbabilisticSoftFMeasure _) = TheHigherTheBetter getMetricOrdering (ProbabilisticSoftFMeasure _) = TheHigherTheBetter
getMetricOrdering (Soft2DFMeasure _) = TheHigherTheBetter
getMetricOrdering NMI = TheHigherTheBetter getMetricOrdering NMI = TheHigherTheBetter
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
@ -164,6 +169,7 @@ fixedNumberOfColumnsInExpected _ = True
fixedNumberOfColumnsInInput :: Metric -> Bool fixedNumberOfColumnsInInput :: Metric -> Bool
fixedNumberOfColumnsInInput (SoftFMeasure _) = False fixedNumberOfColumnsInInput (SoftFMeasure _) = False
fixedNumberOfColumnsInInput (ProbabilisticSoftFMeasure _) = False fixedNumberOfColumnsInInput (ProbabilisticSoftFMeasure _) = False
fixedNumberOfColumnsInInput (Soft2DFMeasure _) = False
fixedNumberOfColumnsInInput _ = True fixedNumberOfColumnsInInput _ = True
perfectOutLineFromExpectedLine :: Metric -> Text -> Text perfectOutLineFromExpectedLine :: Metric -> Text -> Text

View File

@ -21,6 +21,7 @@ import GEval.PrecisionRecall (weightedHarmonicMean)
import Text.Regex.PCRE.Heavy import Text.Regex.PCRE.Heavy
import Data.Either (fromRight) import Data.Either (fromRight)
import Data.String.Here import Data.String.Here
import Data.Maybe (fromMaybe)
import Data.List (intercalate) import Data.List (intercalate)
import Text.Printf import Text.Printf
@ -64,6 +65,9 @@ listOfAvailableMetrics = [RMSE,
ProbabilisticSoftFMeasure 1.0, ProbabilisticSoftFMeasure 1.0,
ProbabilisticSoftFMeasure 2.0, ProbabilisticSoftFMeasure 2.0,
ProbabilisticSoftFMeasure 0.25, ProbabilisticSoftFMeasure 0.25,
Soft2DFMeasure 1.0,
Soft2DFMeasure 2.0,
Soft2DFMeasure 0.25,
CharMatch] CharMatch]
extraInfo :: EvaluationScheme -> Maybe String extraInfo :: EvaluationScheme -> Maybe String
@ -120,14 +124,30 @@ formatEvaluationSchemeDescription :: EvaluationScheme -> String
formatEvaluationSchemeDescription scheme@(EvaluationScheme metric _) = show scheme ++ "\n" ++ description formatEvaluationSchemeDescription scheme@(EvaluationScheme metric _) = show scheme ++ "\n" ++ description
where description = if isEvaluationSchemeDescribed scheme where description = if isEvaluationSchemeDescribed scheme
then (getEvaluationSchemeDescription scheme) then (getEvaluationSchemeDescription scheme)
++ "\n"
++ (formatDescription metric)
++ "\nExample\n" ++ "\nExample\n"
++ (pasteLines "Expected output" "Sample output") ++ (pasteLines "Expected output" "Sample output")
++ concat (map (\(exp, out) -> pasteLines exp out) $ zip (lines $ testExpectedContents metric) ++ concat (map (\(exp, out) -> pasteLines exp out) $ zip (lines $ testExpectedContents metric)
(lines $ outContents metric)) (lines $ outContents metric))
++ "\nMetric value: " ++ (printf "%.4f" $ expectedScore scheme) ++ "\nMetric value: " ++ (printf "%.4f" $ expectedScore scheme)
++ (case scoreExplanation scheme of
Just expl -> "\n(" ++ expl ++ ")"
Nothing -> "")
else noDescription else noDescription
noDescription = [hereLit|THE METRIC HAS NO DESCRIPTION YET, PLEASE ADD AN ISSUE TO https://gitlab.com/filipg/geval/issues noDescription = [hereLit|THE METRIC HAS NO DESCRIPTION YET, PLEASE ADD AN ISSUE TO https://gitlab.com/filipg/geval/issues
IF YOU WANT TO HAVE IT DESCRIBED|] IF YOU WANT TO HAVE IT DESCRIBED|]
formatDescription :: Metric -> String
formatDescription (SoftFMeasure _) = [hereLit|Each line is a sequence of entities separated by spaces, each entity is of
the form LABEL:SPAN, where LABEL is any label and SPAN is defined using single integers, intervals or such
units separated with commas.
|]
scoreExplanation :: EvaluationScheme -> Maybe String
scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
= Just [hereLit|We have a partial (0.75) success for the entity `inwords:1-4`, hence Recall = 0.75/1 = 0.75,
Precision = (0 + 0.75 + 0) / 3 = 0.25, so F-score = 0.375|]
pasteLines :: String -> String -> String pasteLines :: String -> String -> String
pasteLines a b = printf "%-35s %s\n" a b pasteLines a b = printf "%-35s %s\n" a b

View File

@ -55,13 +55,13 @@ f2MeasureOnCounts = fMeasureOnCounts 2.0
f1MeasureOnCounts :: ConvertibleToDouble n => (n, Int, Int) -> Double f1MeasureOnCounts :: ConvertibleToDouble n => (n, Int, Int) -> Double
f1MeasureOnCounts = fMeasureOnCounts 1.0 f1MeasureOnCounts = fMeasureOnCounts 1.0
fMeasureOnCounts :: ConvertibleToDouble n => Double -> (n, Int, Int) -> Double fMeasureOnCounts :: (ConvertibleToDouble n, Integral v) => Double -> (n, v, v) -> Double
fMeasureOnCounts beta (tp, nbExpected, nbGot) = fMeasureOnCounts beta (tp, nbExpected, nbGot) =
(1 + betaSquared) * p * r `safeDoubleDiv` (betaSquared * p + r) (1 + betaSquared) * p * r `safeDoubleDiv` (betaSquared * p + r)
where betaSquared = beta ^ 2 where betaSquared = beta ^ 2
(p, r) = precisionAndRecallFromCounts (tp, nbExpected, nbGot) (p, r) = precisionAndRecallFromCounts (tp, nbExpected, nbGot)
countFolder :: Num n => (n, Int, Int) -> (n, Int, Int) -> (n, Int, Int) countFolder :: (Num n, Num v) => (n, v, v) -> (n, v, v) -> (n, v, v)
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3) countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
getCounts :: (a -> b -> Bool) -> ([a], [b]) -> (Int, Int, Int) getCounts :: (a -> b -> Bool) -> ([a], [b]) -> (Int, Int, Int)
@ -80,7 +80,7 @@ precisionAndRecall matchFun expected got
= precisionAndRecallFromCounts (tp, length expected, length got) = precisionAndRecallFromCounts (tp, length expected, length got)
where tp = maxMatch matchFun expected got where tp = maxMatch matchFun expected got
precisionAndRecallFromCounts :: ConvertibleToDouble n => (n, Int, Int) -> (Double, Double) precisionAndRecallFromCounts :: (ConvertibleToDouble n, Integral v) => (n, v, v) -> (Double, Double)
precisionAndRecallFromCounts (tp, nbExpected, nbGot) = precisionAndRecallFromCounts (tp, nbExpected, nbGot) =
(tp /. nbGot, tp /. nbExpected) (tp /. nbGot, tp /. nbExpected)

View File

@ -10,7 +10,7 @@ import GEval.Common
import GEval.EvaluationScheme import GEval.EvaluationScheme
import GEval.OptionsParser import GEval.OptionsParser
import GEval.BLEU import GEval.BLEU
import GEval.ClippEU import GEval.Clippings
import GEval.PrecisionRecall import GEval.PrecisionRecall
import GEval.ClusteringMetrics import GEval.ClusteringMetrics
import GEval.BIO import GEval.BIO
@ -31,7 +31,6 @@ import GEval.CreateChallenge
import GEval.Validation import GEval.Validation
import Data.Map.Strict import Data.Map.Strict
import Data.Conduit.List (consume) import Data.Conduit.List (consume)
import System.FilePath import System.FilePath
@ -227,6 +226,21 @@ main = hspec $ do
r `shouldBe` [Clipping (PageNumber 2) (Rectangle (Point 0 0) (Point 2 3)), r `shouldBe` [Clipping (PageNumber 2) (Rectangle (Point 0 0) (Point 2 3)),
Clipping (PageNumber 10) (Rectangle (Point 20 30) (Point 40 50)), Clipping (PageNumber 10) (Rectangle (Point 20 30) (Point 40 50)),
Clipping (PageNumber 18) (Rectangle (Point 0 1) (Point 500 3))] Clipping (PageNumber 18) (Rectangle (Point 0 1) (Point 500 3))]
it "parsing labeled rectangles" $ do
let (Right r) = parseOnly (lineLabeledClippingsParser <* endOfInput) "2/0,0,2,3 foo:5/10,10,20,20 "
r `shouldBe` [LabeledClipping Nothing $ Clipping (PageNumber 2) (Rectangle (Point 0 0) (Point 2 3)),
LabeledClipping (Just "foo") $ Clipping (PageNumber 5) (Rectangle (Point 10 10) (Point 20 20))]
it "check partition" $ do
partitionClippings (LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 0 0) (Point 100 50)))
(LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 10 20) (Point 200 300)))
`shouldBe` Just (Rectangle (Point 10 20) (Point 100 50),
[LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 10 0) (Point 100 19)),
LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 0 0) (Point 9 50))],
[LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 10 51) (Point 100 300)),
LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 101 20) (Point 200 300))])
partitionClippings (LabeledClipping (Just "bar") (Clipping (PageNumber 10) (Rectangle (Point 100 100) (Point 200 149))))
(LabeledClipping (Just "bar") (Clipping (PageNumber 10) (Rectangle (Point 100 201) (Point 200 300))))
`shouldBe` Nothing
it "no rectangles" $ do it "no rectangles" $ do
let (Right r) = parseOnly (lineClippingsParser <* endOfInput) "" let (Right r) = parseOnly (lineClippingsParser <* endOfInput) ""
r `shouldBe` [] r `shouldBe` []
@ -260,6 +274,9 @@ main = hspec $ do
runGEvalTest "probabilistic-soft-f1-simple" `shouldReturnAlmost` 0.33333333333333 runGEvalTest "probabilistic-soft-f1-simple" `shouldReturnAlmost` 0.33333333333333
it "simple test with perfect calibration" $ do it "simple test with perfect calibration" $ do
runGEvalTest "probabilistic-soft-f1-calibrated" `shouldReturnAlmost` 0.88888888888 runGEvalTest "probabilistic-soft-f1-calibrated" `shouldReturnAlmost` 0.88888888888
describe "Soft2D-F1" $ do
it "simple test" $ do
runGEvalTest "soft2d-f1-simple" `shouldReturnAlmost` 0.30152621462832535
describe "test edit-distance library" $ do describe "test edit-distance library" $ do
it "for handling UTF8" $ do it "for handling UTF8" $ do
levenshteinDistance defaultEditCosts "źdźbło" "źd好bło" `shouldBe` 1 levenshteinDistance defaultEditCosts "źdźbło" "źd好bło" `shouldBe` 1
@ -318,6 +335,9 @@ main = hspec $ do
it "just parse" $ do it "just parse" $ do
parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]), parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]),
Annotation "baz" (IS.fromList [4,5,6])] Annotation "baz" (IS.fromList [4,5,6])]
it "just parse 2" $ do
parseAnnotations "inwords:1-3 indigits:5" `shouldBe` Right [Annotation "inwords" (IS.fromList [1,2,3]),
Annotation "indigits" (IS.fromList [5])]
it "empty" $ do it "empty" $ do
parseAnnotations "" `shouldBe` Right [] parseAnnotations "" `shouldBe` Right []
it "empty (just spaces)" $ do it "empty (just spaces)" $ do

View File

@ -0,0 +1,3 @@
foo:2/10,20,100,50 bar:10/100,150,200,300
foo:3/4,8,10,1000
3/10,30,100,100
1 foo:2/10,20,100,50 bar:10/100,150,200,300
2 foo:3/4,8,10,1000
3 3/10,30,100,100

View File

@ -0,0 +1 @@
--metric Soft2D-F1

View File

@ -0,0 +1,3 @@
foo:1/10,20,100,50 bar:10/100,100,200,200
3/10,30,200,200
1 foo:1/10,20,100,50 bar:10/100,100,200,200
2 3/10,30,200,200