Add Soft2D-F metric
This commit is contained in:
parent
dab2646798
commit
6b63740c4a
@ -22,7 +22,7 @@ library
|
||||
, GEval.CreateChallenge
|
||||
, GEval.OptionsParser
|
||||
, GEval.BLEU
|
||||
, GEval.ClippEU
|
||||
, GEval.Clippings
|
||||
, GEval.PrecisionRecall
|
||||
, GEval.ClusteringMetrics
|
||||
, GEval.Common
|
||||
|
@ -1,83 +0,0 @@
|
||||
|
||||
module GEval.ClippEU
|
||||
where
|
||||
|
||||
import Data.Attoparsec.Text
|
||||
import Data.Text
|
||||
import Control.Applicative
|
||||
import Control.Exception
|
||||
|
||||
import GEval.Common
|
||||
|
||||
newtype PageNumber = PageNumber Int
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Point = Point Int Int
|
||||
deriving (Show, Eq)
|
||||
|
||||
data Rectangle = Rectangle Point Point
|
||||
deriving (Show, Eq)
|
||||
|
||||
data Clipping = Clipping PageNumber Rectangle
|
||||
deriving (Show, Eq)
|
||||
|
||||
data ClippingSpec = ClippingSpec PageNumber Rectangle Rectangle
|
||||
deriving (Show, Eq)
|
||||
|
||||
matchClippingToSpec :: ClippingSpec -> Clipping -> Bool
|
||||
matchClippingToSpec (ClippingSpec pageNo (Rectangle (Point x0' y0') (Point x1' y1'))
|
||||
(Rectangle (Point x0'' y0'') (Point x1'' y1'')))
|
||||
(Clipping pageNo' (Rectangle (Point x0 y0) (Point x1 y1))) =
|
||||
pageNo == pageNo' &&
|
||||
isInside x0 x0' x0'' && isInside y0 y0' y0'' && isInside x1 x1' x1'' && isInside y1 y1' y1''
|
||||
where isInside c c' c'' = c >= c' && c <= c'' || c <= c' && c >= c''
|
||||
|
||||
|
||||
lineClippingsParser :: Parser [Clipping]
|
||||
lineClippingsParser = sepByWhitespaces clippingParser
|
||||
|
||||
clippingParser :: Parser Clipping
|
||||
clippingParser = do
|
||||
pageNo <- PageNumber <$> decimal
|
||||
char '/'
|
||||
rectangle <- rectangleParser
|
||||
return $ Clipping pageNo rectangle
|
||||
|
||||
lineClippingSpecsParser :: Parser [ClippingSpec]
|
||||
lineClippingSpecsParser = sepByWhitespaces clippingSpecParser
|
||||
|
||||
clippingSpecParser :: Parser ClippingSpec
|
||||
clippingSpecParser = do
|
||||
pageNo <- PageNumber <$> decimal
|
||||
char '/'
|
||||
rectangle <- rectangleParser
|
||||
char '/'
|
||||
margin <- decimal
|
||||
return $ ClippingSpec pageNo (smallerRectangle margin rectangle) (extendedRectangle margin rectangle)
|
||||
|
||||
extendedRectangle :: Int -> Rectangle -> Rectangle
|
||||
extendedRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
|
||||
Rectangle (Point (x0 `nonNegativeDiff` margin) (y0 `nonNegativeDiff` margin))
|
||||
(Point (x1 + margin) (y1 + margin))
|
||||
|
||||
smallerRectangle :: Int -> Rectangle -> Rectangle
|
||||
smallerRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
|
||||
Rectangle (Point (x0 + margin) (y0 + margin))
|
||||
(Point (x1 `nonNegativeDiff` margin) (y1 `nonNegativeDiff` margin))
|
||||
|
||||
|
||||
|
||||
nonNegativeDiff x y
|
||||
| x < y = 0
|
||||
| otherwise = x - y
|
||||
|
||||
rectangleParser :: Parser Rectangle
|
||||
rectangleParser = do
|
||||
x0 <- decimal
|
||||
char ','
|
||||
y0 <- decimal
|
||||
char ','
|
||||
x1 <- decimal
|
||||
char ','
|
||||
y1 <- decimal
|
||||
return $ Rectangle (Point x0 y0) (Point x1 y1)
|
160
src/GEval/Clippings.hs
Normal file
160
src/GEval/Clippings.hs
Normal file
@ -0,0 +1,160 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module GEval.Clippings
|
||||
where
|
||||
|
||||
import Data.Attoparsec.Text
|
||||
import Data.Text
|
||||
import Control.Applicative
|
||||
import Control.Exception
|
||||
import Data.Char (isSpace)
|
||||
import Data.List (unfoldr)
|
||||
import Data.Maybe (catMaybes)
|
||||
|
||||
import Debug.Trace
|
||||
|
||||
import GEval.Common
|
||||
|
||||
newtype PageNumber = PageNumber Int
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Point = Point Int Int
|
||||
deriving (Show, Eq)
|
||||
|
||||
data Rectangle = Rectangle Point Point
|
||||
deriving (Show, Eq)
|
||||
|
||||
data Clipping = Clipping PageNumber Rectangle
|
||||
deriving (Show, Eq)
|
||||
|
||||
data ClippingSpec = ClippingSpec PageNumber Rectangle Rectangle
|
||||
deriving (Show, Eq)
|
||||
|
||||
data LabeledClipping = LabeledClipping (Maybe Text) Clipping
|
||||
deriving (Show, Eq)
|
||||
|
||||
matchClippingToSpec :: ClippingSpec -> Clipping -> Bool
|
||||
matchClippingToSpec (ClippingSpec pageNo (Rectangle (Point x0' y0') (Point x1' y1'))
|
||||
(Rectangle (Point x0'' y0'') (Point x1'' y1'')))
|
||||
(Clipping pageNo' (Rectangle (Point x0 y0) (Point x1 y1))) =
|
||||
pageNo == pageNo' &&
|
||||
isInside x0 x0' x0'' && isInside y0 y0' y0'' && isInside x1 x1' x1'' && isInside y1 y1' y1''
|
||||
where isInside c c' c'' = c >= c' && c <= c'' || c <= c' && c >= c''
|
||||
|
||||
|
||||
lineClippingsParser :: Parser [Clipping]
|
||||
lineClippingsParser = sepByWhitespaces clippingParser
|
||||
|
||||
clippingParser :: Parser Clipping
|
||||
clippingParser = do
|
||||
pageNo <- PageNumber <$> decimal
|
||||
char '/'
|
||||
rectangle <- rectangleParser
|
||||
return $ Clipping pageNo rectangle
|
||||
|
||||
lineClippingSpecsParser :: Parser [ClippingSpec]
|
||||
lineClippingSpecsParser = sepByWhitespaces clippingSpecParser
|
||||
|
||||
clippingSpecParser :: Parser ClippingSpec
|
||||
clippingSpecParser = do
|
||||
pageNo <- PageNumber <$> decimal
|
||||
char '/'
|
||||
rectangle <- rectangleParser
|
||||
char '/'
|
||||
margin <- decimal
|
||||
return $ ClippingSpec pageNo (smallerRectangle margin rectangle) (extendedRectangle margin rectangle)
|
||||
|
||||
labeledClippingParser :: Parser LabeledClipping
|
||||
labeledClippingParser =
|
||||
choice [clippingWithLabelParser,
|
||||
(LabeledClipping Nothing <$> clippingParser)]
|
||||
|
||||
clippingWithLabelParser :: Parser LabeledClipping
|
||||
clippingWithLabelParser = do
|
||||
label <- takeWhile1 (\c -> not (isSpace c) && c /= ':')
|
||||
string ":"
|
||||
clipping <- clippingParser
|
||||
return $ LabeledClipping (Just label) clipping
|
||||
|
||||
lineLabeledClippingsParser :: Parser [LabeledClipping]
|
||||
lineLabeledClippingsParser = sepByWhitespaces labeledClippingParser
|
||||
|
||||
extendedRectangle :: Int -> Rectangle -> Rectangle
|
||||
extendedRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
|
||||
Rectangle (Point (x0 `nonNegativeDiff` margin) (y0 `nonNegativeDiff` margin))
|
||||
(Point (x1 + margin) (y1 + margin))
|
||||
|
||||
smallerRectangle :: Int -> Rectangle -> Rectangle
|
||||
smallerRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
|
||||
Rectangle (Point (x0 + margin) (y0 + margin))
|
||||
(Point (x1 `nonNegativeDiff` margin) (y1 `nonNegativeDiff` margin))
|
||||
|
||||
|
||||
|
||||
nonNegativeDiff x y
|
||||
| x < y = 0
|
||||
| otherwise = x - y
|
||||
|
||||
rectangleParser :: Parser Rectangle
|
||||
rectangleParser = do
|
||||
x0 <- decimal
|
||||
char ','
|
||||
y0 <- decimal
|
||||
char ','
|
||||
x1 <- decimal
|
||||
char ','
|
||||
y1 <- decimal
|
||||
return $ Rectangle (Point x0 y0) (Point x1 y1)
|
||||
|
||||
rectangleArea :: Rectangle -> Integer
|
||||
rectangleArea (Rectangle (Point x0 y0) (Point x1 y1)) =
|
||||
(fromIntegral $ x1 - x0) * (fromIntegral $ y1 - y0)
|
||||
|
||||
clippingArea :: LabeledClipping -> Integer
|
||||
clippingArea (LabeledClipping _ (Clipping _ rect)) = rectangleArea rect
|
||||
|
||||
totalArea :: [LabeledClipping] -> Integer
|
||||
totalArea = sum . Prelude.map clippingArea
|
||||
|
||||
coveredBy :: [LabeledClipping] -> [LabeledClipping] -> Integer
|
||||
coveredBy clippingsA clippingsB = sum
|
||||
$ Prelude.map rectangleArea
|
||||
$ catMaybes
|
||||
$ Data.List.unfoldr step (clippingsA, clippingsB)
|
||||
where
|
||||
step ([], _) = Nothing
|
||||
step (firstA:restA, b) = Just (result, (newA ++ restA, newB))
|
||||
where (result, newA, newB) = step' firstA b
|
||||
step' rectA [] = (Nothing, [], [])
|
||||
step' a (firstB:restB) = case partitionClippings a firstB of
|
||||
Just (commonRect, leftoversA, leftoversB) -> (Just commonRect, leftoversA, leftoversB ++ restB)
|
||||
Nothing -> let
|
||||
(result, leftoversA, leftoversB) = step' a restB
|
||||
in (result, leftoversA, firstB:leftoversB)
|
||||
|
||||
partitionClippings :: LabeledClipping -> LabeledClipping -> Maybe (Rectangle, [LabeledClipping], [LabeledClipping])
|
||||
partitionClippings (LabeledClipping label (Clipping page rect@(Rectangle (Point x0 y0) (Point x1 y1))))
|
||||
(LabeledClipping label' (Clipping page' rect'@(Rectangle (Point x0' y0') (Point x1' y1'))))
|
||||
| label == label' && page == page' && not (areDisjoint rect rect') = Just (commonRect, leftovers, leftovers')
|
||||
| otherwise = Nothing
|
||||
where commonRect = Rectangle (Point cx0 cy0) (Point cx1 cy1)
|
||||
cx0 = max x0 x0'
|
||||
cx1 = min x1 x1'
|
||||
cy0 = max y0 y0'
|
||||
cy1 = min y1 y1'
|
||||
leftovers = Prelude.map (\r -> LabeledClipping label (Clipping page r)) $ getLeftovers commonRect rect
|
||||
leftovers' = Prelude.map (\r -> LabeledClipping label (Clipping page r)) $ getLeftovers commonRect rect'
|
||||
|
||||
areDisjoint :: Rectangle -> Rectangle -> Bool
|
||||
areDisjoint (Rectangle (Point x0 y0) (Point x1 y1))
|
||||
(Rectangle (Point x0' y0') (Point x1' y1')) =
|
||||
x1 < x0' || x1' < x0 || y1 < y0' || y1' < y0
|
||||
|
||||
getLeftovers :: Rectangle -> Rectangle -> [Rectangle]
|
||||
getLeftovers (Rectangle (Point x0 y0) (Point x1 y1))
|
||||
(Rectangle (Point x0' y0') (Point x1' y1')) =
|
||||
Prelude.filter validRectangle [Rectangle (Point x0 y0') (Point x1 (y0 - 1)),
|
||||
Rectangle (Point x0 (y1 + 1)) (Point x1 y1'),
|
||||
Rectangle (Point x0' y0') (Point (x0 - 1) y1'),
|
||||
Rectangle (Point (x1 + 1) y0') (Point x1' y1')]
|
||||
where validRectangle (Rectangle (Point x0 y0) (Point x1 y1)) = x0 <= x1 && y0 <= y1
|
@ -29,6 +29,9 @@ instance ConvertibleToDouble Double where
|
||||
instance ConvertibleToDouble Int where
|
||||
toDouble = fromIntegral
|
||||
|
||||
instance ConvertibleToDouble Integer where
|
||||
toDouble = fromIntegral
|
||||
|
||||
(/.) :: (ConvertibleToDouble f, Integral a) => f -> a -> Double
|
||||
x /. 0 = 1.0
|
||||
x /. y = (toDouble x) / (fromIntegral y)
|
||||
|
@ -84,7 +84,7 @@ import qualified Data.IntSet as IS
|
||||
|
||||
import GEval.BLEU
|
||||
import GEval.Common
|
||||
import GEval.ClippEU
|
||||
import GEval.Clippings
|
||||
import GEval.PrecisionRecall
|
||||
import GEval.ClusteringMetrics
|
||||
import GEval.LogLossHashed
|
||||
@ -647,6 +647,18 @@ gevalCore' (ProbabilisticSoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnot
|
||||
where calibrationMeasure = softCalibration results probs
|
||||
recall = got /. nbExpected
|
||||
|
||||
gevalCore' (Soft2DFMeasure beta) _ = gevalCoreWithoutInput parseLabeledClippings
|
||||
parseLabeledClippings
|
||||
get2DCounts
|
||||
countAgg
|
||||
(fMeasureOnCounts beta)
|
||||
noGraph
|
||||
where
|
||||
parseLabeledClippings = controlledParse lineLabeledClippingsParser
|
||||
get2DCounts (expected, got) = (coveredBy expected got,
|
||||
totalArea expected,
|
||||
totalArea got)
|
||||
|
||||
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep noGraph
|
||||
where
|
||||
parseClippings = controlledParse lineClippingsParser
|
||||
@ -736,8 +748,8 @@ gevalCore' MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
|
||||
where
|
||||
intoWords = Right . Data.Text.words
|
||||
|
||||
countAgg :: (Num n, Monad m) => ConduitM (n, Int, Int) o m (n, Int, Int)
|
||||
countAgg = CC.foldl countFolder (fromInteger 0, 0, 0)
|
||||
countAgg :: (Num n, Num v, Monad m) => ConduitM (n, v, v) o m (n, v, v)
|
||||
countAgg = CC.foldl countFolder (fromInteger 0, fromInteger 0, fromInteger 0)
|
||||
|
||||
gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
|
||||
(V.Vector (Double, Double) -> Double) -> -- ^ correlation function
|
||||
|
@ -128,6 +128,23 @@ GEval sample challenge — mark numbers
|
||||
This is a sample/toy classification challenge for Gonito framework with Probabilistic-Soft-F-measure as the metric.
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents (Soft2DFMeasure _) testName = [i|
|
||||
Sample challenge for clippings
|
||||
==============================
|
||||
|
||||
The metric is Soft2D-F-score, i.e. F-score for clipping with partial
|
||||
hits (when two rectangles overlaps) taken into account.
|
||||
|
||||
Format
|
||||
------
|
||||
|
||||
Each clipping found in a corresponding PDF/DjVu file. Each clipping should be given as P/X0,Y0,X1,Y1, where:
|
||||
|
||||
P — DjVu page number (starting from 1)
|
||||
X0, Y0, X1, Y1 — clipping coordinates (in pixels)
|
||||
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents NMI testName = [i|
|
||||
Cluster proverbs
|
||||
================
|
||||
@ -466,6 +483,7 @@ Love and hate LOVE HATE
|
||||
I am sad SADNESS
|
||||
I am so sad and hateful SADNESS HATE
|
||||
|]
|
||||
trainContents (Soft2DFMeasure _) = trainContents ClippEU
|
||||
trainContents ClippEU = [hereLit|2/0,0,10,150 foo.djvu
|
||||
1/30,40,100,1000 bar.djvu
|
||||
|]
|
||||
@ -530,6 +548,7 @@ devInContents MultiLabelLikelihood = devInContents MultiLabelLogLoss
|
||||
devInContents MultiLabelLogLoss = [hereLit|I am in love
|
||||
I am a sad hater
|
||||
|]
|
||||
devInContents (Soft2DFMeasure _) = devInContents ClippEU
|
||||
devInContents ClippEU = [hereLit|file1.djvu
|
||||
file2.djvu
|
||||
|]
|
||||
@ -592,6 +611,9 @@ devExpectedContents MultiLabelLikelihood = devExpectedContents MultiLabelLogLoss
|
||||
devExpectedContents MultiLabelLogLoss = [hereLit|LOVE
|
||||
SADNESS LOVE
|
||||
|]
|
||||
devExpectedContents (Soft2DFMeasure _) = [hereLit|
|
||||
10/10,20,30,100 3/0,50,500,500
|
||||
|]
|
||||
devExpectedContents ClippEU = [hereLit|
|
||||
10/10,20,30,100/5 3/0,50,500,500/5
|
||||
|]
|
||||
@ -656,6 +678,7 @@ testInContents MultiLabelLikelihood = testInContents MultiLabelLogLoss
|
||||
testInContents MultiLabelLogLoss = [hereLit|I am very sad
|
||||
I hate
|
||||
|]
|
||||
testInContents (Soft2DFMeasure _) = testInContents ClippEU
|
||||
testInContents ClippEU = [hereLit|file3.djvu
|
||||
file4.djvu
|
||||
|]
|
||||
@ -720,6 +743,9 @@ testExpectedContents MultiLabelLikelihood = testExpectedContents MultiLabelLogLo
|
||||
testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS
|
||||
HATE
|
||||
|]
|
||||
testExpectedContents (Soft2DFMeasure _) = [hereLit|3/0,0,100,100
|
||||
1/10,10,1000,1000
|
||||
|]
|
||||
testExpectedContents ClippEU = [hereLit|3/0,0,100,100/10
|
||||
1/10,10,1000,1000/10
|
||||
|]
|
||||
|
@ -16,7 +16,7 @@ import Data.Text
|
||||
import Data.Monoid ((<>))
|
||||
|
||||
import GEval.Common
|
||||
import GEval.ClippEU
|
||||
import GEval.Clippings
|
||||
import Data.Attoparsec.Text (parseOnly)
|
||||
|
||||
-- here metrics and their basic properties are listed,
|
||||
@ -28,7 +28,7 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C
|
||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||
| BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
|
||||
| MultiLabelLogLoss | MultiLabelLikelihood
|
||||
| SoftFMeasure Double | ProbabilisticSoftFMeasure Double
|
||||
| SoftFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
|
||||
deriving (Eq)
|
||||
|
||||
instance Show Metric where
|
||||
@ -45,6 +45,7 @@ instance Show Metric where
|
||||
show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
|
||||
show (SoftFMeasure beta) = "Soft-F" ++ (show beta)
|
||||
show (ProbabilisticSoftFMeasure beta) = "Probabilistic-Soft-F" ++ (show beta)
|
||||
show (Soft2DFMeasure beta) = "Soft2D-F" ++ (show beta)
|
||||
show NMI = "NMI"
|
||||
show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if
|
||||
nbOfBits == defaultLogLossHashedSize
|
||||
@ -91,6 +92,9 @@ instance Read Metric where
|
||||
readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
|
||||
[(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
|
||||
_ -> []
|
||||
readsPrec p ('S':'o':'f':'t':'2':'D':'-':'F':theRest) = case readsPrec p theRest of
|
||||
[(beta, theRest)] -> [(Soft2DFMeasure beta, theRest)]
|
||||
_ -> []
|
||||
readsPrec p ('S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
|
||||
[(beta, theRest)] -> [(SoftFMeasure beta, theRest)]
|
||||
_ -> []
|
||||
@ -134,6 +138,7 @@ getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering (SoftFMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering (ProbabilisticSoftFMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering (Soft2DFMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering NMI = TheHigherTheBetter
|
||||
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
||||
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
|
||||
@ -164,6 +169,7 @@ fixedNumberOfColumnsInExpected _ = True
|
||||
fixedNumberOfColumnsInInput :: Metric -> Bool
|
||||
fixedNumberOfColumnsInInput (SoftFMeasure _) = False
|
||||
fixedNumberOfColumnsInInput (ProbabilisticSoftFMeasure _) = False
|
||||
fixedNumberOfColumnsInInput (Soft2DFMeasure _) = False
|
||||
fixedNumberOfColumnsInInput _ = True
|
||||
|
||||
perfectOutLineFromExpectedLine :: Metric -> Text -> Text
|
||||
|
@ -21,6 +21,7 @@ import GEval.PrecisionRecall (weightedHarmonicMean)
|
||||
import Text.Regex.PCRE.Heavy
|
||||
import Data.Either (fromRight)
|
||||
import Data.String.Here
|
||||
import Data.Maybe (fromMaybe)
|
||||
|
||||
import Data.List (intercalate)
|
||||
import Text.Printf
|
||||
@ -64,6 +65,9 @@ listOfAvailableMetrics = [RMSE,
|
||||
ProbabilisticSoftFMeasure 1.0,
|
||||
ProbabilisticSoftFMeasure 2.0,
|
||||
ProbabilisticSoftFMeasure 0.25,
|
||||
Soft2DFMeasure 1.0,
|
||||
Soft2DFMeasure 2.0,
|
||||
Soft2DFMeasure 0.25,
|
||||
CharMatch]
|
||||
|
||||
extraInfo :: EvaluationScheme -> Maybe String
|
||||
@ -120,14 +124,30 @@ formatEvaluationSchemeDescription :: EvaluationScheme -> String
|
||||
formatEvaluationSchemeDescription scheme@(EvaluationScheme metric _) = show scheme ++ "\n" ++ description
|
||||
where description = if isEvaluationSchemeDescribed scheme
|
||||
then (getEvaluationSchemeDescription scheme)
|
||||
++ "\n"
|
||||
++ (formatDescription metric)
|
||||
++ "\nExample\n"
|
||||
++ (pasteLines "Expected output" "Sample output")
|
||||
++ concat (map (\(exp, out) -> pasteLines exp out) $ zip (lines $ testExpectedContents metric)
|
||||
(lines $ outContents metric))
|
||||
++ "\nMetric value: " ++ (printf "%.4f" $ expectedScore scheme)
|
||||
++ (case scoreExplanation scheme of
|
||||
Just expl -> "\n(" ++ expl ++ ")"
|
||||
Nothing -> "")
|
||||
else noDescription
|
||||
noDescription = [hereLit|THE METRIC HAS NO DESCRIPTION YET, PLEASE ADD AN ISSUE TO https://gitlab.com/filipg/geval/issues
|
||||
IF YOU WANT TO HAVE IT DESCRIBED|]
|
||||
|
||||
formatDescription :: Metric -> String
|
||||
formatDescription (SoftFMeasure _) = [hereLit|Each line is a sequence of entities separated by spaces, each entity is of
|
||||
the form LABEL:SPAN, where LABEL is any label and SPAN is defined using single integers, intervals or such
|
||||
units separated with commas.
|
||||
|]
|
||||
|
||||
scoreExplanation :: EvaluationScheme -> Maybe String
|
||||
scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
|
||||
= Just [hereLit|We have a partial (0.75) success for the entity `inwords:1-4`, hence Recall = 0.75/1 = 0.75,
|
||||
Precision = (0 + 0.75 + 0) / 3 = 0.25, so F-score = 0.375|]
|
||||
|
||||
pasteLines :: String -> String -> String
|
||||
pasteLines a b = printf "%-35s %s\n" a b
|
||||
|
@ -55,13 +55,13 @@ f2MeasureOnCounts = fMeasureOnCounts 2.0
|
||||
f1MeasureOnCounts :: ConvertibleToDouble n => (n, Int, Int) -> Double
|
||||
f1MeasureOnCounts = fMeasureOnCounts 1.0
|
||||
|
||||
fMeasureOnCounts :: ConvertibleToDouble n => Double -> (n, Int, Int) -> Double
|
||||
fMeasureOnCounts :: (ConvertibleToDouble n, Integral v) => Double -> (n, v, v) -> Double
|
||||
fMeasureOnCounts beta (tp, nbExpected, nbGot) =
|
||||
(1 + betaSquared) * p * r `safeDoubleDiv` (betaSquared * p + r)
|
||||
where betaSquared = beta ^ 2
|
||||
(p, r) = precisionAndRecallFromCounts (tp, nbExpected, nbGot)
|
||||
|
||||
countFolder :: Num n => (n, Int, Int) -> (n, Int, Int) -> (n, Int, Int)
|
||||
countFolder :: (Num n, Num v) => (n, v, v) -> (n, v, v) -> (n, v, v)
|
||||
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
|
||||
|
||||
getCounts :: (a -> b -> Bool) -> ([a], [b]) -> (Int, Int, Int)
|
||||
@ -80,7 +80,7 @@ precisionAndRecall matchFun expected got
|
||||
= precisionAndRecallFromCounts (tp, length expected, length got)
|
||||
where tp = maxMatch matchFun expected got
|
||||
|
||||
precisionAndRecallFromCounts :: ConvertibleToDouble n => (n, Int, Int) -> (Double, Double)
|
||||
precisionAndRecallFromCounts :: (ConvertibleToDouble n, Integral v) => (n, v, v) -> (Double, Double)
|
||||
precisionAndRecallFromCounts (tp, nbExpected, nbGot) =
|
||||
(tp /. nbGot, tp /. nbExpected)
|
||||
|
||||
|
24
test/Spec.hs
24
test/Spec.hs
@ -10,7 +10,7 @@ import GEval.Common
|
||||
import GEval.EvaluationScheme
|
||||
import GEval.OptionsParser
|
||||
import GEval.BLEU
|
||||
import GEval.ClippEU
|
||||
import GEval.Clippings
|
||||
import GEval.PrecisionRecall
|
||||
import GEval.ClusteringMetrics
|
||||
import GEval.BIO
|
||||
@ -31,7 +31,6 @@ import GEval.CreateChallenge
|
||||
import GEval.Validation
|
||||
|
||||
import Data.Map.Strict
|
||||
|
||||
import Data.Conduit.List (consume)
|
||||
|
||||
import System.FilePath
|
||||
@ -227,6 +226,21 @@ main = hspec $ do
|
||||
r `shouldBe` [Clipping (PageNumber 2) (Rectangle (Point 0 0) (Point 2 3)),
|
||||
Clipping (PageNumber 10) (Rectangle (Point 20 30) (Point 40 50)),
|
||||
Clipping (PageNumber 18) (Rectangle (Point 0 1) (Point 500 3))]
|
||||
it "parsing labeled rectangles" $ do
|
||||
let (Right r) = parseOnly (lineLabeledClippingsParser <* endOfInput) "2/0,0,2,3 foo:5/10,10,20,20 "
|
||||
r `shouldBe` [LabeledClipping Nothing $ Clipping (PageNumber 2) (Rectangle (Point 0 0) (Point 2 3)),
|
||||
LabeledClipping (Just "foo") $ Clipping (PageNumber 5) (Rectangle (Point 10 10) (Point 20 20))]
|
||||
it "check partition" $ do
|
||||
partitionClippings (LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 0 0) (Point 100 50)))
|
||||
(LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 10 20) (Point 200 300)))
|
||||
`shouldBe` Just (Rectangle (Point 10 20) (Point 100 50),
|
||||
[LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 10 0) (Point 100 19)),
|
||||
LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 0 0) (Point 9 50))],
|
||||
[LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 10 51) (Point 100 300)),
|
||||
LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 101 20) (Point 200 300))])
|
||||
partitionClippings (LabeledClipping (Just "bar") (Clipping (PageNumber 10) (Rectangle (Point 100 100) (Point 200 149))))
|
||||
(LabeledClipping (Just "bar") (Clipping (PageNumber 10) (Rectangle (Point 100 201) (Point 200 300))))
|
||||
`shouldBe` Nothing
|
||||
it "no rectangles" $ do
|
||||
let (Right r) = parseOnly (lineClippingsParser <* endOfInput) ""
|
||||
r `shouldBe` []
|
||||
@ -260,6 +274,9 @@ main = hspec $ do
|
||||
runGEvalTest "probabilistic-soft-f1-simple" `shouldReturnAlmost` 0.33333333333333
|
||||
it "simple test with perfect calibration" $ do
|
||||
runGEvalTest "probabilistic-soft-f1-calibrated" `shouldReturnAlmost` 0.88888888888
|
||||
describe "Soft2D-F1" $ do
|
||||
it "simple test" $ do
|
||||
runGEvalTest "soft2d-f1-simple" `shouldReturnAlmost` 0.30152621462832535
|
||||
describe "test edit-distance library" $ do
|
||||
it "for handling UTF8" $ do
|
||||
levenshteinDistance defaultEditCosts "źdźbło" "źd好bło" `shouldBe` 1
|
||||
@ -318,6 +335,9 @@ main = hspec $ do
|
||||
it "just parse" $ do
|
||||
parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]),
|
||||
Annotation "baz" (IS.fromList [4,5,6])]
|
||||
it "just parse 2" $ do
|
||||
parseAnnotations "inwords:1-3 indigits:5" `shouldBe` Right [Annotation "inwords" (IS.fromList [1,2,3]),
|
||||
Annotation "indigits" (IS.fromList [5])]
|
||||
it "empty" $ do
|
||||
parseAnnotations "" `shouldBe` Right []
|
||||
it "empty (just spaces)" $ do
|
||||
|
@ -0,0 +1,3 @@
|
||||
foo:2/10,20,100,50 bar:10/100,150,200,300
|
||||
foo:3/4,8,10,1000
|
||||
3/10,30,100,100
|
|
1
test/soft2d-f1-simple/soft2d-f1-simple/config.txt
Normal file
1
test/soft2d-f1-simple/soft2d-f1-simple/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric Soft2D-F1
|
@ -0,0 +1,3 @@
|
||||
foo:1/10,20,100,50 bar:10/100,100,200,200
|
||||
|
||||
3/10,30,200,200
|
|
Loading…
Reference in New Issue
Block a user