Add Soft2D-F metric
This commit is contained in:
parent
dab2646798
commit
6b63740c4a
@ -22,7 +22,7 @@ library
|
|||||||
, GEval.CreateChallenge
|
, GEval.CreateChallenge
|
||||||
, GEval.OptionsParser
|
, GEval.OptionsParser
|
||||||
, GEval.BLEU
|
, GEval.BLEU
|
||||||
, GEval.ClippEU
|
, GEval.Clippings
|
||||||
, GEval.PrecisionRecall
|
, GEval.PrecisionRecall
|
||||||
, GEval.ClusteringMetrics
|
, GEval.ClusteringMetrics
|
||||||
, GEval.Common
|
, GEval.Common
|
||||||
|
@ -1,83 +0,0 @@
|
|||||||
|
|
||||||
module GEval.ClippEU
|
|
||||||
where
|
|
||||||
|
|
||||||
import Data.Attoparsec.Text
|
|
||||||
import Data.Text
|
|
||||||
import Control.Applicative
|
|
||||||
import Control.Exception
|
|
||||||
|
|
||||||
import GEval.Common
|
|
||||||
|
|
||||||
newtype PageNumber = PageNumber Int
|
|
||||||
deriving (Eq, Show)
|
|
||||||
|
|
||||||
data Point = Point Int Int
|
|
||||||
deriving (Show, Eq)
|
|
||||||
|
|
||||||
data Rectangle = Rectangle Point Point
|
|
||||||
deriving (Show, Eq)
|
|
||||||
|
|
||||||
data Clipping = Clipping PageNumber Rectangle
|
|
||||||
deriving (Show, Eq)
|
|
||||||
|
|
||||||
data ClippingSpec = ClippingSpec PageNumber Rectangle Rectangle
|
|
||||||
deriving (Show, Eq)
|
|
||||||
|
|
||||||
matchClippingToSpec :: ClippingSpec -> Clipping -> Bool
|
|
||||||
matchClippingToSpec (ClippingSpec pageNo (Rectangle (Point x0' y0') (Point x1' y1'))
|
|
||||||
(Rectangle (Point x0'' y0'') (Point x1'' y1'')))
|
|
||||||
(Clipping pageNo' (Rectangle (Point x0 y0) (Point x1 y1))) =
|
|
||||||
pageNo == pageNo' &&
|
|
||||||
isInside x0 x0' x0'' && isInside y0 y0' y0'' && isInside x1 x1' x1'' && isInside y1 y1' y1''
|
|
||||||
where isInside c c' c'' = c >= c' && c <= c'' || c <= c' && c >= c''
|
|
||||||
|
|
||||||
|
|
||||||
lineClippingsParser :: Parser [Clipping]
|
|
||||||
lineClippingsParser = sepByWhitespaces clippingParser
|
|
||||||
|
|
||||||
clippingParser :: Parser Clipping
|
|
||||||
clippingParser = do
|
|
||||||
pageNo <- PageNumber <$> decimal
|
|
||||||
char '/'
|
|
||||||
rectangle <- rectangleParser
|
|
||||||
return $ Clipping pageNo rectangle
|
|
||||||
|
|
||||||
lineClippingSpecsParser :: Parser [ClippingSpec]
|
|
||||||
lineClippingSpecsParser = sepByWhitespaces clippingSpecParser
|
|
||||||
|
|
||||||
clippingSpecParser :: Parser ClippingSpec
|
|
||||||
clippingSpecParser = do
|
|
||||||
pageNo <- PageNumber <$> decimal
|
|
||||||
char '/'
|
|
||||||
rectangle <- rectangleParser
|
|
||||||
char '/'
|
|
||||||
margin <- decimal
|
|
||||||
return $ ClippingSpec pageNo (smallerRectangle margin rectangle) (extendedRectangle margin rectangle)
|
|
||||||
|
|
||||||
extendedRectangle :: Int -> Rectangle -> Rectangle
|
|
||||||
extendedRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
|
|
||||||
Rectangle (Point (x0 `nonNegativeDiff` margin) (y0 `nonNegativeDiff` margin))
|
|
||||||
(Point (x1 + margin) (y1 + margin))
|
|
||||||
|
|
||||||
smallerRectangle :: Int -> Rectangle -> Rectangle
|
|
||||||
smallerRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
|
|
||||||
Rectangle (Point (x0 + margin) (y0 + margin))
|
|
||||||
(Point (x1 `nonNegativeDiff` margin) (y1 `nonNegativeDiff` margin))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
nonNegativeDiff x y
|
|
||||||
| x < y = 0
|
|
||||||
| otherwise = x - y
|
|
||||||
|
|
||||||
rectangleParser :: Parser Rectangle
|
|
||||||
rectangleParser = do
|
|
||||||
x0 <- decimal
|
|
||||||
char ','
|
|
||||||
y0 <- decimal
|
|
||||||
char ','
|
|
||||||
x1 <- decimal
|
|
||||||
char ','
|
|
||||||
y1 <- decimal
|
|
||||||
return $ Rectangle (Point x0 y0) (Point x1 y1)
|
|
160
src/GEval/Clippings.hs
Normal file
160
src/GEval/Clippings.hs
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module GEval.Clippings
|
||||||
|
where
|
||||||
|
|
||||||
|
import Data.Attoparsec.Text
|
||||||
|
import Data.Text
|
||||||
|
import Control.Applicative
|
||||||
|
import Control.Exception
|
||||||
|
import Data.Char (isSpace)
|
||||||
|
import Data.List (unfoldr)
|
||||||
|
import Data.Maybe (catMaybes)
|
||||||
|
|
||||||
|
import Debug.Trace
|
||||||
|
|
||||||
|
import GEval.Common
|
||||||
|
|
||||||
|
newtype PageNumber = PageNumber Int
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
data Point = Point Int Int
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
data Rectangle = Rectangle Point Point
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
data Clipping = Clipping PageNumber Rectangle
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
data ClippingSpec = ClippingSpec PageNumber Rectangle Rectangle
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
data LabeledClipping = LabeledClipping (Maybe Text) Clipping
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
matchClippingToSpec :: ClippingSpec -> Clipping -> Bool
|
||||||
|
matchClippingToSpec (ClippingSpec pageNo (Rectangle (Point x0' y0') (Point x1' y1'))
|
||||||
|
(Rectangle (Point x0'' y0'') (Point x1'' y1'')))
|
||||||
|
(Clipping pageNo' (Rectangle (Point x0 y0) (Point x1 y1))) =
|
||||||
|
pageNo == pageNo' &&
|
||||||
|
isInside x0 x0' x0'' && isInside y0 y0' y0'' && isInside x1 x1' x1'' && isInside y1 y1' y1''
|
||||||
|
where isInside c c' c'' = c >= c' && c <= c'' || c <= c' && c >= c''
|
||||||
|
|
||||||
|
|
||||||
|
lineClippingsParser :: Parser [Clipping]
|
||||||
|
lineClippingsParser = sepByWhitespaces clippingParser
|
||||||
|
|
||||||
|
clippingParser :: Parser Clipping
|
||||||
|
clippingParser = do
|
||||||
|
pageNo <- PageNumber <$> decimal
|
||||||
|
char '/'
|
||||||
|
rectangle <- rectangleParser
|
||||||
|
return $ Clipping pageNo rectangle
|
||||||
|
|
||||||
|
lineClippingSpecsParser :: Parser [ClippingSpec]
|
||||||
|
lineClippingSpecsParser = sepByWhitespaces clippingSpecParser
|
||||||
|
|
||||||
|
clippingSpecParser :: Parser ClippingSpec
|
||||||
|
clippingSpecParser = do
|
||||||
|
pageNo <- PageNumber <$> decimal
|
||||||
|
char '/'
|
||||||
|
rectangle <- rectangleParser
|
||||||
|
char '/'
|
||||||
|
margin <- decimal
|
||||||
|
return $ ClippingSpec pageNo (smallerRectangle margin rectangle) (extendedRectangle margin rectangle)
|
||||||
|
|
||||||
|
labeledClippingParser :: Parser LabeledClipping
|
||||||
|
labeledClippingParser =
|
||||||
|
choice [clippingWithLabelParser,
|
||||||
|
(LabeledClipping Nothing <$> clippingParser)]
|
||||||
|
|
||||||
|
clippingWithLabelParser :: Parser LabeledClipping
|
||||||
|
clippingWithLabelParser = do
|
||||||
|
label <- takeWhile1 (\c -> not (isSpace c) && c /= ':')
|
||||||
|
string ":"
|
||||||
|
clipping <- clippingParser
|
||||||
|
return $ LabeledClipping (Just label) clipping
|
||||||
|
|
||||||
|
lineLabeledClippingsParser :: Parser [LabeledClipping]
|
||||||
|
lineLabeledClippingsParser = sepByWhitespaces labeledClippingParser
|
||||||
|
|
||||||
|
extendedRectangle :: Int -> Rectangle -> Rectangle
|
||||||
|
extendedRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
|
||||||
|
Rectangle (Point (x0 `nonNegativeDiff` margin) (y0 `nonNegativeDiff` margin))
|
||||||
|
(Point (x1 + margin) (y1 + margin))
|
||||||
|
|
||||||
|
smallerRectangle :: Int -> Rectangle -> Rectangle
|
||||||
|
smallerRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
|
||||||
|
Rectangle (Point (x0 + margin) (y0 + margin))
|
||||||
|
(Point (x1 `nonNegativeDiff` margin) (y1 `nonNegativeDiff` margin))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
nonNegativeDiff x y
|
||||||
|
| x < y = 0
|
||||||
|
| otherwise = x - y
|
||||||
|
|
||||||
|
rectangleParser :: Parser Rectangle
|
||||||
|
rectangleParser = do
|
||||||
|
x0 <- decimal
|
||||||
|
char ','
|
||||||
|
y0 <- decimal
|
||||||
|
char ','
|
||||||
|
x1 <- decimal
|
||||||
|
char ','
|
||||||
|
y1 <- decimal
|
||||||
|
return $ Rectangle (Point x0 y0) (Point x1 y1)
|
||||||
|
|
||||||
|
rectangleArea :: Rectangle -> Integer
|
||||||
|
rectangleArea (Rectangle (Point x0 y0) (Point x1 y1)) =
|
||||||
|
(fromIntegral $ x1 - x0) * (fromIntegral $ y1 - y0)
|
||||||
|
|
||||||
|
clippingArea :: LabeledClipping -> Integer
|
||||||
|
clippingArea (LabeledClipping _ (Clipping _ rect)) = rectangleArea rect
|
||||||
|
|
||||||
|
totalArea :: [LabeledClipping] -> Integer
|
||||||
|
totalArea = sum . Prelude.map clippingArea
|
||||||
|
|
||||||
|
coveredBy :: [LabeledClipping] -> [LabeledClipping] -> Integer
|
||||||
|
coveredBy clippingsA clippingsB = sum
|
||||||
|
$ Prelude.map rectangleArea
|
||||||
|
$ catMaybes
|
||||||
|
$ Data.List.unfoldr step (clippingsA, clippingsB)
|
||||||
|
where
|
||||||
|
step ([], _) = Nothing
|
||||||
|
step (firstA:restA, b) = Just (result, (newA ++ restA, newB))
|
||||||
|
where (result, newA, newB) = step' firstA b
|
||||||
|
step' rectA [] = (Nothing, [], [])
|
||||||
|
step' a (firstB:restB) = case partitionClippings a firstB of
|
||||||
|
Just (commonRect, leftoversA, leftoversB) -> (Just commonRect, leftoversA, leftoversB ++ restB)
|
||||||
|
Nothing -> let
|
||||||
|
(result, leftoversA, leftoversB) = step' a restB
|
||||||
|
in (result, leftoversA, firstB:leftoversB)
|
||||||
|
|
||||||
|
partitionClippings :: LabeledClipping -> LabeledClipping -> Maybe (Rectangle, [LabeledClipping], [LabeledClipping])
|
||||||
|
partitionClippings (LabeledClipping label (Clipping page rect@(Rectangle (Point x0 y0) (Point x1 y1))))
|
||||||
|
(LabeledClipping label' (Clipping page' rect'@(Rectangle (Point x0' y0') (Point x1' y1'))))
|
||||||
|
| label == label' && page == page' && not (areDisjoint rect rect') = Just (commonRect, leftovers, leftovers')
|
||||||
|
| otherwise = Nothing
|
||||||
|
where commonRect = Rectangle (Point cx0 cy0) (Point cx1 cy1)
|
||||||
|
cx0 = max x0 x0'
|
||||||
|
cx1 = min x1 x1'
|
||||||
|
cy0 = max y0 y0'
|
||||||
|
cy1 = min y1 y1'
|
||||||
|
leftovers = Prelude.map (\r -> LabeledClipping label (Clipping page r)) $ getLeftovers commonRect rect
|
||||||
|
leftovers' = Prelude.map (\r -> LabeledClipping label (Clipping page r)) $ getLeftovers commonRect rect'
|
||||||
|
|
||||||
|
areDisjoint :: Rectangle -> Rectangle -> Bool
|
||||||
|
areDisjoint (Rectangle (Point x0 y0) (Point x1 y1))
|
||||||
|
(Rectangle (Point x0' y0') (Point x1' y1')) =
|
||||||
|
x1 < x0' || x1' < x0 || y1 < y0' || y1' < y0
|
||||||
|
|
||||||
|
getLeftovers :: Rectangle -> Rectangle -> [Rectangle]
|
||||||
|
getLeftovers (Rectangle (Point x0 y0) (Point x1 y1))
|
||||||
|
(Rectangle (Point x0' y0') (Point x1' y1')) =
|
||||||
|
Prelude.filter validRectangle [Rectangle (Point x0 y0') (Point x1 (y0 - 1)),
|
||||||
|
Rectangle (Point x0 (y1 + 1)) (Point x1 y1'),
|
||||||
|
Rectangle (Point x0' y0') (Point (x0 - 1) y1'),
|
||||||
|
Rectangle (Point (x1 + 1) y0') (Point x1' y1')]
|
||||||
|
where validRectangle (Rectangle (Point x0 y0) (Point x1 y1)) = x0 <= x1 && y0 <= y1
|
@ -29,6 +29,9 @@ instance ConvertibleToDouble Double where
|
|||||||
instance ConvertibleToDouble Int where
|
instance ConvertibleToDouble Int where
|
||||||
toDouble = fromIntegral
|
toDouble = fromIntegral
|
||||||
|
|
||||||
|
instance ConvertibleToDouble Integer where
|
||||||
|
toDouble = fromIntegral
|
||||||
|
|
||||||
(/.) :: (ConvertibleToDouble f, Integral a) => f -> a -> Double
|
(/.) :: (ConvertibleToDouble f, Integral a) => f -> a -> Double
|
||||||
x /. 0 = 1.0
|
x /. 0 = 1.0
|
||||||
x /. y = (toDouble x) / (fromIntegral y)
|
x /. y = (toDouble x) / (fromIntegral y)
|
||||||
|
@ -84,7 +84,7 @@ import qualified Data.IntSet as IS
|
|||||||
|
|
||||||
import GEval.BLEU
|
import GEval.BLEU
|
||||||
import GEval.Common
|
import GEval.Common
|
||||||
import GEval.ClippEU
|
import GEval.Clippings
|
||||||
import GEval.PrecisionRecall
|
import GEval.PrecisionRecall
|
||||||
import GEval.ClusteringMetrics
|
import GEval.ClusteringMetrics
|
||||||
import GEval.LogLossHashed
|
import GEval.LogLossHashed
|
||||||
@ -647,6 +647,18 @@ gevalCore' (ProbabilisticSoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnot
|
|||||||
where calibrationMeasure = softCalibration results probs
|
where calibrationMeasure = softCalibration results probs
|
||||||
recall = got /. nbExpected
|
recall = got /. nbExpected
|
||||||
|
|
||||||
|
gevalCore' (Soft2DFMeasure beta) _ = gevalCoreWithoutInput parseLabeledClippings
|
||||||
|
parseLabeledClippings
|
||||||
|
get2DCounts
|
||||||
|
countAgg
|
||||||
|
(fMeasureOnCounts beta)
|
||||||
|
noGraph
|
||||||
|
where
|
||||||
|
parseLabeledClippings = controlledParse lineLabeledClippingsParser
|
||||||
|
get2DCounts (expected, got) = (coveredBy expected got,
|
||||||
|
totalArea expected,
|
||||||
|
totalArea got)
|
||||||
|
|
||||||
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep noGraph
|
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep noGraph
|
||||||
where
|
where
|
||||||
parseClippings = controlledParse lineClippingsParser
|
parseClippings = controlledParse lineClippingsParser
|
||||||
@ -736,8 +748,8 @@ gevalCore' MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
|
|||||||
where
|
where
|
||||||
intoWords = Right . Data.Text.words
|
intoWords = Right . Data.Text.words
|
||||||
|
|
||||||
countAgg :: (Num n, Monad m) => ConduitM (n, Int, Int) o m (n, Int, Int)
|
countAgg :: (Num n, Num v, Monad m) => ConduitM (n, v, v) o m (n, v, v)
|
||||||
countAgg = CC.foldl countFolder (fromInteger 0, 0, 0)
|
countAgg = CC.foldl countFolder (fromInteger 0, fromInteger 0, fromInteger 0)
|
||||||
|
|
||||||
gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
|
gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
|
||||||
(V.Vector (Double, Double) -> Double) -> -- ^ correlation function
|
(V.Vector (Double, Double) -> Double) -> -- ^ correlation function
|
||||||
|
@ -128,6 +128,23 @@ GEval sample challenge — mark numbers
|
|||||||
This is a sample/toy classification challenge for Gonito framework with Probabilistic-Soft-F-measure as the metric.
|
This is a sample/toy classification challenge for Gonito framework with Probabilistic-Soft-F-measure as the metric.
|
||||||
|] ++ (commonReadmeMDContents testName)
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
|
readmeMDContents (Soft2DFMeasure _) testName = [i|
|
||||||
|
Sample challenge for clippings
|
||||||
|
==============================
|
||||||
|
|
||||||
|
The metric is Soft2D-F-score, i.e. F-score for clipping with partial
|
||||||
|
hits (when two rectangles overlaps) taken into account.
|
||||||
|
|
||||||
|
Format
|
||||||
|
------
|
||||||
|
|
||||||
|
Each clipping found in a corresponding PDF/DjVu file. Each clipping should be given as P/X0,Y0,X1,Y1, where:
|
||||||
|
|
||||||
|
P — DjVu page number (starting from 1)
|
||||||
|
X0, Y0, X1, Y1 — clipping coordinates (in pixels)
|
||||||
|
|
||||||
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
readmeMDContents NMI testName = [i|
|
readmeMDContents NMI testName = [i|
|
||||||
Cluster proverbs
|
Cluster proverbs
|
||||||
================
|
================
|
||||||
@ -466,6 +483,7 @@ Love and hate LOVE HATE
|
|||||||
I am sad SADNESS
|
I am sad SADNESS
|
||||||
I am so sad and hateful SADNESS HATE
|
I am so sad and hateful SADNESS HATE
|
||||||
|]
|
|]
|
||||||
|
trainContents (Soft2DFMeasure _) = trainContents ClippEU
|
||||||
trainContents ClippEU = [hereLit|2/0,0,10,150 foo.djvu
|
trainContents ClippEU = [hereLit|2/0,0,10,150 foo.djvu
|
||||||
1/30,40,100,1000 bar.djvu
|
1/30,40,100,1000 bar.djvu
|
||||||
|]
|
|]
|
||||||
@ -530,6 +548,7 @@ devInContents MultiLabelLikelihood = devInContents MultiLabelLogLoss
|
|||||||
devInContents MultiLabelLogLoss = [hereLit|I am in love
|
devInContents MultiLabelLogLoss = [hereLit|I am in love
|
||||||
I am a sad hater
|
I am a sad hater
|
||||||
|]
|
|]
|
||||||
|
devInContents (Soft2DFMeasure _) = devInContents ClippEU
|
||||||
devInContents ClippEU = [hereLit|file1.djvu
|
devInContents ClippEU = [hereLit|file1.djvu
|
||||||
file2.djvu
|
file2.djvu
|
||||||
|]
|
|]
|
||||||
@ -592,6 +611,9 @@ devExpectedContents MultiLabelLikelihood = devExpectedContents MultiLabelLogLoss
|
|||||||
devExpectedContents MultiLabelLogLoss = [hereLit|LOVE
|
devExpectedContents MultiLabelLogLoss = [hereLit|LOVE
|
||||||
SADNESS LOVE
|
SADNESS LOVE
|
||||||
|]
|
|]
|
||||||
|
devExpectedContents (Soft2DFMeasure _) = [hereLit|
|
||||||
|
10/10,20,30,100 3/0,50,500,500
|
||||||
|
|]
|
||||||
devExpectedContents ClippEU = [hereLit|
|
devExpectedContents ClippEU = [hereLit|
|
||||||
10/10,20,30,100/5 3/0,50,500,500/5
|
10/10,20,30,100/5 3/0,50,500,500/5
|
||||||
|]
|
|]
|
||||||
@ -656,6 +678,7 @@ testInContents MultiLabelLikelihood = testInContents MultiLabelLogLoss
|
|||||||
testInContents MultiLabelLogLoss = [hereLit|I am very sad
|
testInContents MultiLabelLogLoss = [hereLit|I am very sad
|
||||||
I hate
|
I hate
|
||||||
|]
|
|]
|
||||||
|
testInContents (Soft2DFMeasure _) = testInContents ClippEU
|
||||||
testInContents ClippEU = [hereLit|file3.djvu
|
testInContents ClippEU = [hereLit|file3.djvu
|
||||||
file4.djvu
|
file4.djvu
|
||||||
|]
|
|]
|
||||||
@ -720,6 +743,9 @@ testExpectedContents MultiLabelLikelihood = testExpectedContents MultiLabelLogLo
|
|||||||
testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS
|
testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS
|
||||||
HATE
|
HATE
|
||||||
|]
|
|]
|
||||||
|
testExpectedContents (Soft2DFMeasure _) = [hereLit|3/0,0,100,100
|
||||||
|
1/10,10,1000,1000
|
||||||
|
|]
|
||||||
testExpectedContents ClippEU = [hereLit|3/0,0,100,100/10
|
testExpectedContents ClippEU = [hereLit|3/0,0,100,100/10
|
||||||
1/10,10,1000,1000/10
|
1/10,10,1000,1000/10
|
||||||
|]
|
|]
|
||||||
|
@ -16,7 +16,7 @@ import Data.Text
|
|||||||
import Data.Monoid ((<>))
|
import Data.Monoid ((<>))
|
||||||
|
|
||||||
import GEval.Common
|
import GEval.Common
|
||||||
import GEval.ClippEU
|
import GEval.Clippings
|
||||||
import Data.Attoparsec.Text (parseOnly)
|
import Data.Attoparsec.Text (parseOnly)
|
||||||
|
|
||||||
-- here metrics and their basic properties are listed,
|
-- here metrics and their basic properties are listed,
|
||||||
@ -28,7 +28,7 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C
|
|||||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||||
| BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
|
| BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
|
||||||
| MultiLabelLogLoss | MultiLabelLikelihood
|
| MultiLabelLogLoss | MultiLabelLikelihood
|
||||||
| SoftFMeasure Double | ProbabilisticSoftFMeasure Double
|
| SoftFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
instance Show Metric where
|
instance Show Metric where
|
||||||
@ -45,6 +45,7 @@ instance Show Metric where
|
|||||||
show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
|
show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
|
||||||
show (SoftFMeasure beta) = "Soft-F" ++ (show beta)
|
show (SoftFMeasure beta) = "Soft-F" ++ (show beta)
|
||||||
show (ProbabilisticSoftFMeasure beta) = "Probabilistic-Soft-F" ++ (show beta)
|
show (ProbabilisticSoftFMeasure beta) = "Probabilistic-Soft-F" ++ (show beta)
|
||||||
|
show (Soft2DFMeasure beta) = "Soft2D-F" ++ (show beta)
|
||||||
show NMI = "NMI"
|
show NMI = "NMI"
|
||||||
show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if
|
show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if
|
||||||
nbOfBits == defaultLogLossHashedSize
|
nbOfBits == defaultLogLossHashedSize
|
||||||
@ -91,6 +92,9 @@ instance Read Metric where
|
|||||||
readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
|
readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
|
||||||
[(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
|
[(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
|
||||||
_ -> []
|
_ -> []
|
||||||
|
readsPrec p ('S':'o':'f':'t':'2':'D':'-':'F':theRest) = case readsPrec p theRest of
|
||||||
|
[(beta, theRest)] -> [(Soft2DFMeasure beta, theRest)]
|
||||||
|
_ -> []
|
||||||
readsPrec p ('S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
|
readsPrec p ('S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
|
||||||
[(beta, theRest)] -> [(SoftFMeasure beta, theRest)]
|
[(beta, theRest)] -> [(SoftFMeasure beta, theRest)]
|
||||||
_ -> []
|
_ -> []
|
||||||
@ -134,6 +138,7 @@ getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
|||||||
getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
|
getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
|
||||||
getMetricOrdering (SoftFMeasure _) = TheHigherTheBetter
|
getMetricOrdering (SoftFMeasure _) = TheHigherTheBetter
|
||||||
getMetricOrdering (ProbabilisticSoftFMeasure _) = TheHigherTheBetter
|
getMetricOrdering (ProbabilisticSoftFMeasure _) = TheHigherTheBetter
|
||||||
|
getMetricOrdering (Soft2DFMeasure _) = TheHigherTheBetter
|
||||||
getMetricOrdering NMI = TheHigherTheBetter
|
getMetricOrdering NMI = TheHigherTheBetter
|
||||||
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
||||||
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
|
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
|
||||||
@ -164,6 +169,7 @@ fixedNumberOfColumnsInExpected _ = True
|
|||||||
fixedNumberOfColumnsInInput :: Metric -> Bool
|
fixedNumberOfColumnsInInput :: Metric -> Bool
|
||||||
fixedNumberOfColumnsInInput (SoftFMeasure _) = False
|
fixedNumberOfColumnsInInput (SoftFMeasure _) = False
|
||||||
fixedNumberOfColumnsInInput (ProbabilisticSoftFMeasure _) = False
|
fixedNumberOfColumnsInInput (ProbabilisticSoftFMeasure _) = False
|
||||||
|
fixedNumberOfColumnsInInput (Soft2DFMeasure _) = False
|
||||||
fixedNumberOfColumnsInInput _ = True
|
fixedNumberOfColumnsInInput _ = True
|
||||||
|
|
||||||
perfectOutLineFromExpectedLine :: Metric -> Text -> Text
|
perfectOutLineFromExpectedLine :: Metric -> Text -> Text
|
||||||
|
@ -21,6 +21,7 @@ import GEval.PrecisionRecall (weightedHarmonicMean)
|
|||||||
import Text.Regex.PCRE.Heavy
|
import Text.Regex.PCRE.Heavy
|
||||||
import Data.Either (fromRight)
|
import Data.Either (fromRight)
|
||||||
import Data.String.Here
|
import Data.String.Here
|
||||||
|
import Data.Maybe (fromMaybe)
|
||||||
|
|
||||||
import Data.List (intercalate)
|
import Data.List (intercalate)
|
||||||
import Text.Printf
|
import Text.Printf
|
||||||
@ -64,6 +65,9 @@ listOfAvailableMetrics = [RMSE,
|
|||||||
ProbabilisticSoftFMeasure 1.0,
|
ProbabilisticSoftFMeasure 1.0,
|
||||||
ProbabilisticSoftFMeasure 2.0,
|
ProbabilisticSoftFMeasure 2.0,
|
||||||
ProbabilisticSoftFMeasure 0.25,
|
ProbabilisticSoftFMeasure 0.25,
|
||||||
|
Soft2DFMeasure 1.0,
|
||||||
|
Soft2DFMeasure 2.0,
|
||||||
|
Soft2DFMeasure 0.25,
|
||||||
CharMatch]
|
CharMatch]
|
||||||
|
|
||||||
extraInfo :: EvaluationScheme -> Maybe String
|
extraInfo :: EvaluationScheme -> Maybe String
|
||||||
@ -120,14 +124,30 @@ formatEvaluationSchemeDescription :: EvaluationScheme -> String
|
|||||||
formatEvaluationSchemeDescription scheme@(EvaluationScheme metric _) = show scheme ++ "\n" ++ description
|
formatEvaluationSchemeDescription scheme@(EvaluationScheme metric _) = show scheme ++ "\n" ++ description
|
||||||
where description = if isEvaluationSchemeDescribed scheme
|
where description = if isEvaluationSchemeDescribed scheme
|
||||||
then (getEvaluationSchemeDescription scheme)
|
then (getEvaluationSchemeDescription scheme)
|
||||||
|
++ "\n"
|
||||||
|
++ (formatDescription metric)
|
||||||
++ "\nExample\n"
|
++ "\nExample\n"
|
||||||
++ (pasteLines "Expected output" "Sample output")
|
++ (pasteLines "Expected output" "Sample output")
|
||||||
++ concat (map (\(exp, out) -> pasteLines exp out) $ zip (lines $ testExpectedContents metric)
|
++ concat (map (\(exp, out) -> pasteLines exp out) $ zip (lines $ testExpectedContents metric)
|
||||||
(lines $ outContents metric))
|
(lines $ outContents metric))
|
||||||
++ "\nMetric value: " ++ (printf "%.4f" $ expectedScore scheme)
|
++ "\nMetric value: " ++ (printf "%.4f" $ expectedScore scheme)
|
||||||
|
++ (case scoreExplanation scheme of
|
||||||
|
Just expl -> "\n(" ++ expl ++ ")"
|
||||||
|
Nothing -> "")
|
||||||
else noDescription
|
else noDescription
|
||||||
noDescription = [hereLit|THE METRIC HAS NO DESCRIPTION YET, PLEASE ADD AN ISSUE TO https://gitlab.com/filipg/geval/issues
|
noDescription = [hereLit|THE METRIC HAS NO DESCRIPTION YET, PLEASE ADD AN ISSUE TO https://gitlab.com/filipg/geval/issues
|
||||||
IF YOU WANT TO HAVE IT DESCRIBED|]
|
IF YOU WANT TO HAVE IT DESCRIBED|]
|
||||||
|
|
||||||
|
formatDescription :: Metric -> String
|
||||||
|
formatDescription (SoftFMeasure _) = [hereLit|Each line is a sequence of entities separated by spaces, each entity is of
|
||||||
|
the form LABEL:SPAN, where LABEL is any label and SPAN is defined using single integers, intervals or such
|
||||||
|
units separated with commas.
|
||||||
|
|]
|
||||||
|
|
||||||
|
scoreExplanation :: EvaluationScheme -> Maybe String
|
||||||
|
scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
|
||||||
|
= Just [hereLit|We have a partial (0.75) success for the entity `inwords:1-4`, hence Recall = 0.75/1 = 0.75,
|
||||||
|
Precision = (0 + 0.75 + 0) / 3 = 0.25, so F-score = 0.375|]
|
||||||
|
|
||||||
pasteLines :: String -> String -> String
|
pasteLines :: String -> String -> String
|
||||||
pasteLines a b = printf "%-35s %s\n" a b
|
pasteLines a b = printf "%-35s %s\n" a b
|
||||||
|
@ -55,13 +55,13 @@ f2MeasureOnCounts = fMeasureOnCounts 2.0
|
|||||||
f1MeasureOnCounts :: ConvertibleToDouble n => (n, Int, Int) -> Double
|
f1MeasureOnCounts :: ConvertibleToDouble n => (n, Int, Int) -> Double
|
||||||
f1MeasureOnCounts = fMeasureOnCounts 1.0
|
f1MeasureOnCounts = fMeasureOnCounts 1.0
|
||||||
|
|
||||||
fMeasureOnCounts :: ConvertibleToDouble n => Double -> (n, Int, Int) -> Double
|
fMeasureOnCounts :: (ConvertibleToDouble n, Integral v) => Double -> (n, v, v) -> Double
|
||||||
fMeasureOnCounts beta (tp, nbExpected, nbGot) =
|
fMeasureOnCounts beta (tp, nbExpected, nbGot) =
|
||||||
(1 + betaSquared) * p * r `safeDoubleDiv` (betaSquared * p + r)
|
(1 + betaSquared) * p * r `safeDoubleDiv` (betaSquared * p + r)
|
||||||
where betaSquared = beta ^ 2
|
where betaSquared = beta ^ 2
|
||||||
(p, r) = precisionAndRecallFromCounts (tp, nbExpected, nbGot)
|
(p, r) = precisionAndRecallFromCounts (tp, nbExpected, nbGot)
|
||||||
|
|
||||||
countFolder :: Num n => (n, Int, Int) -> (n, Int, Int) -> (n, Int, Int)
|
countFolder :: (Num n, Num v) => (n, v, v) -> (n, v, v) -> (n, v, v)
|
||||||
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
|
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
|
||||||
|
|
||||||
getCounts :: (a -> b -> Bool) -> ([a], [b]) -> (Int, Int, Int)
|
getCounts :: (a -> b -> Bool) -> ([a], [b]) -> (Int, Int, Int)
|
||||||
@ -80,7 +80,7 @@ precisionAndRecall matchFun expected got
|
|||||||
= precisionAndRecallFromCounts (tp, length expected, length got)
|
= precisionAndRecallFromCounts (tp, length expected, length got)
|
||||||
where tp = maxMatch matchFun expected got
|
where tp = maxMatch matchFun expected got
|
||||||
|
|
||||||
precisionAndRecallFromCounts :: ConvertibleToDouble n => (n, Int, Int) -> (Double, Double)
|
precisionAndRecallFromCounts :: (ConvertibleToDouble n, Integral v) => (n, v, v) -> (Double, Double)
|
||||||
precisionAndRecallFromCounts (tp, nbExpected, nbGot) =
|
precisionAndRecallFromCounts (tp, nbExpected, nbGot) =
|
||||||
(tp /. nbGot, tp /. nbExpected)
|
(tp /. nbGot, tp /. nbExpected)
|
||||||
|
|
||||||
|
24
test/Spec.hs
24
test/Spec.hs
@ -10,7 +10,7 @@ import GEval.Common
|
|||||||
import GEval.EvaluationScheme
|
import GEval.EvaluationScheme
|
||||||
import GEval.OptionsParser
|
import GEval.OptionsParser
|
||||||
import GEval.BLEU
|
import GEval.BLEU
|
||||||
import GEval.ClippEU
|
import GEval.Clippings
|
||||||
import GEval.PrecisionRecall
|
import GEval.PrecisionRecall
|
||||||
import GEval.ClusteringMetrics
|
import GEval.ClusteringMetrics
|
||||||
import GEval.BIO
|
import GEval.BIO
|
||||||
@ -31,7 +31,6 @@ import GEval.CreateChallenge
|
|||||||
import GEval.Validation
|
import GEval.Validation
|
||||||
|
|
||||||
import Data.Map.Strict
|
import Data.Map.Strict
|
||||||
|
|
||||||
import Data.Conduit.List (consume)
|
import Data.Conduit.List (consume)
|
||||||
|
|
||||||
import System.FilePath
|
import System.FilePath
|
||||||
@ -227,6 +226,21 @@ main = hspec $ do
|
|||||||
r `shouldBe` [Clipping (PageNumber 2) (Rectangle (Point 0 0) (Point 2 3)),
|
r `shouldBe` [Clipping (PageNumber 2) (Rectangle (Point 0 0) (Point 2 3)),
|
||||||
Clipping (PageNumber 10) (Rectangle (Point 20 30) (Point 40 50)),
|
Clipping (PageNumber 10) (Rectangle (Point 20 30) (Point 40 50)),
|
||||||
Clipping (PageNumber 18) (Rectangle (Point 0 1) (Point 500 3))]
|
Clipping (PageNumber 18) (Rectangle (Point 0 1) (Point 500 3))]
|
||||||
|
it "parsing labeled rectangles" $ do
|
||||||
|
let (Right r) = parseOnly (lineLabeledClippingsParser <* endOfInput) "2/0,0,2,3 foo:5/10,10,20,20 "
|
||||||
|
r `shouldBe` [LabeledClipping Nothing $ Clipping (PageNumber 2) (Rectangle (Point 0 0) (Point 2 3)),
|
||||||
|
LabeledClipping (Just "foo") $ Clipping (PageNumber 5) (Rectangle (Point 10 10) (Point 20 20))]
|
||||||
|
it "check partition" $ do
|
||||||
|
partitionClippings (LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 0 0) (Point 100 50)))
|
||||||
|
(LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 10 20) (Point 200 300)))
|
||||||
|
`shouldBe` Just (Rectangle (Point 10 20) (Point 100 50),
|
||||||
|
[LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 10 0) (Point 100 19)),
|
||||||
|
LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 0 0) (Point 9 50))],
|
||||||
|
[LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 10 51) (Point 100 300)),
|
||||||
|
LabeledClipping Nothing (Clipping (PageNumber 5) $ Rectangle (Point 101 20) (Point 200 300))])
|
||||||
|
partitionClippings (LabeledClipping (Just "bar") (Clipping (PageNumber 10) (Rectangle (Point 100 100) (Point 200 149))))
|
||||||
|
(LabeledClipping (Just "bar") (Clipping (PageNumber 10) (Rectangle (Point 100 201) (Point 200 300))))
|
||||||
|
`shouldBe` Nothing
|
||||||
it "no rectangles" $ do
|
it "no rectangles" $ do
|
||||||
let (Right r) = parseOnly (lineClippingsParser <* endOfInput) ""
|
let (Right r) = parseOnly (lineClippingsParser <* endOfInput) ""
|
||||||
r `shouldBe` []
|
r `shouldBe` []
|
||||||
@ -260,6 +274,9 @@ main = hspec $ do
|
|||||||
runGEvalTest "probabilistic-soft-f1-simple" `shouldReturnAlmost` 0.33333333333333
|
runGEvalTest "probabilistic-soft-f1-simple" `shouldReturnAlmost` 0.33333333333333
|
||||||
it "simple test with perfect calibration" $ do
|
it "simple test with perfect calibration" $ do
|
||||||
runGEvalTest "probabilistic-soft-f1-calibrated" `shouldReturnAlmost` 0.88888888888
|
runGEvalTest "probabilistic-soft-f1-calibrated" `shouldReturnAlmost` 0.88888888888
|
||||||
|
describe "Soft2D-F1" $ do
|
||||||
|
it "simple test" $ do
|
||||||
|
runGEvalTest "soft2d-f1-simple" `shouldReturnAlmost` 0.30152621462832535
|
||||||
describe "test edit-distance library" $ do
|
describe "test edit-distance library" $ do
|
||||||
it "for handling UTF8" $ do
|
it "for handling UTF8" $ do
|
||||||
levenshteinDistance defaultEditCosts "źdźbło" "źd好bło" `shouldBe` 1
|
levenshteinDistance defaultEditCosts "źdźbło" "źd好bło" `shouldBe` 1
|
||||||
@ -318,6 +335,9 @@ main = hspec $ do
|
|||||||
it "just parse" $ do
|
it "just parse" $ do
|
||||||
parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]),
|
parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]),
|
||||||
Annotation "baz" (IS.fromList [4,5,6])]
|
Annotation "baz" (IS.fromList [4,5,6])]
|
||||||
|
it "just parse 2" $ do
|
||||||
|
parseAnnotations "inwords:1-3 indigits:5" `shouldBe` Right [Annotation "inwords" (IS.fromList [1,2,3]),
|
||||||
|
Annotation "indigits" (IS.fromList [5])]
|
||||||
it "empty" $ do
|
it "empty" $ do
|
||||||
parseAnnotations "" `shouldBe` Right []
|
parseAnnotations "" `shouldBe` Right []
|
||||||
it "empty (just spaces)" $ do
|
it "empty (just spaces)" $ do
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
foo:2/10,20,100,50 bar:10/100,150,200,300
|
||||||
|
foo:3/4,8,10,1000
|
||||||
|
3/10,30,100,100
|
|
1
test/soft2d-f1-simple/soft2d-f1-simple/config.txt
Normal file
1
test/soft2d-f1-simple/soft2d-f1-simple/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric Soft2D-F1
|
@ -0,0 +1,3 @@
|
|||||||
|
foo:1/10,20,100,50 bar:10/100,100,200,200
|
||||||
|
|
||||||
|
3/10,30,200,200
|
|
Loading…
Reference in New Issue
Block a user