Change the meaning of Soft2D-F1 metric.
Now it is averaged per line.
This commit is contained in:
parent
6b63740c4a
commit
e4a6ed347d
@ -48,6 +48,8 @@ module GEval.Core
|
|||||||
somethingWrongWithFilesMessage
|
somethingWrongWithFilesMessage
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
import Debug.Trace
|
||||||
|
|
||||||
import GEval.Metric
|
import GEval.Metric
|
||||||
import GEval.EvaluationScheme
|
import GEval.EvaluationScheme
|
||||||
|
|
||||||
@ -649,15 +651,16 @@ gevalCore' (ProbabilisticSoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnot
|
|||||||
|
|
||||||
gevalCore' (Soft2DFMeasure beta) _ = gevalCoreWithoutInput parseLabeledClippings
|
gevalCore' (Soft2DFMeasure beta) _ = gevalCoreWithoutInput parseLabeledClippings
|
||||||
parseLabeledClippings
|
parseLabeledClippings
|
||||||
get2DCounts
|
count2DFScore
|
||||||
countAgg
|
averageC
|
||||||
(fMeasureOnCounts beta)
|
id
|
||||||
noGraph
|
noGraph
|
||||||
where
|
where
|
||||||
parseLabeledClippings = controlledParse lineLabeledClippingsParser
|
parseLabeledClippings = controlledParse lineLabeledClippingsParser
|
||||||
get2DCounts (expected, got) = (coveredBy expected got,
|
count2DFScore (expected, got) = fMeasureOnCounts beta (tpArea, expArea, gotArea)
|
||||||
totalArea expected,
|
where tpArea = coveredBy expected got
|
||||||
totalArea got)
|
expArea = totalArea expected
|
||||||
|
gotArea = totalArea got
|
||||||
|
|
||||||
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep noGraph
|
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep noGraph
|
||||||
where
|
where
|
||||||
|
@ -743,8 +743,8 @@ testExpectedContents MultiLabelLikelihood = testExpectedContents MultiLabelLogLo
|
|||||||
testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS
|
testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS
|
||||||
HATE
|
HATE
|
||||||
|]
|
|]
|
||||||
testExpectedContents (Soft2DFMeasure _) = [hereLit|3/0,0,100,100
|
testExpectedContents (Soft2DFMeasure _) = [hereLit|foo:3/0,0,100,100
|
||||||
1/10,10,1000,1000
|
bar:1/50,50,1000,1000
|
||||||
|]
|
|]
|
||||||
testExpectedContents ClippEU = [hereLit|3/0,0,100,100/10
|
testExpectedContents ClippEU = [hereLit|3/0,0,100,100/10
|
||||||
1/10,10,1000,1000/10
|
1/10,10,1000,1000/10
|
||||||
|
@ -87,6 +87,7 @@ isEvaluationSchemeDescribed _ = False
|
|||||||
|
|
||||||
isMetricDescribed :: Metric -> Bool
|
isMetricDescribed :: Metric -> Bool
|
||||||
isMetricDescribed (SoftFMeasure _) = True
|
isMetricDescribed (SoftFMeasure _) = True
|
||||||
|
isMetricDescribed (Soft2DFMeasure _) = True
|
||||||
isMetricDescribed _ = False
|
isMetricDescribed _ = False
|
||||||
|
|
||||||
getEvaluationSchemeDescription :: EvaluationScheme -> String
|
getEvaluationSchemeDescription :: EvaluationScheme -> String
|
||||||
@ -96,18 +97,33 @@ getMetricDescription :: Metric -> String
|
|||||||
getMetricDescription (SoftFMeasure _) =
|
getMetricDescription (SoftFMeasure _) =
|
||||||
[i|"Soft" F-measure on intervals, i.e. partial "hits" are considered. For instance,
|
[i|"Soft" F-measure on intervals, i.e. partial "hits" are considered. For instance,
|
||||||
if a label `foo` is expected for the span 2-9 and this label is returned but with
|
if a label `foo` is expected for the span 2-9 and this label is returned but with
|
||||||
the span 8-12, it is counted as 1/4 for recall and 2/5 for precision.
|
the span 8-12, it is counted as 2/8=0.25 instead of 0 or 1 when precision/recall counts
|
||||||
|
are gathered.
|
||||||
|
|]
|
||||||
|
getMetricDescription (Soft2DFMeasure _) =
|
||||||
|
[i|"Soft" F-measure on rectangles, i.e. precision and recall is calculated for areas. For instance,
|
||||||
|
if a label `foo` is expected for the rectangle (0, 0)-(100, 200) and this label is returned but with
|
||||||
|
the span (50, 100)-(150, 150), it is treatd as recall=1/4 and precision=2/5. For each item (line) F-score
|
||||||
|
is evaluated separately and finally averaged.
|
||||||
|]
|
|]
|
||||||
|
|
||||||
outContents :: Metric -> String
|
outContents :: Metric -> String
|
||||||
outContents (SoftFMeasure _) = [hereLit|inwords:1-4
|
outContents (SoftFMeasure _) = [hereLit|inwords:1-4
|
||||||
inwords:1-3 indigits:5
|
inwords:1-3 indigits:5
|
||||||
|]
|
|]
|
||||||
|
outContents (Soft2DFMeasure _) = [hereLit|foo:3/250,130,340,217
|
||||||
|
bar:1/0,0,100,200 foo:1/40,50,1000,1000 bar:1/400,600,1000,1000
|
||||||
|
|]
|
||||||
|
|
||||||
expectedScore :: EvaluationScheme -> MetricValue
|
expectedScore :: EvaluationScheme -> MetricValue
|
||||||
expectedScore (EvaluationScheme (SoftFMeasure beta) []) = weightedHarmonicMean beta precision recall
|
expectedScore (EvaluationScheme (SoftFMeasure beta) [])
|
||||||
where precision = 0.25
|
= let precision = 0.25
|
||||||
recall = 0.75
|
recall = 0.75
|
||||||
|
in weightedHarmonicMean beta precision recall
|
||||||
|
expectedScore (EvaluationScheme (Soft2DFMeasure beta) [])
|
||||||
|
= let precision = 0.21117747440273
|
||||||
|
recall = 0.27423822714681
|
||||||
|
in (weightedHarmonicMean beta precision recall) / 2.0
|
||||||
|
|
||||||
listOfAvailableEvaluationSchemes :: [EvaluationScheme]
|
listOfAvailableEvaluationSchemes :: [EvaluationScheme]
|
||||||
listOfAvailableEvaluationSchemes = map (\m -> EvaluationScheme m []) listOfAvailableMetrics
|
listOfAvailableEvaluationSchemes = map (\m -> EvaluationScheme m []) listOfAvailableMetrics
|
||||||
@ -143,11 +159,20 @@ formatDescription (SoftFMeasure _) = [hereLit|Each line is a sequence of entitie
|
|||||||
the form LABEL:SPAN, where LABEL is any label and SPAN is defined using single integers, intervals or such
|
the form LABEL:SPAN, where LABEL is any label and SPAN is defined using single integers, intervals or such
|
||||||
units separated with commas.
|
units separated with commas.
|
||||||
|]
|
|]
|
||||||
|
formatDescription (Soft2DFMeasure _) = [hereLit|Each line is a sequence of entities separated by spaces, each entity is of
|
||||||
|
the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page number (starting from 1) and
|
||||||
|
(X0, Y0) and (X1, Y1) are clipping corners.
|
||||||
|
|]
|
||||||
|
|
||||||
scoreExplanation :: EvaluationScheme -> Maybe String
|
scoreExplanation :: EvaluationScheme -> Maybe String
|
||||||
scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
|
scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
|
||||||
= Just [hereLit|We have a partial (0.75) success for the entity `inwords:1-4`, hence Recall = 0.75/1 = 0.75,
|
= Just [hereLit|We have a partial (0.75) success for the entity `inwords:1-4`, hence Recall = 0.75/1 = 0.75,
|
||||||
Precision = (0 + 0.75 + 0) / 3 = 0.25, so F-score = 0.375|]
|
Precision = (0 + 0.75 + 0) / 3 = 0.25, so F-score = 0.375|]
|
||||||
|
scoreExplanation (EvaluationScheme (Soft2DFMeasure _) [])
|
||||||
|
= Just [hereLit|The F-score for the first item is 0 (the entity was found in the completely wrong place).
|
||||||
|
As far as the second item is concerned, the total area that covered by the output is 50*150+600*400=247500.
|
||||||
|
Hence, recall is 247500/902500=0.274 and precision - 247500/(20000+912000+240000)=0.211. Therefore, the F-score
|
||||||
|
for the second item is 0.238 and the F-score for the whole set is (0 + 0.238)/2 = 0.119.|]
|
||||||
|
|
||||||
pasteLines :: String -> String -> String
|
pasteLines :: String -> String -> String
|
||||||
pasteLines a b = printf "%-35s %s\n" a b
|
pasteLines a b = printf "%-35s %s\n" a b
|
||||||
|
@ -276,7 +276,7 @@ main = hspec $ do
|
|||||||
runGEvalTest "probabilistic-soft-f1-calibrated" `shouldReturnAlmost` 0.88888888888
|
runGEvalTest "probabilistic-soft-f1-calibrated" `shouldReturnAlmost` 0.88888888888
|
||||||
describe "Soft2D-F1" $ do
|
describe "Soft2D-F1" $ do
|
||||||
it "simple test" $ do
|
it "simple test" $ do
|
||||||
runGEvalTest "soft2d-f1-simple" `shouldReturnAlmost` 0.30152621462832535
|
runGEvalTest "soft2d-f1-simple" `shouldReturnAlmost` 0.218457349437945
|
||||||
describe "test edit-distance library" $ do
|
describe "test edit-distance library" $ do
|
||||||
it "for handling UTF8" $ do
|
it "for handling UTF8" $ do
|
||||||
levenshteinDistance defaultEditCosts "źdźbło" "źd好bło" `shouldBe` 1
|
levenshteinDistance defaultEditCosts "źdźbło" "źd好bło" `shouldBe` 1
|
||||||
@ -551,7 +551,7 @@ main = hspec $ do
|
|||||||
let outFile = tempDir </> "test-A" </> "out.tsv"
|
let outFile = tempDir </> "test-A" </> "out.tsv"
|
||||||
writeFile outFile (outContents metric)
|
writeFile outFile (outContents metric)
|
||||||
obtainedScore <- (runGEval ["--expected-directory", tempDir, "--out-directory", tempDir]) >>= extractVal
|
obtainedScore <- (runGEval ["--expected-directory", tempDir, "--out-directory", tempDir]) >>= extractVal
|
||||||
obtainedScore `shouldBe` (expectedScore scheme)
|
obtainedScore `shouldBeAlmost` (expectedScore scheme)
|
||||||
describe "submit" $ do
|
describe "submit" $ do
|
||||||
it "current branch" $ do
|
it "current branch" $ do
|
||||||
runGitTest "branch-test" (\_ -> getCurrentBranch) `shouldReturn` "develop"
|
runGitTest "branch-test" (\_ -> getCurrentBranch) `shouldReturn` "develop"
|
||||||
|
Loading…
Reference in New Issue
Block a user