Documentation on Probabilistic-MultiLabel-F1 metric

This commit is contained in:
Filip Gralinski 2019-09-07 15:48:13 +02:00
parent b540cba7da
commit 5998f8a316
3 changed files with 42 additions and 10 deletions

2
.gitignore vendored
View File

@ -1,2 +1,4 @@
*~ *~
.stack-work .stack-work
.shake/
geval

View File

@ -297,6 +297,7 @@ in the expected file (but not in the output file).
|] ++ (commonReadmeMDContents testName) |] ++ (commonReadmeMDContents testName)
readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName
readmeMDContents (MultiLabelFMeasure beta) testName = [i| readmeMDContents (MultiLabelFMeasure beta) testName = [i|
Tag names and their component Tag names and their component
============================= =============================
@ -308,9 +309,7 @@ Tags:
* surname * surname
* first-name * first-name
For each tag a sequence of token IDs separated with commas should be given (after a colon). For each tag a sequence of token IDs separated with commas should be given (after a slash).
The metric is F1 on labels.
|] ++ (commonReadmeMDContents testName) |] ++ (commonReadmeMDContents testName)
readmeMDContents MultiLabelLikelihood testName = readmeMDContents MultiLabelLogLoss testName readmeMDContents MultiLabelLikelihood testName = readmeMDContents MultiLabelLogLoss testName
@ -474,9 +473,10 @@ B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
trainContents TokenAccuracy = [hereLit|* V N I like cats trainContents TokenAccuracy = [hereLit|* V N I like cats
* * V * N I can see the rainbow * * V * N I can see the rainbow
|] |]
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person:3,4,5 first-name:4 surname:5 trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta)
Steven bloody Brown person:1,3 first-name:1 surname:3 trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5
James and James first-name:1 firstname:3 Steven bloody Brown person/1,3 first-name/1 surname/3
James and James first-name/1 firstname/3
|] |]
trainContents MultiLabelLikelihood = [hereLit|I hate you! HATE trainContents MultiLabelLikelihood = [hereLit|I hate you! HATE
Love and hate LOVE HATE Love and hate LOVE HATE
@ -540,6 +540,7 @@ Mr Jan Kowalski
devInContents TokenAccuracy = [hereLit|The cats on the mat devInContents TokenAccuracy = [hereLit|The cats on the mat
Ala has a cat Ala has a cat
|] |]
devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta)
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
I see him I see him
Barbara Barbara
@ -603,9 +604,10 @@ O B-firstname/JAN B-surname/KOWALSKI
devExpectedContents TokenAccuracy = [hereLit|* N * * N devExpectedContents TokenAccuracy = [hereLit|* N * * N
N V * N N V * N
|] |]
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2 devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta)
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2
first-name:1 first-name/1
|] |]
devExpectedContents MultiLabelLikelihood = devExpectedContents MultiLabelLogLoss devExpectedContents MultiLabelLikelihood = devExpectedContents MultiLabelLogLoss
devExpectedContents MultiLabelLogLoss = [hereLit|LOVE devExpectedContents MultiLabelLogLoss = [hereLit|LOVE
@ -670,6 +672,7 @@ No name here
testInContents TokenAccuracy = [hereLit|I have cats testInContents TokenAccuracy = [hereLit|I have cats
I know I know
|] |]
testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta)
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
Nobody is there Nobody is there
I saw Marketa I saw Marketa
@ -735,9 +738,10 @@ O O O
testExpectedContents TokenAccuracy = [hereLit|* V N testExpectedContents TokenAccuracy = [hereLit|* V N
* V * V
|] |]
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3 testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta)
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3
first-name:3 first-name/3
|] |]
testExpectedContents MultiLabelLikelihood = testExpectedContents MultiLabelLogLoss testExpectedContents MultiLabelLikelihood = testExpectedContents MultiLabelLogLoss
testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS

View File

@ -47,6 +47,9 @@ listOfAvailableMetrics = [RMSE,
MultiLabelFMeasure 1.0, MultiLabelFMeasure 1.0,
MultiLabelFMeasure 2.0, MultiLabelFMeasure 2.0,
MultiLabelFMeasure 0.25, MultiLabelFMeasure 0.25,
ProbabilisticMultiLabelFMeasure 1.0,
ProbabilisticMultiLabelFMeasure 2.0,
ProbabilisticMultiLabelFMeasure 0.25,
MultiLabelLikelihood, MultiLabelLikelihood,
MAP, MAP,
BLEU, BLEU,
@ -88,6 +91,7 @@ isEvaluationSchemeDescribed _ = False
isMetricDescribed :: Metric -> Bool isMetricDescribed :: Metric -> Bool
isMetricDescribed (SoftFMeasure _) = True isMetricDescribed (SoftFMeasure _) = True
isMetricDescribed (Soft2DFMeasure _) = True isMetricDescribed (Soft2DFMeasure _) = True
isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True
isMetricDescribed _ = False isMetricDescribed _ = False
getEvaluationSchemeDescription :: EvaluationScheme -> String getEvaluationSchemeDescription :: EvaluationScheme -> String
@ -106,6 +110,15 @@ if a label `foo` is expected for the rectangle (0, 0)-(100, 200) and this label
the span (50, 100)-(150, 150), it is treated as recall=1/8 and precision=1/2. For each item (line) F-score the span (50, 100)-(150, 150), it is treated as recall=1/8 and precision=1/2. For each item (line) F-score
is evaluated separately and finally averaged. is evaluated separately and finally averaged.
|] |]
getMetricDescription (ProbabilisticMultiLabelFMeasure _) =
[i|F-measure generalised so that labels could annotated with probabilities and the quality
of probabilities is assessed as well. It is calculated as the harmonic mean of calibration and recall
where calibration measures the quality of probabilities (how well they are calibrated, e.g.
if we have 10 items with probability 0.5 and 5 of them are correct, then the calibration
is perfect.
|]
outContents :: Metric -> String outContents :: Metric -> String
outContents (SoftFMeasure _) = [hereLit|inwords:1-4 outContents (SoftFMeasure _) = [hereLit|inwords:1-4
@ -114,6 +127,10 @@ inwords:1-3 indigits:5
outContents (Soft2DFMeasure _) = [hereLit|foo:3/250,130,340,217 outContents (Soft2DFMeasure _) = [hereLit|foo:3/250,130,340,217
bar:1/0,0,100,200 foo:1/40,50,1000,1000 bar:1/400,600,1000,1000 bar:1/0,0,100,200 foo:1/40,50,1000,1000 bar:1/400,600,1000,1000
|] |]
outContents (ProbabilisticMultiLabelFMeasure _) = [hereLit|first-name/1:0.8 surname/3:1.0
surname/1:0.4
first-name/3:0.9
|]
expectedScore :: EvaluationScheme -> MetricValue expectedScore :: EvaluationScheme -> MetricValue
expectedScore (EvaluationScheme (SoftFMeasure beta) []) expectedScore (EvaluationScheme (SoftFMeasure beta) [])
@ -124,6 +141,10 @@ expectedScore (EvaluationScheme (Soft2DFMeasure beta) [])
= let precision = 0.211622914314256 = let precision = 0.211622914314256
recall = 0.2749908502976 recall = 0.2749908502976
in (weightedHarmonicMean beta precision recall) / 2.0 in (weightedHarmonicMean beta precision recall) / 2.0
expectedScore (EvaluationScheme (ProbabilisticMultiLabelFMeasure beta) [])
= let precision = 0.6569596940847289
recall = 0.675
in weightedHarmonicMean beta precision recall
listOfAvailableEvaluationSchemes :: [EvaluationScheme] listOfAvailableEvaluationSchemes :: [EvaluationScheme]
listOfAvailableEvaluationSchemes = map (\m -> EvaluationScheme m []) listOfAvailableMetrics listOfAvailableEvaluationSchemes = map (\m -> EvaluationScheme m []) listOfAvailableMetrics
@ -163,6 +184,10 @@ formatDescription (Soft2DFMeasure _) = [hereLit|Each line is a sequence of entit
the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page number (starting from 1) and the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page number (starting from 1) and
(X0, Y0) and (X1, Y1) are clipping corners. (X0, Y0) and (X1, Y1) are clipping corners.
|] |]
formatDescription (ProbabilisticMultiLabelFMeasure _) = [hereLit|In each line a number of labels (entities) can be given. A label probability
can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed.
|]
scoreExplanation :: EvaluationScheme -> Maybe String scoreExplanation :: EvaluationScheme -> Maybe String
scoreExplanation (EvaluationScheme (SoftFMeasure _) []) scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
@ -173,6 +198,7 @@ scoreExplanation (EvaluationScheme (Soft2DFMeasure _) [])
As far as the second item is concerned, the total area that covered by the output is 50*150+600*400=247500. As far as the second item is concerned, the total area that covered by the output is 50*150+600*400=247500.
Hence, recall is 247500/902500=0.274 and precision - 247500/(20000+912000+240000)=0.211. Therefore, the F-score Hence, recall is 247500/902500=0.274 and precision - 247500/(20000+912000+240000)=0.211. Therefore, the F-score
for the second item is 0.238 and the F-score for the whole set is (0 + 0.238)/2 = 0.119.|] for the second item is 0.238 and the F-score for the whole set is (0 + 0.238)/2 = 0.119.|]
scoreExplanation (EvaluationScheme (ProbabilisticMultiLabelFMeasure _) []) = Nothing
pasteLines :: String -> String -> String pasteLines :: String -> String -> String
pasteLines a b = printf "%-35s %s\n" a b pasteLines a b = printf "%-35s %s\n" a b