Documentation on Probabilistic-MultiLabel-F1 metric

This commit is contained in:
Filip Gralinski 2019-09-07 15:48:13 +02:00
parent b540cba7da
commit 5998f8a316
3 changed files with 42 additions and 10 deletions

2
.gitignore vendored
View File

@ -1,2 +1,4 @@
*~
.stack-work
.shake/
geval

View File

@ -297,6 +297,7 @@ in the expected file (but not in the output file).
|] ++ (commonReadmeMDContents testName)
readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName
readmeMDContents (MultiLabelFMeasure beta) testName = [i|
Tag names and their component
=============================
@ -308,9 +309,7 @@ Tags:
* surname
* first-name
For each tag a sequence of token IDs separated with commas should be given (after a colon).
The metric is F1 on labels.
For each tag a sequence of token IDs separated with commas should be given (after a slash).
|] ++ (commonReadmeMDContents testName)
readmeMDContents MultiLabelLikelihood testName = readmeMDContents MultiLabelLogLoss testName
@ -474,9 +473,10 @@ B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
trainContents TokenAccuracy = [hereLit|* V N I like cats
* * V * N I can see the rainbow
|]
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person:3,4,5 first-name:4 surname:5
Steven bloody Brown person:1,3 first-name:1 surname:3
James and James first-name:1 firstname:3
trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta)
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5
Steven bloody Brown person/1,3 first-name/1 surname/3
James and James first-name/1 firstname/3
|]
trainContents MultiLabelLikelihood = [hereLit|I hate you! HATE
Love and hate LOVE HATE
@ -540,6 +540,7 @@ Mr Jan Kowalski
devInContents TokenAccuracy = [hereLit|The cats on the mat
Ala has a cat
|]
devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta)
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
I see him
Barbara
@ -603,9 +604,10 @@ O B-firstname/JAN B-surname/KOWALSKI
devExpectedContents TokenAccuracy = [hereLit|* N * * N
N V * N
|]
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2
devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta)
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2
first-name:1
first-name/1
|]
devExpectedContents MultiLabelLikelihood = devExpectedContents MultiLabelLogLoss
devExpectedContents MultiLabelLogLoss = [hereLit|LOVE
@ -670,6 +672,7 @@ No name here
testInContents TokenAccuracy = [hereLit|I have cats
I know
|]
testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta)
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
Nobody is there
I saw Marketa
@ -735,9 +738,10 @@ O O O
testExpectedContents TokenAccuracy = [hereLit|* V N
* V
|]
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3
testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta)
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3
first-name:3
first-name/3
|]
testExpectedContents MultiLabelLikelihood = testExpectedContents MultiLabelLogLoss
testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS

View File

@ -47,6 +47,9 @@ listOfAvailableMetrics = [RMSE,
MultiLabelFMeasure 1.0,
MultiLabelFMeasure 2.0,
MultiLabelFMeasure 0.25,
ProbabilisticMultiLabelFMeasure 1.0,
ProbabilisticMultiLabelFMeasure 2.0,
ProbabilisticMultiLabelFMeasure 0.25,
MultiLabelLikelihood,
MAP,
BLEU,
@ -88,6 +91,7 @@ isEvaluationSchemeDescribed _ = False
isMetricDescribed :: Metric -> Bool
isMetricDescribed (SoftFMeasure _) = True
isMetricDescribed (Soft2DFMeasure _) = True
isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True
isMetricDescribed _ = False
getEvaluationSchemeDescription :: EvaluationScheme -> String
@ -106,6 +110,15 @@ if a label `foo` is expected for the rectangle (0, 0)-(100, 200) and this label
the span (50, 100)-(150, 150), it is treated as recall=1/8 and precision=1/2. For each item (line) F-score
is evaluated separately and finally averaged.
|]
getMetricDescription (ProbabilisticMultiLabelFMeasure _) =
[i|F-measure generalised so that labels could annotated with probabilities and the quality
of probabilities is assessed as well. It is calculated as the harmonic mean of calibration and recall
where calibration measures the quality of probabilities (how well they are calibrated, e.g.
if we have 10 items with probability 0.5 and 5 of them are correct, then the calibration
is perfect.
|]
outContents :: Metric -> String
outContents (SoftFMeasure _) = [hereLit|inwords:1-4
@ -114,6 +127,10 @@ inwords:1-3 indigits:5
outContents (Soft2DFMeasure _) = [hereLit|foo:3/250,130,340,217
bar:1/0,0,100,200 foo:1/40,50,1000,1000 bar:1/400,600,1000,1000
|]
outContents (ProbabilisticMultiLabelFMeasure _) = [hereLit|first-name/1:0.8 surname/3:1.0
surname/1:0.4
first-name/3:0.9
|]
expectedScore :: EvaluationScheme -> MetricValue
expectedScore (EvaluationScheme (SoftFMeasure beta) [])
@ -124,6 +141,10 @@ expectedScore (EvaluationScheme (Soft2DFMeasure beta) [])
= let precision = 0.211622914314256
recall = 0.2749908502976
in (weightedHarmonicMean beta precision recall) / 2.0
expectedScore (EvaluationScheme (ProbabilisticMultiLabelFMeasure beta) [])
= let precision = 0.6569596940847289
recall = 0.675
in weightedHarmonicMean beta precision recall
listOfAvailableEvaluationSchemes :: [EvaluationScheme]
listOfAvailableEvaluationSchemes = map (\m -> EvaluationScheme m []) listOfAvailableMetrics
@ -163,6 +184,10 @@ formatDescription (Soft2DFMeasure _) = [hereLit|Each line is a sequence of entit
the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page number (starting from 1) and
(X0, Y0) and (X1, Y1) are clipping corners.
|]
formatDescription (ProbabilisticMultiLabelFMeasure _) = [hereLit|In each line a number of labels (entities) can be given. A label probability
can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed.
|]
scoreExplanation :: EvaluationScheme -> Maybe String
scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
@ -173,6 +198,7 @@ scoreExplanation (EvaluationScheme (Soft2DFMeasure _) [])
As far as the second item is concerned, the total area that covered by the output is 50*150+600*400=247500.
Hence, recall is 247500/902500=0.274 and precision - 247500/(20000+912000+240000)=0.211. Therefore, the F-score
for the second item is 0.238 and the F-score for the whole set is (0 + 0.238)/2 = 0.119.|]
scoreExplanation (EvaluationScheme (ProbabilisticMultiLabelFMeasure _) []) = Nothing
pasteLines :: String -> String -> String
pasteLines a b = printf "%-35s %s\n" a b