Documentation on Probabilistic-MultiLabel-F1 metric
This commit is contained in:
parent
b540cba7da
commit
5998f8a316
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,4 @@
|
|||||||
*~
|
*~
|
||||||
.stack-work
|
.stack-work
|
||||||
|
.shake/
|
||||||
|
geval
|
||||||
|
@ -297,6 +297,7 @@ in the expected file (but not in the output file).
|
|||||||
|
|
||||||
|] ++ (commonReadmeMDContents testName)
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
|
readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName
|
||||||
readmeMDContents (MultiLabelFMeasure beta) testName = [i|
|
readmeMDContents (MultiLabelFMeasure beta) testName = [i|
|
||||||
Tag names and their component
|
Tag names and their component
|
||||||
=============================
|
=============================
|
||||||
@ -308,9 +309,7 @@ Tags:
|
|||||||
* surname
|
* surname
|
||||||
* first-name
|
* first-name
|
||||||
|
|
||||||
For each tag a sequence of token IDs separated with commas should be given (after a colon).
|
For each tag a sequence of token IDs separated with commas should be given (after a slash).
|
||||||
|
|
||||||
The metric is F1 on labels.
|
|
||||||
|] ++ (commonReadmeMDContents testName)
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
readmeMDContents MultiLabelLikelihood testName = readmeMDContents MultiLabelLogLoss testName
|
readmeMDContents MultiLabelLikelihood testName = readmeMDContents MultiLabelLogLoss testName
|
||||||
@ -474,9 +473,10 @@ B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|
|||||||
trainContents TokenAccuracy = [hereLit|* V N I like cats
|
trainContents TokenAccuracy = [hereLit|* V N I like cats
|
||||||
* * V * N I can see the rainbow
|
* * V * N I can see the rainbow
|
||||||
|]
|
|]
|
||||||
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person:3,4,5 first-name:4 surname:5
|
trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta)
|
||||||
Steven bloody Brown person:1,3 first-name:1 surname:3
|
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5
|
||||||
James and James first-name:1 firstname:3
|
Steven bloody Brown person/1,3 first-name/1 surname/3
|
||||||
|
James and James first-name/1 firstname/3
|
||||||
|]
|
|]
|
||||||
trainContents MultiLabelLikelihood = [hereLit|I hate you! HATE
|
trainContents MultiLabelLikelihood = [hereLit|I hate you! HATE
|
||||||
Love and hate LOVE HATE
|
Love and hate LOVE HATE
|
||||||
@ -540,6 +540,7 @@ Mr Jan Kowalski
|
|||||||
devInContents TokenAccuracy = [hereLit|The cats on the mat
|
devInContents TokenAccuracy = [hereLit|The cats on the mat
|
||||||
Ala has a cat
|
Ala has a cat
|
||||||
|]
|
|]
|
||||||
|
devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta)
|
||||||
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
|
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
|
||||||
I see him
|
I see him
|
||||||
Barbara
|
Barbara
|
||||||
@ -603,9 +604,10 @@ O B-firstname/JAN B-surname/KOWALSKI
|
|||||||
devExpectedContents TokenAccuracy = [hereLit|* N * * N
|
devExpectedContents TokenAccuracy = [hereLit|* N * * N
|
||||||
N V * N
|
N V * N
|
||||||
|]
|
|]
|
||||||
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2
|
devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta)
|
||||||
|
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2
|
||||||
|
|
||||||
first-name:1
|
first-name/1
|
||||||
|]
|
|]
|
||||||
devExpectedContents MultiLabelLikelihood = devExpectedContents MultiLabelLogLoss
|
devExpectedContents MultiLabelLikelihood = devExpectedContents MultiLabelLogLoss
|
||||||
devExpectedContents MultiLabelLogLoss = [hereLit|LOVE
|
devExpectedContents MultiLabelLogLoss = [hereLit|LOVE
|
||||||
@ -670,6 +672,7 @@ No name here
|
|||||||
testInContents TokenAccuracy = [hereLit|I have cats
|
testInContents TokenAccuracy = [hereLit|I have cats
|
||||||
I know
|
I know
|
||||||
|]
|
|]
|
||||||
|
testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta)
|
||||||
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
|
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
|
||||||
Nobody is there
|
Nobody is there
|
||||||
I saw Marketa
|
I saw Marketa
|
||||||
@ -735,9 +738,10 @@ O O O
|
|||||||
testExpectedContents TokenAccuracy = [hereLit|* V N
|
testExpectedContents TokenAccuracy = [hereLit|* V N
|
||||||
* V
|
* V
|
||||||
|]
|
|]
|
||||||
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3
|
testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta)
|
||||||
|
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3
|
||||||
|
|
||||||
first-name:3
|
first-name/3
|
||||||
|]
|
|]
|
||||||
testExpectedContents MultiLabelLikelihood = testExpectedContents MultiLabelLogLoss
|
testExpectedContents MultiLabelLikelihood = testExpectedContents MultiLabelLogLoss
|
||||||
testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS
|
testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS
|
||||||
|
@ -47,6 +47,9 @@ listOfAvailableMetrics = [RMSE,
|
|||||||
MultiLabelFMeasure 1.0,
|
MultiLabelFMeasure 1.0,
|
||||||
MultiLabelFMeasure 2.0,
|
MultiLabelFMeasure 2.0,
|
||||||
MultiLabelFMeasure 0.25,
|
MultiLabelFMeasure 0.25,
|
||||||
|
ProbabilisticMultiLabelFMeasure 1.0,
|
||||||
|
ProbabilisticMultiLabelFMeasure 2.0,
|
||||||
|
ProbabilisticMultiLabelFMeasure 0.25,
|
||||||
MultiLabelLikelihood,
|
MultiLabelLikelihood,
|
||||||
MAP,
|
MAP,
|
||||||
BLEU,
|
BLEU,
|
||||||
@ -88,6 +91,7 @@ isEvaluationSchemeDescribed _ = False
|
|||||||
isMetricDescribed :: Metric -> Bool
|
isMetricDescribed :: Metric -> Bool
|
||||||
isMetricDescribed (SoftFMeasure _) = True
|
isMetricDescribed (SoftFMeasure _) = True
|
||||||
isMetricDescribed (Soft2DFMeasure _) = True
|
isMetricDescribed (Soft2DFMeasure _) = True
|
||||||
|
isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True
|
||||||
isMetricDescribed _ = False
|
isMetricDescribed _ = False
|
||||||
|
|
||||||
getEvaluationSchemeDescription :: EvaluationScheme -> String
|
getEvaluationSchemeDescription :: EvaluationScheme -> String
|
||||||
@ -106,6 +110,15 @@ if a label `foo` is expected for the rectangle (0, 0)-(100, 200) and this label
|
|||||||
the span (50, 100)-(150, 150), it is treated as recall=1/8 and precision=1/2. For each item (line) F-score
|
the span (50, 100)-(150, 150), it is treated as recall=1/8 and precision=1/2. For each item (line) F-score
|
||||||
is evaluated separately and finally averaged.
|
is evaluated separately and finally averaged.
|
||||||
|]
|
|]
|
||||||
|
getMetricDescription (ProbabilisticMultiLabelFMeasure _) =
|
||||||
|
[i|F-measure generalised so that labels could annotated with probabilities and the quality
|
||||||
|
of probabilities is assessed as well. It is calculated as the harmonic mean of calibration and recall
|
||||||
|
where calibration measures the quality of probabilities (how well they are calibrated, e.g.
|
||||||
|
if we have 10 items with probability 0.5 and 5 of them are correct, then the calibration
|
||||||
|
is perfect.
|
||||||
|
|]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
outContents :: Metric -> String
|
outContents :: Metric -> String
|
||||||
outContents (SoftFMeasure _) = [hereLit|inwords:1-4
|
outContents (SoftFMeasure _) = [hereLit|inwords:1-4
|
||||||
@ -114,6 +127,10 @@ inwords:1-3 indigits:5
|
|||||||
outContents (Soft2DFMeasure _) = [hereLit|foo:3/250,130,340,217
|
outContents (Soft2DFMeasure _) = [hereLit|foo:3/250,130,340,217
|
||||||
bar:1/0,0,100,200 foo:1/40,50,1000,1000 bar:1/400,600,1000,1000
|
bar:1/0,0,100,200 foo:1/40,50,1000,1000 bar:1/400,600,1000,1000
|
||||||
|]
|
|]
|
||||||
|
outContents (ProbabilisticMultiLabelFMeasure _) = [hereLit|first-name/1:0.8 surname/3:1.0
|
||||||
|
surname/1:0.4
|
||||||
|
first-name/3:0.9
|
||||||
|
|]
|
||||||
|
|
||||||
expectedScore :: EvaluationScheme -> MetricValue
|
expectedScore :: EvaluationScheme -> MetricValue
|
||||||
expectedScore (EvaluationScheme (SoftFMeasure beta) [])
|
expectedScore (EvaluationScheme (SoftFMeasure beta) [])
|
||||||
@ -124,6 +141,10 @@ expectedScore (EvaluationScheme (Soft2DFMeasure beta) [])
|
|||||||
= let precision = 0.211622914314256
|
= let precision = 0.211622914314256
|
||||||
recall = 0.2749908502976
|
recall = 0.2749908502976
|
||||||
in (weightedHarmonicMean beta precision recall) / 2.0
|
in (weightedHarmonicMean beta precision recall) / 2.0
|
||||||
|
expectedScore (EvaluationScheme (ProbabilisticMultiLabelFMeasure beta) [])
|
||||||
|
= let precision = 0.6569596940847289
|
||||||
|
recall = 0.675
|
||||||
|
in weightedHarmonicMean beta precision recall
|
||||||
|
|
||||||
listOfAvailableEvaluationSchemes :: [EvaluationScheme]
|
listOfAvailableEvaluationSchemes :: [EvaluationScheme]
|
||||||
listOfAvailableEvaluationSchemes = map (\m -> EvaluationScheme m []) listOfAvailableMetrics
|
listOfAvailableEvaluationSchemes = map (\m -> EvaluationScheme m []) listOfAvailableMetrics
|
||||||
@ -163,6 +184,10 @@ formatDescription (Soft2DFMeasure _) = [hereLit|Each line is a sequence of entit
|
|||||||
the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page number (starting from 1) and
|
the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page number (starting from 1) and
|
||||||
(X0, Y0) and (X1, Y1) are clipping corners.
|
(X0, Y0) and (X1, Y1) are clipping corners.
|
||||||
|]
|
|]
|
||||||
|
formatDescription (ProbabilisticMultiLabelFMeasure _) = [hereLit|In each line a number of labels (entities) can be given. A label probability
|
||||||
|
can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed.
|
||||||
|
|]
|
||||||
|
|
||||||
|
|
||||||
scoreExplanation :: EvaluationScheme -> Maybe String
|
scoreExplanation :: EvaluationScheme -> Maybe String
|
||||||
scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
|
scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
|
||||||
@ -173,6 +198,7 @@ scoreExplanation (EvaluationScheme (Soft2DFMeasure _) [])
|
|||||||
As far as the second item is concerned, the total area that covered by the output is 50*150+600*400=247500.
|
As far as the second item is concerned, the total area that covered by the output is 50*150+600*400=247500.
|
||||||
Hence, recall is 247500/902500=0.274 and precision - 247500/(20000+912000+240000)=0.211. Therefore, the F-score
|
Hence, recall is 247500/902500=0.274 and precision - 247500/(20000+912000+240000)=0.211. Therefore, the F-score
|
||||||
for the second item is 0.238 and the F-score for the whole set is (0 + 0.238)/2 = 0.119.|]
|
for the second item is 0.238 and the F-score for the whole set is (0 + 0.238)/2 = 0.119.|]
|
||||||
|
scoreExplanation (EvaluationScheme (ProbabilisticMultiLabelFMeasure _) []) = Nothing
|
||||||
|
|
||||||
pasteLines :: String -> String -> String
|
pasteLines :: String -> String -> String
|
||||||
pasteLines a b = printf "%-35s %s\n" a b
|
pasteLines a b = printf "%-35s %s\n" a b
|
||||||
|
Loading…
Reference in New Issue
Block a user