Documentation on Probabilistic-MultiLabel-F1 metric
This commit is contained in:
parent
b540cba7da
commit
5998f8a316
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
*~
|
||||
.stack-work
|
||||
.shake/
|
||||
geval
|
||||
|
@ -297,6 +297,7 @@ in the expected file (but not in the output file).
|
||||
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName
|
||||
readmeMDContents (MultiLabelFMeasure beta) testName = [i|
|
||||
Tag names and their component
|
||||
=============================
|
||||
@ -308,9 +309,7 @@ Tags:
|
||||
* surname
|
||||
* first-name
|
||||
|
||||
For each tag a sequence of token IDs separated with commas should be given (after a colon).
|
||||
|
||||
The metric is F1 on labels.
|
||||
For each tag a sequence of token IDs separated with commas should be given (after a slash).
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents MultiLabelLikelihood testName = readmeMDContents MultiLabelLogLoss testName
|
||||
@ -474,9 +473,10 @@ B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|
||||
trainContents TokenAccuracy = [hereLit|* V N I like cats
|
||||
* * V * N I can see the rainbow
|
||||
|]
|
||||
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person:3,4,5 first-name:4 surname:5
|
||||
Steven bloody Brown person:1,3 first-name:1 surname:3
|
||||
James and James first-name:1 firstname:3
|
||||
trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta)
|
||||
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5
|
||||
Steven bloody Brown person/1,3 first-name/1 surname/3
|
||||
James and James first-name/1 firstname/3
|
||||
|]
|
||||
trainContents MultiLabelLikelihood = [hereLit|I hate you! HATE
|
||||
Love and hate LOVE HATE
|
||||
@ -540,6 +540,7 @@ Mr Jan Kowalski
|
||||
devInContents TokenAccuracy = [hereLit|The cats on the mat
|
||||
Ala has a cat
|
||||
|]
|
||||
devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta)
|
||||
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
|
||||
I see him
|
||||
Barbara
|
||||
@ -603,9 +604,10 @@ O B-firstname/JAN B-surname/KOWALSKI
|
||||
devExpectedContents TokenAccuracy = [hereLit|* N * * N
|
||||
N V * N
|
||||
|]
|
||||
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2
|
||||
devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta)
|
||||
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2
|
||||
|
||||
first-name:1
|
||||
first-name/1
|
||||
|]
|
||||
devExpectedContents MultiLabelLikelihood = devExpectedContents MultiLabelLogLoss
|
||||
devExpectedContents MultiLabelLogLoss = [hereLit|LOVE
|
||||
@ -670,6 +672,7 @@ No name here
|
||||
testInContents TokenAccuracy = [hereLit|I have cats
|
||||
I know
|
||||
|]
|
||||
testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta)
|
||||
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
|
||||
Nobody is there
|
||||
I saw Marketa
|
||||
@ -735,9 +738,10 @@ O O O
|
||||
testExpectedContents TokenAccuracy = [hereLit|* V N
|
||||
* V
|
||||
|]
|
||||
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3
|
||||
testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta)
|
||||
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3
|
||||
|
||||
first-name:3
|
||||
first-name/3
|
||||
|]
|
||||
testExpectedContents MultiLabelLikelihood = testExpectedContents MultiLabelLogLoss
|
||||
testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS
|
||||
|
@ -47,6 +47,9 @@ listOfAvailableMetrics = [RMSE,
|
||||
MultiLabelFMeasure 1.0,
|
||||
MultiLabelFMeasure 2.0,
|
||||
MultiLabelFMeasure 0.25,
|
||||
ProbabilisticMultiLabelFMeasure 1.0,
|
||||
ProbabilisticMultiLabelFMeasure 2.0,
|
||||
ProbabilisticMultiLabelFMeasure 0.25,
|
||||
MultiLabelLikelihood,
|
||||
MAP,
|
||||
BLEU,
|
||||
@ -88,6 +91,7 @@ isEvaluationSchemeDescribed _ = False
|
||||
isMetricDescribed :: Metric -> Bool
|
||||
isMetricDescribed (SoftFMeasure _) = True
|
||||
isMetricDescribed (Soft2DFMeasure _) = True
|
||||
isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True
|
||||
isMetricDescribed _ = False
|
||||
|
||||
getEvaluationSchemeDescription :: EvaluationScheme -> String
|
||||
@ -106,6 +110,15 @@ if a label `foo` is expected for the rectangle (0, 0)-(100, 200) and this label
|
||||
the span (50, 100)-(150, 150), it is treated as recall=1/8 and precision=1/2. For each item (line) F-score
|
||||
is evaluated separately and finally averaged.
|
||||
|]
|
||||
getMetricDescription (ProbabilisticMultiLabelFMeasure _) =
|
||||
[i|F-measure generalised so that labels could annotated with probabilities and the quality
|
||||
of probabilities is assessed as well. It is calculated as the harmonic mean of calibration and recall
|
||||
where calibration measures the quality of probabilities (how well they are calibrated, e.g.
|
||||
if we have 10 items with probability 0.5 and 5 of them are correct, then the calibration
|
||||
is perfect.
|
||||
|]
|
||||
|
||||
|
||||
|
||||
outContents :: Metric -> String
|
||||
outContents (SoftFMeasure _) = [hereLit|inwords:1-4
|
||||
@ -114,6 +127,10 @@ inwords:1-3 indigits:5
|
||||
outContents (Soft2DFMeasure _) = [hereLit|foo:3/250,130,340,217
|
||||
bar:1/0,0,100,200 foo:1/40,50,1000,1000 bar:1/400,600,1000,1000
|
||||
|]
|
||||
outContents (ProbabilisticMultiLabelFMeasure _) = [hereLit|first-name/1:0.8 surname/3:1.0
|
||||
surname/1:0.4
|
||||
first-name/3:0.9
|
||||
|]
|
||||
|
||||
expectedScore :: EvaluationScheme -> MetricValue
|
||||
expectedScore (EvaluationScheme (SoftFMeasure beta) [])
|
||||
@ -124,6 +141,10 @@ expectedScore (EvaluationScheme (Soft2DFMeasure beta) [])
|
||||
= let precision = 0.211622914314256
|
||||
recall = 0.2749908502976
|
||||
in (weightedHarmonicMean beta precision recall) / 2.0
|
||||
expectedScore (EvaluationScheme (ProbabilisticMultiLabelFMeasure beta) [])
|
||||
= let precision = 0.6569596940847289
|
||||
recall = 0.675
|
||||
in weightedHarmonicMean beta precision recall
|
||||
|
||||
listOfAvailableEvaluationSchemes :: [EvaluationScheme]
|
||||
listOfAvailableEvaluationSchemes = map (\m -> EvaluationScheme m []) listOfAvailableMetrics
|
||||
@ -163,6 +184,10 @@ formatDescription (Soft2DFMeasure _) = [hereLit|Each line is a sequence of entit
|
||||
the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page number (starting from 1) and
|
||||
(X0, Y0) and (X1, Y1) are clipping corners.
|
||||
|]
|
||||
formatDescription (ProbabilisticMultiLabelFMeasure _) = [hereLit|In each line a number of labels (entities) can be given. A label probability
|
||||
can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed.
|
||||
|]
|
||||
|
||||
|
||||
scoreExplanation :: EvaluationScheme -> Maybe String
|
||||
scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
|
||||
@ -173,6 +198,7 @@ scoreExplanation (EvaluationScheme (Soft2DFMeasure _) [])
|
||||
As far as the second item is concerned, the total area that covered by the output is 50*150+600*400=247500.
|
||||
Hence, recall is 247500/902500=0.274 and precision - 247500/(20000+912000+240000)=0.211. Therefore, the F-score
|
||||
for the second item is 0.238 and the F-score for the whole set is (0 + 0.238)/2 = 0.119.|]
|
||||
scoreExplanation (EvaluationScheme (ProbabilisticMultiLabelFMeasure _) []) = Nothing
|
||||
|
||||
pasteLines :: String -> String -> String
|
||||
pasteLines a b = printf "%-35s %s\n" a b
|
||||
|
Loading…
Reference in New Issue
Block a user