Add SegmentAccuracy metric
This commit is contained in:
parent
fb74f568bb
commit
03aacdef98
@ -4,11 +4,12 @@
|
|||||||
module GEval.Annotation
|
module GEval.Annotation
|
||||||
(parseAnnotations, Annotation(..),
|
(parseAnnotations, Annotation(..),
|
||||||
parseObtainedAnnotations, ObtainedAnnotation(..),
|
parseObtainedAnnotations, ObtainedAnnotation(..),
|
||||||
matchScore, intSetParser)
|
matchScore, intSetParser, segmentAccuracy, parseSegmentAnnotations)
|
||||||
where
|
where
|
||||||
|
|
||||||
import qualified Data.IntSet as IS
|
import qualified Data.IntSet as IS
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
|
import Data.Set (intersection, fromList)
|
||||||
|
|
||||||
import Data.Attoparsec.Text
|
import Data.Attoparsec.Text
|
||||||
import Data.Attoparsec.Combinator
|
import Data.Attoparsec.Combinator
|
||||||
@ -17,11 +18,12 @@ import GEval.Common (sepByWhitespaces, (/.))
|
|||||||
import GEval.Probability
|
import GEval.Probability
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.Maybe (fromMaybe)
|
import Data.Maybe (fromMaybe)
|
||||||
|
import Data.Either (partitionEithers)
|
||||||
|
|
||||||
import GEval.PrecisionRecall(weightedMaxMatching)
|
import GEval.PrecisionRecall(weightedMaxMatching)
|
||||||
|
|
||||||
data Annotation = Annotation T.Text IS.IntSet
|
data Annotation = Annotation T.Text IS.IntSet
|
||||||
deriving (Eq, Show)
|
deriving (Eq, Show, Ord)
|
||||||
|
|
||||||
data ObtainedAnnotation = ObtainedAnnotation Annotation Double
|
data ObtainedAnnotation = ObtainedAnnotation Annotation Double
|
||||||
deriving (Eq, Show)
|
deriving (Eq, Show)
|
||||||
@ -52,6 +54,36 @@ obtainedAnnotationParser = do
|
|||||||
parseAnnotations :: T.Text -> Either String [Annotation]
|
parseAnnotations :: T.Text -> Either String [Annotation]
|
||||||
parseAnnotations t = parseOnly (annotationsParser <* endOfInput) t
|
parseAnnotations t = parseOnly (annotationsParser <* endOfInput) t
|
||||||
|
|
||||||
|
parseSegmentAnnotations :: T.Text -> Either String [Annotation]
|
||||||
|
parseSegmentAnnotations t = case parseAnnotationsWithColons t of
|
||||||
|
Left m -> Left m
|
||||||
|
Right annotations -> if areSegmentsDisjoint annotations
|
||||||
|
then (Right annotations)
|
||||||
|
else (Left "Overlapping segments")
|
||||||
|
|
||||||
|
areSegmentsDisjoint :: [Annotation] -> Bool
|
||||||
|
areSegmentsDisjoint = areIntSetsDisjoint . map (\(Annotation _ s) -> s)
|
||||||
|
|
||||||
|
areIntSetsDisjoint :: [IS.IntSet] -> Bool
|
||||||
|
areIntSetsDisjoint ss = snd $ foldr step (IS.empty, True) ss
|
||||||
|
where step _ w@(_, False) = w
|
||||||
|
step s (u, True) = (s `IS.union` u, s `IS.disjoint` u)
|
||||||
|
|
||||||
|
-- unfortunately, attoparsec does not seem to back-track properly
|
||||||
|
-- so we need a special function if labels can contain colons
|
||||||
|
parseAnnotationsWithColons :: T.Text -> Either String [Annotation]
|
||||||
|
parseAnnotationsWithColons t = case partitionEithers (map parseAnnotationWithColons $ T.words t) of
|
||||||
|
([], annotations) -> Right annotations
|
||||||
|
((firstProblem:_), _) -> Left firstProblem
|
||||||
|
|
||||||
|
parseAnnotationWithColons :: T.Text -> Either String Annotation
|
||||||
|
parseAnnotationWithColons t = if T.null label
|
||||||
|
then Left "Colon expected"
|
||||||
|
else case parseOnly (intSetParser <* endOfInput) position of
|
||||||
|
Left m -> Left m
|
||||||
|
Right s -> Right (Annotation (T.init label) s)
|
||||||
|
where (label, position) = T.breakOnEnd ":" t
|
||||||
|
|
||||||
annotationsParser :: Parser [Annotation]
|
annotationsParser :: Parser [Annotation]
|
||||||
annotationsParser = sepByWhitespaces annotationParser
|
annotationsParser = sepByWhitespaces annotationParser
|
||||||
|
|
||||||
@ -70,3 +102,7 @@ intervalParser = do
|
|||||||
startIx <- decimal
|
startIx <- decimal
|
||||||
endIx <- (string "-" *> decimal <|> pure startIx)
|
endIx <- (string "-" *> decimal <|> pure startIx)
|
||||||
pure $ IS.fromList [startIx..endIx]
|
pure $ IS.fromList [startIx..endIx]
|
||||||
|
|
||||||
|
segmentAccuracy :: [Annotation] -> [Annotation] -> Double
|
||||||
|
segmentAccuracy expected output = (fromIntegral $ length matched) / (fromIntegral $ length expected)
|
||||||
|
where matched = (fromList expected) `intersection` (fromList output)
|
||||||
|
@ -706,6 +706,13 @@ gevalCoreOnSources TokenAccuracy _ = gevalCoreWithoutInput intoTokens
|
|||||||
| otherwise = (h, t + 1)
|
| otherwise = (h, t + 1)
|
||||||
hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0)
|
hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0)
|
||||||
|
|
||||||
|
gevalCoreOnSources SegmentAccuracy _ = gevalCoreWithoutInput parseSegmentAnnotations
|
||||||
|
parseSegmentAnnotations
|
||||||
|
(uncurry segmentAccuracy)
|
||||||
|
averageC
|
||||||
|
id
|
||||||
|
noGraph
|
||||||
|
|
||||||
gevalCoreOnSources MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
|
gevalCoreOnSources MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
|
||||||
(Right . parseIntoProbList)
|
(Right . parseIntoProbList)
|
||||||
(uncurry countLogLossOnProbList)
|
(uncurry countLogLossOnProbList)
|
||||||
|
@ -297,6 +297,19 @@ in the expected file (but not in the output file).
|
|||||||
|
|
||||||
|] ++ (commonReadmeMDContents testName)
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
|
readmeMDContents SegmentAccuracy testName = [i|
|
||||||
|
Segment a sentence and tag with POS tags
|
||||||
|
========================================
|
||||||
|
|
||||||
|
This is a sample, toy challenge for SegmentAccuracy.
|
||||||
|
|
||||||
|
For each sentence, give a sequence of POS tags, each one with
|
||||||
|
its position (1-indexed). For instance, `N:1-10` means a nouns
|
||||||
|
starting from the beginning (the first character) up to to the tenth
|
||||||
|
character (inclusively).
|
||||||
|
|
||||||
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName
|
readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName
|
||||||
readmeMDContents (MultiLabelFMeasure beta) testName = [i|
|
readmeMDContents (MultiLabelFMeasure beta) testName = [i|
|
||||||
Tag names and their component
|
Tag names and their component
|
||||||
@ -473,6 +486,9 @@ B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|
|||||||
trainContents TokenAccuracy = [hereLit|* V N I like cats
|
trainContents TokenAccuracy = [hereLit|* V N I like cats
|
||||||
* * V * N I can see the rainbow
|
* * V * N I can see the rainbow
|
||||||
|]
|
|]
|
||||||
|
trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19 The student's smart
|
||||||
|
N:1-6 N:8-10 V:12-13 A:15-18 Mary's dog is nice
|
||||||
|
|]
|
||||||
trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta)
|
trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta)
|
||||||
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5
|
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5
|
||||||
Steven bloody Brown person/1,3 first-name/1 surname/3
|
Steven bloody Brown person/1,3 first-name/1 surname/3
|
||||||
@ -540,6 +556,9 @@ Mr Jan Kowalski
|
|||||||
devInContents TokenAccuracy = [hereLit|The cats on the mat
|
devInContents TokenAccuracy = [hereLit|The cats on the mat
|
||||||
Ala has a cat
|
Ala has a cat
|
||||||
|]
|
|]
|
||||||
|
devInContents SegmentAccuracy = [hereLit|John is smart
|
||||||
|
Mary's intelligent
|
||||||
|
|]
|
||||||
devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta)
|
devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta)
|
||||||
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
|
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
|
||||||
I see him
|
I see him
|
||||||
@ -604,6 +623,9 @@ O B-firstname/JAN B-surname/KOWALSKI
|
|||||||
devExpectedContents TokenAccuracy = [hereLit|* N * * N
|
devExpectedContents TokenAccuracy = [hereLit|* N * * N
|
||||||
N V * N
|
N V * N
|
||||||
|]
|
|]
|
||||||
|
devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13
|
||||||
|
N:1-4 V:6-7 A:9-19
|
||||||
|
|]
|
||||||
devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta)
|
devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta)
|
||||||
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2
|
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2
|
||||||
|
|
||||||
@ -673,6 +695,9 @@ No name here
|
|||||||
testInContents TokenAccuracy = [hereLit|I have cats
|
testInContents TokenAccuracy = [hereLit|I have cats
|
||||||
I know
|
I know
|
||||||
|]
|
|]
|
||||||
|
testInContents SegmentAccuracy = [hereLit|Mary's cat is old
|
||||||
|
John is young
|
||||||
|
|]
|
||||||
testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta)
|
testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta)
|
||||||
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
|
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
|
||||||
Nobody is there
|
Nobody is there
|
||||||
@ -738,6 +763,9 @@ O O O
|
|||||||
testExpectedContents TokenAccuracy = [hereLit|* V N
|
testExpectedContents TokenAccuracy = [hereLit|* V N
|
||||||
* V
|
* V
|
||||||
|]
|
|]
|
||||||
|
testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17
|
||||||
|
N:1-4 V:6-7 A:9-13
|
||||||
|
|]
|
||||||
testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta)
|
testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta)
|
||||||
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3
|
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ import Data.Attoparsec.Text (parseOnly)
|
|||||||
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU
|
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU
|
||||||
| FMeasure Double | MacroFMeasure Double | NMI
|
| FMeasure Double | MacroFMeasure Double | NMI
|
||||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||||
| BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
|
| BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
|
||||||
| MultiLabelLogLoss | MultiLabelLikelihood
|
| MultiLabelLogLoss | MultiLabelLikelihood
|
||||||
| SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
|
| SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
@ -67,6 +67,7 @@ instance Show Metric where
|
|||||||
show BIOF1 = "BIO-F1"
|
show BIOF1 = "BIO-F1"
|
||||||
show BIOF1Labels = "BIO-F1-Labels"
|
show BIOF1Labels = "BIO-F1-Labels"
|
||||||
show TokenAccuracy = "TokenAccuracy"
|
show TokenAccuracy = "TokenAccuracy"
|
||||||
|
show SegmentAccuracy = "SegmentAccuracy"
|
||||||
show MAE = "MAE"
|
show MAE = "MAE"
|
||||||
show SMAPE = "SMAPE"
|
show SMAPE = "SMAPE"
|
||||||
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
|
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
|
||||||
@ -118,6 +119,7 @@ instance Read Metric where
|
|||||||
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
|
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
|
||||||
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
|
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
|
||||||
readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)]
|
readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)]
|
||||||
|
readsPrec _ ('S':'e':'g':'m':'e':'n':'t':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(SegmentAccuracy, theRest)]
|
||||||
readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)]
|
readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)]
|
||||||
readsPrec _ ('S':'M':'A':'P':'E':theRest) = [(SMAPE, theRest)]
|
readsPrec _ ('S':'M':'A':'P':'E':theRest) = [(SMAPE, theRest)]
|
||||||
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)]
|
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)]
|
||||||
@ -154,6 +156,7 @@ getMetricOrdering Likelihood = TheHigherTheBetter
|
|||||||
getMetricOrdering BIOF1 = TheHigherTheBetter
|
getMetricOrdering BIOF1 = TheHigherTheBetter
|
||||||
getMetricOrdering BIOF1Labels = TheHigherTheBetter
|
getMetricOrdering BIOF1Labels = TheHigherTheBetter
|
||||||
getMetricOrdering TokenAccuracy = TheHigherTheBetter
|
getMetricOrdering TokenAccuracy = TheHigherTheBetter
|
||||||
|
getMetricOrdering SegmentAccuracy = TheHigherTheBetter
|
||||||
getMetricOrdering MAE = TheLowerTheBetter
|
getMetricOrdering MAE = TheLowerTheBetter
|
||||||
getMetricOrdering SMAPE = TheLowerTheBetter
|
getMetricOrdering SMAPE = TheLowerTheBetter
|
||||||
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
|
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
|
||||||
|
@ -63,6 +63,7 @@ listOfAvailableMetrics = [RMSE,
|
|||||||
BIOF1,
|
BIOF1,
|
||||||
BIOF1Labels,
|
BIOF1Labels,
|
||||||
TokenAccuracy,
|
TokenAccuracy,
|
||||||
|
SegmentAccuracy,
|
||||||
SoftFMeasure 1.0,
|
SoftFMeasure 1.0,
|
||||||
SoftFMeasure 2.0,
|
SoftFMeasure 2.0,
|
||||||
SoftFMeasure 0.25,
|
SoftFMeasure 0.25,
|
||||||
@ -94,6 +95,7 @@ isMetricDescribed (SoftFMeasure _) = True
|
|||||||
isMetricDescribed (Soft2DFMeasure _) = True
|
isMetricDescribed (Soft2DFMeasure _) = True
|
||||||
isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True
|
isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True
|
||||||
isMetricDescribed GLEU = True
|
isMetricDescribed GLEU = True
|
||||||
|
isMetricDescribed SegmentAccuracy = True
|
||||||
isMetricDescribed _ = False
|
isMetricDescribed _ = False
|
||||||
|
|
||||||
getEvaluationSchemeDescription :: EvaluationScheme -> String
|
getEvaluationSchemeDescription :: EvaluationScheme -> String
|
||||||
@ -134,7 +136,11 @@ metric on a corpus level but does not have its drawbacks for our per
|
|||||||
sentence reward objective.
|
sentence reward objective.
|
||||||
see: https://arxiv.org/pdf/1609.08144.pdf
|
see: https://arxiv.org/pdf/1609.08144.pdf
|
||||||
|]
|
|]
|
||||||
|
getMetricDescription SegmentAccuracy =
|
||||||
|
[i|Accuracy counted for segments, i.e. labels with positions.
|
||||||
|
The percentage of labels in the ground truth retrieved in the actual output is returned.
|
||||||
|
Accuracy is calculated separately for each item and then averaged.
|
||||||
|
|]
|
||||||
|
|
||||||
outContents :: Metric -> String
|
outContents :: Metric -> String
|
||||||
outContents (SoftFMeasure _) = [hereLit|inwords:1-4
|
outContents (SoftFMeasure _) = [hereLit|inwords:1-4
|
||||||
@ -147,7 +153,10 @@ outContents (ProbabilisticMultiLabelFMeasure _) = [hereLit|first-name/1:0.8 surn
|
|||||||
surname/1:0.4
|
surname/1:0.4
|
||||||
first-name/3:0.9
|
first-name/3:0.9
|
||||||
|]
|
|]
|
||||||
outContents GLEU = [hereLit|Alice has a black
|
outContents GLEU = [hereLit|Alice has a black
|
||||||
|
|]
|
||||||
|
outContents SegmentAccuracy = [hereLit|N:1-4 V:5-6 N:8-10 V:12-13 A:15-17
|
||||||
|
N:1-4 V:6-7 A:9-13
|
||||||
|]
|
|]
|
||||||
|
|
||||||
expectedScore :: EvaluationScheme -> MetricValue
|
expectedScore :: EvaluationScheme -> MetricValue
|
||||||
@ -165,6 +174,8 @@ expectedScore (EvaluationScheme (ProbabilisticMultiLabelFMeasure beta) [])
|
|||||||
in weightedHarmonicMean beta precision recall
|
in weightedHarmonicMean beta precision recall
|
||||||
expectedScore (EvaluationScheme GLEU [])
|
expectedScore (EvaluationScheme GLEU [])
|
||||||
= 0.7142857142857143
|
= 0.7142857142857143
|
||||||
|
expectedScore (EvaluationScheme SegmentAccuracy [])
|
||||||
|
= 0.875
|
||||||
|
|
||||||
helpMetricParameterMetricsList :: String
|
helpMetricParameterMetricsList :: String
|
||||||
helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of
|
helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of
|
||||||
@ -213,7 +224,14 @@ the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page numbe
|
|||||||
formatDescription (ProbabilisticMultiLabelFMeasure _) = [hereLit|In each line a number of labels (entities) can be given. A label probability
|
formatDescription (ProbabilisticMultiLabelFMeasure _) = [hereLit|In each line a number of labels (entities) can be given. A label probability
|
||||||
can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed.
|
can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed.
|
||||||
|]
|
|]
|
||||||
formatDescription GLEU = [hereLit|In each line a there is a space sparated sentence of words.
|
formatDescription GLEU = [hereLit|In each line a there is a space sparated sentence of words.
|
||||||
|
|]
|
||||||
|
formatDescription SegmentAccuracy = [hereLit|Labels can be any strings (without spaces), whereas is a list of
|
||||||
|
1-based indexes or spans separated by commas (spans are inclusive
|
||||||
|
ranges, e.g. "10-14"). For instance, "foo:bar:2,4-7,10" is a
|
||||||
|
label "foo:bar" for positions 2, 4, 5, 6, 7 and 10. Note that no
|
||||||
|
overlapping segments can be returned (evaluation will fail in
|
||||||
|
such a case).
|
||||||
|]
|
|]
|
||||||
|
|
||||||
scoreExplanation :: EvaluationScheme -> Maybe String
|
scoreExplanation :: EvaluationScheme -> Maybe String
|
||||||
@ -227,13 +245,16 @@ Hence, recall is 247500/902500=0.274 and precision - 247500/(20000+912000+240000
|
|||||||
for the second item is 0.238 and the F-score for the whole set is (0 + 0.238)/2 = 0.119.|]
|
for the second item is 0.238 and the F-score for the whole set is (0 + 0.238)/2 = 0.119.|]
|
||||||
scoreExplanation (EvaluationScheme (ProbabilisticMultiLabelFMeasure _) []) = Nothing
|
scoreExplanation (EvaluationScheme (ProbabilisticMultiLabelFMeasure _) []) = Nothing
|
||||||
scoreExplanation (EvaluationScheme GLEU [])
|
scoreExplanation (EvaluationScheme GLEU [])
|
||||||
= Just [hereLit|To find out GLEU score we first count number of tp (true positives) fp(false positives) and fn(false negatives).
|
= Just [hereLit|To find out GLEU score we first count number of tp (true positives) fp(false positives) and fn(false negatives).
|
||||||
We have 4 matching unigrams ("Alice", "has", "a", "black") , 3 bigrams ("Alice has", "has a", "a black"), 2 trigrams ("Alice has a", "has a black") and 1 tetragram ("Alice has a black"),
|
We have 4 matching unigrams ("Alice", "has", "a", "black") , 3 bigrams ("Alice has", "has a", "a black"), 2 trigrams ("Alice has a", "has a black") and 1 tetragram ("Alice has a black"),
|
||||||
so tp=10. We have no fp, therefore fp=0. There are 4 fn - ("cat", "black cat", "a black cat", "has a black cat").
|
so tp=10. We have no fp, therefore fp=0. There are 4 fn - ("cat", "black cat", "a black cat", "has a black cat").
|
||||||
Now we have to calculate precision and recall:
|
Now we have to calculate precision and recall:
|
||||||
Precision is tp / (tp+fp) = 10/(10+0) = 1,
|
Precision is tp / (tp+fp) = 10/(10+0) = 1,
|
||||||
recall is tp / (tp+fn) = 10 / (10+4) = 10/14 =~ 0.71428...
|
recall is tp / (tp+fn) = 10 / (10+4) = 10/14 =~ 0.71428...
|
||||||
The GLEU score is min(precision,recall)=0.71428 |]
|
The GLEU score is min(precision,recall)=0.71428 |]
|
||||||
|
scoreExplanation (EvaluationScheme SegmentAccuracy [])
|
||||||
|
= Just [hereLit|Out of 4 segments in the expected output for the first item, 3 were retrieved correcly (accuracy is 3/4=0.75).
|
||||||
|
The second item was retrieved perfectly (accuracy is 1.0). Hence, the average is (0.75+1.0)/2=0.875.|]
|
||||||
|
|
||||||
pasteLines :: String -> String -> String
|
pasteLines :: String -> String -> String
|
||||||
pasteLines a b = printf "%-35s %s\n" a b
|
pasteLines a b = printf "%-35s %s\n" a b
|
||||||
|
@ -146,6 +146,9 @@ main = hspec $ do
|
|||||||
describe "TokenAccuracy" $ do
|
describe "TokenAccuracy" $ do
|
||||||
it "simple example" $ do
|
it "simple example" $ do
|
||||||
runGEvalTest "token-accuracy-simple" `shouldReturnAlmost` 0.5
|
runGEvalTest "token-accuracy-simple" `shouldReturnAlmost` 0.5
|
||||||
|
describe "SegmentAccuracy" $ do
|
||||||
|
it "simple test" $ do
|
||||||
|
runGEvalTest "segment-accuracy-simple" `shouldReturnAlmost` 0.4444444
|
||||||
describe "precision count" $ do
|
describe "precision count" $ do
|
||||||
it "simple test" $ do
|
it "simple test" $ do
|
||||||
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
|
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
|
||||||
@ -342,6 +345,11 @@ main = hspec $ do
|
|||||||
it "just parse" $ do
|
it "just parse" $ do
|
||||||
parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]),
|
parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]),
|
||||||
Annotation "baz" (IS.fromList [4,5,6])]
|
Annotation "baz" (IS.fromList [4,5,6])]
|
||||||
|
it "just parse wit colons" $ do
|
||||||
|
parseSegmentAnnotations "foo:x:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo:x" (IS.fromList [3,7,8,9,10]),
|
||||||
|
Annotation "baz" (IS.fromList [4,5,6])]
|
||||||
|
it "just parse wit colons" $ do
|
||||||
|
parseSegmentAnnotations "foo:x:3,7-10 baz:2-6" `shouldBe` Left "Overlapping segments"
|
||||||
it "just parse 2" $ do
|
it "just parse 2" $ do
|
||||||
parseAnnotations "inwords:1-3 indigits:5" `shouldBe` Right [Annotation "inwords" (IS.fromList [1,2,3]),
|
parseAnnotations "inwords:1-3 indigits:5" `shouldBe` Right [Annotation "inwords" (IS.fromList [1,2,3]),
|
||||||
Annotation "indigits" (IS.fromList [5])]
|
Annotation "indigits" (IS.fromList [5])]
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
foo:0 baq:1-2 baz:3
|
||||||
|
aaa:0-1
|
||||||
|
xyz:0 bbb:x:1
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric SegmentAccuracy
|
@ -0,0 +1,3 @@
|
|||||||
|
foo:0 bar:1-2 baz:3
|
||||||
|
aaa:0-2
|
||||||
|
xyz:0 bbb:x:1 ccc:x:2
|
|
Loading…
Reference in New Issue
Block a user