Add SegmentAccuracy metric

This commit is contained in:
Filip Gralinski 2019-11-17 21:59:20 +01:00
parent fb74f568bb
commit 03aacdef98
9 changed files with 119 additions and 9 deletions

View File

@ -4,11 +4,12 @@
module GEval.Annotation module GEval.Annotation
(parseAnnotations, Annotation(..), (parseAnnotations, Annotation(..),
parseObtainedAnnotations, ObtainedAnnotation(..), parseObtainedAnnotations, ObtainedAnnotation(..),
matchScore, intSetParser) matchScore, intSetParser, segmentAccuracy, parseSegmentAnnotations)
where where
import qualified Data.IntSet as IS import qualified Data.IntSet as IS
import qualified Data.Text as T import qualified Data.Text as T
import Data.Set (intersection, fromList)
import Data.Attoparsec.Text import Data.Attoparsec.Text
import Data.Attoparsec.Combinator import Data.Attoparsec.Combinator
@ -17,11 +18,12 @@ import GEval.Common (sepByWhitespaces, (/.))
import GEval.Probability import GEval.Probability
import Data.Char import Data.Char
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Either (partitionEithers)
import GEval.PrecisionRecall(weightedMaxMatching) import GEval.PrecisionRecall(weightedMaxMatching)
data Annotation = Annotation T.Text IS.IntSet data Annotation = Annotation T.Text IS.IntSet
deriving (Eq, Show) deriving (Eq, Show, Ord)
data ObtainedAnnotation = ObtainedAnnotation Annotation Double data ObtainedAnnotation = ObtainedAnnotation Annotation Double
deriving (Eq, Show) deriving (Eq, Show)
@ -52,6 +54,36 @@ obtainedAnnotationParser = do
parseAnnotations :: T.Text -> Either String [Annotation] parseAnnotations :: T.Text -> Either String [Annotation]
parseAnnotations t = parseOnly (annotationsParser <* endOfInput) t parseAnnotations t = parseOnly (annotationsParser <* endOfInput) t
parseSegmentAnnotations :: T.Text -> Either String [Annotation]
parseSegmentAnnotations t = case parseAnnotationsWithColons t of
Left m -> Left m
Right annotations -> if areSegmentsDisjoint annotations
then (Right annotations)
else (Left "Overlapping segments")
areSegmentsDisjoint :: [Annotation] -> Bool
areSegmentsDisjoint = areIntSetsDisjoint . map (\(Annotation _ s) -> s)
areIntSetsDisjoint :: [IS.IntSet] -> Bool
areIntSetsDisjoint ss = snd $ foldr step (IS.empty, True) ss
where step _ w@(_, False) = w
step s (u, True) = (s `IS.union` u, s `IS.disjoint` u)
-- unfortunately, attoparsec does not seem to back-track properly
-- so we need a special function if labels can contain colons
parseAnnotationsWithColons :: T.Text -> Either String [Annotation]
parseAnnotationsWithColons t = case partitionEithers (map parseAnnotationWithColons $ T.words t) of
([], annotations) -> Right annotations
((firstProblem:_), _) -> Left firstProblem
parseAnnotationWithColons :: T.Text -> Either String Annotation
parseAnnotationWithColons t = if T.null label
then Left "Colon expected"
else case parseOnly (intSetParser <* endOfInput) position of
Left m -> Left m
Right s -> Right (Annotation (T.init label) s)
where (label, position) = T.breakOnEnd ":" t
annotationsParser :: Parser [Annotation] annotationsParser :: Parser [Annotation]
annotationsParser = sepByWhitespaces annotationParser annotationsParser = sepByWhitespaces annotationParser
@ -70,3 +102,7 @@ intervalParser = do
startIx <- decimal startIx <- decimal
endIx <- (string "-" *> decimal <|> pure startIx) endIx <- (string "-" *> decimal <|> pure startIx)
pure $ IS.fromList [startIx..endIx] pure $ IS.fromList [startIx..endIx]
segmentAccuracy :: [Annotation] -> [Annotation] -> Double
segmentAccuracy expected output = (fromIntegral $ length matched) / (fromIntegral $ length expected)
where matched = (fromList expected) `intersection` (fromList output)

View File

@ -706,6 +706,13 @@ gevalCoreOnSources TokenAccuracy _ = gevalCoreWithoutInput intoTokens
| otherwise = (h, t + 1) | otherwise = (h, t + 1)
hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0) hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0)
gevalCoreOnSources SegmentAccuracy _ = gevalCoreWithoutInput parseSegmentAnnotations
parseSegmentAnnotations
(uncurry segmentAccuracy)
averageC
id
noGraph
gevalCoreOnSources MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords gevalCoreOnSources MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
(Right . parseIntoProbList) (Right . parseIntoProbList)
(uncurry countLogLossOnProbList) (uncurry countLogLossOnProbList)

View File

@ -297,6 +297,19 @@ in the expected file (but not in the output file).
|] ++ (commonReadmeMDContents testName) |] ++ (commonReadmeMDContents testName)
readmeMDContents SegmentAccuracy testName = [i|
Segment a sentence and tag with POS tags
========================================
This is a sample, toy challenge for SegmentAccuracy.
For each sentence, give a sequence of POS tags, each one with
its position (1-indexed). For instance, `N:1-10` means a nouns
starting from the beginning (the first character) up to to the tenth
character (inclusively).
|] ++ (commonReadmeMDContents testName)
readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName
readmeMDContents (MultiLabelFMeasure beta) testName = [i| readmeMDContents (MultiLabelFMeasure beta) testName = [i|
Tag names and their component Tag names and their component
@ -473,6 +486,9 @@ B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
trainContents TokenAccuracy = [hereLit|* V N I like cats trainContents TokenAccuracy = [hereLit|* V N I like cats
* * V * N I can see the rainbow * * V * N I can see the rainbow
|] |]
trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19 The student's smart
N:1-6 N:8-10 V:12-13 A:15-18 Mary's dog is nice
|]
trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta) trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta)
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5 trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5
Steven bloody Brown person/1,3 first-name/1 surname/3 Steven bloody Brown person/1,3 first-name/1 surname/3
@ -540,6 +556,9 @@ Mr Jan Kowalski
devInContents TokenAccuracy = [hereLit|The cats on the mat devInContents TokenAccuracy = [hereLit|The cats on the mat
Ala has a cat Ala has a cat
|] |]
devInContents SegmentAccuracy = [hereLit|John is smart
Mary's intelligent
|]
devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta) devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta)
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
I see him I see him
@ -604,6 +623,9 @@ O B-firstname/JAN B-surname/KOWALSKI
devExpectedContents TokenAccuracy = [hereLit|* N * * N devExpectedContents TokenAccuracy = [hereLit|* N * * N
N V * N N V * N
|] |]
devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13
N:1-4 V:6-7 A:9-19
|]
devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta) devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta)
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2 devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2
@ -673,6 +695,9 @@ No name here
testInContents TokenAccuracy = [hereLit|I have cats testInContents TokenAccuracy = [hereLit|I have cats
I know I know
|] |]
testInContents SegmentAccuracy = [hereLit|Mary's cat is old
John is young
|]
testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta) testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta)
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
Nobody is there Nobody is there
@ -738,6 +763,9 @@ O O O
testExpectedContents TokenAccuracy = [hereLit|* V N testExpectedContents TokenAccuracy = [hereLit|* V N
* V * V
|] |]
testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17
N:1-4 V:6-7 A:9-13
|]
testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta) testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta)
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3 testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3

View File

@ -26,7 +26,7 @@ import Data.Attoparsec.Text (parseOnly)
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU
| FMeasure Double | MacroFMeasure Double | NMI | FMeasure Double | MacroFMeasure Double | NMI
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
| BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double | BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
| MultiLabelLogLoss | MultiLabelLikelihood | MultiLabelLogLoss | MultiLabelLikelihood
| SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
deriving (Eq) deriving (Eq)
@ -67,6 +67,7 @@ instance Show Metric where
show BIOF1 = "BIO-F1" show BIOF1 = "BIO-F1"
show BIOF1Labels = "BIO-F1-Labels" show BIOF1Labels = "BIO-F1-Labels"
show TokenAccuracy = "TokenAccuracy" show TokenAccuracy = "TokenAccuracy"
show SegmentAccuracy = "SegmentAccuracy"
show MAE = "MAE" show MAE = "MAE"
show SMAPE = "SMAPE" show SMAPE = "SMAPE"
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta) show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
@ -118,6 +119,7 @@ instance Read Metric where
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)] readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)]
readsPrec _ ('S':'e':'g':'m':'e':'n':'t':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(SegmentAccuracy, theRest)]
readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)] readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)]
readsPrec _ ('S':'M':'A':'P':'E':theRest) = [(SMAPE, theRest)] readsPrec _ ('S':'M':'A':'P':'E':theRest) = [(SMAPE, theRest)]
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)] readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)]
@ -154,6 +156,7 @@ getMetricOrdering Likelihood = TheHigherTheBetter
getMetricOrdering BIOF1 = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter
getMetricOrdering BIOF1Labels = TheHigherTheBetter getMetricOrdering BIOF1Labels = TheHigherTheBetter
getMetricOrdering TokenAccuracy = TheHigherTheBetter getMetricOrdering TokenAccuracy = TheHigherTheBetter
getMetricOrdering SegmentAccuracy = TheHigherTheBetter
getMetricOrdering MAE = TheLowerTheBetter getMetricOrdering MAE = TheLowerTheBetter
getMetricOrdering SMAPE = TheLowerTheBetter getMetricOrdering SMAPE = TheLowerTheBetter
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter

View File

@ -63,6 +63,7 @@ listOfAvailableMetrics = [RMSE,
BIOF1, BIOF1,
BIOF1Labels, BIOF1Labels,
TokenAccuracy, TokenAccuracy,
SegmentAccuracy,
SoftFMeasure 1.0, SoftFMeasure 1.0,
SoftFMeasure 2.0, SoftFMeasure 2.0,
SoftFMeasure 0.25, SoftFMeasure 0.25,
@ -94,6 +95,7 @@ isMetricDescribed (SoftFMeasure _) = True
isMetricDescribed (Soft2DFMeasure _) = True isMetricDescribed (Soft2DFMeasure _) = True
isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True
isMetricDescribed GLEU = True isMetricDescribed GLEU = True
isMetricDescribed SegmentAccuracy = True
isMetricDescribed _ = False isMetricDescribed _ = False
getEvaluationSchemeDescription :: EvaluationScheme -> String getEvaluationSchemeDescription :: EvaluationScheme -> String
@ -134,7 +136,11 @@ metric on a corpus level but does not have its drawbacks for our per
sentence reward objective. sentence reward objective.
see: https://arxiv.org/pdf/1609.08144.pdf see: https://arxiv.org/pdf/1609.08144.pdf
|] |]
getMetricDescription SegmentAccuracy =
[i|Accuracy counted for segments, i.e. labels with positions.
The percentage of labels in the ground truth retrieved in the actual output is returned.
Accuracy is calculated separately for each item and then averaged.
|]
outContents :: Metric -> String outContents :: Metric -> String
outContents (SoftFMeasure _) = [hereLit|inwords:1-4 outContents (SoftFMeasure _) = [hereLit|inwords:1-4
@ -149,6 +155,9 @@ first-name/3:0.9
|] |]
outContents GLEU = [hereLit|Alice has a black outContents GLEU = [hereLit|Alice has a black
|] |]
outContents SegmentAccuracy = [hereLit|N:1-4 V:5-6 N:8-10 V:12-13 A:15-17
N:1-4 V:6-7 A:9-13
|]
expectedScore :: EvaluationScheme -> MetricValue expectedScore :: EvaluationScheme -> MetricValue
expectedScore (EvaluationScheme (SoftFMeasure beta) []) expectedScore (EvaluationScheme (SoftFMeasure beta) [])
@ -165,6 +174,8 @@ expectedScore (EvaluationScheme (ProbabilisticMultiLabelFMeasure beta) [])
in weightedHarmonicMean beta precision recall in weightedHarmonicMean beta precision recall
expectedScore (EvaluationScheme GLEU []) expectedScore (EvaluationScheme GLEU [])
= 0.7142857142857143 = 0.7142857142857143
expectedScore (EvaluationScheme SegmentAccuracy [])
= 0.875
helpMetricParameterMetricsList :: String helpMetricParameterMetricsList :: String
helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of
@ -215,6 +226,13 @@ can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed.
|] |]
formatDescription GLEU = [hereLit|In each line a there is a space sparated sentence of words. formatDescription GLEU = [hereLit|In each line a there is a space sparated sentence of words.
|] |]
formatDescription SegmentAccuracy = [hereLit|Labels can be any strings (without spaces), whereas is a list of
1-based indexes or spans separated by commas (spans are inclusive
ranges, e.g. "10-14"). For instance, "foo:bar:2,4-7,10" is a
label "foo:bar" for positions 2, 4, 5, 6, 7 and 10. Note that no
overlapping segments can be returned (evaluation will fail in
such a case).
|]
scoreExplanation :: EvaluationScheme -> Maybe String scoreExplanation :: EvaluationScheme -> Maybe String
scoreExplanation (EvaluationScheme (SoftFMeasure _) []) scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
@ -234,6 +252,9 @@ Now we have to calculate precision and recall:
Precision is tp / (tp+fp) = 10/(10+0) = 1, Precision is tp / (tp+fp) = 10/(10+0) = 1,
recall is tp / (tp+fn) = 10 / (10+4) = 10/14 =~ 0.71428... recall is tp / (tp+fn) = 10 / (10+4) = 10/14 =~ 0.71428...
The GLEU score is min(precision,recall)=0.71428 |] The GLEU score is min(precision,recall)=0.71428 |]
scoreExplanation (EvaluationScheme SegmentAccuracy [])
= Just [hereLit|Out of 4 segments in the expected output for the first item, 3 were retrieved correcly (accuracy is 3/4=0.75).
The second item was retrieved perfectly (accuracy is 1.0). Hence, the average is (0.75+1.0)/2=0.875.|]
pasteLines :: String -> String -> String pasteLines :: String -> String -> String
pasteLines a b = printf "%-35s %s\n" a b pasteLines a b = printf "%-35s %s\n" a b

View File

@ -146,6 +146,9 @@ main = hspec $ do
describe "TokenAccuracy" $ do describe "TokenAccuracy" $ do
it "simple example" $ do it "simple example" $ do
runGEvalTest "token-accuracy-simple" `shouldReturnAlmost` 0.5 runGEvalTest "token-accuracy-simple" `shouldReturnAlmost` 0.5
describe "SegmentAccuracy" $ do
it "simple test" $ do
runGEvalTest "segment-accuracy-simple" `shouldReturnAlmost` 0.4444444
describe "precision count" $ do describe "precision count" $ do
it "simple test" $ do it "simple test" $ do
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2 precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
@ -342,6 +345,11 @@ main = hspec $ do
it "just parse" $ do it "just parse" $ do
parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]), parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]),
Annotation "baz" (IS.fromList [4,5,6])] Annotation "baz" (IS.fromList [4,5,6])]
it "just parse wit colons" $ do
parseSegmentAnnotations "foo:x:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo:x" (IS.fromList [3,7,8,9,10]),
Annotation "baz" (IS.fromList [4,5,6])]
it "just parse wit colons" $ do
parseSegmentAnnotations "foo:x:3,7-10 baz:2-6" `shouldBe` Left "Overlapping segments"
it "just parse 2" $ do it "just parse 2" $ do
parseAnnotations "inwords:1-3 indigits:5" `shouldBe` Right [Annotation "inwords" (IS.fromList [1,2,3]), parseAnnotations "inwords:1-3 indigits:5" `shouldBe` Right [Annotation "inwords" (IS.fromList [1,2,3]),
Annotation "indigits" (IS.fromList [5])] Annotation "indigits" (IS.fromList [5])]

View File

@ -0,0 +1,3 @@
foo:0 baq:1-2 baz:3
aaa:0-1
xyz:0 bbb:x:1
1 foo:0 baq:1-2 baz:3
2 aaa:0-1
3 xyz:0 bbb:x:1

View File

@ -0,0 +1 @@
--metric SegmentAccuracy

View File

@ -0,0 +1,3 @@
foo:0 bar:1-2 baz:3
aaa:0-2
xyz:0 bbb:x:1 ccc:x:2
1 foo:0 bar:1-2 baz:3
2 aaa:0-2
3 xyz:0 bbb:x:1 ccc:x:2