From 2e816c4e384d5e87bf6df68c81809815053df8af Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Tue, 23 Oct 2018 16:26:05 +0200 Subject: [PATCH] Add TokenAccuracy metric --- src/GEval/Core.hs | 27 ++++++++++++++++++- src/GEval/CreateChallenge.hs | 24 +++++++++++++++++ src/GEval/OptionsParser.hs | 2 +- test/Spec.hs | 3 +++ .../test-A/out.tsv | 3 +++ .../token-accuracy-simple/config.txt | 1 + .../token-accuracy-simple/test-A/expected.tsv | 3 +++ 7 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 test/token-accuracy-simple/token-accuracy-simple-solution/test-A/out.tsv create mode 100644 test/token-accuracy-simple/token-accuracy-simple/config.txt create mode 100644 test/token-accuracy-simple/token-accuracy-simple/test-A/expected.tsv diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index da3a12d..e055207 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -108,7 +108,7 @@ defaultLogLossHashedSize = 10 data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU | FMeasure Double | MacroFMeasure Double | NMI | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood - | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double + | BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double | MultiLabelLogLoss | MultiLabelLikelihood | SoftFMeasure Double deriving (Eq) @@ -145,6 +145,7 @@ instance Show Metric where show Likelihood = "Likelihood" show BIOF1 = "BIO-F1" show BIOF1Labels = "BIO-F1-Labels" + show TokenAccuracy = "TokenAccuracy" show MAE = "MAE" show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta) show MultiLabelLogLoss = "MultiLabel-Logloss" @@ -185,6 +186,7 @@ instance Read Metric where readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)] + readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)] readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)] readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)] readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(MultiLabelLikelihood, theRest)] @@ -216,6 +218,7 @@ getMetricOrdering LogLoss = TheLowerTheBetter getMetricOrdering Likelihood = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter getMetricOrdering BIOF1Labels = TheHigherTheBetter +getMetricOrdering TokenAccuracy = TheHigherTheBetter getMetricOrdering MAE = TheLowerTheBetter getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter @@ -293,6 +296,7 @@ data GEvalException = NoExpectedFile FilePath | EmptyOutput | UnexpectedData Word32 String | UnexpectedMultipleOutputs + | OtherException String deriving (Eq) instance Exception GEvalException @@ -313,6 +317,7 @@ instance Show GEvalException where show EmptyOutput = "The output file is empty" show (UnexpectedData lineNo message) = "Line " ++ (show lineNo) ++ ": Unexpected data [" ++ message ++ "]" show UnexpectedMultipleOutputs = "Multiple outputs are not possible in this mode, use -o option to select an output file" + show (OtherException message) = message somethingWrongWithFilesMessage :: String -> FilePath -> String somethingWrongWithFilesMessage msg filePath = Prelude.concat @@ -682,6 +687,26 @@ gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWit entities <- parseBioSequenceIntoEntities s return $ Prelude.map eraseNormalisation entities +gevalCore' TokenAccuracy _ = gevalCoreWithoutInput intoTokens + intoTokens + countHitsAndTotals + hitsAndTotalsAgg + (\(hits, total) -> hits /. total) + where intoTokens = Right . Data.Text.words + countHitsAndTotals :: ([Text], [Text]) -> (Int, Int) + countHitsAndTotals (es, os) = + if Prelude.length os /= Prelude.length es + then throw $ OtherException "wrong number of tokens" + else Prelude.foldl matchFun + (0, 0) + (Prelude.zip es os) + matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int) + matchFun (h, t) (e, o) + | e == (pack "*") = (h, t) + | o == e = (h + 1, t + 1) + | otherwise = (h, t + 1) + hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0) + gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords getWords (getCounts (==)) diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 557430e..6a6fb2d 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -256,6 +256,15 @@ The output should be given in the BIO format with the normalized forms given aft The metric is F1 counted on entities (not labels). |] ++ (commonReadmeMDContents testName) +readmeMDContents TokenAccuracy testName = [i| +Get part of speech tags for each token +====================================== + +This is a sample challenge for TokenAccuracy. We just +count the accuracy per token and skip entries marked as "*" +in the expected file. +|] ++ (commonReadmeMDContents testName) + readmeMDContents (MultiLabelFMeasure beta) testName = [i| Tag names and their component ============================= @@ -400,6 +409,9 @@ trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surnam O O O O O There is no name here B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman |] +trainContents TokenAccuracy = [hereLit|* V N I like cats +* * V * N I can see the rainbow +|] trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person:3,4,5 first-name:4 surname:5 Steven bloody Brown person:1,3 first-name:1 surname:3 James and James first-name:1 firstname:3 @@ -458,6 +470,9 @@ devInContents BIOF1Labels = devInContents BIOF1 devInContents BIOF1 = [hereLit|Adam and Eve Mr Jan Kowalski |] +devInContents TokenAccuracy = [hereLit|The cats on the mat +Ala has a cat +|] devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here I see him Barbara @@ -513,6 +528,9 @@ devExpectedContents BIOF1Labels = devExpectedContents BIOF1 devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE O B-firstname/JAN B-surname/KOWALSKI |] +devExpectedContents TokenAccuracy = [hereLit|* N * * N +N V * N +|] devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2 first-name:1 @@ -570,6 +588,9 @@ testInContents BIOF1Labels = testInContents BIOF1 testInContents BIOF1 = [hereLit|Alan Tring No name here |] +testInContents TokenAccuracy = [hereLit|I have cats +I know +|] testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith Nobody is there I saw Marketa @@ -624,6 +645,9 @@ testExpectedContents BIOF1Labels = testExpectedContents BIOF1 testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING O O O |] +testExpectedContents TokenAccuracy = [hereLit|* V N +* V +|] testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3 first-name:3 diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index 8828b7d..4d4438b 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -169,7 +169,7 @@ metricReader = many $ option auto -- actually `some` should be used inst ( long "metric" -- --metric might be in the config.txt file... <> short 'm' <> metavar "METRIC" - <> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25) or CharMatch" ) + <> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MultiLabel-Likelihood, MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, TokenAccuracy, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25) or CharMatch" ) altMetricReader :: Parser (Maybe Metric) altMetricReader = optional $ option auto diff --git a/test/Spec.hs b/test/Spec.hs index af9a246..de53f85 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -118,6 +118,9 @@ main = hspec $ do runGEvalTest "macro-f1-simple" `shouldReturnAlmost` 0.266666 it "perfect soltion" $ runGEvalTest "macro-f-measure-perfect" `shouldReturnAlmost` 1.00000 + describe "TokenAccuracy" $ do + it "simple example" $ do + runGEvalTest "token-accuracy-simple" `shouldReturnAlmost` 0.5 describe "precision count" $ do it "simple test" $ do precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2 diff --git a/test/token-accuracy-simple/token-accuracy-simple-solution/test-A/out.tsv b/test/token-accuracy-simple/token-accuracy-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..dd3da94 --- /dev/null +++ b/test/token-accuracy-simple/token-accuracy-simple-solution/test-A/out.tsv @@ -0,0 +1,3 @@ +foo xyz * baz +baz +bar foo baz diff --git a/test/token-accuracy-simple/token-accuracy-simple/config.txt b/test/token-accuracy-simple/token-accuracy-simple/config.txt new file mode 100644 index 0000000..d87ea51 --- /dev/null +++ b/test/token-accuracy-simple/token-accuracy-simple/config.txt @@ -0,0 +1 @@ +--metric TokenAccuracy diff --git a/test/token-accuracy-simple/token-accuracy-simple/test-A/expected.tsv b/test/token-accuracy-simple/token-accuracy-simple/test-A/expected.tsv new file mode 100644 index 0000000..525332f --- /dev/null +++ b/test/token-accuracy-simple/token-accuracy-simple/test-A/expected.tsv @@ -0,0 +1,3 @@ +foo * * bar +baz +foo bar baz