From 2e816c4e384d5e87bf6df68c81809815053df8af Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@ceti.pl>
Date: Tue, 23 Oct 2018 16:26:05 +0200
Subject: [PATCH] Add TokenAccuracy metric

---
 src/GEval/Core.hs                             | 27 ++++++++++++++++++-
 src/GEval/CreateChallenge.hs                  | 24 +++++++++++++++++
 src/GEval/OptionsParser.hs                    |  2 +-
 test/Spec.hs                                  |  3 +++
 .../test-A/out.tsv                            |  3 +++
 .../token-accuracy-simple/config.txt          |  1 +
 .../token-accuracy-simple/test-A/expected.tsv |  3 +++
 7 files changed, 61 insertions(+), 2 deletions(-)
 create mode 100644 test/token-accuracy-simple/token-accuracy-simple-solution/test-A/out.tsv
 create mode 100644 test/token-accuracy-simple/token-accuracy-simple/config.txt
 create mode 100644 test/token-accuracy-simple/token-accuracy-simple/test-A/expected.tsv

diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index da3a12d..e055207 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -108,7 +108,7 @@ defaultLogLossHashedSize = 10
 data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU
               | FMeasure Double | MacroFMeasure Double | NMI
               | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
-              | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
+              | BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
               | MultiLabelLogLoss | MultiLabelLikelihood
               | SoftFMeasure Double
               deriving (Eq)
@@ -145,6 +145,7 @@ instance Show Metric where
   show Likelihood = "Likelihood"
   show BIOF1 = "BIO-F1"
   show BIOF1Labels = "BIO-F1-Labels"
+  show TokenAccuracy = "TokenAccuracy"
   show MAE = "MAE"
   show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
   show MultiLabelLogLoss = "MultiLabel-Logloss"
@@ -185,6 +186,7 @@ instance Read Metric where
   readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
   readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
   readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
+  readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)]
   readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)]
   readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)]
   readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(MultiLabelLikelihood, theRest)]
@@ -216,6 +218,7 @@ getMetricOrdering LogLoss = TheLowerTheBetter
 getMetricOrdering Likelihood = TheHigherTheBetter
 getMetricOrdering BIOF1 = TheHigherTheBetter
 getMetricOrdering BIOF1Labels = TheHigherTheBetter
+getMetricOrdering TokenAccuracy = TheHigherTheBetter
 getMetricOrdering MAE = TheLowerTheBetter
 getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
 getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
@@ -293,6 +296,7 @@ data GEvalException = NoExpectedFile FilePath
                       | EmptyOutput
                       | UnexpectedData Word32 String
                       | UnexpectedMultipleOutputs
+                      | OtherException String
                       deriving (Eq)
 
 instance Exception GEvalException
@@ -313,6 +317,7 @@ instance Show GEvalException where
   show EmptyOutput = "The output file is empty"
   show (UnexpectedData lineNo message) = "Line " ++ (show lineNo) ++ ": Unexpected data [" ++ message ++ "]"
   show UnexpectedMultipleOutputs = "Multiple outputs are not possible in this mode, use -o option to select an output file"
+  show (OtherException message) = message
 
 somethingWrongWithFilesMessage :: String -> FilePath -> String
 somethingWrongWithFilesMessage msg filePath = Prelude.concat
@@ -682,6 +687,26 @@ gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWit
            entities <- parseBioSequenceIntoEntities s
            return $ Prelude.map eraseNormalisation entities
 
+gevalCore' TokenAccuracy _ = gevalCoreWithoutInput intoTokens
+                                                   intoTokens
+                                                   countHitsAndTotals
+                                                   hitsAndTotalsAgg
+                                                   (\(hits, total) -> hits /. total)
+   where intoTokens = Right . Data.Text.words
+         countHitsAndTotals :: ([Text], [Text]) -> (Int, Int)
+         countHitsAndTotals (es, os) =
+             if Prelude.length os /= Prelude.length es
+               then throw $ OtherException "wrong number of tokens"
+               else Prelude.foldl matchFun
+                                  (0, 0)
+                                  (Prelude.zip es os)
+         matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int)
+         matchFun (h, t) (e, o)
+           | e == (pack "*") = (h, t)
+           | o == e = (h + 1, t + 1)
+           | otherwise = (h, t + 1)
+         hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0)
+
 gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords
                                                                getWords
                                                                (getCounts (==))
diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index 557430e..6a6fb2d 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -256,6 +256,15 @@ The output should be given in the BIO format with the normalized forms given aft
 The metric is F1 counted on entities (not labels).
 |] ++ (commonReadmeMDContents testName)
 
+readmeMDContents TokenAccuracy testName = [i|
+Get part of speech tags for each token
+======================================
+
+This is a sample challenge for TokenAccuracy. We just
+count the accuracy per token and skip entries marked as "*"
+in the expected file.
+|] ++ (commonReadmeMDContents testName)
+
 readmeMDContents (MultiLabelFMeasure beta) testName = [i|
 Tag names and their component
 =============================
@@ -400,6 +409,9 @@ trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surnam
 O O O O O	There is no name here
 B-firstname/JOHN I-surname/VON I-surname/NEUMANN	John von Nueman
 |]
+trainContents TokenAccuracy = [hereLit|* V N	I like cats
+* * V * N	I can see the rainbow
+|]
 trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith	person:3,4,5 first-name:4 surname:5
 Steven bloody Brown	person:1,3 first-name:1 surname:3
 James and James	first-name:1 firstname:3
@@ -458,6 +470,9 @@ devInContents BIOF1Labels = devInContents BIOF1
 devInContents BIOF1 = [hereLit|Adam and Eve
 Mr Jan Kowalski
 |]
+devInContents TokenAccuracy = [hereLit|The cats on the mat
+Ala has a cat
+|]
 devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
 I see him
 Barbara
@@ -513,6 +528,9 @@ devExpectedContents BIOF1Labels = devExpectedContents BIOF1
 devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
 O B-firstname/JAN B-surname/KOWALSKI
 |]
+devExpectedContents TokenAccuracy = [hereLit|* N * * N
+N V * N
+|]
 devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2
 
 first-name:1
@@ -570,6 +588,9 @@ testInContents BIOF1Labels = testInContents BIOF1
 testInContents BIOF1 = [hereLit|Alan Tring
 No name here
 |]
+testInContents TokenAccuracy = [hereLit|I have cats
+I know
+|]
 testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
 Nobody is there
 I saw Marketa
@@ -624,6 +645,9 @@ testExpectedContents BIOF1Labels = testExpectedContents BIOF1
 testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
 O O O
 |]
+testExpectedContents TokenAccuracy = [hereLit|* V N
+* V
+|]
 testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3
 
 first-name:3
diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index 8828b7d..4d4438b 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -169,7 +169,7 @@ metricReader = many $ option auto         -- actually `some` should be used inst
                ( long "metric"            -- --metric might be in the config.txt file...
                  <> short 'm'
                  <> metavar "METRIC"
-                 <> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25) or CharMatch" )
+                 <> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MultiLabel-Likelihood, MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, TokenAccuracy, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25) or CharMatch" )
 
 altMetricReader :: Parser (Maybe Metric)
 altMetricReader = optional $ option auto
diff --git a/test/Spec.hs b/test/Spec.hs
index af9a246..de53f85 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -118,6 +118,9 @@ main = hspec $ do
       runGEvalTest "macro-f1-simple" `shouldReturnAlmost` 0.266666
     it "perfect soltion" $
       runGEvalTest "macro-f-measure-perfect" `shouldReturnAlmost` 1.00000
+  describe "TokenAccuracy" $ do
+    it "simple example" $ do
+       runGEvalTest "token-accuracy-simple" `shouldReturnAlmost` 0.5
   describe "precision count" $ do
     it "simple test" $ do
       precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
diff --git a/test/token-accuracy-simple/token-accuracy-simple-solution/test-A/out.tsv b/test/token-accuracy-simple/token-accuracy-simple-solution/test-A/out.tsv
new file mode 100644
index 0000000..dd3da94
--- /dev/null
+++ b/test/token-accuracy-simple/token-accuracy-simple-solution/test-A/out.tsv
@@ -0,0 +1,3 @@
+foo xyz * baz
+baz
+bar foo baz
diff --git a/test/token-accuracy-simple/token-accuracy-simple/config.txt b/test/token-accuracy-simple/token-accuracy-simple/config.txt
new file mode 100644
index 0000000..d87ea51
--- /dev/null
+++ b/test/token-accuracy-simple/token-accuracy-simple/config.txt
@@ -0,0 +1 @@
+--metric TokenAccuracy
diff --git a/test/token-accuracy-simple/token-accuracy-simple/test-A/expected.tsv b/test/token-accuracy-simple/token-accuracy-simple/test-A/expected.tsv
new file mode 100644
index 0000000..525332f
--- /dev/null
+++ b/test/token-accuracy-simple/token-accuracy-simple/test-A/expected.tsv
@@ -0,0 +1,3 @@
+foo * * bar
+baz
+foo bar baz