Add TokenAccuracy metric

This commit is contained in:
Filip Gralinski 2018-10-23 16:26:05 +02:00
parent 30c37c2b40
commit 2e816c4e38
7 changed files with 61 additions and 2 deletions

View File

@ -108,7 +108,7 @@ defaultLogLossHashedSize = 10
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU
| FMeasure Double | MacroFMeasure Double | NMI | FMeasure Double | MacroFMeasure Double | NMI
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double | BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
| MultiLabelLogLoss | MultiLabelLikelihood | MultiLabelLogLoss | MultiLabelLikelihood
| SoftFMeasure Double | SoftFMeasure Double
deriving (Eq) deriving (Eq)
@ -145,6 +145,7 @@ instance Show Metric where
show Likelihood = "Likelihood" show Likelihood = "Likelihood"
show BIOF1 = "BIO-F1" show BIOF1 = "BIO-F1"
show BIOF1Labels = "BIO-F1-Labels" show BIOF1Labels = "BIO-F1-Labels"
show TokenAccuracy = "TokenAccuracy"
show MAE = "MAE" show MAE = "MAE"
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta) show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
show MultiLabelLogLoss = "MultiLabel-Logloss" show MultiLabelLogLoss = "MultiLabel-Logloss"
@ -185,6 +186,7 @@ instance Read Metric where
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)] readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)]
readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)] readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)]
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)] readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)]
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(MultiLabelLikelihood, theRest)] readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(MultiLabelLikelihood, theRest)]
@ -216,6 +218,7 @@ getMetricOrdering LogLoss = TheLowerTheBetter
getMetricOrdering Likelihood = TheHigherTheBetter getMetricOrdering Likelihood = TheHigherTheBetter
getMetricOrdering BIOF1 = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter
getMetricOrdering BIOF1Labels = TheHigherTheBetter getMetricOrdering BIOF1Labels = TheHigherTheBetter
getMetricOrdering TokenAccuracy = TheHigherTheBetter
getMetricOrdering MAE = TheLowerTheBetter getMetricOrdering MAE = TheLowerTheBetter
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
@ -293,6 +296,7 @@ data GEvalException = NoExpectedFile FilePath
| EmptyOutput | EmptyOutput
| UnexpectedData Word32 String | UnexpectedData Word32 String
| UnexpectedMultipleOutputs | UnexpectedMultipleOutputs
| OtherException String
deriving (Eq) deriving (Eq)
instance Exception GEvalException instance Exception GEvalException
@ -313,6 +317,7 @@ instance Show GEvalException where
show EmptyOutput = "The output file is empty" show EmptyOutput = "The output file is empty"
show (UnexpectedData lineNo message) = "Line " ++ (show lineNo) ++ ": Unexpected data [" ++ message ++ "]" show (UnexpectedData lineNo message) = "Line " ++ (show lineNo) ++ ": Unexpected data [" ++ message ++ "]"
show UnexpectedMultipleOutputs = "Multiple outputs are not possible in this mode, use -o option to select an output file" show UnexpectedMultipleOutputs = "Multiple outputs are not possible in this mode, use -o option to select an output file"
show (OtherException message) = message
somethingWrongWithFilesMessage :: String -> FilePath -> String somethingWrongWithFilesMessage :: String -> FilePath -> String
somethingWrongWithFilesMessage msg filePath = Prelude.concat somethingWrongWithFilesMessage msg filePath = Prelude.concat
@ -682,6 +687,26 @@ gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWit
entities <- parseBioSequenceIntoEntities s entities <- parseBioSequenceIntoEntities s
return $ Prelude.map eraseNormalisation entities return $ Prelude.map eraseNormalisation entities
gevalCore' TokenAccuracy _ = gevalCoreWithoutInput intoTokens
intoTokens
countHitsAndTotals
hitsAndTotalsAgg
(\(hits, total) -> hits /. total)
where intoTokens = Right . Data.Text.words
countHitsAndTotals :: ([Text], [Text]) -> (Int, Int)
countHitsAndTotals (es, os) =
if Prelude.length os /= Prelude.length es
then throw $ OtherException "wrong number of tokens"
else Prelude.foldl matchFun
(0, 0)
(Prelude.zip es os)
matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int)
matchFun (h, t) (e, o)
| e == (pack "*") = (h, t)
| o == e = (h + 1, t + 1)
| otherwise = (h, t + 1)
hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0)
gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords
getWords getWords
(getCounts (==)) (getCounts (==))

View File

@ -256,6 +256,15 @@ The output should be given in the BIO format with the normalized forms given aft
The metric is F1 counted on entities (not labels). The metric is F1 counted on entities (not labels).
|] ++ (commonReadmeMDContents testName) |] ++ (commonReadmeMDContents testName)
readmeMDContents TokenAccuracy testName = [i|
Get part of speech tags for each token
======================================
This is a sample challenge for TokenAccuracy. We just
count the accuracy per token and skip entries marked as "*"
in the expected file.
|] ++ (commonReadmeMDContents testName)
readmeMDContents (MultiLabelFMeasure beta) testName = [i| readmeMDContents (MultiLabelFMeasure beta) testName = [i|
Tag names and their component Tag names and their component
============================= =============================
@ -400,6 +409,9 @@ trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surnam
O O O O O There is no name here O O O O O There is no name here
B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|] |]
trainContents TokenAccuracy = [hereLit|* V N I like cats
* * V * N I can see the rainbow
|]
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person:3,4,5 first-name:4 surname:5 trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person:3,4,5 first-name:4 surname:5
Steven bloody Brown person:1,3 first-name:1 surname:3 Steven bloody Brown person:1,3 first-name:1 surname:3
James and James first-name:1 firstname:3 James and James first-name:1 firstname:3
@ -458,6 +470,9 @@ devInContents BIOF1Labels = devInContents BIOF1
devInContents BIOF1 = [hereLit|Adam and Eve devInContents BIOF1 = [hereLit|Adam and Eve
Mr Jan Kowalski Mr Jan Kowalski
|] |]
devInContents TokenAccuracy = [hereLit|The cats on the mat
Ala has a cat
|]
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
I see him I see him
Barbara Barbara
@ -513,6 +528,9 @@ devExpectedContents BIOF1Labels = devExpectedContents BIOF1
devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
O B-firstname/JAN B-surname/KOWALSKI O B-firstname/JAN B-surname/KOWALSKI
|] |]
devExpectedContents TokenAccuracy = [hereLit|* N * * N
N V * N
|]
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2 devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2
first-name:1 first-name:1
@ -570,6 +588,9 @@ testInContents BIOF1Labels = testInContents BIOF1
testInContents BIOF1 = [hereLit|Alan Tring testInContents BIOF1 = [hereLit|Alan Tring
No name here No name here
|] |]
testInContents TokenAccuracy = [hereLit|I have cats
I know
|]
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
Nobody is there Nobody is there
I saw Marketa I saw Marketa
@ -624,6 +645,9 @@ testExpectedContents BIOF1Labels = testExpectedContents BIOF1
testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
O O O O O O
|] |]
testExpectedContents TokenAccuracy = [hereLit|* V N
* V
|]
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3 testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3
first-name:3 first-name:3

View File

@ -169,7 +169,7 @@ metricReader = many $ option auto -- actually `some` should be used inst
( long "metric" -- --metric might be in the config.txt file... ( long "metric" -- --metric might be in the config.txt file...
<> short 'm' <> short 'm'
<> metavar "METRIC" <> metavar "METRIC"
<> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25) or CharMatch" ) <> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MultiLabel-Likelihood, MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, TokenAccuracy, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25) or CharMatch" )
altMetricReader :: Parser (Maybe Metric) altMetricReader :: Parser (Maybe Metric)
altMetricReader = optional $ option auto altMetricReader = optional $ option auto

View File

@ -118,6 +118,9 @@ main = hspec $ do
runGEvalTest "macro-f1-simple" `shouldReturnAlmost` 0.266666 runGEvalTest "macro-f1-simple" `shouldReturnAlmost` 0.266666
it "perfect soltion" $ it "perfect soltion" $
runGEvalTest "macro-f-measure-perfect" `shouldReturnAlmost` 1.00000 runGEvalTest "macro-f-measure-perfect" `shouldReturnAlmost` 1.00000
describe "TokenAccuracy" $ do
it "simple example" $ do
runGEvalTest "token-accuracy-simple" `shouldReturnAlmost` 0.5
describe "precision count" $ do describe "precision count" $ do
it "simple test" $ do it "simple test" $ do
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2 precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2

View File

@ -0,0 +1,3 @@
foo xyz * baz
baz
bar foo baz
1 foo xyz * baz
2 baz
3 bar foo baz

View File

@ -0,0 +1 @@
--metric TokenAccuracy

View File

@ -0,0 +1,3 @@
foo * * bar
baz
foo bar baz
1 foo * * bar
2 baz
3 foo bar baz