Add TokenAccuracy metric
This commit is contained in:
parent
30c37c2b40
commit
2e816c4e38
@ -108,7 +108,7 @@ defaultLogLossHashedSize = 10
|
|||||||
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU
|
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU
|
||||||
| FMeasure Double | MacroFMeasure Double | NMI
|
| FMeasure Double | MacroFMeasure Double | NMI
|
||||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||||
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
|
| BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
|
||||||
| MultiLabelLogLoss | MultiLabelLikelihood
|
| MultiLabelLogLoss | MultiLabelLikelihood
|
||||||
| SoftFMeasure Double
|
| SoftFMeasure Double
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
@ -145,6 +145,7 @@ instance Show Metric where
|
|||||||
show Likelihood = "Likelihood"
|
show Likelihood = "Likelihood"
|
||||||
show BIOF1 = "BIO-F1"
|
show BIOF1 = "BIO-F1"
|
||||||
show BIOF1Labels = "BIO-F1-Labels"
|
show BIOF1Labels = "BIO-F1-Labels"
|
||||||
|
show TokenAccuracy = "TokenAccuracy"
|
||||||
show MAE = "MAE"
|
show MAE = "MAE"
|
||||||
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
|
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
|
||||||
show MultiLabelLogLoss = "MultiLabel-Logloss"
|
show MultiLabelLogLoss = "MultiLabel-Logloss"
|
||||||
@ -185,6 +186,7 @@ instance Read Metric where
|
|||||||
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
|
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
|
||||||
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
|
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
|
||||||
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
|
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
|
||||||
|
readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)]
|
||||||
readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)]
|
readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)]
|
||||||
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)]
|
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)]
|
||||||
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(MultiLabelLikelihood, theRest)]
|
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(MultiLabelLikelihood, theRest)]
|
||||||
@ -216,6 +218,7 @@ getMetricOrdering LogLoss = TheLowerTheBetter
|
|||||||
getMetricOrdering Likelihood = TheHigherTheBetter
|
getMetricOrdering Likelihood = TheHigherTheBetter
|
||||||
getMetricOrdering BIOF1 = TheHigherTheBetter
|
getMetricOrdering BIOF1 = TheHigherTheBetter
|
||||||
getMetricOrdering BIOF1Labels = TheHigherTheBetter
|
getMetricOrdering BIOF1Labels = TheHigherTheBetter
|
||||||
|
getMetricOrdering TokenAccuracy = TheHigherTheBetter
|
||||||
getMetricOrdering MAE = TheLowerTheBetter
|
getMetricOrdering MAE = TheLowerTheBetter
|
||||||
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
|
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
|
||||||
getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
|
getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
|
||||||
@ -293,6 +296,7 @@ data GEvalException = NoExpectedFile FilePath
|
|||||||
| EmptyOutput
|
| EmptyOutput
|
||||||
| UnexpectedData Word32 String
|
| UnexpectedData Word32 String
|
||||||
| UnexpectedMultipleOutputs
|
| UnexpectedMultipleOutputs
|
||||||
|
| OtherException String
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
instance Exception GEvalException
|
instance Exception GEvalException
|
||||||
@ -313,6 +317,7 @@ instance Show GEvalException where
|
|||||||
show EmptyOutput = "The output file is empty"
|
show EmptyOutput = "The output file is empty"
|
||||||
show (UnexpectedData lineNo message) = "Line " ++ (show lineNo) ++ ": Unexpected data [" ++ message ++ "]"
|
show (UnexpectedData lineNo message) = "Line " ++ (show lineNo) ++ ": Unexpected data [" ++ message ++ "]"
|
||||||
show UnexpectedMultipleOutputs = "Multiple outputs are not possible in this mode, use -o option to select an output file"
|
show UnexpectedMultipleOutputs = "Multiple outputs are not possible in this mode, use -o option to select an output file"
|
||||||
|
show (OtherException message) = message
|
||||||
|
|
||||||
somethingWrongWithFilesMessage :: String -> FilePath -> String
|
somethingWrongWithFilesMessage :: String -> FilePath -> String
|
||||||
somethingWrongWithFilesMessage msg filePath = Prelude.concat
|
somethingWrongWithFilesMessage msg filePath = Prelude.concat
|
||||||
@ -682,6 +687,26 @@ gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWit
|
|||||||
entities <- parseBioSequenceIntoEntities s
|
entities <- parseBioSequenceIntoEntities s
|
||||||
return $ Prelude.map eraseNormalisation entities
|
return $ Prelude.map eraseNormalisation entities
|
||||||
|
|
||||||
|
gevalCore' TokenAccuracy _ = gevalCoreWithoutInput intoTokens
|
||||||
|
intoTokens
|
||||||
|
countHitsAndTotals
|
||||||
|
hitsAndTotalsAgg
|
||||||
|
(\(hits, total) -> hits /. total)
|
||||||
|
where intoTokens = Right . Data.Text.words
|
||||||
|
countHitsAndTotals :: ([Text], [Text]) -> (Int, Int)
|
||||||
|
countHitsAndTotals (es, os) =
|
||||||
|
if Prelude.length os /= Prelude.length es
|
||||||
|
then throw $ OtherException "wrong number of tokens"
|
||||||
|
else Prelude.foldl matchFun
|
||||||
|
(0, 0)
|
||||||
|
(Prelude.zip es os)
|
||||||
|
matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int)
|
||||||
|
matchFun (h, t) (e, o)
|
||||||
|
| e == (pack "*") = (h, t)
|
||||||
|
| o == e = (h + 1, t + 1)
|
||||||
|
| otherwise = (h, t + 1)
|
||||||
|
hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0)
|
||||||
|
|
||||||
gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords
|
gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords
|
||||||
getWords
|
getWords
|
||||||
(getCounts (==))
|
(getCounts (==))
|
||||||
|
@ -256,6 +256,15 @@ The output should be given in the BIO format with the normalized forms given aft
|
|||||||
The metric is F1 counted on entities (not labels).
|
The metric is F1 counted on entities (not labels).
|
||||||
|] ++ (commonReadmeMDContents testName)
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
|
readmeMDContents TokenAccuracy testName = [i|
|
||||||
|
Get part of speech tags for each token
|
||||||
|
======================================
|
||||||
|
|
||||||
|
This is a sample challenge for TokenAccuracy. We just
|
||||||
|
count the accuracy per token and skip entries marked as "*"
|
||||||
|
in the expected file.
|
||||||
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
readmeMDContents (MultiLabelFMeasure beta) testName = [i|
|
readmeMDContents (MultiLabelFMeasure beta) testName = [i|
|
||||||
Tag names and their component
|
Tag names and their component
|
||||||
=============================
|
=============================
|
||||||
@ -400,6 +409,9 @@ trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surnam
|
|||||||
O O O O O There is no name here
|
O O O O O There is no name here
|
||||||
B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|
B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|
||||||
|]
|
|]
|
||||||
|
trainContents TokenAccuracy = [hereLit|* V N I like cats
|
||||||
|
* * V * N I can see the rainbow
|
||||||
|
|]
|
||||||
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person:3,4,5 first-name:4 surname:5
|
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person:3,4,5 first-name:4 surname:5
|
||||||
Steven bloody Brown person:1,3 first-name:1 surname:3
|
Steven bloody Brown person:1,3 first-name:1 surname:3
|
||||||
James and James first-name:1 firstname:3
|
James and James first-name:1 firstname:3
|
||||||
@ -458,6 +470,9 @@ devInContents BIOF1Labels = devInContents BIOF1
|
|||||||
devInContents BIOF1 = [hereLit|Adam and Eve
|
devInContents BIOF1 = [hereLit|Adam and Eve
|
||||||
Mr Jan Kowalski
|
Mr Jan Kowalski
|
||||||
|]
|
|]
|
||||||
|
devInContents TokenAccuracy = [hereLit|The cats on the mat
|
||||||
|
Ala has a cat
|
||||||
|
|]
|
||||||
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
|
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
|
||||||
I see him
|
I see him
|
||||||
Barbara
|
Barbara
|
||||||
@ -513,6 +528,9 @@ devExpectedContents BIOF1Labels = devExpectedContents BIOF1
|
|||||||
devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
|
devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
|
||||||
O B-firstname/JAN B-surname/KOWALSKI
|
O B-firstname/JAN B-surname/KOWALSKI
|
||||||
|]
|
|]
|
||||||
|
devExpectedContents TokenAccuracy = [hereLit|* N * * N
|
||||||
|
N V * N
|
||||||
|
|]
|
||||||
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2
|
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2
|
||||||
|
|
||||||
first-name:1
|
first-name:1
|
||||||
@ -570,6 +588,9 @@ testInContents BIOF1Labels = testInContents BIOF1
|
|||||||
testInContents BIOF1 = [hereLit|Alan Tring
|
testInContents BIOF1 = [hereLit|Alan Tring
|
||||||
No name here
|
No name here
|
||||||
|]
|
|]
|
||||||
|
testInContents TokenAccuracy = [hereLit|I have cats
|
||||||
|
I know
|
||||||
|
|]
|
||||||
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
|
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
|
||||||
Nobody is there
|
Nobody is there
|
||||||
I saw Marketa
|
I saw Marketa
|
||||||
@ -624,6 +645,9 @@ testExpectedContents BIOF1Labels = testExpectedContents BIOF1
|
|||||||
testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
|
testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
|
||||||
O O O
|
O O O
|
||||||
|]
|
|]
|
||||||
|
testExpectedContents TokenAccuracy = [hereLit|* V N
|
||||||
|
* V
|
||||||
|
|]
|
||||||
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3
|
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3
|
||||||
|
|
||||||
first-name:3
|
first-name:3
|
||||||
|
@ -169,7 +169,7 @@ metricReader = many $ option auto -- actually `some` should be used inst
|
|||||||
( long "metric" -- --metric might be in the config.txt file...
|
( long "metric" -- --metric might be in the config.txt file...
|
||||||
<> short 'm'
|
<> short 'm'
|
||||||
<> metavar "METRIC"
|
<> metavar "METRIC"
|
||||||
<> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25) or CharMatch" )
|
<> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MultiLabel-Likelihood, MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, TokenAccuracy, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25) or CharMatch" )
|
||||||
|
|
||||||
altMetricReader :: Parser (Maybe Metric)
|
altMetricReader :: Parser (Maybe Metric)
|
||||||
altMetricReader = optional $ option auto
|
altMetricReader = optional $ option auto
|
||||||
|
@ -118,6 +118,9 @@ main = hspec $ do
|
|||||||
runGEvalTest "macro-f1-simple" `shouldReturnAlmost` 0.266666
|
runGEvalTest "macro-f1-simple" `shouldReturnAlmost` 0.266666
|
||||||
it "perfect soltion" $
|
it "perfect soltion" $
|
||||||
runGEvalTest "macro-f-measure-perfect" `shouldReturnAlmost` 1.00000
|
runGEvalTest "macro-f-measure-perfect" `shouldReturnAlmost` 1.00000
|
||||||
|
describe "TokenAccuracy" $ do
|
||||||
|
it "simple example" $ do
|
||||||
|
runGEvalTest "token-accuracy-simple" `shouldReturnAlmost` 0.5
|
||||||
describe "precision count" $ do
|
describe "precision count" $ do
|
||||||
it "simple test" $ do
|
it "simple test" $ do
|
||||||
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
|
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
foo xyz * baz
|
||||||
|
baz
|
||||||
|
bar foo baz
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric TokenAccuracy
|
@ -0,0 +1,3 @@
|
|||||||
|
foo * * bar
|
||||||
|
baz
|
||||||
|
foo bar baz
|
|
Loading…
Reference in New Issue
Block a user