Add CER metric

(Character-Error Rate)
This commit is contained in:
Filip Gralinski 2020-10-17 16:55:40 +02:00
parent 51c29aabf6
commit 819fbecedc
12 changed files with 121 additions and 6 deletions

View File

@ -158,6 +158,7 @@ isPreprocessable Spearman = False
isPreprocessable BLEU = True isPreprocessable BLEU = True
isPreprocessable GLEU = True isPreprocessable GLEU = True
isPreprocessable WER = True isPreprocessable WER = True
isPreprocessable CER = True
isPreprocessable Accuracy = True isPreprocessable Accuracy = True
isPreprocessable ClippEU = False isPreprocessable ClippEU = False
isPreprocessable (FMeasure _) = False isPreprocessable (FMeasure _) = False
@ -691,7 +692,19 @@ gevalCoreOnSources (Mean WER)
intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t
intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being" gevalCoreOnSources (Mean CER)
= gevalCoreWithoutInputOnItemTargets (Right . getString)
(Right . getString)
((uncurry (/.)) . (uncurry werStep))
averageC
id
noGraph
where
-- repeated as below, as it will be refactored into dependent types soon anyway
getString (RawItemTarget t) = unpack t
getString (PartiallyParsedItemTarget ts) = Prelude.unwords $ Prelude.map unpack ts
gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1, WER and CER for the time being"
-- only MultiLabel-F1 handled for JSONs for the time being... -- only MultiLabel-F1 handled for JSONs for the time being...
gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) = gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) =
@ -925,6 +938,11 @@ continueGEvalCalculations SAWER WER = defineContinuation werAgg werFinal noGraph
werFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2) werFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
werFinal (errors, ref) = errors /. ref werFinal (errors, ref) = errors /. ref
continueGEvalCalculations SACER CER = defineContinuation cerAgg cerFinal noGraph
where cerAgg = CC.foldl cerFuse (0, 0)
cerFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
cerFinal (errors, ref) = errors /. ref
continueGEvalCalculations SAAccuracy Accuracy = defineContinuation averageC id noGraph continueGEvalCalculations SAAccuracy Accuracy = defineContinuation averageC id noGraph
continueGEvalCalculations SAFMeasure (FMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph continueGEvalCalculations SAFMeasure (FMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph

View File

@ -105,6 +105,15 @@ Directory structure
* `${testName}/in.tsv` Finnish input data for the test set * `${testName}/in.tsv` Finnish input data for the test set
* `${testName}/expected.tsv` Māori reference translation for the test set * `${testName}/expected.tsv` Māori reference translation for the test set
|] |]
readmeMDContents WER testName = readmeMDContents BLEU testName
readmeMDContents CER testName = [i|
GEval simple OCR challenge
==========================
Do OCR.
This is a sample fake challenge for Gonito framework. Replace it with
the description of your challenge.|] ++ (commonReadmeMDContents testName)
readmeMDContents Accuracy testName = [i| readmeMDContents Accuracy testName = [i|
GEval sample classification challenge GEval sample classification challenge
@ -417,7 +426,8 @@ Directory structure
* `README.md` this file * `README.md` this file
* `config.txt` configuration file * `config.txt` configuration file
* `train/` directory with training data * `train/` directory with training data
* `train/train.tsv` sample train set * `train/in.tsv` input data for the train set
* `train/expected.tsv` expected (reference) data for the train set
* `dev-0/` directory with dev (test) data * `dev-0/` directory with dev (test) data
* `dev-0/in.tsv` input data for the dev set * `dev-0/in.tsv` input data for the dev set
* `dev-0/expected.tsv` expected (reference) data for the dev set * `dev-0/expected.tsv` expected (reference) data for the dev set
@ -469,6 +479,11 @@ trainContents BLEU = [hereLit|alussa loi jumala taivaan ja maan he mea hanga na
ja maa oli autio ja tyhjä , ja pimeys oli syvyyden päällä a kahore he ahua o te whenua , i takoto kau ; he pouri ano a runga i te mata o te hohonu ja maa oli autio ja tyhjä , ja pimeys oli syvyyden päällä a kahore he ahua o te whenua , i takoto kau ; he pouri ano a runga i te mata o te hohonu
ja jumalan henki liikkui vetten päällä na ka whakapaho te wairua o te atua i runga i te kare o nga wai ja jumalan henki liikkui vetten päällä na ka whakapaho te wairua o te atua i runga i te kare o nga wai
|] |]
trainContents WER = trainContents BLEU
trainContents CER = [hereLit|Hannibal ad portas train1.pdf
equo ne credite train2.pdf
errare humanum est train3.pdf
|]
trainContents Accuracy = [hereLit|Y 10 none yes trainContents Accuracy = [hereLit|Y 10 none yes
N -2 strong no N -2 strong no
@ -568,6 +583,10 @@ devInContents GLEU = devInContents BLEU
devInContents BLEU = [hereLit|ja jumala sanoi : " tulkoon valkeus " , ja valkeus tuli devInContents BLEU = [hereLit|ja jumala sanoi : " tulkoon valkeus " , ja valkeus tuli
ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydestä ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydestä
|] |]
devInContents WER = devInContents BLEU
devInContents CER = [hereLit|dev1.pdf
dev2.pdf
|]
devInContents Accuracy = [hereLit|-8 none no devInContents Accuracy = [hereLit|-8 none no
1 mild no 1 mild no
|] |]
@ -636,6 +655,10 @@ devExpectedContents GLEU = devExpectedContents BLEU
devExpectedContents BLEU = [hereLit|a ka ki te atua , kia marama : na ka marama devExpectedContents BLEU = [hereLit|a ka ki te atua , kia marama : na ka marama
a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pouri a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pouri
|] |]
devExpectedContents WER = devExpectedContents BLEU
devExpectedContents CER = [hereLit|et facta est lux
Et tu, Brute?
|]
devExpectedContents Accuracy = [hereLit|N devExpectedContents Accuracy = [hereLit|N
Y Y
|] |]
@ -702,11 +725,15 @@ devExpectedContents _ = [hereLit|0.82
testInContents :: Metric -> String testInContents :: Metric -> String
testInContents (Mean metric) = testInContents metric testInContents (Mean metric) = testInContents metric
testInContents GLEU = [hereLit|Alice has a black testInContents GLEU = [hereLit|Alicella on musta kissa.
|] |]
testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
ja tuli ehtoo , ja tuli aamu , ensimmäinen päivä ja tuli ehtoo , ja tuli aamu , ensimmäinen päivä
|] |]
testInContents WER = testInContents BLEU
testInContents CER = [hereLit|test1.pdf
test2.pdf
|]
testInContents Accuracy = [hereLit|2 mild yes testInContents Accuracy = [hereLit|2 mild yes
-5 mild no -5 mild no
|] |]
@ -776,6 +803,10 @@ testExpectedContents (Mean metric) = testExpectedContents metric
testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po
a ko te ahiahi , ko te ata , he ra kotahi a ko te ahiahi , ko te ata , he ra kotahi
|] |]
testExpectedContents CER = [hereLit|esse est percipi
tabula rasa
|]
testExpectedContents WER = testExpectedContents BLEU
testExpectedContents Accuracy = [hereLit|N testExpectedContents Accuracy = [hereLit|N
Y Y
|] |]
@ -848,6 +879,8 @@ inHeaderContents :: Metric -> Maybe [String]
inHeaderContents (Mean metric) = inHeaderContents metric inHeaderContents (Mean metric) = inHeaderContents metric
inHeaderContents GLEU = Nothing inHeaderContents GLEU = Nothing
inHeaderContents BLEU = Nothing inHeaderContents BLEU = Nothing
inHeaderContents WER = Nothing
inHeaderContents CER = Just ["Filename"]
inHeaderContents Accuracy = Just ["Temperature", "Wind", "Rain"] inHeaderContents Accuracy = Just ["Temperature", "Wind", "Rain"]
inHeaderContents (FMeasure _) = Just ["seismic", inHeaderContents (FMeasure _) = Just ["seismic",
"seismoacoustic", "seismoacoustic",
@ -894,6 +927,8 @@ outHeaderContents :: Metric -> Maybe [String]
outHeaderContents (Mean metric) = outHeaderContents metric outHeaderContents (Mean metric) = outHeaderContents metric
outHeaderContents BLEU = Nothing outHeaderContents BLEU = Nothing
outHeaderContents GLEU = Nothing outHeaderContents GLEU = Nothing
outHeaderContents WER = Nothing
outHeaderContents CER = Just ["OCRedText"]
outHeaderContents Accuracy = Just ["ShouldYouKidForWalk"] outHeaderContents Accuracy = Just ["ShouldYouKidForWalk"]
outHeaderContents (FMeasure _) = Just ["IsSeismicBump"] outHeaderContents (FMeasure _) = Just ["IsSeismicBump"]
outHeaderContents (MacroFMeasure _) = Just ["LanguageCode"] outHeaderContents (MacroFMeasure _) = Just ["LanguageCode"]

View File

@ -25,7 +25,7 @@ import Data.Attoparsec.Text (parseOnly)
-- the evaluation procedures are defined in GEval.Core -- the evaluation procedures are defined in GEval.Core
-- | evaluation metric -- | evaluation metric
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | CER | Accuracy | ClippEU
| FMeasure Double | MacroFMeasure Double | NMI | FMeasure Double | MacroFMeasure Double | NMI
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
| BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE
@ -48,6 +48,7 @@ instance Show Metric where
show BLEU = "BLEU" show BLEU = "BLEU"
show GLEU = "GLEU" show GLEU = "GLEU"
show WER = "WER" show WER = "WER"
show CER = "CER"
show Accuracy = "Accuracy" show Accuracy = "Accuracy"
show ClippEU = "ClippEU" show ClippEU = "ClippEU"
show (FMeasure beta) = "F" ++ (show beta) show (FMeasure beta) = "F" ++ (show beta)
@ -119,6 +120,7 @@ instance Read Metric where
readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)] readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)]
readsPrec _ ('G':'L':'E':'U':theRest) = [(GLEU, theRest)] readsPrec _ ('G':'L':'E':'U':theRest) = [(GLEU, theRest)]
readsPrec _ ('W':'E':'R':theRest) = [(WER, theRest)] readsPrec _ ('W':'E':'R':theRest) = [(WER, theRest)]
readsPrec _ ('C':'E':'R':theRest) = [(CER, theRest)]
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)] readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)]
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)] readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)] readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)]
@ -178,6 +180,7 @@ getMetricOrdering Spearman = TheHigherTheBetter
getMetricOrdering BLEU = TheHigherTheBetter getMetricOrdering BLEU = TheHigherTheBetter
getMetricOrdering GLEU = TheHigherTheBetter getMetricOrdering GLEU = TheHigherTheBetter
getMetricOrdering WER = TheLowerTheBetter getMetricOrdering WER = TheLowerTheBetter
getMetricOrdering CER = TheLowerTheBetter
getMetricOrdering Accuracy = TheHigherTheBetter getMetricOrdering Accuracy = TheHigherTheBetter
getMetricOrdering ClippEU = TheHigherTheBetter getMetricOrdering ClippEU = TheHigherTheBetter
getMetricOrdering (FMeasure _) = TheHigherTheBetter getMetricOrdering (FMeasure _) = TheHigherTheBetter

View File

@ -47,7 +47,7 @@ import GEval.MatchingSpecification
-- | Helper type so that singleton can be used. -- | Helper type so that singleton can be used.
-- | (The problem is that some metrics are parametrized by Double -- | (The problem is that some metrics are parametrized by Double
-- | Word32 and this is not handled by the singleton libary.) -- | Word32 and this is not handled by the singleton libary.)
singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | AAccuracy | AClippEU singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | ACER | AAccuracy | AClippEU
| AFMeasure | AMacroFMeasure | ANMI | AFMeasure | AMacroFMeasure | ANMI
| ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood | ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood
| ABIOF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification | ABIOF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
@ -66,6 +66,7 @@ toHelper Spearman = ASpearman
toHelper BLEU = ABLEU toHelper BLEU = ABLEU
toHelper GLEU = AGLEU toHelper GLEU = AGLEU
toHelper WER = AWER toHelper WER = AWER
toHelper CER = ACER
toHelper Accuracy = AAccuracy toHelper Accuracy = AAccuracy
toHelper ClippEU = AClippEU toHelper ClippEU = AClippEU
toHelper (FMeasure _) = AFMeasure toHelper (FMeasure _) = AFMeasure
@ -104,6 +105,7 @@ type family ParsedExpectedType (t :: AMetric) :: * where
ParsedExpectedType ABLEU = [[String]] ParsedExpectedType ABLEU = [[String]]
ParsedExpectedType AGLEU = [[String]] ParsedExpectedType AGLEU = [[String]]
ParsedExpectedType AWER = [String] ParsedExpectedType AWER = [String]
ParsedExpectedType ACER = String
ParsedExpectedType AAccuracy = Text ParsedExpectedType AAccuracy = Text
ParsedExpectedType AClippEU = [ClippingSpec] ParsedExpectedType AClippEU = [ClippingSpec]
ParsedExpectedType AFMeasure = Bool ParsedExpectedType AFMeasure = Bool
@ -138,6 +140,7 @@ expectedParser SASpearman = doubleParser
expectedParser SABLEU = alternativeSentencesParser expectedParser SABLEU = alternativeSentencesParser
expectedParser SAGLEU = alternativeSentencesParser expectedParser SAGLEU = alternativeSentencesParser
expectedParser SAWER = intoStringWords expectedParser SAWER = intoStringWords
expectedParser SACER = Right . unpack
expectedParser SAAccuracy = onlyStrip expectedParser SAAccuracy = onlyStrip
expectedParser SAClippEU = controlledParse lineClippingSpecsParser expectedParser SAClippEU = controlledParse lineClippingSpecsParser
expectedParser SAFMeasure = zeroOneParser expectedParser SAFMeasure = zeroOneParser
@ -185,6 +188,7 @@ outputParser SASpearman = expectedParser SASpearman
outputParser SABLEU = Right . Prelude.words . unpack outputParser SABLEU = Right . Prelude.words . unpack
outputParser SAGLEU = Right . Prelude.words . unpack outputParser SAGLEU = Right . Prelude.words . unpack
outputParser SAWER = expectedParser SAWER outputParser SAWER = expectedParser SAWER
outputParser SACER = expectedParser SACER
outputParser SAAccuracy = expectedParser SAAccuracy outputParser SAAccuracy = expectedParser SAAccuracy
outputParser SAClippEU = controlledParse lineClippingsParser outputParser SAClippEU = controlledParse lineClippingsParser
outputParser SAFMeasure = probToZeroOneParser outputParser SAFMeasure = probToZeroOneParser
@ -236,6 +240,7 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text) ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text)
ItemIntermediateRepresentationType ACharMatch = (Text, Text) ItemIntermediateRepresentationType ACharMatch = (Text, Text)
ItemIntermediateRepresentationType AWER = (Int, Int) ItemIntermediateRepresentationType AWER = (Int, Int)
ItemIntermediateRepresentationType ACER = (Int, Int)
ItemIntermediateRepresentationType t = Double ItemIntermediateRepresentationType t = Double
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
@ -246,6 +251,8 @@ itemStep SASpearman = id
itemStep SABLEU = uncurry bleuStep itemStep SABLEU = uncurry bleuStep
itemStep SAGLEU = uncurry gleuStep itemStep SAGLEU = uncurry gleuStep
itemStep SAWER = uncurry werStep itemStep SAWER = uncurry werStep
-- strings are character lists, so we could re-use werStep
itemStep SACER = uncurry werStep
itemStep SAAccuracy = hitOrMiss itemStep SAAccuracy = hitOrMiss
itemStep SAClippEU = clippEUMatchStep itemStep SAClippEU = clippEUMatchStep
itemStep SAFMeasure = getCount itemStep SAFMeasure = getCount

View File

@ -58,6 +58,7 @@ listOfAvailableMetrics = [RMSE,
BLEU, BLEU,
GLEU, GLEU,
WER, WER,
CER,
NMI, NMI,
ClippEU, ClippEU,
LogLossHashed defaultLogLossHashedSize, LogLossHashed defaultLogLossHashedSize,
@ -78,6 +79,7 @@ listOfAvailableMetrics = [RMSE,
CharMatch] CharMatch]
extraInfo :: EvaluationScheme -> Maybe String extraInfo :: EvaluationScheme -> Maybe String
extraInfo (EvaluationScheme CER []) = Just "Character-Error Rate"
extraInfo (EvaluationScheme GLEU []) = Just "\"Google GLEU\" not the grammar correction metric" extraInfo (EvaluationScheme GLEU []) = Just "\"Google GLEU\" not the grammar correction metric"
extraInfo (EvaluationScheme BLEU [LowerCasing, extraInfo (EvaluationScheme BLEU [LowerCasing,
RegexpMatch _]) = Just "BLEU on lowercased strings, only Latin characters and digits considered" RegexpMatch _]) = Just "BLEU on lowercased strings, only Latin characters and digits considered"
@ -97,6 +99,8 @@ isMetricDescribed (SoftFMeasure _) = True
isMetricDescribed (Soft2DFMeasure _) = True isMetricDescribed (Soft2DFMeasure _) = True
isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True
isMetricDescribed GLEU = True isMetricDescribed GLEU = True
isMetricDescribed WER = True
isMetricDescribed CER = True
isMetricDescribed SegmentAccuracy = True isMetricDescribed SegmentAccuracy = True
isMetricDescribed _ = False isMetricDescribed _ = False
@ -138,6 +142,17 @@ metric on a corpus level but does not have its drawbacks for our per
sentence reward objective. sentence reward objective.
see: https://arxiv.org/pdf/1609.08144.pdf see: https://arxiv.org/pdf/1609.08144.pdf
|] |]
getMetricDescription WER =
[i|WER (Word-Error Rate) is the number of word-level mistakes divided
by the number of words in the expected output. Possible mistakes are
deletions, insertions and substitions as in the Levenshtein distance.
|]
getMetricDescription CER =
[i|CER (Character-Error Rate) is the number of character-level mistakes divided
by the total length of the expected output. Possible mistakes are
deletions, insertions and substitions as in the Levenshtein distance.
|]
getMetricDescription SegmentAccuracy = getMetricDescription SegmentAccuracy =
[i|Accuracy counted for segments, i.e. labels with positions. [i|Accuracy counted for segments, i.e. labels with positions.
The percentage of labels in the ground truth retrieved in the actual output is returned. The percentage of labels in the ground truth retrieved in the actual output is returned.
@ -157,6 +172,12 @@ first-name/3:0.9
|] |]
outContents GLEU = [hereLit|Alice has a black outContents GLEU = [hereLit|Alice has a black
|] |]
outContents WER = [hereLit|na ka huainaua e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po
a ko te ahiahi , ko ata , he ra ko kotahi
|]
outContents CER = [hereLit|esse esi perctp
tabula rasai
|]
outContents SegmentAccuracy = [hereLit|N:1-4 V:5-6 N:8-10 V:12-13 A:15-17 outContents SegmentAccuracy = [hereLit|N:1-4 V:5-6 N:8-10 V:12-13 A:15-17
N:1-4 V:6-7 A:9-13 N:1-4 V:6-7 A:9-13
|] |]
@ -178,6 +199,10 @@ expectedScore (EvaluationScheme GLEU [])
= 0.7142857142857143 = 0.7142857142857143
expectedScore (EvaluationScheme SegmentAccuracy []) expectedScore (EvaluationScheme SegmentAccuracy [])
= 0.875 = 0.875
expectedScore (EvaluationScheme WER [])
= 0.08571
expectedScore (EvaluationScheme CER [])
= 0.14814
helpMetricParameterMetricsList :: String helpMetricParameterMetricsList :: String
helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of
@ -226,7 +251,7 @@ the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page numbe
formatDescription (ProbabilisticMultiLabelFMeasure _) = [hereLit|In each line a number of labels (entities) can be given. A label probability formatDescription (ProbabilisticMultiLabelFMeasure _) = [hereLit|In each line a number of labels (entities) can be given. A label probability
can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed. can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed.
|] |]
formatDescription GLEU = [hereLit|In each line a there is a space sparated sentence of words. formatDescription GLEU = [hereLit|In each line a there is a space sparated sequence of words.
|] |]
formatDescription SegmentAccuracy = [hereLit|Labels can be any strings (without spaces), whereas is a list of formatDescription SegmentAccuracy = [hereLit|Labels can be any strings (without spaces), whereas is a list of
1-based indexes or spans separated by commas (spans are inclusive 1-based indexes or spans separated by commas (spans are inclusive
@ -235,6 +260,9 @@ label "foo:bar" for positions 2, 4, 5, 6, 7 and 10. Note that no
overlapping segments can be returned (evaluation will fail in overlapping segments can be returned (evaluation will fail in
such a case). such a case).
|] |]
formatDescription WER = formatDescription GLEU
formatDescription CER = [hereLit|Any text, whitespace and punctuation marks are also considered.
|]
scoreExplanation :: EvaluationScheme -> Maybe String scoreExplanation :: EvaluationScheme -> Maybe String
scoreExplanation (EvaluationScheme (SoftFMeasure _) []) scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
@ -257,6 +285,14 @@ Now we have to calculate precision and recall:
scoreExplanation (EvaluationScheme SegmentAccuracy []) scoreExplanation (EvaluationScheme SegmentAccuracy [])
= Just [hereLit|Out of 4 segments in the expected output for the first item, 3 were retrieved correcly (accuracy is 3/4=0.75). = Just [hereLit|Out of 4 segments in the expected output for the first item, 3 were retrieved correcly (accuracy is 3/4=0.75).
The second item was retrieved perfectly (accuracy is 1.0). Hence, the average is (0.75+1.0)/2=0.875.|] The second item was retrieved perfectly (accuracy is 1.0). Hence, the average is (0.75+1.0)/2=0.875.|]
scoreExplanation (EvaluationScheme WER [])
= Just [hereLit|The total length of expected output (in words) is 35. There are 3 errors
(1 word substituted, 1 inserted, 1 deleted) in the actual output. Hence,
WER = (1+1+1) / 35 = 3 / 35 = 0.08571.|]
scoreExplanation (EvaluationScheme CER [])
= Just [hereLit|The total length of expected output (in characters) is 27. There are 4 errors
(1 word substituted, 1 inserted, 1 deleted) in the actual output. Hence,
CER = (2+1+1) / 27 = 4 / 27 = 0.14814.|]
pasteLines :: String -> String -> String pasteLines :: String -> String -> String
pasteLines a b = printf "%-35s %s\n" a b pasteLines a b = printf "%-35s %s\n" a b

View File

@ -128,6 +128,12 @@ main = hspec $ do
describe "WER" $ do describe "WER" $ do
it "simple example" $ it "simple example" $
runGEvalTest "wer-simple" `shouldReturnAlmost` 0.5555555555 runGEvalTest "wer-simple" `shouldReturnAlmost` 0.5555555555
describe "CER" $ do
it "simple example" $
runGEvalTest "cer-simple" `shouldReturnAlmost` 0.28947368421
describe "CER" $ do
it "simple example (Mean/CER)" $
runGEvalTest "cer-mean-simple" `shouldReturnAlmost` 0.277777777777778
describe "Accuracy" $ do describe "Accuracy" $ do
it "simple example" $ it "simple example" $
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6 runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6

View File

@ -0,0 +1,2 @@
To be or mot to be
Thas is the
1 To be or mot to be
2 Thas is the

View File

@ -0,0 +1 @@
--metric Mean/CER

View File

@ -0,0 +1,2 @@
To be or not to be
That is the question
1 To be or not to be
2 That is the question

View File

@ -0,0 +1,2 @@
To be or mot to be
Thas is the
1 To be or mot to be
2 Thas is the

View File

@ -0,0 +1 @@
--metric CER

View File

@ -0,0 +1,2 @@
To be or not to be
That is the question
1 To be or not to be
2 That is the question