Add CER metric
(Character-Error Rate)
This commit is contained in:
parent
51c29aabf6
commit
819fbecedc
@ -158,6 +158,7 @@ isPreprocessable Spearman = False
|
|||||||
isPreprocessable BLEU = True
|
isPreprocessable BLEU = True
|
||||||
isPreprocessable GLEU = True
|
isPreprocessable GLEU = True
|
||||||
isPreprocessable WER = True
|
isPreprocessable WER = True
|
||||||
|
isPreprocessable CER = True
|
||||||
isPreprocessable Accuracy = True
|
isPreprocessable Accuracy = True
|
||||||
isPreprocessable ClippEU = False
|
isPreprocessable ClippEU = False
|
||||||
isPreprocessable (FMeasure _) = False
|
isPreprocessable (FMeasure _) = False
|
||||||
@ -691,7 +692,19 @@ gevalCoreOnSources (Mean WER)
|
|||||||
intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t
|
intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t
|
||||||
intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
|
intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
|
||||||
|
|
||||||
gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being"
|
gevalCoreOnSources (Mean CER)
|
||||||
|
= gevalCoreWithoutInputOnItemTargets (Right . getString)
|
||||||
|
(Right . getString)
|
||||||
|
((uncurry (/.)) . (uncurry werStep))
|
||||||
|
averageC
|
||||||
|
id
|
||||||
|
noGraph
|
||||||
|
where
|
||||||
|
-- repeated as below, as it will be refactored into dependent types soon anyway
|
||||||
|
getString (RawItemTarget t) = unpack t
|
||||||
|
getString (PartiallyParsedItemTarget ts) = Prelude.unwords $ Prelude.map unpack ts
|
||||||
|
|
||||||
|
gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1, WER and CER for the time being"
|
||||||
|
|
||||||
-- only MultiLabel-F1 handled for JSONs for the time being...
|
-- only MultiLabel-F1 handled for JSONs for the time being...
|
||||||
gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) =
|
gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) =
|
||||||
@ -925,6 +938,11 @@ continueGEvalCalculations SAWER WER = defineContinuation werAgg werFinal noGraph
|
|||||||
werFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
|
werFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
|
||||||
werFinal (errors, ref) = errors /. ref
|
werFinal (errors, ref) = errors /. ref
|
||||||
|
|
||||||
|
continueGEvalCalculations SACER CER = defineContinuation cerAgg cerFinal noGraph
|
||||||
|
where cerAgg = CC.foldl cerFuse (0, 0)
|
||||||
|
cerFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
|
||||||
|
cerFinal (errors, ref) = errors /. ref
|
||||||
|
|
||||||
continueGEvalCalculations SAAccuracy Accuracy = defineContinuation averageC id noGraph
|
continueGEvalCalculations SAAccuracy Accuracy = defineContinuation averageC id noGraph
|
||||||
|
|
||||||
continueGEvalCalculations SAFMeasure (FMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph
|
continueGEvalCalculations SAFMeasure (FMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph
|
||||||
|
@ -105,6 +105,15 @@ Directory structure
|
|||||||
* `${testName}/in.tsv` — Finnish input data for the test set
|
* `${testName}/in.tsv` — Finnish input data for the test set
|
||||||
* `${testName}/expected.tsv` — Māori reference translation for the test set
|
* `${testName}/expected.tsv` — Māori reference translation for the test set
|
||||||
|]
|
|]
|
||||||
|
readmeMDContents WER testName = readmeMDContents BLEU testName
|
||||||
|
readmeMDContents CER testName = [i|
|
||||||
|
GEval simple OCR challenge
|
||||||
|
==========================
|
||||||
|
|
||||||
|
Do OCR.
|
||||||
|
|
||||||
|
This is a sample fake challenge for Gonito framework. Replace it with
|
||||||
|
the description of your challenge.|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
readmeMDContents Accuracy testName = [i|
|
readmeMDContents Accuracy testName = [i|
|
||||||
GEval sample classification challenge
|
GEval sample classification challenge
|
||||||
@ -417,7 +426,8 @@ Directory structure
|
|||||||
* `README.md` — this file
|
* `README.md` — this file
|
||||||
* `config.txt` — configuration file
|
* `config.txt` — configuration file
|
||||||
* `train/` — directory with training data
|
* `train/` — directory with training data
|
||||||
* `train/train.tsv` — sample train set
|
* `train/in.tsv` — input data for the train set
|
||||||
|
* `train/expected.tsv` — expected (reference) data for the train set
|
||||||
* `dev-0/` — directory with dev (test) data
|
* `dev-0/` — directory with dev (test) data
|
||||||
* `dev-0/in.tsv` — input data for the dev set
|
* `dev-0/in.tsv` — input data for the dev set
|
||||||
* `dev-0/expected.tsv` — expected (reference) data for the dev set
|
* `dev-0/expected.tsv` — expected (reference) data for the dev set
|
||||||
@ -469,6 +479,11 @@ trainContents BLEU = [hereLit|alussa loi jumala taivaan ja maan he mea hanga na
|
|||||||
ja maa oli autio ja tyhjä , ja pimeys oli syvyyden päällä a kahore he ahua o te whenua , i takoto kau ; he pouri ano a runga i te mata o te hohonu
|
ja maa oli autio ja tyhjä , ja pimeys oli syvyyden päällä a kahore he ahua o te whenua , i takoto kau ; he pouri ano a runga i te mata o te hohonu
|
||||||
ja jumalan henki liikkui vetten päällä na ka whakapaho te wairua o te atua i runga i te kare o nga wai
|
ja jumalan henki liikkui vetten päällä na ka whakapaho te wairua o te atua i runga i te kare o nga wai
|
||||||
|]
|
|]
|
||||||
|
trainContents WER = trainContents BLEU
|
||||||
|
trainContents CER = [hereLit|Hannibal ad portas train1.pdf
|
||||||
|
equo ne credite train2.pdf
|
||||||
|
errare humanum est train3.pdf
|
||||||
|
|]
|
||||||
|
|
||||||
trainContents Accuracy = [hereLit|Y 10 none yes
|
trainContents Accuracy = [hereLit|Y 10 none yes
|
||||||
N -2 strong no
|
N -2 strong no
|
||||||
@ -568,6 +583,10 @@ devInContents GLEU = devInContents BLEU
|
|||||||
devInContents BLEU = [hereLit|ja jumala sanoi : " tulkoon valkeus " , ja valkeus tuli
|
devInContents BLEU = [hereLit|ja jumala sanoi : " tulkoon valkeus " , ja valkeus tuli
|
||||||
ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydestä
|
ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydestä
|
||||||
|]
|
|]
|
||||||
|
devInContents WER = devInContents BLEU
|
||||||
|
devInContents CER = [hereLit|dev1.pdf
|
||||||
|
dev2.pdf
|
||||||
|
|]
|
||||||
devInContents Accuracy = [hereLit|-8 none no
|
devInContents Accuracy = [hereLit|-8 none no
|
||||||
1 mild no
|
1 mild no
|
||||||
|]
|
|]
|
||||||
@ -636,6 +655,10 @@ devExpectedContents GLEU = devExpectedContents BLEU
|
|||||||
devExpectedContents BLEU = [hereLit|a ka ki te atua , kia marama : na ka marama
|
devExpectedContents BLEU = [hereLit|a ka ki te atua , kia marama : na ka marama
|
||||||
a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pouri
|
a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pouri
|
||||||
|]
|
|]
|
||||||
|
devExpectedContents WER = devExpectedContents BLEU
|
||||||
|
devExpectedContents CER = [hereLit|et facta est lux
|
||||||
|
Et tu, Brute?
|
||||||
|
|]
|
||||||
devExpectedContents Accuracy = [hereLit|N
|
devExpectedContents Accuracy = [hereLit|N
|
||||||
Y
|
Y
|
||||||
|]
|
|]
|
||||||
@ -702,11 +725,15 @@ devExpectedContents _ = [hereLit|0.82
|
|||||||
|
|
||||||
testInContents :: Metric -> String
|
testInContents :: Metric -> String
|
||||||
testInContents (Mean metric) = testInContents metric
|
testInContents (Mean metric) = testInContents metric
|
||||||
testInContents GLEU = [hereLit|Alice has a black
|
testInContents GLEU = [hereLit|Alicella on musta kissa.
|
||||||
|]
|
|]
|
||||||
testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
|
testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
|
||||||
ja tuli ehtoo , ja tuli aamu , ensimmäinen päivä
|
ja tuli ehtoo , ja tuli aamu , ensimmäinen päivä
|
||||||
|]
|
|]
|
||||||
|
testInContents WER = testInContents BLEU
|
||||||
|
testInContents CER = [hereLit|test1.pdf
|
||||||
|
test2.pdf
|
||||||
|
|]
|
||||||
testInContents Accuracy = [hereLit|2 mild yes
|
testInContents Accuracy = [hereLit|2 mild yes
|
||||||
-5 mild no
|
-5 mild no
|
||||||
|]
|
|]
|
||||||
@ -776,6 +803,10 @@ testExpectedContents (Mean metric) = testExpectedContents metric
|
|||||||
testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po
|
testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po
|
||||||
a ko te ahiahi , ko te ata , he ra kotahi
|
a ko te ahiahi , ko te ata , he ra kotahi
|
||||||
|]
|
|]
|
||||||
|
testExpectedContents CER = [hereLit|esse est percipi
|
||||||
|
tabula rasa
|
||||||
|
|]
|
||||||
|
testExpectedContents WER = testExpectedContents BLEU
|
||||||
testExpectedContents Accuracy = [hereLit|N
|
testExpectedContents Accuracy = [hereLit|N
|
||||||
Y
|
Y
|
||||||
|]
|
|]
|
||||||
@ -848,6 +879,8 @@ inHeaderContents :: Metric -> Maybe [String]
|
|||||||
inHeaderContents (Mean metric) = inHeaderContents metric
|
inHeaderContents (Mean metric) = inHeaderContents metric
|
||||||
inHeaderContents GLEU = Nothing
|
inHeaderContents GLEU = Nothing
|
||||||
inHeaderContents BLEU = Nothing
|
inHeaderContents BLEU = Nothing
|
||||||
|
inHeaderContents WER = Nothing
|
||||||
|
inHeaderContents CER = Just ["Filename"]
|
||||||
inHeaderContents Accuracy = Just ["Temperature", "Wind", "Rain"]
|
inHeaderContents Accuracy = Just ["Temperature", "Wind", "Rain"]
|
||||||
inHeaderContents (FMeasure _) = Just ["seismic",
|
inHeaderContents (FMeasure _) = Just ["seismic",
|
||||||
"seismoacoustic",
|
"seismoacoustic",
|
||||||
@ -894,6 +927,8 @@ outHeaderContents :: Metric -> Maybe [String]
|
|||||||
outHeaderContents (Mean metric) = outHeaderContents metric
|
outHeaderContents (Mean metric) = outHeaderContents metric
|
||||||
outHeaderContents BLEU = Nothing
|
outHeaderContents BLEU = Nothing
|
||||||
outHeaderContents GLEU = Nothing
|
outHeaderContents GLEU = Nothing
|
||||||
|
outHeaderContents WER = Nothing
|
||||||
|
outHeaderContents CER = Just ["OCRedText"]
|
||||||
outHeaderContents Accuracy = Just ["ShouldYouKidForWalk"]
|
outHeaderContents Accuracy = Just ["ShouldYouKidForWalk"]
|
||||||
outHeaderContents (FMeasure _) = Just ["IsSeismicBump"]
|
outHeaderContents (FMeasure _) = Just ["IsSeismicBump"]
|
||||||
outHeaderContents (MacroFMeasure _) = Just ["LanguageCode"]
|
outHeaderContents (MacroFMeasure _) = Just ["LanguageCode"]
|
||||||
|
@ -25,7 +25,7 @@ import Data.Attoparsec.Text (parseOnly)
|
|||||||
-- the evaluation procedures are defined in GEval.Core
|
-- the evaluation procedures are defined in GEval.Core
|
||||||
|
|
||||||
-- | evaluation metric
|
-- | evaluation metric
|
||||||
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU
|
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | CER | Accuracy | ClippEU
|
||||||
| FMeasure Double | MacroFMeasure Double | NMI
|
| FMeasure Double | MacroFMeasure Double | NMI
|
||||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||||
| BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE
|
| BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE
|
||||||
@ -48,6 +48,7 @@ instance Show Metric where
|
|||||||
show BLEU = "BLEU"
|
show BLEU = "BLEU"
|
||||||
show GLEU = "GLEU"
|
show GLEU = "GLEU"
|
||||||
show WER = "WER"
|
show WER = "WER"
|
||||||
|
show CER = "CER"
|
||||||
show Accuracy = "Accuracy"
|
show Accuracy = "Accuracy"
|
||||||
show ClippEU = "ClippEU"
|
show ClippEU = "ClippEU"
|
||||||
show (FMeasure beta) = "F" ++ (show beta)
|
show (FMeasure beta) = "F" ++ (show beta)
|
||||||
@ -119,6 +120,7 @@ instance Read Metric where
|
|||||||
readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)]
|
readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)]
|
||||||
readsPrec _ ('G':'L':'E':'U':theRest) = [(GLEU, theRest)]
|
readsPrec _ ('G':'L':'E':'U':theRest) = [(GLEU, theRest)]
|
||||||
readsPrec _ ('W':'E':'R':theRest) = [(WER, theRest)]
|
readsPrec _ ('W':'E':'R':theRest) = [(WER, theRest)]
|
||||||
|
readsPrec _ ('C':'E':'R':theRest) = [(CER, theRest)]
|
||||||
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)]
|
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)]
|
||||||
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
|
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
|
||||||
readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)]
|
readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)]
|
||||||
@ -178,6 +180,7 @@ getMetricOrdering Spearman = TheHigherTheBetter
|
|||||||
getMetricOrdering BLEU = TheHigherTheBetter
|
getMetricOrdering BLEU = TheHigherTheBetter
|
||||||
getMetricOrdering GLEU = TheHigherTheBetter
|
getMetricOrdering GLEU = TheHigherTheBetter
|
||||||
getMetricOrdering WER = TheLowerTheBetter
|
getMetricOrdering WER = TheLowerTheBetter
|
||||||
|
getMetricOrdering CER = TheLowerTheBetter
|
||||||
getMetricOrdering Accuracy = TheHigherTheBetter
|
getMetricOrdering Accuracy = TheHigherTheBetter
|
||||||
getMetricOrdering ClippEU = TheHigherTheBetter
|
getMetricOrdering ClippEU = TheHigherTheBetter
|
||||||
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
||||||
|
@ -47,7 +47,7 @@ import GEval.MatchingSpecification
|
|||||||
-- | Helper type so that singleton can be used.
|
-- | Helper type so that singleton can be used.
|
||||||
-- | (The problem is that some metrics are parametrized by Double
|
-- | (The problem is that some metrics are parametrized by Double
|
||||||
-- | Word32 and this is not handled by the singleton libary.)
|
-- | Word32 and this is not handled by the singleton libary.)
|
||||||
singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | AAccuracy | AClippEU
|
singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | ACER | AAccuracy | AClippEU
|
||||||
| AFMeasure | AMacroFMeasure | ANMI
|
| AFMeasure | AMacroFMeasure | ANMI
|
||||||
| ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood
|
| ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood
|
||||||
| ABIOF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
|
| ABIOF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
|
||||||
@ -66,6 +66,7 @@ toHelper Spearman = ASpearman
|
|||||||
toHelper BLEU = ABLEU
|
toHelper BLEU = ABLEU
|
||||||
toHelper GLEU = AGLEU
|
toHelper GLEU = AGLEU
|
||||||
toHelper WER = AWER
|
toHelper WER = AWER
|
||||||
|
toHelper CER = ACER
|
||||||
toHelper Accuracy = AAccuracy
|
toHelper Accuracy = AAccuracy
|
||||||
toHelper ClippEU = AClippEU
|
toHelper ClippEU = AClippEU
|
||||||
toHelper (FMeasure _) = AFMeasure
|
toHelper (FMeasure _) = AFMeasure
|
||||||
@ -104,6 +105,7 @@ type family ParsedExpectedType (t :: AMetric) :: * where
|
|||||||
ParsedExpectedType ABLEU = [[String]]
|
ParsedExpectedType ABLEU = [[String]]
|
||||||
ParsedExpectedType AGLEU = [[String]]
|
ParsedExpectedType AGLEU = [[String]]
|
||||||
ParsedExpectedType AWER = [String]
|
ParsedExpectedType AWER = [String]
|
||||||
|
ParsedExpectedType ACER = String
|
||||||
ParsedExpectedType AAccuracy = Text
|
ParsedExpectedType AAccuracy = Text
|
||||||
ParsedExpectedType AClippEU = [ClippingSpec]
|
ParsedExpectedType AClippEU = [ClippingSpec]
|
||||||
ParsedExpectedType AFMeasure = Bool
|
ParsedExpectedType AFMeasure = Bool
|
||||||
@ -138,6 +140,7 @@ expectedParser SASpearman = doubleParser
|
|||||||
expectedParser SABLEU = alternativeSentencesParser
|
expectedParser SABLEU = alternativeSentencesParser
|
||||||
expectedParser SAGLEU = alternativeSentencesParser
|
expectedParser SAGLEU = alternativeSentencesParser
|
||||||
expectedParser SAWER = intoStringWords
|
expectedParser SAWER = intoStringWords
|
||||||
|
expectedParser SACER = Right . unpack
|
||||||
expectedParser SAAccuracy = onlyStrip
|
expectedParser SAAccuracy = onlyStrip
|
||||||
expectedParser SAClippEU = controlledParse lineClippingSpecsParser
|
expectedParser SAClippEU = controlledParse lineClippingSpecsParser
|
||||||
expectedParser SAFMeasure = zeroOneParser
|
expectedParser SAFMeasure = zeroOneParser
|
||||||
@ -185,6 +188,7 @@ outputParser SASpearman = expectedParser SASpearman
|
|||||||
outputParser SABLEU = Right . Prelude.words . unpack
|
outputParser SABLEU = Right . Prelude.words . unpack
|
||||||
outputParser SAGLEU = Right . Prelude.words . unpack
|
outputParser SAGLEU = Right . Prelude.words . unpack
|
||||||
outputParser SAWER = expectedParser SAWER
|
outputParser SAWER = expectedParser SAWER
|
||||||
|
outputParser SACER = expectedParser SACER
|
||||||
outputParser SAAccuracy = expectedParser SAAccuracy
|
outputParser SAAccuracy = expectedParser SAAccuracy
|
||||||
outputParser SAClippEU = controlledParse lineClippingsParser
|
outputParser SAClippEU = controlledParse lineClippingsParser
|
||||||
outputParser SAFMeasure = probToZeroOneParser
|
outputParser SAFMeasure = probToZeroOneParser
|
||||||
@ -236,6 +240,7 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
|
|||||||
ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text)
|
ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text)
|
||||||
ItemIntermediateRepresentationType ACharMatch = (Text, Text)
|
ItemIntermediateRepresentationType ACharMatch = (Text, Text)
|
||||||
ItemIntermediateRepresentationType AWER = (Int, Int)
|
ItemIntermediateRepresentationType AWER = (Int, Int)
|
||||||
|
ItemIntermediateRepresentationType ACER = (Int, Int)
|
||||||
ItemIntermediateRepresentationType t = Double
|
ItemIntermediateRepresentationType t = Double
|
||||||
|
|
||||||
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
|
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
|
||||||
@ -246,6 +251,8 @@ itemStep SASpearman = id
|
|||||||
itemStep SABLEU = uncurry bleuStep
|
itemStep SABLEU = uncurry bleuStep
|
||||||
itemStep SAGLEU = uncurry gleuStep
|
itemStep SAGLEU = uncurry gleuStep
|
||||||
itemStep SAWER = uncurry werStep
|
itemStep SAWER = uncurry werStep
|
||||||
|
-- strings are character lists, so we could re-use werStep
|
||||||
|
itemStep SACER = uncurry werStep
|
||||||
itemStep SAAccuracy = hitOrMiss
|
itemStep SAAccuracy = hitOrMiss
|
||||||
itemStep SAClippEU = clippEUMatchStep
|
itemStep SAClippEU = clippEUMatchStep
|
||||||
itemStep SAFMeasure = getCount
|
itemStep SAFMeasure = getCount
|
||||||
|
@ -58,6 +58,7 @@ listOfAvailableMetrics = [RMSE,
|
|||||||
BLEU,
|
BLEU,
|
||||||
GLEU,
|
GLEU,
|
||||||
WER,
|
WER,
|
||||||
|
CER,
|
||||||
NMI,
|
NMI,
|
||||||
ClippEU,
|
ClippEU,
|
||||||
LogLossHashed defaultLogLossHashedSize,
|
LogLossHashed defaultLogLossHashedSize,
|
||||||
@ -78,6 +79,7 @@ listOfAvailableMetrics = [RMSE,
|
|||||||
CharMatch]
|
CharMatch]
|
||||||
|
|
||||||
extraInfo :: EvaluationScheme -> Maybe String
|
extraInfo :: EvaluationScheme -> Maybe String
|
||||||
|
extraInfo (EvaluationScheme CER []) = Just "Character-Error Rate"
|
||||||
extraInfo (EvaluationScheme GLEU []) = Just "\"Google GLEU\" not the grammar correction metric"
|
extraInfo (EvaluationScheme GLEU []) = Just "\"Google GLEU\" not the grammar correction metric"
|
||||||
extraInfo (EvaluationScheme BLEU [LowerCasing,
|
extraInfo (EvaluationScheme BLEU [LowerCasing,
|
||||||
RegexpMatch _]) = Just "BLEU on lowercased strings, only Latin characters and digits considered"
|
RegexpMatch _]) = Just "BLEU on lowercased strings, only Latin characters and digits considered"
|
||||||
@ -97,6 +99,8 @@ isMetricDescribed (SoftFMeasure _) = True
|
|||||||
isMetricDescribed (Soft2DFMeasure _) = True
|
isMetricDescribed (Soft2DFMeasure _) = True
|
||||||
isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True
|
isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True
|
||||||
isMetricDescribed GLEU = True
|
isMetricDescribed GLEU = True
|
||||||
|
isMetricDescribed WER = True
|
||||||
|
isMetricDescribed CER = True
|
||||||
isMetricDescribed SegmentAccuracy = True
|
isMetricDescribed SegmentAccuracy = True
|
||||||
isMetricDescribed _ = False
|
isMetricDescribed _ = False
|
||||||
|
|
||||||
@ -138,6 +142,17 @@ metric on a corpus level but does not have its drawbacks for our per
|
|||||||
sentence reward objective.
|
sentence reward objective.
|
||||||
see: https://arxiv.org/pdf/1609.08144.pdf
|
see: https://arxiv.org/pdf/1609.08144.pdf
|
||||||
|]
|
|]
|
||||||
|
getMetricDescription WER =
|
||||||
|
[i|WER (Word-Error Rate) is the number of word-level mistakes divided
|
||||||
|
by the number of words in the expected output. Possible mistakes are
|
||||||
|
deletions, insertions and substitions — as in the Levenshtein distance.
|
||||||
|
|]
|
||||||
|
getMetricDescription CER =
|
||||||
|
[i|CER (Character-Error Rate) is the number of character-level mistakes divided
|
||||||
|
by the total length of the expected output. Possible mistakes are
|
||||||
|
deletions, insertions and substitions — as in the Levenshtein distance.
|
||||||
|
|]
|
||||||
|
|
||||||
getMetricDescription SegmentAccuracy =
|
getMetricDescription SegmentAccuracy =
|
||||||
[i|Accuracy counted for segments, i.e. labels with positions.
|
[i|Accuracy counted for segments, i.e. labels with positions.
|
||||||
The percentage of labels in the ground truth retrieved in the actual output is returned.
|
The percentage of labels in the ground truth retrieved in the actual output is returned.
|
||||||
@ -157,6 +172,12 @@ first-name/3:0.9
|
|||||||
|]
|
|]
|
||||||
outContents GLEU = [hereLit|Alice has a black
|
outContents GLEU = [hereLit|Alice has a black
|
||||||
|]
|
|]
|
||||||
|
outContents WER = [hereLit|na ka huainaua e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po
|
||||||
|
a ko te ahiahi , ko ata , he ra ko kotahi
|
||||||
|
|]
|
||||||
|
outContents CER = [hereLit|esse esi perctp
|
||||||
|
tabula rasai
|
||||||
|
|]
|
||||||
outContents SegmentAccuracy = [hereLit|N:1-4 V:5-6 N:8-10 V:12-13 A:15-17
|
outContents SegmentAccuracy = [hereLit|N:1-4 V:5-6 N:8-10 V:12-13 A:15-17
|
||||||
N:1-4 V:6-7 A:9-13
|
N:1-4 V:6-7 A:9-13
|
||||||
|]
|
|]
|
||||||
@ -178,6 +199,10 @@ expectedScore (EvaluationScheme GLEU [])
|
|||||||
= 0.7142857142857143
|
= 0.7142857142857143
|
||||||
expectedScore (EvaluationScheme SegmentAccuracy [])
|
expectedScore (EvaluationScheme SegmentAccuracy [])
|
||||||
= 0.875
|
= 0.875
|
||||||
|
expectedScore (EvaluationScheme WER [])
|
||||||
|
= 0.08571
|
||||||
|
expectedScore (EvaluationScheme CER [])
|
||||||
|
= 0.14814
|
||||||
|
|
||||||
helpMetricParameterMetricsList :: String
|
helpMetricParameterMetricsList :: String
|
||||||
helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of
|
helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of
|
||||||
@ -226,7 +251,7 @@ the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page numbe
|
|||||||
formatDescription (ProbabilisticMultiLabelFMeasure _) = [hereLit|In each line a number of labels (entities) can be given. A label probability
|
formatDescription (ProbabilisticMultiLabelFMeasure _) = [hereLit|In each line a number of labels (entities) can be given. A label probability
|
||||||
can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed.
|
can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed.
|
||||||
|]
|
|]
|
||||||
formatDescription GLEU = [hereLit|In each line a there is a space sparated sentence of words.
|
formatDescription GLEU = [hereLit|In each line a there is a space sparated sequence of words.
|
||||||
|]
|
|]
|
||||||
formatDescription SegmentAccuracy = [hereLit|Labels can be any strings (without spaces), whereas is a list of
|
formatDescription SegmentAccuracy = [hereLit|Labels can be any strings (without spaces), whereas is a list of
|
||||||
1-based indexes or spans separated by commas (spans are inclusive
|
1-based indexes or spans separated by commas (spans are inclusive
|
||||||
@ -235,6 +260,9 @@ label "foo:bar" for positions 2, 4, 5, 6, 7 and 10. Note that no
|
|||||||
overlapping segments can be returned (evaluation will fail in
|
overlapping segments can be returned (evaluation will fail in
|
||||||
such a case).
|
such a case).
|
||||||
|]
|
|]
|
||||||
|
formatDescription WER = formatDescription GLEU
|
||||||
|
formatDescription CER = [hereLit|Any text, whitespace and punctuation marks are also considered.
|
||||||
|
|]
|
||||||
|
|
||||||
scoreExplanation :: EvaluationScheme -> Maybe String
|
scoreExplanation :: EvaluationScheme -> Maybe String
|
||||||
scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
|
scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
|
||||||
@ -257,6 +285,14 @@ Now we have to calculate precision and recall:
|
|||||||
scoreExplanation (EvaluationScheme SegmentAccuracy [])
|
scoreExplanation (EvaluationScheme SegmentAccuracy [])
|
||||||
= Just [hereLit|Out of 4 segments in the expected output for the first item, 3 were retrieved correcly (accuracy is 3/4=0.75).
|
= Just [hereLit|Out of 4 segments in the expected output for the first item, 3 were retrieved correcly (accuracy is 3/4=0.75).
|
||||||
The second item was retrieved perfectly (accuracy is 1.0). Hence, the average is (0.75+1.0)/2=0.875.|]
|
The second item was retrieved perfectly (accuracy is 1.0). Hence, the average is (0.75+1.0)/2=0.875.|]
|
||||||
|
scoreExplanation (EvaluationScheme WER [])
|
||||||
|
= Just [hereLit|The total length of expected output (in words) is 35. There are 3 errors
|
||||||
|
(1 word substituted, 1 inserted, 1 deleted) in the actual output. Hence,
|
||||||
|
WER = (1+1+1) / 35 = 3 / 35 = 0.08571.|]
|
||||||
|
scoreExplanation (EvaluationScheme CER [])
|
||||||
|
= Just [hereLit|The total length of expected output (in characters) is 27. There are 4 errors
|
||||||
|
(1 word substituted, 1 inserted, 1 deleted) in the actual output. Hence,
|
||||||
|
CER = (2+1+1) / 27 = 4 / 27 = 0.14814.|]
|
||||||
|
|
||||||
pasteLines :: String -> String -> String
|
pasteLines :: String -> String -> String
|
||||||
pasteLines a b = printf "%-35s %s\n" a b
|
pasteLines a b = printf "%-35s %s\n" a b
|
||||||
|
@ -128,6 +128,12 @@ main = hspec $ do
|
|||||||
describe "WER" $ do
|
describe "WER" $ do
|
||||||
it "simple example" $
|
it "simple example" $
|
||||||
runGEvalTest "wer-simple" `shouldReturnAlmost` 0.5555555555
|
runGEvalTest "wer-simple" `shouldReturnAlmost` 0.5555555555
|
||||||
|
describe "CER" $ do
|
||||||
|
it "simple example" $
|
||||||
|
runGEvalTest "cer-simple" `shouldReturnAlmost` 0.28947368421
|
||||||
|
describe "CER" $ do
|
||||||
|
it "simple example (Mean/CER)" $
|
||||||
|
runGEvalTest "cer-mean-simple" `shouldReturnAlmost` 0.277777777777778
|
||||||
describe "Accuracy" $ do
|
describe "Accuracy" $ do
|
||||||
it "simple example" $
|
it "simple example" $
|
||||||
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
|
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
|
||||||
|
@ -0,0 +1,2 @@
|
|||||||
|
To be or mot to be
|
||||||
|
Thas is the
|
|
1
test/cer-mean-simple/cer-mean-simple/config.txt
Normal file
1
test/cer-mean-simple/cer-mean-simple/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric Mean/CER
|
2
test/cer-mean-simple/cer-mean-simple/test-A/expected.tsv
Normal file
2
test/cer-mean-simple/cer-mean-simple/test-A/expected.tsv
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
To be or not to be
|
||||||
|
That is the question
|
|
2
test/cer-simple/cer-simple-solution/test-A/out.tsv
Normal file
2
test/cer-simple/cer-simple-solution/test-A/out.tsv
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
To be or mot to be
|
||||||
|
Thas is the
|
|
1
test/cer-simple/cer-simple/config.txt
Normal file
1
test/cer-simple/cer-simple/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric CER
|
2
test/cer-simple/cer-simple/test-A/expected.tsv
Normal file
2
test/cer-simple/cer-simple/test-A/expected.tsv
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
To be or not to be
|
||||||
|
That is the question
|
|
Loading…
Reference in New Issue
Block a user