Matching specification can be used for Accuracy
This commit is contained in:
parent
53a5a19af2
commit
eecd71e8e1
@ -159,7 +159,7 @@ isPreprocessable BLEU = True
|
|||||||
isPreprocessable GLEU = True
|
isPreprocessable GLEU = True
|
||||||
isPreprocessable WER = True
|
isPreprocessable WER = True
|
||||||
isPreprocessable CER = True
|
isPreprocessable CER = True
|
||||||
isPreprocessable Accuracy = True
|
isPreprocessable (Accuracy _) = True
|
||||||
isPreprocessable ClippEU = False
|
isPreprocessable ClippEU = False
|
||||||
isPreprocessable (FMeasure _) = False
|
isPreprocessable (FMeasure _) = False
|
||||||
isPreprocessable (MacroFMeasure _) = False
|
isPreprocessable (MacroFMeasure _) = False
|
||||||
@ -963,7 +963,7 @@ continueGEvalCalculations SACER CER = defineContinuation cerAgg cerFinal noGraph
|
|||||||
cerFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
|
cerFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
|
||||||
cerFinal (errors, ref) = errors /. ref
|
cerFinal (errors, ref) = errors /. ref
|
||||||
|
|
||||||
continueGEvalCalculations SAAccuracy Accuracy = defineContinuation averageC id noGraph
|
continueGEvalCalculations (SAAccuracy _) (Accuracy _) = defineContinuation averageC id noGraph
|
||||||
|
|
||||||
continueGEvalCalculations SAFMeasure (FMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph
|
continueGEvalCalculations SAFMeasure (FMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph
|
||||||
|
|
||||||
|
@ -124,7 +124,7 @@ Compute distance between two points on a sphere given their longitudes and latit
|
|||||||
This is a sample fake challenge for Gonito framework. Replace it with
|
This is a sample fake challenge for Gonito framework. Replace it with
|
||||||
the description of your challenge.|] ++ (commonReadmeMDContents testName)
|
the description of your challenge.|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
readmeMDContents Accuracy testName = [i|
|
readmeMDContents (Accuracy _) testName = [i|
|
||||||
GEval sample classification challenge
|
GEval sample classification challenge
|
||||||
=====================================
|
=====================================
|
||||||
|
|
||||||
@ -509,7 +509,7 @@ trainContents Haversine = [hereLit|30.47547 -90.100911 some text
|
|||||||
33.399478 -110.87095 Another text
|
33.399478 -110.87095 Another text
|
||||||
|]
|
|]
|
||||||
|
|
||||||
trainContents Accuracy = [hereLit|Y 10 none yes
|
trainContents (Accuracy _) = [hereLit|Y 10 none yes
|
||||||
N -2 strong no
|
N -2 strong no
|
||||||
Y -3 mild no
|
Y -3 mild no
|
||||||
N -1 mild yes
|
N -1 mild yes
|
||||||
@ -612,7 +612,7 @@ devInContents WER = devInContents BLEU
|
|||||||
devInContents CER = [hereLit|dev1.pdf
|
devInContents CER = [hereLit|dev1.pdf
|
||||||
dev2.pdf
|
dev2.pdf
|
||||||
|]
|
|]
|
||||||
devInContents Accuracy = [hereLit|-8 none no
|
devInContents (Accuracy _) = [hereLit|-8 none no
|
||||||
1 mild no
|
1 mild no
|
||||||
|]
|
|]
|
||||||
devInContents NMI = [hereLit|When in Rome, do as the Romans.
|
devInContents NMI = [hereLit|When in Rome, do as the Romans.
|
||||||
@ -687,7 +687,7 @@ devExpectedContents WER = devExpectedContents BLEU
|
|||||||
devExpectedContents CER = [hereLit|et facta est lux
|
devExpectedContents CER = [hereLit|et facta est lux
|
||||||
Et tu, Brute?
|
Et tu, Brute?
|
||||||
|]
|
|]
|
||||||
devExpectedContents Accuracy = [hereLit|N
|
devExpectedContents (Accuracy _) = [hereLit|N
|
||||||
Y
|
Y
|
||||||
|]
|
|]
|
||||||
devExpectedContents (FMeasure _) = [hereLit|0
|
devExpectedContents (FMeasure _) = [hereLit|0
|
||||||
@ -765,7 +765,7 @@ testInContents WER = testInContents BLEU
|
|||||||
testInContents CER = [hereLit|test1.pdf
|
testInContents CER = [hereLit|test1.pdf
|
||||||
test2.pdf
|
test2.pdf
|
||||||
|]
|
|]
|
||||||
testInContents Accuracy = [hereLit|2 mild yes
|
testInContents (Accuracy _) = [hereLit|2 mild yes
|
||||||
-5 mild no
|
-5 mild no
|
||||||
|]
|
|]
|
||||||
testInContents (FMeasure _) = [hereLit|b b W 15210 527 -64 -56 a 0 0 0 0 0 0 0 0 0 0
|
testInContents (FMeasure _) = [hereLit|b b W 15210 527 -64 -56 a 0 0 0 0 0 0 0 0 0 0
|
||||||
@ -843,7 +843,7 @@ testExpectedContents CER = [hereLit|esse est percipi
|
|||||||
tabula rasa
|
tabula rasa
|
||||||
|]
|
|]
|
||||||
testExpectedContents WER = testExpectedContents BLEU
|
testExpectedContents WER = testExpectedContents BLEU
|
||||||
testExpectedContents Accuracy = [hereLit|N
|
testExpectedContents (Accuracy _) = [hereLit|N
|
||||||
Y
|
Y
|
||||||
|]
|
|]
|
||||||
testExpectedContents (FMeasure _) = [hereLit|0
|
testExpectedContents (FMeasure _) = [hereLit|0
|
||||||
@ -922,7 +922,7 @@ inHeaderContents GLEU = Nothing
|
|||||||
inHeaderContents BLEU = Nothing
|
inHeaderContents BLEU = Nothing
|
||||||
inHeaderContents WER = Nothing
|
inHeaderContents WER = Nothing
|
||||||
inHeaderContents CER = Just ["Filename"]
|
inHeaderContents CER = Just ["Filename"]
|
||||||
inHeaderContents Accuracy = Just ["Temperature", "Wind", "Rain"]
|
inHeaderContents (Accuracy _) = Just ["Temperature", "Wind", "Rain"]
|
||||||
inHeaderContents (FMeasure _) = Just ["seismic",
|
inHeaderContents (FMeasure _) = Just ["seismic",
|
||||||
"seismoacoustic",
|
"seismoacoustic",
|
||||||
"shift",
|
"shift",
|
||||||
@ -972,7 +972,7 @@ outHeaderContents BLEU = Nothing
|
|||||||
outHeaderContents GLEU = Nothing
|
outHeaderContents GLEU = Nothing
|
||||||
outHeaderContents WER = Nothing
|
outHeaderContents WER = Nothing
|
||||||
outHeaderContents CER = Just ["OCRedText"]
|
outHeaderContents CER = Just ["OCRedText"]
|
||||||
outHeaderContents Accuracy = Just ["ShouldYouKidForWalk"]
|
outHeaderContents (Accuracy _) = Just ["ShouldYouKidForWalk"]
|
||||||
outHeaderContents (FMeasure _) = Just ["IsSeismicBump"]
|
outHeaderContents (FMeasure _) = Just ["IsSeismicBump"]
|
||||||
outHeaderContents (MacroFMeasure _) = Just ["LanguageCode"]
|
outHeaderContents (MacroFMeasure _) = Just ["LanguageCode"]
|
||||||
outHeaderContents (ProbabilisticSoftFMeasure b) = outHeaderContents (SoftFMeasure b)
|
outHeaderContents (ProbabilisticSoftFMeasure b) = outHeaderContents (SoftFMeasure b)
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
{-# LANGUAGE EmptyCase #-}
|
{-# LANGUAGE EmptyCase #-}
|
||||||
{-# LANGUAGE UndecidableInstances #-}
|
{-# LANGUAGE UndecidableInstances #-}
|
||||||
{-# LANGUAGE ScopedTypeVariables #-}
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
{-# LANGUAGE QuasiQuotes, FlexibleContexts #-}
|
||||||
|
|
||||||
-- | This module is for defining possible "matching specifications",
|
-- | This module is for defining possible "matching specifications",
|
||||||
-- i.e. the way tokens are matched against one another in metrics such as MultiLabel-F1.
|
-- i.e. the way tokens are matched against one another in metrics such as MultiLabel-F1.
|
||||||
@ -18,9 +19,10 @@ module GEval.MatchingSpecification
|
|||||||
import Data.Singletons.TH
|
import Data.Singletons.TH
|
||||||
import Data.Text
|
import Data.Text
|
||||||
import Data.List.Extra (breakOn)
|
import Data.List.Extra (breakOn)
|
||||||
import Data.Char (isLetter)
|
import Data.Char (isLetter, toLower)
|
||||||
import Data.List (find)
|
import Data.List (find)
|
||||||
import Data.Maybe (isJust)
|
import Data.Maybe (isJust, listToMaybe, fromJust, catMaybes, fromMaybe)
|
||||||
|
import Text.Regex.PCRE.Heavy
|
||||||
|
|
||||||
import Text.EditDistance
|
import Text.EditDistance
|
||||||
|
|
||||||
@ -32,12 +34,18 @@ singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. ide
|
|||||||
| SmartMatch MatchingSpecification -- ^ do fuzzy matching only on values
|
| SmartMatch MatchingSpecification -- ^ do fuzzy matching only on values
|
||||||
-- containing letters
|
-- containing letters
|
||||||
| Harden MatchingSpecification -- ^ harden a soft match
|
| Harden MatchingSpecification -- ^ harden a soft match
|
||||||
|
| LenientHarden MatchingSpecification -- ^ harden a soft match (lenient variant)
|
||||||
|
| Lower MatchingSpecification -- ^ lower-case inputs
|
||||||
|
| ExtractNumber MatchingSpecification -- ^ try extracting numbers first
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|]
|
|]
|
||||||
|
|
||||||
hardeningThreshold :: Double
|
hardeningThreshold :: Double
|
||||||
hardeningThreshold = 0.8
|
hardeningThreshold = 0.8
|
||||||
|
|
||||||
|
lenientHardeningThreshold :: Double
|
||||||
|
lenientHardeningThreshold = 0.5
|
||||||
|
|
||||||
getMatchingFunctionForString :: MatchingSpecification -> String -> String -> Double
|
getMatchingFunctionForString :: MatchingSpecification -> String -> String -> Double
|
||||||
getMatchingFunctionForString ExactMatch got expected
|
getMatchingFunctionForString ExactMatch got expected
|
||||||
| got == expected = 1.0
|
| got == expected = 1.0
|
||||||
@ -60,6 +68,50 @@ getMatchingFunctionForString (Harden smatchSpec) got expected = if softMatch >=
|
|||||||
else 0.0
|
else 0.0
|
||||||
where softMatch = getMatchingFunctionForString smatchSpec got expected
|
where softMatch = getMatchingFunctionForString smatchSpec got expected
|
||||||
|
|
||||||
|
getMatchingFunctionForString (LenientHarden smatchSpec) got expected = if softMatch >= lenientHardeningThreshold
|
||||||
|
then 1.0
|
||||||
|
else 0.0
|
||||||
|
where softMatch = getMatchingFunctionForString smatchSpec got expected
|
||||||
|
|
||||||
|
getMatchingFunctionForString (Lower smatchSpec) got expected =
|
||||||
|
getMatchingFunctionForString smatchSpec (lowerS got)
|
||||||
|
(lowerS expected)
|
||||||
|
where lowerS = Prelude.map Data.Char.toLower
|
||||||
|
|
||||||
|
getMatchingFunctionForString (ExtractNumber smatchSpec) got expected =
|
||||||
|
if isJust en
|
||||||
|
then
|
||||||
|
if gn == en
|
||||||
|
then 1.0
|
||||||
|
else 0.0
|
||||||
|
else m
|
||||||
|
where m = getMatchingFunctionForString smatchSpec got expected
|
||||||
|
gn = extractNumber got
|
||||||
|
en = extractNumber expected
|
||||||
|
|
||||||
|
extractNumber :: String -> Maybe String
|
||||||
|
extractNumber s = case extractArabicNumber s of
|
||||||
|
Just n -> Just n
|
||||||
|
Nothing -> fmap show (listToMaybe $ catMaybes $ Prelude.map extractRomanNumber $ Prelude.words s)
|
||||||
|
|
||||||
|
extractArabicNumber :: String -> Maybe String
|
||||||
|
extractArabicNumber s = fst <$> listToMaybe (scan [re|[-+]?\d*\.\d+|\d+|] s)
|
||||||
|
|
||||||
|
extractRomanNumber s = case romanToInt s of
|
||||||
|
Just v -> if v > 0
|
||||||
|
then Just v
|
||||||
|
else Nothing
|
||||||
|
Nothing -> Nothing
|
||||||
|
|
||||||
|
-- see https://wiki.haskell.org/Roman_numerals
|
||||||
|
romanToInt :: String -> Maybe Int
|
||||||
|
romanToInt = fst . Prelude.foldr step (Just 0, 0)
|
||||||
|
. Prelude.map (flip lookup (Prelude.zip "IVXLCDM" [1, 5, 10, 50, 100, 500, 1000]))
|
||||||
|
where step :: Maybe Int -> (Maybe Int, Int) -> (Maybe Int, Int)
|
||||||
|
step _ (Nothing, p) = (Nothing, p)
|
||||||
|
step Nothing (_, p) = (Nothing, p)
|
||||||
|
step (Just p) (Just t, s) = if p >= s then (Just (t+p), p) else (Just (t-p), p)
|
||||||
|
|
||||||
-- | Whether suitable for fuzzy matching when in the "smart" match mode.
|
-- | Whether suitable for fuzzy matching when in the "smart" match mode.
|
||||||
-- At the moment we check whether it contains at least one letter
|
-- At the moment we check whether it contains at least one letter
|
||||||
-- (we require the exact match for, for instance, numbers written with digits.
|
-- (we require the exact match for, for instance, numbers written with digits.
|
||||||
|
@ -25,7 +25,7 @@ import Data.Attoparsec.Text (parseOnly)
|
|||||||
-- the evaluation procedures are defined in GEval.Core
|
-- the evaluation procedures are defined in GEval.Core
|
||||||
|
|
||||||
-- | evaluation metric
|
-- | evaluation metric
|
||||||
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | CER | Accuracy | ClippEU
|
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | CER | Accuracy MatchingSpecification | ClippEU
|
||||||
| FMeasure Double | MacroFMeasure Double | NMI
|
| FMeasure Double | MacroFMeasure Double | NMI
|
||||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||||
| BIOF1 | BIOWeightedF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE
|
| BIOF1 | BIOWeightedF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE
|
||||||
@ -50,7 +50,14 @@ instance Show Metric where
|
|||||||
show GLEU = "GLEU"
|
show GLEU = "GLEU"
|
||||||
show WER = "WER"
|
show WER = "WER"
|
||||||
show CER = "CER"
|
show CER = "CER"
|
||||||
show Accuracy = "Accuracy"
|
show (Accuracy ExactMatch) = "Accuracy"
|
||||||
|
show (Accuracy FuzzyMatch) = "Fuzzy/" ++ (show $ Accuracy ExactMatch)
|
||||||
|
show (Accuracy (CutLabel matchSpec)) = "CutLabel/" ++ (show $ Accuracy matchSpec)
|
||||||
|
show (Accuracy (SmartMatch matchSpec)) = "Smart/" ++ (show $ Accuracy matchSpec)
|
||||||
|
show (Accuracy (Harden matchSpec)) = "Harden/" ++ (show $ Accuracy matchSpec)
|
||||||
|
show (Accuracy (LenientHarden matchSpec)) = "LenientHarden/" ++ (show $ Accuracy matchSpec)
|
||||||
|
show (Accuracy (Lower matchSpec)) = "Lower/" ++ (show $ Accuracy matchSpec)
|
||||||
|
show (Accuracy (ExtractNumber matchSpec)) = "ExtractNumber/" ++ (show $ Accuracy matchSpec)
|
||||||
show ClippEU = "ClippEU"
|
show ClippEU = "ClippEU"
|
||||||
show (FMeasure beta) = "F" ++ (show beta)
|
show (FMeasure beta) = "F" ++ (show beta)
|
||||||
show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
|
show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
|
||||||
@ -88,6 +95,9 @@ instance Show Metric where
|
|||||||
show (MultiLabelFMeasure beta (CutLabel matchSpec)) = "CutLabel/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
show (MultiLabelFMeasure beta (CutLabel matchSpec)) = "CutLabel/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||||
show (MultiLabelFMeasure beta (SmartMatch matchSpec)) = "Smart/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
show (MultiLabelFMeasure beta (SmartMatch matchSpec)) = "Smart/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||||
show (MultiLabelFMeasure beta (Harden matchSpec)) = "Harden/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
show (MultiLabelFMeasure beta (Harden matchSpec)) = "Harden/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||||
|
show (MultiLabelFMeasure beta (LenientHarden matchSpec)) = "LenientHarden/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||||
|
show (MultiLabelFMeasure beta (Lower matchSpec)) = "Lower/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||||
|
show (MultiLabelFMeasure beta (ExtractNumber matchSpec)) = "ExtractNumber/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||||
show MultiLabelLogLoss = "MultiLabel-Logloss"
|
show MultiLabelLogLoss = "MultiLabel-Logloss"
|
||||||
show MultiLabelLikelihood = "MultiLabel-Likelihood"
|
show MultiLabelLikelihood = "MultiLabel-Likelihood"
|
||||||
show Haversine = "Haversine"
|
show Haversine = "Haversine"
|
||||||
@ -98,6 +108,8 @@ applyMatchingSpecification :: (MatchingSpecification -> MatchingSpecification)
|
|||||||
-> Metric
|
-> Metric
|
||||||
applyMatchingSpecification fun (MultiLabelFMeasure beta matchSpec)
|
applyMatchingSpecification fun (MultiLabelFMeasure beta matchSpec)
|
||||||
= MultiLabelFMeasure beta (fun matchSpec)
|
= MultiLabelFMeasure beta (fun matchSpec)
|
||||||
|
applyMatchingSpecification fun (Accuracy matchSpec)
|
||||||
|
= Accuracy (fun matchSpec)
|
||||||
applyMatchingSpecification _ metric = error $ "Matching specification cannot be applied to the " ++ (show metric) ++ " metric"
|
applyMatchingSpecification _ metric = error $ "Matching specification cannot be applied to the " ++ (show metric) ++ " metric"
|
||||||
|
|
||||||
instance Read Metric where
|
instance Read Metric where
|
||||||
@ -116,6 +128,15 @@ instance Read Metric where
|
|||||||
readsPrec p ('H':'a':'r':'d':'e':'n':'/':theRest) = case readsPrec p theRest of
|
readsPrec p ('H':'a':'r':'d':'e':'n':'/':theRest) = case readsPrec p theRest of
|
||||||
[(metric, theRest)] -> [(applyMatchingSpecification Harden metric, theRest)]
|
[(metric, theRest)] -> [(applyMatchingSpecification Harden metric, theRest)]
|
||||||
_ -> []
|
_ -> []
|
||||||
|
readsPrec p ('L':'e':'n':'i':'e':'n':'t':'H':'a':'r':'d':'e':'n':'/':theRest) = case readsPrec p theRest of
|
||||||
|
[(metric, theRest)] -> [(applyMatchingSpecification LenientHarden metric, theRest)]
|
||||||
|
_ -> []
|
||||||
|
readsPrec p ('L':'o':'w':'e':'r':'/':theRest) = case readsPrec p theRest of
|
||||||
|
[(metric, theRest)] -> [(applyMatchingSpecification Lower metric, theRest)]
|
||||||
|
_ -> []
|
||||||
|
readsPrec p ('E':'x':'t':'r':'a':'c':'t':'N':'u':'m':'b':'e':'r':'/':theRest) = case readsPrec p theRest of
|
||||||
|
[(metric, theRest)] -> [(applyMatchingSpecification ExtractNumber metric, theRest)]
|
||||||
|
_ -> []
|
||||||
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||||
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
|
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
|
||||||
readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
|
readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
|
||||||
@ -124,7 +145,7 @@ instance Read Metric where
|
|||||||
readsPrec _ ('G':'L':'E':'U':theRest) = [(GLEU, theRest)]
|
readsPrec _ ('G':'L':'E':'U':theRest) = [(GLEU, theRest)]
|
||||||
readsPrec _ ('W':'E':'R':theRest) = [(WER, theRest)]
|
readsPrec _ ('W':'E':'R':theRest) = [(WER, theRest)]
|
||||||
readsPrec _ ('C':'E':'R':theRest) = [(CER, theRest)]
|
readsPrec _ ('C':'E':'R':theRest) = [(CER, theRest)]
|
||||||
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)]
|
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy ExactMatch, theRest)]
|
||||||
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
|
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
|
||||||
readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)]
|
readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)]
|
||||||
readsPrec p ('F':'L':'C':'-':'F':theRest) = case readsPrec p theRest of
|
readsPrec p ('F':'L':'C':'-':'F':theRest) = case readsPrec p theRest of
|
||||||
@ -186,7 +207,7 @@ getMetricOrdering BLEU = TheHigherTheBetter
|
|||||||
getMetricOrdering GLEU = TheHigherTheBetter
|
getMetricOrdering GLEU = TheHigherTheBetter
|
||||||
getMetricOrdering WER = TheLowerTheBetter
|
getMetricOrdering WER = TheLowerTheBetter
|
||||||
getMetricOrdering CER = TheLowerTheBetter
|
getMetricOrdering CER = TheLowerTheBetter
|
||||||
getMetricOrdering Accuracy = TheHigherTheBetter
|
getMetricOrdering (Accuracy _) = TheHigherTheBetter
|
||||||
getMetricOrdering ClippEU = TheHigherTheBetter
|
getMetricOrdering ClippEU = TheHigherTheBetter
|
||||||
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
||||||
getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
|
getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
|
||||||
@ -230,6 +251,8 @@ fixedNumberOfColumnsInExpected (Mean metric) = fixedNumberOfColumnsInExpected me
|
|||||||
fixedNumberOfColumnsInExpected MAP = False
|
fixedNumberOfColumnsInExpected MAP = False
|
||||||
fixedNumberOfColumnsInExpected BLEU = False
|
fixedNumberOfColumnsInExpected BLEU = False
|
||||||
fixedNumberOfColumnsInExpected GLEU = False
|
fixedNumberOfColumnsInExpected GLEU = False
|
||||||
|
fixedNumberOfColumnsInExpected (Accuracy ExactMatch) = True
|
||||||
|
fixedNumberOfColumnsInExpected (Accuracy _) = False
|
||||||
fixedNumberOfColumnsInExpected _ = True
|
fixedNumberOfColumnsInExpected _ = True
|
||||||
|
|
||||||
fixedNumberOfColumnsInInput :: Metric -> Bool
|
fixedNumberOfColumnsInInput :: Metric -> Bool
|
||||||
@ -246,6 +269,8 @@ perfectOutLineFromExpectedLine (LikelihoodHashed _) t = t <> ":1.0"
|
|||||||
perfectOutLineFromExpectedLine BLEU t = getFirstColumn t
|
perfectOutLineFromExpectedLine BLEU t = getFirstColumn t
|
||||||
perfectOutLineFromExpectedLine GLEU t = getFirstColumn t
|
perfectOutLineFromExpectedLine GLEU t = getFirstColumn t
|
||||||
perfectOutLineFromExpectedLine ClippEU t = cleanMarginFromClippEU t
|
perfectOutLineFromExpectedLine ClippEU t = cleanMarginFromClippEU t
|
||||||
|
perfectOutLineFromExpectedLine (Accuracy ExactMatch) t = t
|
||||||
|
perfectOutLineFromExpectedLine (Accuracy _) t = getFirstColumn t
|
||||||
perfectOutLineFromExpectedLine _ t = t
|
perfectOutLineFromExpectedLine _ t = t
|
||||||
|
|
||||||
getFirstColumn :: Text -> Text
|
getFirstColumn :: Text -> Text
|
||||||
|
@ -50,7 +50,7 @@ import qualified Data.HashMap.Strict as M
|
|||||||
-- | Helper type so that singleton can be used.
|
-- | Helper type so that singleton can be used.
|
||||||
-- | (The problem is that some metrics are parametrized by Double
|
-- | (The problem is that some metrics are parametrized by Double
|
||||||
-- | Word32 and this is not handled by the singleton libary.)
|
-- | Word32 and this is not handled by the singleton libary.)
|
||||||
singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | ACER | AAccuracy | AClippEU
|
singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | ACER | AAccuracy MatchingSpecification | AClippEU
|
||||||
| AFMeasure | AMacroFMeasure | ANMI
|
| AFMeasure | AMacroFMeasure | ANMI
|
||||||
| ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood
|
| ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood
|
||||||
| ABIOF1 | ABIOWeightedF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
|
| ABIOF1 | ABIOWeightedF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
|
||||||
@ -70,7 +70,7 @@ toHelper BLEU = ABLEU
|
|||||||
toHelper GLEU = AGLEU
|
toHelper GLEU = AGLEU
|
||||||
toHelper WER = AWER
|
toHelper WER = AWER
|
||||||
toHelper CER = ACER
|
toHelper CER = ACER
|
||||||
toHelper Accuracy = AAccuracy
|
toHelper (Accuracy matchingSpec) = AAccuracy matchingSpec
|
||||||
toHelper ClippEU = AClippEU
|
toHelper ClippEU = AClippEU
|
||||||
toHelper (FMeasure _) = AFMeasure
|
toHelper (FMeasure _) = AFMeasure
|
||||||
toHelper (MacroFMeasure _) = AMacroFMeasure
|
toHelper (MacroFMeasure _) = AMacroFMeasure
|
||||||
@ -111,7 +111,7 @@ type family ParsedExpectedType (t :: AMetric) :: * where
|
|||||||
ParsedExpectedType AGLEU = [[String]]
|
ParsedExpectedType AGLEU = [[String]]
|
||||||
ParsedExpectedType AWER = [String]
|
ParsedExpectedType AWER = [String]
|
||||||
ParsedExpectedType ACER = String
|
ParsedExpectedType ACER = String
|
||||||
ParsedExpectedType AAccuracy = Text
|
ParsedExpectedType (AAccuracy _) = Text
|
||||||
ParsedExpectedType AClippEU = [ClippingSpec]
|
ParsedExpectedType AClippEU = [ClippingSpec]
|
||||||
ParsedExpectedType AFMeasure = Bool
|
ParsedExpectedType AFMeasure = Bool
|
||||||
ParsedExpectedType AMacroFMeasure = Maybe Text
|
ParsedExpectedType AMacroFMeasure = Maybe Text
|
||||||
@ -148,7 +148,7 @@ expectedParser SABLEU = alternativeSentencesParser
|
|||||||
expectedParser SAGLEU = alternativeSentencesParser
|
expectedParser SAGLEU = alternativeSentencesParser
|
||||||
expectedParser SAWER = intoStringWords
|
expectedParser SAWER = intoStringWords
|
||||||
expectedParser SACER = Right . unpack
|
expectedParser SACER = Right . unpack
|
||||||
expectedParser SAAccuracy = onlyStrip
|
expectedParser (SAAccuracy _) = onlyStrip
|
||||||
expectedParser SAClippEU = controlledParse lineClippingSpecsParser
|
expectedParser SAClippEU = controlledParse lineClippingSpecsParser
|
||||||
expectedParser SAFMeasure = zeroOneParser
|
expectedParser SAFMeasure = zeroOneParser
|
||||||
expectedParser SAMacroFMeasure = justStrip
|
expectedParser SAMacroFMeasure = justStrip
|
||||||
@ -199,7 +199,7 @@ outputParser SABLEU = Right . Prelude.words . unpack
|
|||||||
outputParser SAGLEU = Right . Prelude.words . unpack
|
outputParser SAGLEU = Right . Prelude.words . unpack
|
||||||
outputParser SAWER = expectedParser SAWER
|
outputParser SAWER = expectedParser SAWER
|
||||||
outputParser SACER = expectedParser SACER
|
outputParser SACER = expectedParser SACER
|
||||||
outputParser SAAccuracy = expectedParser SAAccuracy
|
outputParser p@(SAAccuracy _) = expectedParser p
|
||||||
outputParser SAClippEU = controlledParse lineClippingsParser
|
outputParser SAClippEU = controlledParse lineClippingsParser
|
||||||
outputParser SAFMeasure = probToZeroOneParser
|
outputParser SAFMeasure = probToZeroOneParser
|
||||||
outputParser SAMacroFMeasure = Right . predictedParser . strip
|
outputParser SAMacroFMeasure = Right . predictedParser . strip
|
||||||
@ -257,6 +257,13 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
|
|||||||
ItemIntermediateRepresentationType AHaversine = Double
|
ItemIntermediateRepresentationType AHaversine = Double
|
||||||
ItemIntermediateRepresentationType t = Double
|
ItemIntermediateRepresentationType t = Double
|
||||||
|
|
||||||
|
findBest :: (Text -> Text -> Double) -> (Text -> Text -> Double)
|
||||||
|
findBest fun expected got = Prelude.maximum $ Prelude.map (fun got) expectedVals
|
||||||
|
where expectedVals = case splitOn "\t" expected of
|
||||||
|
[] -> [""]
|
||||||
|
l -> l
|
||||||
|
|
||||||
|
|
||||||
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
|
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
|
||||||
itemStep SARMSE = itemSquaredError
|
itemStep SARMSE = itemSquaredError
|
||||||
itemStep SAMSE = itemSquaredError
|
itemStep SAMSE = itemSquaredError
|
||||||
@ -267,7 +274,8 @@ itemStep SAGLEU = uncurry gleuStep
|
|||||||
itemStep SAWER = uncurry werStep
|
itemStep SAWER = uncurry werStep
|
||||||
-- strings are character lists, so we could re-use werStep
|
-- strings are character lists, so we could re-use werStep
|
||||||
itemStep SACER = uncurry werStep
|
itemStep SACER = uncurry werStep
|
||||||
itemStep SAAccuracy = hitOrMiss
|
itemStep (SAAccuracy SExactMatch) = hitOrMiss
|
||||||
|
itemStep (SAAccuracy smatchSpec) = uncurry (findBest $ getMatchingFunctionForText $ fromSing smatchSpec)
|
||||||
itemStep SAClippEU = clippEUMatchStep
|
itemStep SAClippEU = clippEUMatchStep
|
||||||
itemStep SAFMeasure = getCount
|
itemStep SAFMeasure = getCount
|
||||||
itemStep SAMacroFMeasure = getClassesInvolved
|
itemStep SAMacroFMeasure = getClassesInvolved
|
||||||
|
@ -37,7 +37,7 @@ listOfAvailableMetrics = [RMSE,
|
|||||||
SMAPE,
|
SMAPE,
|
||||||
Pearson,
|
Pearson,
|
||||||
Spearman,
|
Spearman,
|
||||||
Accuracy,
|
Accuracy ExactMatch,
|
||||||
LogLoss,
|
LogLoss,
|
||||||
Likelihood,
|
Likelihood,
|
||||||
FMeasure 1.0,
|
FMeasure 1.0,
|
||||||
|
@ -9,6 +9,7 @@ import GEval.MetricsMeta (listOfAvailableEvaluationSchemes, isEvaluationSchemeDe
|
|||||||
import GEval.Core
|
import GEval.Core
|
||||||
import GEval.Common
|
import GEval.Common
|
||||||
import GEval.EvaluationScheme
|
import GEval.EvaluationScheme
|
||||||
|
import GEval.MatchingSpecification
|
||||||
import GEval.OptionsParser
|
import GEval.OptionsParser
|
||||||
import GEval.BLEU
|
import GEval.BLEU
|
||||||
import GEval.Clippings
|
import GEval.Clippings
|
||||||
@ -149,6 +150,8 @@ main = hspec $ do
|
|||||||
runGEvalTest "accuracy-filtering" `shouldReturnAlmost` 0.6666
|
runGEvalTest "accuracy-filtering" `shouldReturnAlmost` 0.6666
|
||||||
it "with filtering 2" $
|
it "with filtering 2" $
|
||||||
runGEvalTest "accuracy-multiple-filtering" `shouldReturnAlmost` 0.5
|
runGEvalTest "accuracy-multiple-filtering" `shouldReturnAlmost` 0.5
|
||||||
|
it "with fuzzy match" $
|
||||||
|
runGEvalTest "fuzzy-match-accuracy" `shouldReturnAlmost` 0.6
|
||||||
describe "F-measure" $ do
|
describe "F-measure" $ do
|
||||||
it "simple example" $
|
it "simple example" $
|
||||||
runGEvalTest "f-measure-simple" `shouldReturnAlmost` 0.57142857
|
runGEvalTest "f-measure-simple" `shouldReturnAlmost` 0.57142857
|
||||||
@ -282,7 +285,7 @@ main = hspec $ do
|
|||||||
runGEvalTest "clippeu-simple" `shouldReturnAlmost` 0.399999999999
|
runGEvalTest "clippeu-simple" `shouldReturnAlmost` 0.399999999999
|
||||||
describe "evaluation metric specification is parsed" $ do
|
describe "evaluation metric specification is parsed" $ do
|
||||||
it "for simple names" $ do
|
it "for simple names" $ do
|
||||||
let metrics = [RMSE, MSE, BLEU, Accuracy, ClippEU]
|
let metrics = [RMSE, MSE, BLEU, Accuracy ExactMatch, ClippEU]
|
||||||
let parsedMetrics = Prelude.map (read . show) metrics
|
let parsedMetrics = Prelude.map (read . show) metrics
|
||||||
metrics `shouldBe` parsedMetrics
|
metrics `shouldBe` parsedMetrics
|
||||||
it "for F-Measure" $ do
|
it "for F-Measure" $ do
|
||||||
|
@ -0,0 +1,10 @@
|
|||||||
|
foo bar baz
|
||||||
|
pig
|
||||||
|
Hentry the Saw
|
||||||
|
12
|
||||||
|
123
|
||||||
|
1
|
||||||
|
WARSZAWA
|
||||||
|
52 XYZU
|
||||||
|
Test XII
|
||||||
|
FRANCE
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric LenientHarden/ExtractNumber/Lower/Fuzzy/Accuracy
|
@ -0,0 +1,10 @@
|
|||||||
|
foo bar baz
|
||||||
|
squirrel
|
||||||
|
Henry the Hammer
|
||||||
|
1
|
||||||
|
12
|
||||||
|
12
|
||||||
|
Warszawa
|
||||||
|
Abcd 52
|
||||||
|
Bla 12
|
||||||
|
Poland France Italy
|
Can't render this file because it has a wrong number of fields in line 10.
|
Loading…
Reference in New Issue
Block a user