Matching specification can be used for Accuracy

This commit is contained in:
Filip Gralinski 2021-07-19 16:37:43 +02:00
parent 53a5a19af2
commit eecd71e8e1
10 changed files with 133 additions and 24 deletions

View File

@ -159,7 +159,7 @@ isPreprocessable BLEU = True
isPreprocessable GLEU = True
isPreprocessable WER = True
isPreprocessable CER = True
isPreprocessable Accuracy = True
isPreprocessable (Accuracy _) = True
isPreprocessable ClippEU = False
isPreprocessable (FMeasure _) = False
isPreprocessable (MacroFMeasure _) = False
@ -963,7 +963,7 @@ continueGEvalCalculations SACER CER = defineContinuation cerAgg cerFinal noGraph
cerFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
cerFinal (errors, ref) = errors /. ref
continueGEvalCalculations SAAccuracy Accuracy = defineContinuation averageC id noGraph
continueGEvalCalculations (SAAccuracy _) (Accuracy _) = defineContinuation averageC id noGraph
continueGEvalCalculations SAFMeasure (FMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph

View File

@ -124,7 +124,7 @@ Compute distance between two points on a sphere given their longitudes and latit
This is a sample fake challenge for Gonito framework. Replace it with
the description of your challenge.|] ++ (commonReadmeMDContents testName)
readmeMDContents Accuracy testName = [i|
readmeMDContents (Accuracy _) testName = [i|
GEval sample classification challenge
=====================================
@ -509,7 +509,7 @@ trainContents Haversine = [hereLit|30.47547 -90.100911 some text
33.399478 -110.87095 Another text
|]
trainContents Accuracy = [hereLit|Y 10 none yes
trainContents (Accuracy _) = [hereLit|Y 10 none yes
N -2 strong no
Y -3 mild no
N -1 mild yes
@ -612,7 +612,7 @@ devInContents WER = devInContents BLEU
devInContents CER = [hereLit|dev1.pdf
dev2.pdf
|]
devInContents Accuracy = [hereLit|-8 none no
devInContents (Accuracy _) = [hereLit|-8 none no
1 mild no
|]
devInContents NMI = [hereLit|When in Rome, do as the Romans.
@ -687,7 +687,7 @@ devExpectedContents WER = devExpectedContents BLEU
devExpectedContents CER = [hereLit|et facta est lux
Et tu, Brute?
|]
devExpectedContents Accuracy = [hereLit|N
devExpectedContents (Accuracy _) = [hereLit|N
Y
|]
devExpectedContents (FMeasure _) = [hereLit|0
@ -765,7 +765,7 @@ testInContents WER = testInContents BLEU
testInContents CER = [hereLit|test1.pdf
test2.pdf
|]
testInContents Accuracy = [hereLit|2 mild yes
testInContents (Accuracy _) = [hereLit|2 mild yes
-5 mild no
|]
testInContents (FMeasure _) = [hereLit|b b W 15210 527 -64 -56 a 0 0 0 0 0 0 0 0 0 0
@ -843,7 +843,7 @@ testExpectedContents CER = [hereLit|esse est percipi
tabula rasa
|]
testExpectedContents WER = testExpectedContents BLEU
testExpectedContents Accuracy = [hereLit|N
testExpectedContents (Accuracy _) = [hereLit|N
Y
|]
testExpectedContents (FMeasure _) = [hereLit|0
@ -922,7 +922,7 @@ inHeaderContents GLEU = Nothing
inHeaderContents BLEU = Nothing
inHeaderContents WER = Nothing
inHeaderContents CER = Just ["Filename"]
inHeaderContents Accuracy = Just ["Temperature", "Wind", "Rain"]
inHeaderContents (Accuracy _) = Just ["Temperature", "Wind", "Rain"]
inHeaderContents (FMeasure _) = Just ["seismic",
"seismoacoustic",
"shift",
@ -972,7 +972,7 @@ outHeaderContents BLEU = Nothing
outHeaderContents GLEU = Nothing
outHeaderContents WER = Nothing
outHeaderContents CER = Just ["OCRedText"]
outHeaderContents Accuracy = Just ["ShouldYouKidForWalk"]
outHeaderContents (Accuracy _) = Just ["ShouldYouKidForWalk"]
outHeaderContents (FMeasure _) = Just ["IsSeismicBump"]
outHeaderContents (MacroFMeasure _) = Just ["LanguageCode"]
outHeaderContents (ProbabilisticSoftFMeasure b) = outHeaderContents (SoftFMeasure b)

View File

@ -5,6 +5,7 @@
{-# LANGUAGE EmptyCase #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE QuasiQuotes, FlexibleContexts #-}
-- | This module is for defining possible "matching specifications",
-- i.e. the way tokens are matched against one another in metrics such as MultiLabel-F1.
@ -18,9 +19,10 @@ module GEval.MatchingSpecification
import Data.Singletons.TH
import Data.Text
import Data.List.Extra (breakOn)
import Data.Char (isLetter)
import Data.Char (isLetter, toLower)
import Data.List (find)
import Data.Maybe (isJust)
import Data.Maybe (isJust, listToMaybe, fromJust, catMaybes, fromMaybe)
import Text.Regex.PCRE.Heavy
import Text.EditDistance
@ -32,12 +34,18 @@ singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. ide
| SmartMatch MatchingSpecification -- ^ do fuzzy matching only on values
-- containing letters
| Harden MatchingSpecification -- ^ harden a soft match
| LenientHarden MatchingSpecification -- ^ harden a soft match (lenient variant)
| Lower MatchingSpecification -- ^ lower-case inputs
| ExtractNumber MatchingSpecification -- ^ try extracting numbers first
deriving (Eq)
|]
hardeningThreshold :: Double
hardeningThreshold = 0.8
lenientHardeningThreshold :: Double
lenientHardeningThreshold = 0.5
getMatchingFunctionForString :: MatchingSpecification -> String -> String -> Double
getMatchingFunctionForString ExactMatch got expected
| got == expected = 1.0
@ -60,6 +68,50 @@ getMatchingFunctionForString (Harden smatchSpec) got expected = if softMatch >=
else 0.0
where softMatch = getMatchingFunctionForString smatchSpec got expected
getMatchingFunctionForString (LenientHarden smatchSpec) got expected = if softMatch >= lenientHardeningThreshold
then 1.0
else 0.0
where softMatch = getMatchingFunctionForString smatchSpec got expected
getMatchingFunctionForString (Lower smatchSpec) got expected =
getMatchingFunctionForString smatchSpec (lowerS got)
(lowerS expected)
where lowerS = Prelude.map Data.Char.toLower
getMatchingFunctionForString (ExtractNumber smatchSpec) got expected =
if isJust en
then
if gn == en
then 1.0
else 0.0
else m
where m = getMatchingFunctionForString smatchSpec got expected
gn = extractNumber got
en = extractNumber expected
extractNumber :: String -> Maybe String
extractNumber s = case extractArabicNumber s of
Just n -> Just n
Nothing -> fmap show (listToMaybe $ catMaybes $ Prelude.map extractRomanNumber $ Prelude.words s)
extractArabicNumber :: String -> Maybe String
extractArabicNumber s = fst <$> listToMaybe (scan [re|[-+]?\d*\.\d+|\d+|] s)
extractRomanNumber s = case romanToInt s of
Just v -> if v > 0
then Just v
else Nothing
Nothing -> Nothing
-- see https://wiki.haskell.org/Roman_numerals
romanToInt :: String -> Maybe Int
romanToInt = fst . Prelude.foldr step (Just 0, 0)
. Prelude.map (flip lookup (Prelude.zip "IVXLCDM" [1, 5, 10, 50, 100, 500, 1000]))
where step :: Maybe Int -> (Maybe Int, Int) -> (Maybe Int, Int)
step _ (Nothing, p) = (Nothing, p)
step Nothing (_, p) = (Nothing, p)
step (Just p) (Just t, s) = if p >= s then (Just (t+p), p) else (Just (t-p), p)
-- | Whether suitable for fuzzy matching when in the "smart" match mode.
-- At the moment we check whether it contains at least one letter
-- (we require the exact match for, for instance, numbers written with digits.

View File

@ -25,7 +25,7 @@ import Data.Attoparsec.Text (parseOnly)
-- the evaluation procedures are defined in GEval.Core
-- | evaluation metric
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | CER | Accuracy | ClippEU
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | CER | Accuracy MatchingSpecification | ClippEU
| FMeasure Double | MacroFMeasure Double | NMI
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
| BIOF1 | BIOWeightedF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE
@ -50,7 +50,14 @@ instance Show Metric where
show GLEU = "GLEU"
show WER = "WER"
show CER = "CER"
show Accuracy = "Accuracy"
show (Accuracy ExactMatch) = "Accuracy"
show (Accuracy FuzzyMatch) = "Fuzzy/" ++ (show $ Accuracy ExactMatch)
show (Accuracy (CutLabel matchSpec)) = "CutLabel/" ++ (show $ Accuracy matchSpec)
show (Accuracy (SmartMatch matchSpec)) = "Smart/" ++ (show $ Accuracy matchSpec)
show (Accuracy (Harden matchSpec)) = "Harden/" ++ (show $ Accuracy matchSpec)
show (Accuracy (LenientHarden matchSpec)) = "LenientHarden/" ++ (show $ Accuracy matchSpec)
show (Accuracy (Lower matchSpec)) = "Lower/" ++ (show $ Accuracy matchSpec)
show (Accuracy (ExtractNumber matchSpec)) = "ExtractNumber/" ++ (show $ Accuracy matchSpec)
show ClippEU = "ClippEU"
show (FMeasure beta) = "F" ++ (show beta)
show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
@ -88,6 +95,9 @@ instance Show Metric where
show (MultiLabelFMeasure beta (CutLabel matchSpec)) = "CutLabel/" ++ (show $ MultiLabelFMeasure beta matchSpec)
show (MultiLabelFMeasure beta (SmartMatch matchSpec)) = "Smart/" ++ (show $ MultiLabelFMeasure beta matchSpec)
show (MultiLabelFMeasure beta (Harden matchSpec)) = "Harden/" ++ (show $ MultiLabelFMeasure beta matchSpec)
show (MultiLabelFMeasure beta (LenientHarden matchSpec)) = "LenientHarden/" ++ (show $ MultiLabelFMeasure beta matchSpec)
show (MultiLabelFMeasure beta (Lower matchSpec)) = "Lower/" ++ (show $ MultiLabelFMeasure beta matchSpec)
show (MultiLabelFMeasure beta (ExtractNumber matchSpec)) = "ExtractNumber/" ++ (show $ MultiLabelFMeasure beta matchSpec)
show MultiLabelLogLoss = "MultiLabel-Logloss"
show MultiLabelLikelihood = "MultiLabel-Likelihood"
show Haversine = "Haversine"
@ -98,6 +108,8 @@ applyMatchingSpecification :: (MatchingSpecification -> MatchingSpecification)
-> Metric
applyMatchingSpecification fun (MultiLabelFMeasure beta matchSpec)
= MultiLabelFMeasure beta (fun matchSpec)
applyMatchingSpecification fun (Accuracy matchSpec)
= Accuracy (fun matchSpec)
applyMatchingSpecification _ metric = error $ "Matching specification cannot be applied to the " ++ (show metric) ++ " metric"
instance Read Metric where
@ -116,6 +128,15 @@ instance Read Metric where
readsPrec p ('H':'a':'r':'d':'e':'n':'/':theRest) = case readsPrec p theRest of
[(metric, theRest)] -> [(applyMatchingSpecification Harden metric, theRest)]
_ -> []
readsPrec p ('L':'e':'n':'i':'e':'n':'t':'H':'a':'r':'d':'e':'n':'/':theRest) = case readsPrec p theRest of
[(metric, theRest)] -> [(applyMatchingSpecification LenientHarden metric, theRest)]
_ -> []
readsPrec p ('L':'o':'w':'e':'r':'/':theRest) = case readsPrec p theRest of
[(metric, theRest)] -> [(applyMatchingSpecification Lower metric, theRest)]
_ -> []
readsPrec p ('E':'x':'t':'r':'a':'c':'t':'N':'u':'m':'b':'e':'r':'/':theRest) = case readsPrec p theRest of
[(metric, theRest)] -> [(applyMatchingSpecification ExtractNumber metric, theRest)]
_ -> []
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
@ -124,7 +145,7 @@ instance Read Metric where
readsPrec _ ('G':'L':'E':'U':theRest) = [(GLEU, theRest)]
readsPrec _ ('W':'E':'R':theRest) = [(WER, theRest)]
readsPrec _ ('C':'E':'R':theRest) = [(CER, theRest)]
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)]
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy ExactMatch, theRest)]
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)]
readsPrec p ('F':'L':'C':'-':'F':theRest) = case readsPrec p theRest of
@ -186,7 +207,7 @@ getMetricOrdering BLEU = TheHigherTheBetter
getMetricOrdering GLEU = TheHigherTheBetter
getMetricOrdering WER = TheLowerTheBetter
getMetricOrdering CER = TheLowerTheBetter
getMetricOrdering Accuracy = TheHigherTheBetter
getMetricOrdering (Accuracy _) = TheHigherTheBetter
getMetricOrdering ClippEU = TheHigherTheBetter
getMetricOrdering (FMeasure _) = TheHigherTheBetter
getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
@ -230,6 +251,8 @@ fixedNumberOfColumnsInExpected (Mean metric) = fixedNumberOfColumnsInExpected me
fixedNumberOfColumnsInExpected MAP = False
fixedNumberOfColumnsInExpected BLEU = False
fixedNumberOfColumnsInExpected GLEU = False
fixedNumberOfColumnsInExpected (Accuracy ExactMatch) = True
fixedNumberOfColumnsInExpected (Accuracy _) = False
fixedNumberOfColumnsInExpected _ = True
fixedNumberOfColumnsInInput :: Metric -> Bool
@ -246,6 +269,8 @@ perfectOutLineFromExpectedLine (LikelihoodHashed _) t = t <> ":1.0"
perfectOutLineFromExpectedLine BLEU t = getFirstColumn t
perfectOutLineFromExpectedLine GLEU t = getFirstColumn t
perfectOutLineFromExpectedLine ClippEU t = cleanMarginFromClippEU t
perfectOutLineFromExpectedLine (Accuracy ExactMatch) t = t
perfectOutLineFromExpectedLine (Accuracy _) t = getFirstColumn t
perfectOutLineFromExpectedLine _ t = t
getFirstColumn :: Text -> Text

View File

@ -50,7 +50,7 @@ import qualified Data.HashMap.Strict as M
-- | Helper type so that singleton can be used.
-- | (The problem is that some metrics are parametrized by Double
-- | Word32 and this is not handled by the singleton libary.)
singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | ACER | AAccuracy | AClippEU
singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | ACER | AAccuracy MatchingSpecification | AClippEU
| AFMeasure | AMacroFMeasure | ANMI
| ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood
| ABIOF1 | ABIOWeightedF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
@ -70,7 +70,7 @@ toHelper BLEU = ABLEU
toHelper GLEU = AGLEU
toHelper WER = AWER
toHelper CER = ACER
toHelper Accuracy = AAccuracy
toHelper (Accuracy matchingSpec) = AAccuracy matchingSpec
toHelper ClippEU = AClippEU
toHelper (FMeasure _) = AFMeasure
toHelper (MacroFMeasure _) = AMacroFMeasure
@ -111,7 +111,7 @@ type family ParsedExpectedType (t :: AMetric) :: * where
ParsedExpectedType AGLEU = [[String]]
ParsedExpectedType AWER = [String]
ParsedExpectedType ACER = String
ParsedExpectedType AAccuracy = Text
ParsedExpectedType (AAccuracy _) = Text
ParsedExpectedType AClippEU = [ClippingSpec]
ParsedExpectedType AFMeasure = Bool
ParsedExpectedType AMacroFMeasure = Maybe Text
@ -148,7 +148,7 @@ expectedParser SABLEU = alternativeSentencesParser
expectedParser SAGLEU = alternativeSentencesParser
expectedParser SAWER = intoStringWords
expectedParser SACER = Right . unpack
expectedParser SAAccuracy = onlyStrip
expectedParser (SAAccuracy _) = onlyStrip
expectedParser SAClippEU = controlledParse lineClippingSpecsParser
expectedParser SAFMeasure = zeroOneParser
expectedParser SAMacroFMeasure = justStrip
@ -199,7 +199,7 @@ outputParser SABLEU = Right . Prelude.words . unpack
outputParser SAGLEU = Right . Prelude.words . unpack
outputParser SAWER = expectedParser SAWER
outputParser SACER = expectedParser SACER
outputParser SAAccuracy = expectedParser SAAccuracy
outputParser p@(SAAccuracy _) = expectedParser p
outputParser SAClippEU = controlledParse lineClippingsParser
outputParser SAFMeasure = probToZeroOneParser
outputParser SAMacroFMeasure = Right . predictedParser . strip
@ -257,6 +257,13 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
ItemIntermediateRepresentationType AHaversine = Double
ItemIntermediateRepresentationType t = Double
findBest :: (Text -> Text -> Double) -> (Text -> Text -> Double)
findBest fun expected got = Prelude.maximum $ Prelude.map (fun got) expectedVals
where expectedVals = case splitOn "\t" expected of
[] -> [""]
l -> l
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
itemStep SARMSE = itemSquaredError
itemStep SAMSE = itemSquaredError
@ -267,7 +274,8 @@ itemStep SAGLEU = uncurry gleuStep
itemStep SAWER = uncurry werStep
-- strings are character lists, so we could re-use werStep
itemStep SACER = uncurry werStep
itemStep SAAccuracy = hitOrMiss
itemStep (SAAccuracy SExactMatch) = hitOrMiss
itemStep (SAAccuracy smatchSpec) = uncurry (findBest $ getMatchingFunctionForText $ fromSing smatchSpec)
itemStep SAClippEU = clippEUMatchStep
itemStep SAFMeasure = getCount
itemStep SAMacroFMeasure = getClassesInvolved

View File

@ -37,7 +37,7 @@ listOfAvailableMetrics = [RMSE,
SMAPE,
Pearson,
Spearman,
Accuracy,
Accuracy ExactMatch,
LogLoss,
Likelihood,
FMeasure 1.0,

View File

@ -9,6 +9,7 @@ import GEval.MetricsMeta (listOfAvailableEvaluationSchemes, isEvaluationSchemeDe
import GEval.Core
import GEval.Common
import GEval.EvaluationScheme
import GEval.MatchingSpecification
import GEval.OptionsParser
import GEval.BLEU
import GEval.Clippings
@ -149,6 +150,8 @@ main = hspec $ do
runGEvalTest "accuracy-filtering" `shouldReturnAlmost` 0.6666
it "with filtering 2" $
runGEvalTest "accuracy-multiple-filtering" `shouldReturnAlmost` 0.5
it "with fuzzy match" $
runGEvalTest "fuzzy-match-accuracy" `shouldReturnAlmost` 0.6
describe "F-measure" $ do
it "simple example" $
runGEvalTest "f-measure-simple" `shouldReturnAlmost` 0.57142857
@ -282,7 +285,7 @@ main = hspec $ do
runGEvalTest "clippeu-simple" `shouldReturnAlmost` 0.399999999999
describe "evaluation metric specification is parsed" $ do
it "for simple names" $ do
let metrics = [RMSE, MSE, BLEU, Accuracy, ClippEU]
let metrics = [RMSE, MSE, BLEU, Accuracy ExactMatch, ClippEU]
let parsedMetrics = Prelude.map (read . show) metrics
metrics `shouldBe` parsedMetrics
it "for F-Measure" $ do

View File

@ -0,0 +1,10 @@
foo bar baz
pig
Hentry the Saw
12
123
1
WARSZAWA
52 XYZU
Test XII
FRANCE
1 foo bar baz
2 pig
3 Hentry the Saw
4 12
5 123
6 1
7 WARSZAWA
8 52 XYZU
9 Test XII
10 FRANCE

View File

@ -0,0 +1 @@
--metric LenientHarden/ExtractNumber/Lower/Fuzzy/Accuracy

View File

@ -0,0 +1,10 @@
foo bar baz
squirrel
Henry the Hammer
1
12
12
Warszawa
Abcd 52
Bla 12
Poland France Italy
Can't render this file because it has a wrong number of fields in line 10.