From 6e20e79f5b6b9ea5f081bc577e022b91b97fc73c Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 21 Sep 2019 14:26:22 +0200 Subject: [PATCH] Start working on usind dependent types --- geval.cabal | 2 + src/GEval/BIO.hs | 5 + src/GEval/Core.hs | 3 - src/GEval/MetricsMechanics.hs | 168 ++++++++++++++++++++++++++++++++++ 4 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 src/GEval/MetricsMechanics.hs diff --git a/geval.cabal b/geval.cabal index e48552b..0939f45 100644 --- a/geval.cabal +++ b/geval.cabal @@ -17,6 +17,7 @@ library hs-source-dirs: src exposed-modules: GEval.Core , GEval.Metric + , GEval.MetricsMechanics , GEval.MetricsMeta , GEval.EvaluationScheme , GEval.CreateChallenge @@ -100,6 +101,7 @@ library , filemanip , temporary , utf8-string + , singletons default-language: Haskell2010 executable geval diff --git a/src/GEval/BIO.hs b/src/GEval/BIO.hs index 1f4b75b..f909dd3 100644 --- a/src/GEval/BIO.hs +++ b/src/GEval/BIO.hs @@ -2,6 +2,7 @@ module GEval.BIO (BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities, + parseBioSequenceIntoEntitiesWithoutNormalization, TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO, eraseNormalisation) where @@ -45,6 +46,10 @@ gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, len parseBioSequenceIntoEntities :: T.Text -> Either String [TaggedEntity] parseBioSequenceIntoEntities t = labelsIntoEntities =<< (parseOnly (bioSequenceParser <* endOfInput) t) +parseBioSequenceIntoEntitiesWithoutNormalization s = do + entities <- parseBioSequenceIntoEntities s + return $ Prelude.map eraseNormalisation entities + labelsIntoEntities :: [BIOLabel] -> Either String [TaggedEntity] labelsIntoEntities labels = labelsIntoEntities' $ zip labels [1..] diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 1897fb7..bc78759 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -681,9 +681,6 @@ gevalCoreOnSources MAP _ = gevalCoreWithoutInput (Right . DLS.splitOn "\t" . unp gevalCoreOnSources BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph gevalCoreOnSources BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts noGraph - where parseBioSequenceIntoEntitiesWithoutNormalization s = do - entities <- parseBioSequenceIntoEntities s - return $ Prelude.map eraseNormalisation entities gevalCoreOnSources TokenAccuracy _ = gevalCoreWithoutInput intoTokens intoTokens diff --git a/src/GEval/MetricsMechanics.hs b/src/GEval/MetricsMechanics.hs new file mode 100644 index 0000000..f4ace4b --- /dev/null +++ b/src/GEval/MetricsMechanics.hs @@ -0,0 +1,168 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE EmptyCase #-} + +module GEval.MetricsMechanics + where + +import Data.Singletons.TH + +import GEval.Metric + +import Data.Text +import Data.Text.Read as TR +import qualified Data.List.Split as DLS +import Data.Attoparsec.Text (parseOnly) + +import Control.Monad ((<=<)) + +import GEval.Annotation (Annotation, parseAnnotations) +import GEval.Clippings (ClippingSpec, LabeledClipping, lineClippingsParser, lineClippingSpecsParser, lineLabeledClippingsParser) +import GEval.BIO (TaggedEntity, parseBioSequenceIntoEntities, parseBioSequenceIntoEntitiesWithoutNormalization) + +-- | Helper type so that singleton can be used. +-- | (The problem is that some metrics are parametrized by Double +-- | Word32 and this is not handled by the singleton libary.) +singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | AAccuracy | AClippEU + | AFMeasure | AMacroFMeasure | ANMI + | ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood + | ABIOF1 | ABIOF1Labels | ATokenAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure + | AMultiLabelLogLoss | AMultiLabelLikelihood + | ASoftFMeasure | AProbabilisticMultiLabelFMeasure | AProbabilisticSoftFMeasure | ASoft2DFMeasure + deriving (Eq) + |] + +-- | Convert a metric to a helper type without parameters +toHelper :: Metric -> AMetric +toHelper RMSE = ARMSE +toHelper MSE = AMSE +toHelper Pearson = APearson +toHelper Spearman = ASpearman +toHelper BLEU = ABLEU +toHelper GLEU = AGLEU +toHelper WER = AWER +toHelper Accuracy = AAccuracy +toHelper ClippEU = AClippEU +toHelper (FMeasure _) = AFMeasure +toHelper (MacroFMeasure _) = AMacroFMeasure +toHelper NMI = ANMI +toHelper (LogLossHashed _) = ALogLossHashed +toHelper CharMatch = ACharMatch +toHelper MAP = AMAP +toHelper LogLoss = ALogLoss +toHelper Likelihood = ALikelihood +toHelper BIOF1 = ABIOF1 +toHelper BIOF1Labels = ABIOF1Labels +toHelper TokenAccuracy = ATokenAccuracy +toHelper (LikelihoodHashed _) = ALikelihoodHashed +toHelper MAE = AMAE +toHelper SMAPE = ASMAPE +toHelper (MultiLabelFMeasure _) = AMultiLabelFMeasure +toHelper MultiLabelLogLoss = AMultiLabelLogLoss +toHelper MultiLabelLikelihood = AMultiLabelLikelihood +toHelper (SoftFMeasure _) = ASoftFMeasure +toHelper (ProbabilisticMultiLabelFMeasure _) = AProbabilisticMultiLabelFMeasure +toHelper (ProbabilisticSoftFMeasure _) = AProbabilisticSoftFMeasure +toHelper (Soft2DFMeasure _) = ASoft2DFMeasure + +type family ParsedInputType (t :: AMetric) :: * where + ParsedInputType ACharMatch = Text + ParsedInputType _ = () + +type family ParsedExpectedType (t :: AMetric) :: * where + ParsedExpectedType ARMSE = Double + ParsedExpectedType AMSE = Double + ParsedExpectedType APearson = Double + ParsedExpectedType ASpearman = Double + ParsedExpectedType ABLEU = [[String]] + ParsedExpectedType AGLEU = [[String]] + ParsedExpectedType AWER = [String] + ParsedExpectedType AAccuracy = Text + ParsedExpectedType AClippEU = [ClippingSpec] + ParsedExpectedType AFMeasure = Bool + ParsedExpectedType AMacroFMeasure = Maybe Text + ParsedExpectedType ASoftFMeasure = [Annotation] + ParsedExpectedType AProbabilisticMultiLabelFMeasure = [Text] + ParsedExpectedType AProbabilisticSoftFMeasure = [Annotation] + ParsedExpectedType ASoft2DFMeasure = [LabeledClipping] + ParsedExpectedType ANMI = Text + ParsedExpectedType ALogLossHashed = Text + ParsedExpectedType ALikelihoodHashed = Text + ParsedExpectedType ACharMatch = Text + ParsedExpectedType AMAP = [String] + ParsedExpectedType ALogLoss = Double + ParsedExpectedType ALikelihood = Double + ParsedExpectedType ABIOF1 = [TaggedEntity] + ParsedExpectedType ABIOF1Labels = [TaggedEntity] + ParsedExpectedType ATokenAccuracy = [Text] + ParsedExpectedType AMAE = Double + ParsedExpectedType ASMAPE = Double + ParsedExpectedType AMultiLabelFMeasure = [Text] + ParsedExpectedType AMultiLabelLogLoss = [Text] + ParsedExpectedType AMultiLabelLikelihood = [Text] + +expectedParser :: SAMetric t -> Text -> Either String (ParsedExpectedType t) +expectedParser SARMSE = doubleParser +expectedParser SAMSE = doubleParser +expectedParser SAPearson = doubleParser +expectedParser SASpearman = doubleParser +expectedParser SABLEU = alternativeSentencesParser +expectedParser SAGLEU = alternativeSentencesParser +expectedParser SAWER = intoStringWords +expectedParser SAAccuracy = onlyStrip +expectedParser SAClippEU = controlledParse lineClippingSpecsParser +expectedParser SAFMeasure = zeroOneParser +expectedParser SAMacroFMeasure = justStrip +expectedParser SASoftFMeasure = parseAnnotations +expectedParser SAProbabilisticMultiLabelFMeasure = intoWords +expectedParser SAProbabilisticSoftFMeasure = parseAnnotations +expectedParser SASoft2DFMeasure = controlledParse lineLabeledClippingsParser +expectedParser SANMI = onlyStrip +expectedParser SALogLossHashed = onlyStrip +expectedParser SALikelihoodHashed = onlyStrip +expectedParser SACharMatch = Right +expectedParser SAMAP = splitByTabs +expectedParser SALogLoss = doubleParser +expectedParser SALikelihood = doubleParser +expectedParser SABIOF1 = parseBioSequenceIntoEntities +expectedParser SABIOF1Labels = parseBioSequenceIntoEntitiesWithoutNormalization +expectedParser SATokenAccuracy = intoWords +expectedParser SAMAE = doubleParser +expectedParser SASMAPE = doubleParser +expectedParser SAMultiLabelFMeasure = intoWords +expectedParser SAMultiLabelLogLoss = intoWords +expectedParser SAMultiLabelLikelihood = intoWords + +doubleParser = getValue . TR.double + +intoWords = Right . Data.Text.words + +intoStringWords = Right . Prelude.words . unpack + +alternativeSentencesParser = Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack + +onlyStrip = Right . strip + +justStrip = Right . Just . strip + +splitByTabs = Right . DLS.splitOn "\t" . unpack + +zeroOneParser = expected <=< (getValue . TR.decimal) + where expected 1 = Right True + expected 0 = Right False + expected _ = Left "expected 0 or 1" + +getValue :: Num a => Either String (a, Text) -> Either String a +getValue (Right (x, reminder)) = + if Data.Text.null reminder || Data.Text.head reminder == '\t' + then Right x + else Left "number expected" +getValue (Left s) = Left s + +controlledParse parser t = + case parseOnly parser t of + (Right v) -> Right v + (Left _) -> Left "cannot parse line"