A dead-end when working on fuzzy matching

2020-07-01 18:24:45 +02:00 · 2020-07-01 18:24:45 +02:00 · 4e3ff20e2c
commit 4e3ff20e2c
parent 32bf424e6c
11 changed files with 129 additions and 52 deletions
--- a/geval.cabal
+++ b/geval.cabal
@ -57,6 +57,7 @@ library
                     , GEval.DataSource
                     , GEval.Model
                     , GEval.ModelTraining
+                     , GEval.MatchingSpecification
                     , Paths_geval
  build-depends:       base >= 4.7 && < 5
                     , cond
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@ -6,7 +6,6 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE PackageImports #-}

-
 module GEval.Core
    ( geval,
      gevalCore,
@ -113,6 +112,7 @@ import GEval.Annotation
 import GEval.BlackBoxDebugging
 import Data.Conduit.Bootstrap
 import GEval.DataSource
+import GEval.MatchingSpecification

 import qualified Data.HashMap.Strict as M
 import qualified Data.Vector as V
@ -522,7 +522,7 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces
 -- some metrics are handled by Bootstrap due to legacy issues,
 -- fix on the way
 handleBootstrap :: Metric -> Bool
-handleBootstrap (Mean (MultiLabelFMeasure _)) = True
+handleBootstrap (Mean (MultiLabelFMeasure _ _)) = True
 handleBootstrap (Mean _) = False
 handleBootstrap CharMatch = False
 handleBootstrap (LogLossHashed _) = False
@ -567,15 +567,18 @@ gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
                          -> m (MetricOutput)           -- ^ metric values for the output against the expected output

 -- for the time being hardcoded
-gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do
+gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta matchingSpec)) lsSpec = do
  gevalRunPipeline parserSpec (trans step) finalPipeline context
  where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
        context = fromSpecificationToWithoutInput lsSpec
-        step = itemStep SAMultiLabelFMeasure
-        expParser = expectedParser SAMultiLabelFMeasure
-        outParser = outputParser SAMultiLabelFMeasure
+        step = case toSing matchingSpec of
+          SomeSing s -> itemStep (SAMultiLabelFMeasure s)
+        expParser = case toSing matchingSpec of
+          SomeSing s -> expectedParser (SAMultiLabelFMeasure s)
+        outParser = case toSing matchingSpec of
+          SomeSing s -> outputParser (SAMultiLabelFMeasure s)
        finalPipeline = fixer (
-          CL.map (fMeasureOnCounts beta)
+          CL.map (fMeasureOnCounts' beta)
          .| (bootstrapC numberOfSamples
              $ continueGEvalCalculations SAMSE MSE))
        trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
@ -630,7 +633,7 @@ gevalCoreOnSources (LogLossHashed nbOfBits) = helperLogLossHashed nbOfBits id
 gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToLikehood


-gevalCoreOnSources (Mean (MultiLabelFMeasure beta))
+gevalCoreOnSources (Mean (MultiLabelFMeasure beta matchingSpec))
  = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
                                       (Right . getWords)
                                       ((fMeasureOnCounts beta) . (getCounts (==)))
@ -661,7 +664,8 @@ gevalCoreOnSources (Mean WER)
 gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being"

 -- only MultiLabel-F1 handled for JSONs for the time being...
-gevalCoreOnSources (MultiLabelFMeasure beta) = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
+gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) =
+  gevalCoreWithoutInputOnItemTargets (Right . intoWords)
                                     (Right . getWords)
                                     (getCounts (==))
                                     countAgg
@ -846,14 +850,13 @@ gevalRunPipeline' parserSpec itemStep finalPipeline context = do
       <$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..])
       <*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| finalPipeline)

-
-
-continueGEvalCalculations :: (MonadIO m) =>
+continueGEvalCalculations :: forall m t . (MonadIO m) =>
                            SAMetric t
                            -> Metric
                            -> ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) MetricOutput

-continueGEvalCalculations SAMultiLabelFMeasure (MultiLabelFMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph
+continueGEvalCalculations (SAMultiLabelFMeasure matchingSpec) (MultiLabelFMeasure beta matchingSpec')
+  = defineContinuation countAgg (fMeasureOnCounts beta) noGraph

 continueGEvalCalculations SALikelihood Likelihood = defineContinuation averageC logLossToLikehood noGraph

--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@ -10,6 +10,7 @@ import GEval.EvaluationScheme
 import GEval.Common (GEvalException(..))
 import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName)
 import GEval.Submit (tokenFileName)
+import GEval.MatchingSpecification (MatchingSpecification(ExactMatch))
 import qualified System.Directory as D
 import Control.Conditional (whenM)

@ -331,8 +332,8 @@ character (inclusively).

 |] ++ (commonReadmeMDContents testName)

-readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName
-readmeMDContents (MultiLabelFMeasure beta) testName = [i|
+readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta ExactMatch) testName
+readmeMDContents (MultiLabelFMeasure beta _) testName = [i|
 Tag names and their component
 =============================

@ -533,8 +534,8 @@ trainContents TokenAccuracy = [hereLit|* V N	I like cats
 trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19	The student's smart
 N:1-6 N:8-10 V:12-13 A:15-18	Mary's dog is nice
 |]
-trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta)
-trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith	person/3,4,5 first-name/4 surname/5
+trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta ExactMatch)
+trainContents (MultiLabelFMeasure _ _) = [hereLit|I know Mr John Smith	person/3,4,5 first-name/4 surname/5
 Steven bloody Brown	person/1,3 first-name/1 surname/3
 James and James	first-name/1 firstname/3
 |]
@ -606,8 +607,8 @@ Ala has a cat
 devInContents SegmentAccuracy = [hereLit|John is smart
 Mary's intelligent
 |]
-devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta)
-devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
+devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta ExactMatch)
+devInContents (MultiLabelFMeasure _ _) = [hereLit|Jan Kowalski is here
 I see him
 Barbara
 |]
@ -674,8 +675,8 @@ N V * N
 devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13
 N:1-4 V:6-7 A:9-19
 |]
-devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta)
-devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2
+devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta ExactMatch)
+devExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,2 first-name/1 surname/2

 first-name/1
 |]
@ -747,8 +748,8 @@ I know
 testInContents SegmentAccuracy = [hereLit|Mary's cat is old
 John is young
 |]
-testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta)
-testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
+testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta ExactMatch)
+testInContents (MultiLabelFMeasure _ _) = [hereLit|John bloody Smith
 Nobody is there
 I saw Marketa
 |]
@ -816,8 +817,8 @@ testExpectedContents TokenAccuracy = [hereLit|* V N
 testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17
 N:1-4 V:6-7 A:9-13
 |]
-testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta)
-testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3
+testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta ExactMatch)
+testExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 surname/3

 first-name/3
 |]
@ -875,8 +876,8 @@ inHeaderContents BIOF1Labels = inHeaderContents BIOF1
 inHeaderContents BIOF1 = Just ["Text"]
 inHeaderContents TokenAccuracy = Just ["TokenizedText"]
 inHeaderContents SegmentAccuracy = Just ["Segment"]
-inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta)
-inHeaderContents (MultiLabelFMeasure _) = Just ["Text"]
+inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta ExactMatch)
+inHeaderContents (MultiLabelFMeasure _ _) = Just ["Text"]
 inHeaderContents MultiLabelLikelihood = inHeaderContents MultiLabelLogLoss
 inHeaderContents MultiLabelLogLoss = Just ["Utterance"]
 inHeaderContents (Soft2DFMeasure _) = inHeaderContents ClippEU
@ -903,8 +904,8 @@ outHeaderContents BIOF1Labels = outHeaderContents BIOF1
 outHeaderContents BIOF1 = Just ["BIOOutput"]
 outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"]
 outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"]
-outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta)
-outHeaderContents (MultiLabelFMeasure _) = Just ["Entities"]
+outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta ExactMatch)
+outHeaderContents (MultiLabelFMeasure _ _) = Just ["Entities"]
 outHeaderContents MultiLabelLikelihood = outHeaderContents MultiLabelLogLoss
 outHeaderContents MultiLabelLogLoss = Just ["Emotion"]
 outHeaderContents (Soft2DFMeasure _) = Just ["Rectangle"]
--- a/src/GEval/MatchingSpecification.hs
+++ b/src/GEval/MatchingSpecification.hs
@ -0,0 +1,32 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE EmptyCase #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+-- | This module is for defining possible "matching specifications",
+-- i.e. the way tokens are matched against one another in metrics such as MultiLabel-F1.
+--
+-- Not all metrics could be affected by matching specifications (e.g. they would
+-- not make sense for metrics comparing numbers, such as MSE or MAE).
+
+module GEval.MatchingSpecification
+  where
+
+import Data.Singletons.TH
+import Data.Text
+
+-- | The data type for storing a matching specification
+singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. identity is required
+                                         | FuzzyMatch -- ^ fuzzy match by Levenshtein distance
+                                         | CutLabel MatchingSpecification -- ^ require that the label (part before up to `=`)
+                                                                          -- is matched and then proceed with some matching spec.
+                           deriving (Eq)
+             |]
+
+getMatchingFunction :: MatchingSpecification -> Text -> Text -> Double
+getMatchingFunction ExactMatch = (\a b -> 1.0)
+getMatchingFunction FuzzyMatch = (\a b -> 1.0)
+getMatchingFunction (CutLabel smatchSpec)= getMatchingFunction smatchSpec
--- a/src/GEval/Metric.hs
+++ b/src/GEval/Metric.hs
@ -18,6 +18,7 @@ import Data.Monoid ((<>))

 import GEval.Common
 import GEval.Clippings
+import GEval.MatchingSpecification
 import Data.Attoparsec.Text (parseOnly)

 -- here metrics and their basic properties are listed,
@ -27,7 +28,8 @@ import Data.Attoparsec.Text (parseOnly)
 data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU
              | FMeasure Double | MacroFMeasure Double | NMI
              | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
-              | BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
+              | BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE
+              | MultiLabelFMeasure Double MatchingSpecification
              | MultiLabelLogLoss | MultiLabelLikelihood
              | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double
              | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
@ -78,15 +80,30 @@ instance Show Metric where
  show SegmentAccuracy = "SegmentAccuracy"
  show MAE = "MAE"
  show SMAPE = "SMAPE"
-  show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
+  show (MultiLabelFMeasure beta ExactMatch) = "MultiLabel-F" ++ (show beta)
+  show (MultiLabelFMeasure beta FuzzyMatch) = "Fuzzy/" ++ (show $ MultiLabelFMeasure beta ExactMatch)
+  show (MultiLabelFMeasure beta (CutLabel matchSpec)) = "CutLabel/" ++ (show $ MultiLabelFMeasure beta matchSpec)
  show MultiLabelLogLoss = "MultiLabel-Logloss"
  show MultiLabelLikelihood = "MultiLabel-Likelihood"
  show (Mean metric) = "Mean/" ++ (show metric)

+applyMatchingSpecification :: (MatchingSpecification -> MatchingSpecification)
+                           -> Metric
+                           -> Metric
+applyMatchingSpecification fun (MultiLabelFMeasure beta matchSpec)
+  = MultiLabelFMeasure beta (fun matchSpec)
+applyMatchingSpecification _ metric = error $ "Matching specification cannot be applied to the " ++ (show metric) ++ " metric"
+
 instance Read Metric where
  readsPrec p ('M':'e':'a':'n':'/':theRest) = case readsPrec p theRest of
    [(metric, theRest)] -> [(Mean metric, theRest)]
    _ -> []
+  readsPrec p ('F':'u':'z':'z':'y':'/':theRest) = case readsPrec p theRest of
+    [(metric, theRest)] -> [(applyMatchingSpecification (const FuzzyMatch) metric, theRest)]
+    _ -> []
+  readsPrec p ('C':'u':'t':'L':'a':'b':'e':'l':'/':theRest) = case readsPrec p theRest of
+    [(metric, theRest)] -> [(applyMatchingSpecification CutLabel metric, theRest)]
+    _ -> []
  readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
  readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
  readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
@ -107,7 +124,7 @@ instance Read Metric where
    [(beta, theRest)] -> [(MacroFMeasure beta, theRest)]
    _ -> []
  readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
-    [(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
+    [(beta, theRest)] -> [(MultiLabelFMeasure beta ExactMatch, theRest)]
    _ -> []
  readsPrec p ('S':'o':'f':'t':'2':'D':'-':'F':theRest) = case readsPrec p theRest of
    [(beta, theRest)] -> [(Soft2DFMeasure beta, theRest)]
@ -175,7 +192,7 @@ getMetricOrdering TokenAccuracy = TheHigherTheBetter
 getMetricOrdering SegmentAccuracy = TheHigherTheBetter
 getMetricOrdering MAE = TheLowerTheBetter
 getMetricOrdering SMAPE = TheLowerTheBetter
-getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
+getMetricOrdering (MultiLabelFMeasure _ _) = TheHigherTheBetter
 getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
 getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter
 getMetricOrdering (Mean metric) = getMetricOrdering metric
--- a/src/GEval/MetricsMechanics.hs
+++ b/src/GEval/MetricsMechanics.hs
@ -5,6 +5,8 @@
 {-# LANGUAGE TemplateHaskell #-}
 {-# LANGUAGE EmptyCase #-}
 {-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts #-}

 module GEval.MetricsMechanics
  where
@ -40,6 +42,7 @@ import GEval.Clippings (Clipping, ClippingSpec, LabeledClipping, lineClippingsPa
 import GEval.BIO (TaggedEntity, parseBioSequenceIntoEntities, parseBioSequenceIntoEntitiesWithoutNormalization)
 import GEval.LogLossHashed (parseWordSpecs, wordSpecToPair)
 import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..), countLogLossOnProbList)
+import GEval.MatchingSpecification

 -- | Helper type so that singleton can be used.
 -- | (The problem is that some metrics are parametrized by Double
@ -47,7 +50,7 @@ import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..), countL
 singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | AAccuracy | AClippEU
                             | AFMeasure | AMacroFMeasure | ANMI
                             | ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood
-                             | ABIOF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure
+                             | ABIOF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
                             | AMultiLabelLogLoss | AMultiLabelLikelihood
                             | ASoftFMeasure | AProbabilisticMultiLabelFMeasure | AProbabilisticSoftFMeasure | ASoft2DFMeasure
                             | AFLCFMeasure
@ -80,7 +83,7 @@ toHelper SegmentAccuracy = ASegmentAccuracy
 toHelper (LikelihoodHashed _) = ALikelihoodHashed
 toHelper MAE = AMAE
 toHelper SMAPE = ASMAPE
-toHelper (MultiLabelFMeasure _) = AMultiLabelFMeasure
+toHelper (MultiLabelFMeasure _ matchingSpec) = AMultiLabelFMeasure matchingSpec
 toHelper MultiLabelLogLoss = AMultiLabelLogLoss
 toHelper MultiLabelLikelihood = AMultiLabelLikelihood
 toHelper (SoftFMeasure _) = ASoftFMeasure
@ -123,7 +126,7 @@ type family ParsedExpectedType (t :: AMetric) :: * where
  ParsedExpectedType ASegmentAccuracy = [Annotation]
  ParsedExpectedType AMAE = Double
  ParsedExpectedType ASMAPE = Double
-  ParsedExpectedType AMultiLabelFMeasure = [Text]
+  ParsedExpectedType (AMultiLabelFMeasure _) = [Text]
  ParsedExpectedType AMultiLabelLogLoss = [Text]
  ParsedExpectedType AMultiLabelLikelihood = [Text]

@ -157,7 +160,7 @@ expectedParser SATokenAccuracy = intoWords
 expectedParser SASegmentAccuracy = parseSegmentAnnotations
 expectedParser SAMAE = doubleParser
 expectedParser SASMAPE = doubleParser
-expectedParser SAMultiLabelFMeasure = intoWords
+expectedParser (SAMultiLabelFMeasure _) = intoWords
 expectedParser SAMultiLabelLogLoss = intoWords
 expectedParser SAMultiLabelLikelihood = intoWords

@ -204,7 +207,7 @@ outputParser SATokenAccuracy = intoWords
 outputParser SASegmentAccuracy = parseSegmentAnnotations
 outputParser SAMAE = doubleParser
 outputParser SASMAPE = doubleParser
-outputParser SAMultiLabelFMeasure = intoWords
+outputParser (SAMultiLabelFMeasure _) = intoWords
 outputParser SAMultiLabelLogLoss = Right . parseIntoProbList
 outputParser SAMultiLabelLikelihood = Right . parseIntoProbList

@ -225,13 +228,17 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
  ItemIntermediateRepresentationType AProbabilisticSoftFMeasure = ([Double], [Double], Double, Int)
  ItemIntermediateRepresentationType APearson = (Double, Double)
  ItemIntermediateRepresentationType ASpearman = (Double, Double)
-  ItemIntermediateRepresentationType AMultiLabelFMeasure = (Int, Int, Int)
+  ItemIntermediateRepresentationType (AMultiLabelFMeasure ms) = (MatchingCount ms, Int, Int)
  ItemIntermediateRepresentationType ALogLossHashed = (Text, Text)
  ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text)
  ItemIntermediateRepresentationType ACharMatch = (Text, Text)
  ItemIntermediateRepresentationType AWER = (Int, Int)
  ItemIntermediateRepresentationType t = Double

+type family MatchingCount (t :: MatchingSpecification) where
+  MatchingCount ExactMatch = Int
+  MatchingCount _ = Double
+
 itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
 itemStep SARMSE = itemSquaredError
 itemStep SAMSE = itemSquaredError
@ -262,7 +269,10 @@ itemStep SATokenAccuracy = countHitsAndTotals
 itemStep SASegmentAccuracy = uncurry segmentAccuracy
 itemStep SAMAE = itemAbsoluteError
 itemStep SASMAPE = smape
-itemStep SAMultiLabelFMeasure = getCounts (==)
+itemStep (SAMultiLabelFMeasure SExactMatch) = getCounts (==)
+itemStep (SAMultiLabelFMeasure SFuzzyMatch) = getWeightedCounts (getMatchingFunction $ fromSing SFuzzyMatch)
+itemStep (SAMultiLabelFMeasure smatchSpec@(SCutLabel _))
+  = getWeightedCounts (getMatchingFunction $ fromSing smatchSpec)
 itemStep SAMultiLabelLogLoss = uncurry countLogLossOnProbList
 itemStep SAMultiLabelLikelihood = uncurry countLogLossOnProbList

@ -354,10 +364,13 @@ getClassesInvolved (Just a, Just b) = if a == b
                                      then (Just a, Just a, Just a)
                                      else (Nothing, Just a, Just b)

-getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got,
+getWeightedCounts :: (a -> b -> Double) -> ([a], [b]) -> (Double, Int, Int)
+getWeightedCounts matchFun (expected, got) = (weightedMaxMatch matchFun expected got,
                                              Prelude.length expected,
                                              Prelude.length got)

+getSoftCounts args = getWeightedCounts matchScore args
+
 getSoft2DCounts (expected, got) = (tpArea, expArea, gotArea)
  where tpArea = coveredBy expected got
        expArea = totalArea expected
--- a/src/GEval/MetricsMeta.hs
+++ b/src/GEval/MetricsMeta.hs
@ -18,6 +18,7 @@ import GEval.Metric
 import GEval.EvaluationScheme
 import GEval.CreateChallenge (testExpectedContents)
 import GEval.PrecisionRecall (weightedHarmonicMean)
+import GEval.MatchingSpecification (MatchingSpecification(ExactMatch))

 import Text.Regex.PCRE.Heavy
 import Data.Either (fromRight)
@ -45,10 +46,10 @@ listOfAvailableMetrics = [RMSE,
                          MacroFMeasure 1.0,
                          MacroFMeasure 2.0,
                          MacroFMeasure 0.25,
-                          MultiLabelFMeasure 1.0,
-                          MultiLabelFMeasure 2.0,
-                          MultiLabelFMeasure 0.25,
-                          Mean (MultiLabelFMeasure 1.0),
+                          MultiLabelFMeasure 1.0 ExactMatch,
+                          MultiLabelFMeasure 2.0 ExactMatch,
+                          MultiLabelFMeasure 0.25 ExactMatch,
+                          Mean (MultiLabelFMeasure 1.0 ExactMatch),
                          ProbabilisticMultiLabelFMeasure 1.0,
                          ProbabilisticMultiLabelFMeasure 2.0,
                          ProbabilisticMultiLabelFMeasure 0.25,
--- a/test/Spec.hs
+++ b/test/Spec.hs
@ -347,6 +347,8 @@ main = hspec $ do
      runGEvalTest "multilabel-f1-ie" `shouldReturnAlmost` 0.1111111111
    it "information extraction with flags" $ do
      runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444
+    it "information extraction with fuzzy matching" $ do
+      runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.6928
  describe "Mean/MultiLabel-F" $ do
    it "simple" $ do
      runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
--- a/test/multilabel-f1-ie-fuzzy/multilabel-f1-ie-fuzzy-solution/test-A/out.tsv
+++ b/test/multilabel-f1-ie-fuzzy/multilabel-f1-ie-fuzzy-solution/test-A/out.tsv
@ -0,0 +1,3 @@
+important-person=JOHN_BROWN important-person=JOHN_SMITH company-name=Axaxaxaxas_Mlo profit=12031
+company-name=Foo_Bar profit=1220
+company-name=Whatever important-person=PIERRE_MENARD
--- a/test/multilabel-f1-ie-fuzzy/multilabel-f1-ie-fuzzy/config.txt
+++ b/test/multilabel-f1-ie-fuzzy/multilabel-f1-ie-fuzzy/config.txt
@ -0,0 +1 @@
+--metric CutLabel/Fuzzy/MultiLabel-F1:ls<_(inc|ltd)\.?(\s|$)><\2>
--- a/test/multilabel-f1-ie-fuzzy/multilabel-f1-ie-fuzzy/test-A/expected.tsv
+++ b/test/multilabel-f1-ie-fuzzy/multilabel-f1-ie-fuzzy/test-A/expected.tsv
@ -0,0 +1,3 @@
+company-name=Axaxaxas_Mlö profit=12031 important-person=John_Smith important-person=James_Brown
+company-name=Orbis_Tertius profit=1020 important-person=Anna_Smith
+company-name=Whatever_Inc profit=5600 important-person=Pierre_Menard
				`@ -0,0 +1 @@`
				`--metric CutLabel/Fuzzy/MultiLabel-F1:ls<_(inc\|ltd)\.?(\s\|$)><\2>`