From 32290d371559f4f02a3bbdc6bbc40edc7913f879 Mon Sep 17 00:00:00 2001
From: welp <welperooni@gmail.com>
Date: Mon, 13 Jul 2020 16:20:36 +0200
Subject: [PATCH] pure percantage format feature commit

---
 src/GEval/Common.hs          |  5 ++++
 src/GEval/Core.hs            | 49 ++++++++++++++++------------------
 src/GEval/CreateChallenge.hs | 51 ++++++++++++++++++------------------
 src/GEval/Formatting.hs      | 34 ++++++++++++++----------
 src/GEval/OptionsParser.hs   | 15 ++++++-----
 test/Spec.hs                 | 14 ++--------
 6 files changed, 85 insertions(+), 83 deletions(-)

diff --git a/src/GEval/Common.hs b/src/GEval/Common.hs
index 4a9cc4e..c8b581d 100644
--- a/src/GEval/Common.hs
+++ b/src/GEval/Common.hs
@@ -15,6 +15,11 @@ type MetricValue = Double
 
 data GraphSeries = GraphSeries [(Double, Double)]
 
+data FormattingOptions = FormattingOptions {
+  decimalPlaces :: Maybe Int,
+  asPercentage :: Bool
+  }
+
 data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue]
 
 instance Show MetricResult where
diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index 01b0a82..ebe7b8d 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -6,6 +6,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE PackageImports #-}
 
+
 module GEval.Core
     ( geval,
       gevalCore,
@@ -112,7 +113,6 @@ import GEval.Annotation
 import GEval.BlackBoxDebugging
 import Data.Conduit.Bootstrap
 import GEval.DataSource
-import GEval.MatchingSpecification
 
 import qualified Data.HashMap.Strict as M
 import qualified Data.Vector as V
@@ -182,7 +182,7 @@ data GEvalSpecification = GEvalSpecification
                             gesExpectedFile :: String,
                             gesInputFile :: String,
                             gesMetrics :: [EvaluationScheme],
-                            gesPrecision :: Maybe Int,
+                            gesFormatting :: FormattingOptions,
                             gesTokenizer :: Maybe Tokenizer,
                             gesGonitoHost :: Maybe String,
                             gesToken :: Maybe String,
@@ -253,7 +253,7 @@ defaultGEvalSpecification = GEvalSpecification {
   gesExpectedFile = defaultExpectedFile,
   gesInputFile = defaultInputFile,
   gesMetrics = [EvaluationScheme defaultMetric []],
-  gesPrecision = Nothing,
+  gesFormatting = FormattingOptions Nothing False,
   gesTokenizer = Nothing,
   gesGonitoHost = Nothing,
   gesToken = Nothing,
@@ -522,7 +522,7 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces
 -- some metrics are handled by Bootstrap due to legacy issues,
 -- fix on the way
 handleBootstrap :: Metric -> Bool
-handleBootstrap (Mean (MultiLabelFMeasure _ _)) = True
+handleBootstrap (Mean (MultiLabelFMeasure _)) = True
 handleBootstrap (Mean _) = False
 handleBootstrap CharMatch = False
 handleBootstrap (LogLossHashed _) = False
@@ -567,16 +567,13 @@ gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
                           -> m (MetricOutput)           -- ^ metric values for the output against the expected output
 
 -- for the time being hardcoded
-gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta matchingSpec)) lsSpec = do
+gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do
   gevalRunPipeline parserSpec (trans step) finalPipeline context
   where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
         context = fromSpecificationToWithoutInput lsSpec
-        step = case toSing matchingSpec of
-          SomeSing s -> itemStep (SAMultiLabelFMeasure s)
-        expParser = case toSing matchingSpec of
-          SomeSing s -> expectedParser (SAMultiLabelFMeasure s)
-        outParser = case toSing matchingSpec of
-          SomeSing s -> outputParser (SAMultiLabelFMeasure s)
+        step = itemStep SAMultiLabelFMeasure
+        expParser = expectedParser SAMultiLabelFMeasure
+        outParser = outputParser SAMultiLabelFMeasure
         finalPipeline = fixer (
           CL.map (fMeasureOnCounts beta)
           .| (bootstrapC numberOfSamples
@@ -633,10 +630,10 @@ gevalCoreOnSources (LogLossHashed nbOfBits) = helperLogLossHashed nbOfBits id
 gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToLikehood
 
 
-gevalCoreOnSources (Mean (MultiLabelFMeasure beta matchingSpec))
+gevalCoreOnSources (Mean (MultiLabelFMeasure beta))
   = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
                                        (Right . getWords)
-                                       ((fMeasureOnCounts beta) . (getWeightedCounts (getMatchingFunctionForString matchingSpec)))
+                                       ((fMeasureOnCounts beta) . (getCounts (==)))
                                        averageC
                                        id
                                        noGraph
@@ -664,13 +661,12 @@ gevalCoreOnSources (Mean WER)
 gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being"
 
 -- only MultiLabel-F1 handled for JSONs for the time being...
-gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) =
-  gevalCoreWithoutInputOnItemTargets (Right . intoWords)
-                                     (Right . getWords)
-                                     (getWeightedCounts (getMatchingFunctionForString matchingSpec))
-                                     countAgg
-                                     (fMeasureOnCounts beta)
-                                     noGraph
+gevalCoreOnSources (MultiLabelFMeasure beta) = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
+                                                                            (Right . getWords)
+                                                                            (getCounts (==))
+                                                                            countAgg
+                                                                            (fMeasureOnCounts beta)
+                                                                            noGraph
     where
       getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
       getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
@@ -748,9 +744,9 @@ countFragAgg :: (Num n, Num v, Monad m) => ConduitM (n, n, v, v) o m (n, n, v, v
 countFragAgg = CC.foldl countFragFolder (fromInteger 0, fromInteger 0, fromInteger 0, fromInteger 0)
 
 gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
-                                (V.Vector (Double, Double) -> Double) -> -- ^ correlation function
-                                LineSourcesSpecification (ResourceT m) ->
-                                m (MetricOutput)             -- ^ metric values for the output against the expected output
+                                (V.Vector (Double, Double) -> Double) -- ^ correlation function
+                                -> LineSourcesSpecification (ResourceT m)
+                                -> m (MetricOutput)             -- ^ metric values for the output against the expected output
 gevalCoreByCorrelationMeasure correlationFunction =
   gevalCoreWithoutInput SAPearson correlationC finalStep noGraph
   where correlationC = CC.foldl (flip (:)) []
@@ -850,13 +846,14 @@ gevalRunPipeline' parserSpec itemStep finalPipeline context = do
        <$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..])
        <*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| finalPipeline)
 
-continueGEvalCalculations :: forall m t . (MonadIO m) =>
+
+
+continueGEvalCalculations :: (MonadIO m) =>
                             SAMetric t
                             -> Metric
                             -> ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) MetricOutput
 
-continueGEvalCalculations (SAMultiLabelFMeasure matchingSpec) (MultiLabelFMeasure beta matchingSpec')
-  = defineContinuation countAgg (fMeasureOnCounts beta) noGraph
+continueGEvalCalculations SAMultiLabelFMeasure (MultiLabelFMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph
 
 continueGEvalCalculations SALikelihood Likelihood = defineContinuation averageC logLossToLikehood noGraph
 
diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index 69e1150..6649e80 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -7,10 +7,9 @@ module GEval.CreateChallenge
 
 import GEval.Metric
 import GEval.EvaluationScheme
-import GEval.Common (GEvalException(..))
+import GEval.Common (GEvalException(..), FormattingOptions(..))
 import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName)
 import GEval.Submit (tokenFileName)
-import GEval.MatchingSpecification (MatchingSpecification(ExactMatch))
 import qualified System.Directory as D
 import Control.Conditional (whenM)
 
@@ -22,6 +21,9 @@ import Data.String.Here
 
 import Data.List (intercalate)
 import Data.List.Split (splitOn)
+import Data.Bool
+
+import Text.Printf
 
 createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO ()
 createChallenge withDataFiles expectedDirectory spec = do
@@ -31,7 +33,7 @@ createChallenge withDataFiles expectedDirectory spec = do
   D.createDirectoryIfMissing False testDirectory
   createFile (expectedDirectory </> ".gitignore") $ gitignoreContents
   createFile (expectedDirectory </> "README.md") $ readmeMDContents metric testName
-  createFile (expectedDirectory </> configFileName) $ configContents metrics precision testName
+  createFile (expectedDirectory </> configFileName) $ configContents metrics format testName
   createHeaderFile expectedDirectory "in-header.tsv" $ inHeaderContents metric
   createHeaderFile expectedDirectory "out-header.tsv" $ outHeaderContents metric
   if withDataFiles
@@ -49,7 +51,7 @@ createChallenge withDataFiles expectedDirectory spec = do
       return ()
   where metric = gesMainMetric spec
         metrics = gesMetrics spec
-        precision = gesPrecision spec
+        format = gesFormatting spec
         testName = gesTestName spec
         trainDirectory = expectedDirectory </> "train"
         devDirectory = expectedDirectory </> "dev-0"
@@ -332,8 +334,8 @@ character (inclusively).
 
 |] ++ (commonReadmeMDContents testName)
 
-readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta ExactMatch) testName
-readmeMDContents (MultiLabelFMeasure beta _) testName = [i|
+readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName
+readmeMDContents (MultiLabelFMeasure beta) testName = [i|
 Tag names and their component
 =============================
 
@@ -423,18 +425,17 @@ Directory structure
 |]
 
 
-configContents :: [EvaluationScheme] -> Maybe Int -> String -> String
-configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++
+configContents :: [EvaluationScheme] -> FormattingOptions -> String -> String
+configContents schemes format testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++
                                  (if testName /= defaultTestName
                                      then
                                         " --test-name " ++ testName
                                      else
                                      "") ++
-                                 (precisionOpt precision) ++
+                                 (precisionOpt format) ++
                                  inHeaderOpts ++
                                  outHeaderOpts
-    where precisionOpt Nothing = ""
-          precisionOpt (Just p) = " --precision " ++ (show p)
+    where precisionOpt (FormattingOptions m b) =  maybe "" (printf "--precision %d ") m ++ bool "" "--show-as-percentage" b
           ((EvaluationScheme mainMetric _):_) = schemes
           inHeaderOpts = getHeaderOpts "in-header" inHeaderContents
           outHeaderOpts = getHeaderOpts "out-header" outHeaderContents
@@ -534,8 +535,8 @@ trainContents TokenAccuracy = [hereLit|* V N	I like cats
 trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19	The student's smart
 N:1-6 N:8-10 V:12-13 A:15-18	Mary's dog is nice
 |]
-trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta ExactMatch)
-trainContents (MultiLabelFMeasure _ _) = [hereLit|I know Mr John Smith	person/3,4,5 first-name/4 surname/5
+trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta)
+trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith	person/3,4,5 first-name/4 surname/5
 Steven bloody Brown	person/1,3 first-name/1 surname/3
 James and James	first-name/1 firstname/3
 |]
@@ -607,8 +608,8 @@ Ala has a cat
 devInContents SegmentAccuracy = [hereLit|John is smart
 Mary's intelligent
 |]
-devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta ExactMatch)
-devInContents (MultiLabelFMeasure _ _) = [hereLit|Jan Kowalski is here
+devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta)
+devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
 I see him
 Barbara
 |]
@@ -675,8 +676,8 @@ N V * N
 devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13
 N:1-4 V:6-7 A:9-19
 |]
-devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta ExactMatch)
-devExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,2 first-name/1 surname/2
+devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta)
+devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2
 
 first-name/1
 |]
@@ -748,8 +749,8 @@ I know
 testInContents SegmentAccuracy = [hereLit|Mary's cat is old
 John is young
 |]
-testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta ExactMatch)
-testInContents (MultiLabelFMeasure _ _) = [hereLit|John bloody Smith
+testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta)
+testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
 Nobody is there
 I saw Marketa
 |]
@@ -817,8 +818,8 @@ testExpectedContents TokenAccuracy = [hereLit|* V N
 testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17
 N:1-4 V:6-7 A:9-13
 |]
-testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta ExactMatch)
-testExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 surname/3
+testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta)
+testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3
 
 first-name/3
 |]
@@ -876,8 +877,8 @@ inHeaderContents BIOF1Labels = inHeaderContents BIOF1
 inHeaderContents BIOF1 = Just ["Text"]
 inHeaderContents TokenAccuracy = Just ["TokenizedText"]
 inHeaderContents SegmentAccuracy = Just ["Segment"]
-inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta ExactMatch)
-inHeaderContents (MultiLabelFMeasure _ _) = Just ["Text"]
+inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta)
+inHeaderContents (MultiLabelFMeasure _) = Just ["Text"]
 inHeaderContents MultiLabelLikelihood = inHeaderContents MultiLabelLogLoss
 inHeaderContents MultiLabelLogLoss = Just ["Utterance"]
 inHeaderContents (Soft2DFMeasure _) = inHeaderContents ClippEU
@@ -904,8 +905,8 @@ outHeaderContents BIOF1Labels = outHeaderContents BIOF1
 outHeaderContents BIOF1 = Just ["BIOOutput"]
 outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"]
 outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"]
-outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta ExactMatch)
-outHeaderContents (MultiLabelFMeasure _ _) = Just ["Entities"]
+outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta)
+outHeaderContents (MultiLabelFMeasure _) = Just ["Entities"]
 outHeaderContents MultiLabelLikelihood = outHeaderContents MultiLabelLogLoss
 outHeaderContents MultiLabelLogLoss = Just ["Emotion"]
 outHeaderContents (Soft2DFMeasure _) = Just ["Rectangle"]
diff --git a/src/GEval/Formatting.hs b/src/GEval/Formatting.hs
index 149c1b9..39a9799 100644
--- a/src/GEval/Formatting.hs
+++ b/src/GEval/Formatting.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE LambdaCase #-}
+
 module GEval.Formatting
        (formatTheResult, formatSimpleResult, formatTheResultWithErrorBounds)
        where
@@ -7,25 +9,29 @@ import Data.Conduit.Bootstrap
 import Text.Printf
 
 
-formatTheResult :: Maybe Int -> MetricResult -> String
-formatTheResult mPrecision (SimpleRun val) = formatSimpleResult mPrecision val
-formatTheResult mPrecision (BootstrapResampling vals) = formatTheResultWithErrorBounds mPrecision pointEstimate (Just errorBound)
+formatTheResult :: FormattingOptions -> MetricResult -> String
+formatTheResult format (SimpleRun val) = formatSimpleResult format val
+formatTheResult format (BootstrapResampling vals) = formatTheResultWithErrorBounds format pointEstimate (Just errorBound)
   where pointEstimate = (upperBound + lowerBound) / 2.0
         errorBound = (upperBound - lowerBound) / 2.0
         (lowerBound, upperBound) = getConfidenceBounds defaultConfidenceLevel vals
 
-formatTheResultWithErrorBounds :: Maybe Int -> MetricValue -> Maybe MetricValue -> String
-formatTheResultWithErrorBounds mPrecision pointEstimate Nothing = formatSimpleResult mPrecision pointEstimate
-formatTheResultWithErrorBounds mPrecision pointEstimate (Just errorBound) = (formatSimpleResult correctedPrecision pointEstimate)
+formatTheResultWithErrorBounds :: FormattingOptions -> MetricValue -> Maybe MetricValue -> String
+formatTheResultWithErrorBounds format pointEstimate Nothing = formatSimpleResult format pointEstimate
+formatTheResultWithErrorBounds format pointEstimate (Just errorBound) = (formatSimpleResult formatWithCorrectedPrecision pointEstimate)
                                                                             ++ "±"
-                                                                            ++ (formatSimpleResult correctedPrecision errorBound)
+                                                                            ++ (formatSimpleResult formatWithCorrectedPrecision errorBound)
     where errorBoundMagnitude = (floor (logBase 10.0 errorBound)) - 1
-          correctedPrecision = Just $ selectLowerPrecision (max (-errorBoundMagnitude) 0) mPrecision
+          formatWithCorrectedPrecision = selectLowerPrecision (max (-errorBoundMagnitude) 0) format
 
-formatSimpleResult :: Maybe Int -> MetricValue -> String
-formatSimpleResult Nothing = show
-formatSimpleResult (Just prec) = printf "%0.*f" prec
+formatSimpleResult :: FormattingOptions -> MetricValue -> String
+formatSimpleResult = \case
+  FormattingOptions (Just prec) True -> printf "%.*f" (prec-2) . (*100)
+  FormattingOptions (Just prec) _    -> printf "0.*f" prec
+  _                                  -> show
 
-selectLowerPrecision :: Int -> Maybe Int -> Int
-selectLowerPrecision p Nothing = p
-selectLowerPrecision p (Just p') = min p p'
+selectLowerPrecision :: Int -> FormattingOptions -> FormattingOptions
+selectLowerPrecision p = \case
+  a@(FormattingOptions _ True)    -> a
+  FormattingOptions (Just prec) _ -> FormattingOptions (Just $ min prec p) False
+  _                               -> FormattingOptions (Just p) False
diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index fdddad9..0290083 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -6,7 +6,7 @@ module GEval.OptionsParser
         runGEvalGetOptions,
         getOptions,
         metricReader,
-        precisionArgParser
+        formatParser
         ) where
 
 import Paths_geval (version)
@@ -138,12 +138,15 @@ optionsParser = GEvalOptions
        <> help "Mark worst features when in the line-by-line mode")
 
 
-precisionArgParser :: Parser Int
-precisionArgParser = option auto
-    ( long "precision"
+formatParser :: Parser FormattingOptions
+formatParser = FormattingOptions
+    <$> (optional $ option auto ( long "precision"
       <> short 'p'
       <> metavar "NUMBER-OF-FRACTIONAL-DIGITS"
-      <> help "Arithmetic precision, i.e. the number of fractional digits to be shown" )
+      <> help "Arithmetic precision, i.e. the number of fractional digits to be shown" ))
+    <*> switch ( long "show-as-percentage"
+      <> short '%'
+      <> help "Returns the result as a percentage (i.e. maximum value of 100 instead of 1)" )
 
 specParser :: Parser GEvalSpecification
 specParser = GEvalSpecification
@@ -191,7 +194,7 @@ specParser = GEvalSpecification
     <> metavar "INPUT"
     <> help "The name of the file with the input (applicable only for some metrics)" )
   <*> ((flip fromMaybe) <$> (singletonMaybe <$> altMetricReader) <*> metricReader)
-  <*> optional precisionArgParser
+  <*> formatParser
   <*> (optional $ option auto
        ( long "tokenizer"
          <> short 'T'
diff --git a/test/Spec.hs b/test/Spec.hs
index 43b8a3d..d208fc6 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -343,16 +343,6 @@ main = hspec $ do
       runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
     it "labels given with probs and numbers" $ do
       runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
-    it "information extraction" $ do
-      runGEvalTest "multilabel-f1-ie" `shouldReturnAlmost` 0.1111111111
-    it "information extraction with flags" $ do
-      runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444
-    it "information extraction with fuzzy matching" $ do
-      runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777
-    it "information extraction with smart fuzzy matching" $ do
-      runGEvalTest "multilabel-f1-ie-fuzzy-smart" `shouldReturnAlmost` 0.598444
-    it "information extraction with smart fuzzy matching hardened" $ do
-      runGEvalTest "multilabel-f1-ie-fuzzy-harden" `shouldReturnAlmost` 0.555555555
   describe "Mean/MultiLabel-F" $ do
     it "simple" $ do
       runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
@@ -478,7 +468,7 @@ main = hspec $ do
             gesExpectedFile = "expected.tsv",
             gesInputFile = "in.tsv",
             gesMetrics = [EvaluationScheme Likelihood []],
-            gesPrecision = Nothing,
+            gesFormatting = FormattingOptions Nothing False,
             gesTokenizer = Nothing,
             gesGonitoHost = Nothing,
             gesToken = Nothing,
@@ -606,7 +596,7 @@ main = hspec $ do
             let spec = defaultGEvalSpecification {
                   gesExpectedDirectory = Just tempDir,
                   gesMetrics = [scheme],
-                  gesPrecision = Just 4 }
+                  gesFormatting = FormattingOptions (Just 4) False }
             createChallenge True tempDir spec
             validationChallenge tempDir spec
   describe "test sample outputs" $ do