pure percantage format feature commit

2020-07-13 16:20:36 +02:00 · 2020-07-13 16:20:36 +02:00 · 32290d3715
commit 32290d3715
parent 0a484b9c92
6 changed files with 85 additions and 83 deletions
--- a/src/GEval/Common.hs
+++ b/src/GEval/Common.hs
@ -15,6 +15,11 @@ type MetricValue = Double
 data GraphSeries = GraphSeries [(Double, Double)]
 data FormattingOptions = FormattingOptions {
  decimalPlaces :: Maybe Int,
  asPercentage :: Bool
  }
 data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue]
 instance Show MetricResult where
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@ -6,6 +6,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE PackageImports #-}
 module GEval.Core
    ( geval,
      gevalCore,
@ -112,7 +113,6 @@ import GEval.Annotation
 import GEval.BlackBoxDebugging
 import Data.Conduit.Bootstrap
 import GEval.DataSource
 import GEval.MatchingSpecification
 import qualified Data.HashMap.Strict as M
 import qualified Data.Vector as V
@ -182,7 +182,7 @@ data GEvalSpecification = GEvalSpecification
                            gesExpectedFile :: String,
                            gesInputFile :: String,
                            gesMetrics :: [EvaluationScheme],
-                            gesPrecision :: Maybe Int,
+                            gesFormatting :: FormattingOptions,
                            gesTokenizer :: Maybe Tokenizer,
                            gesGonitoHost :: Maybe String,
                            gesToken :: Maybe String,
@ -253,7 +253,7 @@ defaultGEvalSpecification = GEvalSpecification {
  gesExpectedFile = defaultExpectedFile,
  gesInputFile = defaultInputFile,
  gesMetrics = [EvaluationScheme defaultMetric []],
-  gesPrecision = Nothing,
+  gesFormatting = FormattingOptions Nothing False,
  gesTokenizer = Nothing,
  gesGonitoHost = Nothing,
  gesToken = Nothing,
@ -522,7 +522,7 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces
 -- some metrics are handled by Bootstrap due to legacy issues,
 -- fix on the way
 handleBootstrap :: Metric -> Bool
-handleBootstrap (Mean (MultiLabelFMeasure _ _)) = True
+handleBootstrap (Mean (MultiLabelFMeasure _)) = True
 handleBootstrap (Mean _) = False
 handleBootstrap CharMatch = False
 handleBootstrap (LogLossHashed _) = False
@ -567,16 +567,13 @@ gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
                          -> m (MetricOutput)           -- ^ metric values for the output against the expected output
 -- for the time being hardcoded
-gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta matchingSpec)) lsSpec = do
+gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do
  gevalRunPipeline parserSpec (trans step) finalPipeline context
  where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
        context = fromSpecificationToWithoutInput lsSpec
-        step = case toSing matchingSpec of
+        step = itemStep SAMultiLabelFMeasure
-          SomeSing s -> itemStep (SAMultiLabelFMeasure s)
+        expParser = expectedParser SAMultiLabelFMeasure
-        expParser = case toSing matchingSpec of
+        outParser = outputParser SAMultiLabelFMeasure
          SomeSing s -> expectedParser (SAMultiLabelFMeasure s)
        outParser = case toSing matchingSpec of
          SomeSing s -> outputParser (SAMultiLabelFMeasure s)
        finalPipeline = fixer (
          CL.map (fMeasureOnCounts beta)
          .| (bootstrapC numberOfSamples
@ -633,10 +630,10 @@ gevalCoreOnSources (LogLossHashed nbOfBits) = helperLogLossHashed nbOfBits id
 gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToLikehood
-gevalCoreOnSources (Mean (MultiLabelFMeasure beta matchingSpec))
+gevalCoreOnSources (Mean (MultiLabelFMeasure beta))
  = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
                                       (Right . getWords)
-                                       ((fMeasureOnCounts beta) . (getWeightedCounts (getMatchingFunctionForString matchingSpec)))
+                                       ((fMeasureOnCounts beta) . (getCounts (==)))
                                       averageC
                                       id
                                       noGraph
@ -664,10 +661,9 @@ gevalCoreOnSources (Mean WER)
 gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being"
 -- only MultiLabel-F1 handled for JSONs for the time being...
-gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) =
+gevalCoreOnSources (MultiLabelFMeasure beta) = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
  gevalCoreWithoutInputOnItemTargets (Right . intoWords)
                                                                            (Right . getWords)
-                                     (getWeightedCounts (getMatchingFunctionForString matchingSpec))
+                                                                            (getCounts (==))
                                                                            countAgg
                                                                            (fMeasureOnCounts beta)
                                                                            noGraph
@ -748,9 +744,9 @@ countFragAgg :: (Num n, Num v, Monad m) => ConduitM (n, n, v, v) o m (n, n, v, v
 countFragAgg = CC.foldl countFragFolder (fromInteger 0, fromInteger 0, fromInteger 0, fromInteger 0)
 gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
-                                (V.Vector (Double, Double) -> Double) -> -- ^ correlation function
+                                (V.Vector (Double, Double) -> Double) -- ^ correlation function
-                                LineSourcesSpecification (ResourceT m) ->
+                                -> LineSourcesSpecification (ResourceT m)
-                                m (MetricOutput)             -- ^ metric values for the output against the expected output
+                                -> m (MetricOutput)             -- ^ metric values for the output against the expected output
 gevalCoreByCorrelationMeasure correlationFunction =
  gevalCoreWithoutInput SAPearson correlationC finalStep noGraph
  where correlationC = CC.foldl (flip (:)) []
@ -850,13 +846,14 @@ gevalRunPipeline' parserSpec itemStep finalPipeline context = do
       <$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..])
       <*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| finalPipeline)
-continueGEvalCalculations :: forall m t . (MonadIO m) =>
+
 continueGEvalCalculations :: (MonadIO m) =>
                            SAMetric t
                            -> Metric
                            -> ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) MetricOutput
-continueGEvalCalculations (SAMultiLabelFMeasure matchingSpec) (MultiLabelFMeasure beta matchingSpec')
+continueGEvalCalculations SAMultiLabelFMeasure (MultiLabelFMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph
  = defineContinuation countAgg (fMeasureOnCounts beta) noGraph
 continueGEvalCalculations SALikelihood Likelihood = defineContinuation averageC logLossToLikehood noGraph
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@ -7,10 +7,9 @@ module GEval.CreateChallenge
 import GEval.Metric
 import GEval.EvaluationScheme
-import GEval.Common (GEvalException(..))
+import GEval.Common (GEvalException(..), FormattingOptions(..))
 import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName)
 import GEval.Submit (tokenFileName)
 import GEval.MatchingSpecification (MatchingSpecification(ExactMatch))
 import qualified System.Directory as D
 import Control.Conditional (whenM)
@ -22,6 +21,9 @@ import Data.String.Here
 import Data.List (intercalate)
 import Data.List.Split (splitOn)
 import Data.Bool
 import Text.Printf
 createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO ()
 createChallenge withDataFiles expectedDirectory spec = do
@ -31,7 +33,7 @@ createChallenge withDataFiles expectedDirectory spec = do
  D.createDirectoryIfMissing False testDirectory
  createFile (expectedDirectory </> ".gitignore") $ gitignoreContents
  createFile (expectedDirectory </> "README.md") $ readmeMDContents metric testName
-  createFile (expectedDirectory </> configFileName) $ configContents metrics precision testName
+  createFile (expectedDirectory </> configFileName) $ configContents metrics format testName
  createHeaderFile expectedDirectory "in-header.tsv" $ inHeaderContents metric
  createHeaderFile expectedDirectory "out-header.tsv" $ outHeaderContents metric
  if withDataFiles
@ -49,7 +51,7 @@ createChallenge withDataFiles expectedDirectory spec = do
      return ()
  where metric = gesMainMetric spec
        metrics = gesMetrics spec
-        precision = gesPrecision spec
+        format = gesFormatting spec
        testName = gesTestName spec
        trainDirectory = expectedDirectory </> "train"
        devDirectory = expectedDirectory </> "dev-0"
@ -332,8 +334,8 @@ character (inclusively).
 |] ++ (commonReadmeMDContents testName)
-readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta ExactMatch) testName
+readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName
-readmeMDContents (MultiLabelFMeasure beta _) testName = [i|
+readmeMDContents (MultiLabelFMeasure beta) testName = [i|
 Tag names and their component
 =============================
@ -423,18 +425,17 @@ Directory structure
 |]
-configContents :: [EvaluationScheme] -> Maybe Int -> String -> String
+configContents :: [EvaluationScheme] -> FormattingOptions -> String -> String
-configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++
+configContents schemes format testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++
                                 (if testName /= defaultTestName
                                     then
                                        " --test-name " ++ testName
                                     else
                                     "") ++
-                                 (precisionOpt precision) ++
+                                 (precisionOpt format) ++
                                 inHeaderOpts ++
                                 outHeaderOpts
-    where precisionOpt Nothing = ""
+    where precisionOpt (FormattingOptions m b) =  maybe "" (printf "--precision %d ") m ++ bool "" "--show-as-percentage" b
          precisionOpt (Just p) = " --precision " ++ (show p)
          ((EvaluationScheme mainMetric _):_) = schemes
          inHeaderOpts = getHeaderOpts "in-header" inHeaderContents
          outHeaderOpts = getHeaderOpts "out-header" outHeaderContents
@ -534,8 +535,8 @@ trainContents TokenAccuracy = [hereLit|* V N	I like cats
 trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19	The student's smart
 N:1-6 N:8-10 V:12-13 A:15-18	Mary's dog is nice
 |]
-trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta ExactMatch)
+trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta)
-trainContents (MultiLabelFMeasure _ _) = [hereLit|I know Mr John Smith	person/3,4,5 first-name/4 surname/5
+trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith	person/3,4,5 first-name/4 surname/5
 Steven bloody Brown	person/1,3 first-name/1 surname/3
 James and James	first-name/1 firstname/3
 |]
@ -607,8 +608,8 @@ Ala has a cat
 devInContents SegmentAccuracy = [hereLit|John is smart
 Mary's intelligent
 |]
-devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta ExactMatch)
+devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta)
-devInContents (MultiLabelFMeasure _ _) = [hereLit|Jan Kowalski is here
+devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
 I see him
 Barbara
 |]
@ -675,8 +676,8 @@ N V * N
 devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13
 N:1-4 V:6-7 A:9-19
 |]
-devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta ExactMatch)
+devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta)
-devExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,2 first-name/1 surname/2
+devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2
 first-name/1
 |]
@ -748,8 +749,8 @@ I know
 testInContents SegmentAccuracy = [hereLit|Mary's cat is old
 John is young
 |]
-testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta ExactMatch)
+testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta)
-testInContents (MultiLabelFMeasure _ _) = [hereLit|John bloody Smith
+testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
 Nobody is there
 I saw Marketa
 |]
@ -817,8 +818,8 @@ testExpectedContents TokenAccuracy = [hereLit|* V N
 testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17
 N:1-4 V:6-7 A:9-13
 |]
-testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta ExactMatch)
+testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta)
-testExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 surname/3
+testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3
 first-name/3
 |]
@ -876,8 +877,8 @@ inHeaderContents BIOF1Labels = inHeaderContents BIOF1
 inHeaderContents BIOF1 = Just ["Text"]
 inHeaderContents TokenAccuracy = Just ["TokenizedText"]
 inHeaderContents SegmentAccuracy = Just ["Segment"]
-inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta ExactMatch)
+inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta)
-inHeaderContents (MultiLabelFMeasure _ _) = Just ["Text"]
+inHeaderContents (MultiLabelFMeasure _) = Just ["Text"]
 inHeaderContents MultiLabelLikelihood = inHeaderContents MultiLabelLogLoss
 inHeaderContents MultiLabelLogLoss = Just ["Utterance"]
 inHeaderContents (Soft2DFMeasure _) = inHeaderContents ClippEU
@ -904,8 +905,8 @@ outHeaderContents BIOF1Labels = outHeaderContents BIOF1
 outHeaderContents BIOF1 = Just ["BIOOutput"]
 outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"]
 outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"]
-outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta ExactMatch)
+outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta)
-outHeaderContents (MultiLabelFMeasure _ _) = Just ["Entities"]
+outHeaderContents (MultiLabelFMeasure _) = Just ["Entities"]
 outHeaderContents MultiLabelLikelihood = outHeaderContents MultiLabelLogLoss
 outHeaderContents MultiLabelLogLoss = Just ["Emotion"]
 outHeaderContents (Soft2DFMeasure _) = Just ["Rectangle"]
--- a/src/GEval/Formatting.hs
+++ b/src/GEval/Formatting.hs
@ -1,3 +1,5 @@
 {-# LANGUAGE LambdaCase #-}
 module GEval.Formatting
       (formatTheResult, formatSimpleResult, formatTheResultWithErrorBounds)
       where
@ -7,25 +9,29 @@ import Data.Conduit.Bootstrap
 import Text.Printf
-formatTheResult :: Maybe Int -> MetricResult -> String
+formatTheResult :: FormattingOptions -> MetricResult -> String
-formatTheResult mPrecision (SimpleRun val) = formatSimpleResult mPrecision val
+formatTheResult format (SimpleRun val) = formatSimpleResult format val
-formatTheResult mPrecision (BootstrapResampling vals) = formatTheResultWithErrorBounds mPrecision pointEstimate (Just errorBound)
+formatTheResult format (BootstrapResampling vals) = formatTheResultWithErrorBounds format pointEstimate (Just errorBound)
  where pointEstimate = (upperBound + lowerBound) / 2.0
        errorBound = (upperBound - lowerBound) / 2.0
        (lowerBound, upperBound) = getConfidenceBounds defaultConfidenceLevel vals
-formatTheResultWithErrorBounds :: Maybe Int -> MetricValue -> Maybe MetricValue -> String
+formatTheResultWithErrorBounds :: FormattingOptions -> MetricValue -> Maybe MetricValue -> String
-formatTheResultWithErrorBounds mPrecision pointEstimate Nothing = formatSimpleResult mPrecision pointEstimate
+formatTheResultWithErrorBounds format pointEstimate Nothing = formatSimpleResult format pointEstimate
-formatTheResultWithErrorBounds mPrecision pointEstimate (Just errorBound) = (formatSimpleResult correctedPrecision pointEstimate)
+formatTheResultWithErrorBounds format pointEstimate (Just errorBound) = (formatSimpleResult formatWithCorrectedPrecision pointEstimate)
                                                                            ++ "±"
-                                                                            ++ (formatSimpleResult correctedPrecision errorBound)
+                                                                            ++ (formatSimpleResult formatWithCorrectedPrecision errorBound)
    where errorBoundMagnitude = (floor (logBase 10.0 errorBound)) - 1
-          correctedPrecision = Just $ selectLowerPrecision (max (-errorBoundMagnitude) 0) mPrecision
+          formatWithCorrectedPrecision = selectLowerPrecision (max (-errorBoundMagnitude) 0) format
-formatSimpleResult :: Maybe Int -> MetricValue -> String
+formatSimpleResult :: FormattingOptions -> MetricValue -> String
-formatSimpleResult Nothing = show
+formatSimpleResult = \case
-formatSimpleResult (Just prec) = printf "%0.*f" prec
+  FormattingOptions (Just prec) True -> printf "%.*f" (prec-2) . (*100)
  FormattingOptions (Just prec) _    -> printf "0.*f" prec
  _                                  -> show
-selectLowerPrecision :: Int -> Maybe Int -> Int
+selectLowerPrecision :: Int -> FormattingOptions -> FormattingOptions
-selectLowerPrecision p Nothing = p
+selectLowerPrecision p = \case
-selectLowerPrecision p (Just p') = min p p'
+  a@(FormattingOptions _ True)    -> a
  FormattingOptions (Just prec) _ -> FormattingOptions (Just $ min prec p) False
  _                               -> FormattingOptions (Just p) False
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@ -6,7 +6,7 @@ module GEval.OptionsParser
        runGEvalGetOptions,
        getOptions,
        metricReader,
-        precisionArgParser
+        formatParser
        ) where
 import Paths_geval (version)
@ -138,12 +138,15 @@ optionsParser = GEvalOptions
       <> help "Mark worst features when in the line-by-line mode")
-precisionArgParser :: Parser Int
+formatParser :: Parser FormattingOptions
-precisionArgParser = option auto
+formatParser = FormattingOptions
-    ( long "precision"
+    <$> (optional $ option auto ( long "precision"
      <> short 'p'
      <> metavar "NUMBER-OF-FRACTIONAL-DIGITS"
-      <> help "Arithmetic precision, i.e. the number of fractional digits to be shown" )
+      <> help "Arithmetic precision, i.e. the number of fractional digits to be shown" ))
    <*> switch ( long "show-as-percentage"
      <> short '%'
      <> help "Returns the result as a percentage (i.e. maximum value of 100 instead of 1)" )
 specParser :: Parser GEvalSpecification
 specParser = GEvalSpecification
@ -191,7 +194,7 @@ specParser = GEvalSpecification
    <> metavar "INPUT"
    <> help "The name of the file with the input (applicable only for some metrics)" )
  <*> ((flip fromMaybe) <$> (singletonMaybe <$> altMetricReader) <*> metricReader)
-  <*> optional precisionArgParser
+  <*> formatParser
  <*> (optional $ option auto
       ( long "tokenizer"
         <> short 'T'
--- a/test/Spec.hs
+++ b/test/Spec.hs
@ -343,16 +343,6 @@ main = hspec $ do
      runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
    it "labels given with probs and numbers" $ do
      runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
    it "information extraction" $ do
      runGEvalTest "multilabel-f1-ie" `shouldReturnAlmost` 0.1111111111
    it "information extraction with flags" $ do
      runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444
    it "information extraction with fuzzy matching" $ do
      runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777
    it "information extraction with smart fuzzy matching" $ do
      runGEvalTest "multilabel-f1-ie-fuzzy-smart" `shouldReturnAlmost` 0.598444
    it "information extraction with smart fuzzy matching hardened" $ do
      runGEvalTest "multilabel-f1-ie-fuzzy-harden" `shouldReturnAlmost` 0.555555555
  describe "Mean/MultiLabel-F" $ do
    it "simple" $ do
      runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
@ -478,7 +468,7 @@ main = hspec $ do
            gesExpectedFile = "expected.tsv",
            gesInputFile = "in.tsv",
            gesMetrics = [EvaluationScheme Likelihood []],
-            gesPrecision = Nothing,
+            gesFormatting = FormattingOptions Nothing False,
            gesTokenizer = Nothing,
            gesGonitoHost = Nothing,
            gesToken = Nothing,
@ -606,7 +596,7 @@ main = hspec $ do
            let spec = defaultGEvalSpecification {
                  gesExpectedDirectory = Just tempDir,
                  gesMetrics = [scheme],
-                  gesPrecision = Just 4 }
+                  gesFormatting = FormattingOptions (Just 4) False }
            createChallenge True tempDir spec
            validationChallenge tempDir spec
  describe "test sample outputs" $ do