pure percantage format feature commit

This commit is contained in:
welp 2020-07-13 16:20:36 +02:00
parent 0a484b9c92
commit 32290d3715
6 changed files with 85 additions and 83 deletions

View File

@ -15,6 +15,11 @@ type MetricValue = Double
data GraphSeries = GraphSeries [(Double, Double)] data GraphSeries = GraphSeries [(Double, Double)]
data FormattingOptions = FormattingOptions {
decimalPlaces :: Maybe Int,
asPercentage :: Bool
}
data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue] data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue]
instance Show MetricResult where instance Show MetricResult where

View File

@ -6,6 +6,7 @@
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE PackageImports #-} {-# LANGUAGE PackageImports #-}
module GEval.Core module GEval.Core
( geval, ( geval,
gevalCore, gevalCore,
@ -112,7 +113,6 @@ import GEval.Annotation
import GEval.BlackBoxDebugging import GEval.BlackBoxDebugging
import Data.Conduit.Bootstrap import Data.Conduit.Bootstrap
import GEval.DataSource import GEval.DataSource
import GEval.MatchingSpecification
import qualified Data.HashMap.Strict as M import qualified Data.HashMap.Strict as M
import qualified Data.Vector as V import qualified Data.Vector as V
@ -182,7 +182,7 @@ data GEvalSpecification = GEvalSpecification
gesExpectedFile :: String, gesExpectedFile :: String,
gesInputFile :: String, gesInputFile :: String,
gesMetrics :: [EvaluationScheme], gesMetrics :: [EvaluationScheme],
gesPrecision :: Maybe Int, gesFormatting :: FormattingOptions,
gesTokenizer :: Maybe Tokenizer, gesTokenizer :: Maybe Tokenizer,
gesGonitoHost :: Maybe String, gesGonitoHost :: Maybe String,
gesToken :: Maybe String, gesToken :: Maybe String,
@ -253,7 +253,7 @@ defaultGEvalSpecification = GEvalSpecification {
gesExpectedFile = defaultExpectedFile, gesExpectedFile = defaultExpectedFile,
gesInputFile = defaultInputFile, gesInputFile = defaultInputFile,
gesMetrics = [EvaluationScheme defaultMetric []], gesMetrics = [EvaluationScheme defaultMetric []],
gesPrecision = Nothing, gesFormatting = FormattingOptions Nothing False,
gesTokenizer = Nothing, gesTokenizer = Nothing,
gesGonitoHost = Nothing, gesGonitoHost = Nothing,
gesToken = Nothing, gesToken = Nothing,
@ -522,7 +522,7 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces
-- some metrics are handled by Bootstrap due to legacy issues, -- some metrics are handled by Bootstrap due to legacy issues,
-- fix on the way -- fix on the way
handleBootstrap :: Metric -> Bool handleBootstrap :: Metric -> Bool
handleBootstrap (Mean (MultiLabelFMeasure _ _)) = True handleBootstrap (Mean (MultiLabelFMeasure _)) = True
handleBootstrap (Mean _) = False handleBootstrap (Mean _) = False
handleBootstrap CharMatch = False handleBootstrap CharMatch = False
handleBootstrap (LogLossHashed _) = False handleBootstrap (LogLossHashed _) = False
@ -567,16 +567,13 @@ gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
-> m (MetricOutput) -- ^ metric values for the output against the expected output -> m (MetricOutput) -- ^ metric values for the output against the expected output
-- for the time being hardcoded -- for the time being hardcoded
gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta matchingSpec)) lsSpec = do gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do
gevalRunPipeline parserSpec (trans step) finalPipeline context gevalRunPipeline parserSpec (trans step) finalPipeline context
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser)) where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
context = fromSpecificationToWithoutInput lsSpec context = fromSpecificationToWithoutInput lsSpec
step = case toSing matchingSpec of step = itemStep SAMultiLabelFMeasure
SomeSing s -> itemStep (SAMultiLabelFMeasure s) expParser = expectedParser SAMultiLabelFMeasure
expParser = case toSing matchingSpec of outParser = outputParser SAMultiLabelFMeasure
SomeSing s -> expectedParser (SAMultiLabelFMeasure s)
outParser = case toSing matchingSpec of
SomeSing s -> outputParser (SAMultiLabelFMeasure s)
finalPipeline = fixer ( finalPipeline = fixer (
CL.map (fMeasureOnCounts beta) CL.map (fMeasureOnCounts beta)
.| (bootstrapC numberOfSamples .| (bootstrapC numberOfSamples
@ -633,10 +630,10 @@ gevalCoreOnSources (LogLossHashed nbOfBits) = helperLogLossHashed nbOfBits id
gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToLikehood gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToLikehood
gevalCoreOnSources (Mean (MultiLabelFMeasure beta matchingSpec)) gevalCoreOnSources (Mean (MultiLabelFMeasure beta))
= gevalCoreWithoutInputOnItemTargets (Right . intoWords) = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
(Right . getWords) (Right . getWords)
((fMeasureOnCounts beta) . (getWeightedCounts (getMatchingFunctionForString matchingSpec))) ((fMeasureOnCounts beta) . (getCounts (==)))
averageC averageC
id id
noGraph noGraph
@ -664,10 +661,9 @@ gevalCoreOnSources (Mean WER)
gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being" gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being"
-- only MultiLabel-F1 handled for JSONs for the time being... -- only MultiLabel-F1 handled for JSONs for the time being...
gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) = gevalCoreOnSources (MultiLabelFMeasure beta) = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
gevalCoreWithoutInputOnItemTargets (Right . intoWords)
(Right . getWords) (Right . getWords)
(getWeightedCounts (getMatchingFunctionForString matchingSpec)) (getCounts (==))
countAgg countAgg
(fMeasureOnCounts beta) (fMeasureOnCounts beta)
noGraph noGraph
@ -748,9 +744,9 @@ countFragAgg :: (Num n, Num v, Monad m) => ConduitM (n, n, v, v) o m (n, n, v, v
countFragAgg = CC.foldl countFragFolder (fromInteger 0, fromInteger 0, fromInteger 0, fromInteger 0) countFragAgg = CC.foldl countFragFolder (fromInteger 0, fromInteger 0, fromInteger 0, fromInteger 0)
gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
(V.Vector (Double, Double) -> Double) -> -- ^ correlation function (V.Vector (Double, Double) -> Double) -- ^ correlation function
LineSourcesSpecification (ResourceT m) -> -> LineSourcesSpecification (ResourceT m)
m (MetricOutput) -- ^ metric values for the output against the expected output -> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreByCorrelationMeasure correlationFunction = gevalCoreByCorrelationMeasure correlationFunction =
gevalCoreWithoutInput SAPearson correlationC finalStep noGraph gevalCoreWithoutInput SAPearson correlationC finalStep noGraph
where correlationC = CC.foldl (flip (:)) [] where correlationC = CC.foldl (flip (:)) []
@ -850,13 +846,14 @@ gevalRunPipeline' parserSpec itemStep finalPipeline context = do
<$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..]) <$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..])
<*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| finalPipeline) <*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| finalPipeline)
continueGEvalCalculations :: forall m t . (MonadIO m) =>
continueGEvalCalculations :: (MonadIO m) =>
SAMetric t SAMetric t
-> Metric -> Metric
-> ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) MetricOutput -> ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) MetricOutput
continueGEvalCalculations (SAMultiLabelFMeasure matchingSpec) (MultiLabelFMeasure beta matchingSpec') continueGEvalCalculations SAMultiLabelFMeasure (MultiLabelFMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph
= defineContinuation countAgg (fMeasureOnCounts beta) noGraph
continueGEvalCalculations SALikelihood Likelihood = defineContinuation averageC logLossToLikehood noGraph continueGEvalCalculations SALikelihood Likelihood = defineContinuation averageC logLossToLikehood noGraph

View File

@ -7,10 +7,9 @@ module GEval.CreateChallenge
import GEval.Metric import GEval.Metric
import GEval.EvaluationScheme import GEval.EvaluationScheme
import GEval.Common (GEvalException(..)) import GEval.Common (GEvalException(..), FormattingOptions(..))
import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName) import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName)
import GEval.Submit (tokenFileName) import GEval.Submit (tokenFileName)
import GEval.MatchingSpecification (MatchingSpecification(ExactMatch))
import qualified System.Directory as D import qualified System.Directory as D
import Control.Conditional (whenM) import Control.Conditional (whenM)
@ -22,6 +21,9 @@ import Data.String.Here
import Data.List (intercalate) import Data.List (intercalate)
import Data.List.Split (splitOn) import Data.List.Split (splitOn)
import Data.Bool
import Text.Printf
createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO () createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO ()
createChallenge withDataFiles expectedDirectory spec = do createChallenge withDataFiles expectedDirectory spec = do
@ -31,7 +33,7 @@ createChallenge withDataFiles expectedDirectory spec = do
D.createDirectoryIfMissing False testDirectory D.createDirectoryIfMissing False testDirectory
createFile (expectedDirectory </> ".gitignore") $ gitignoreContents createFile (expectedDirectory </> ".gitignore") $ gitignoreContents
createFile (expectedDirectory </> "README.md") $ readmeMDContents metric testName createFile (expectedDirectory </> "README.md") $ readmeMDContents metric testName
createFile (expectedDirectory </> configFileName) $ configContents metrics precision testName createFile (expectedDirectory </> configFileName) $ configContents metrics format testName
createHeaderFile expectedDirectory "in-header.tsv" $ inHeaderContents metric createHeaderFile expectedDirectory "in-header.tsv" $ inHeaderContents metric
createHeaderFile expectedDirectory "out-header.tsv" $ outHeaderContents metric createHeaderFile expectedDirectory "out-header.tsv" $ outHeaderContents metric
if withDataFiles if withDataFiles
@ -49,7 +51,7 @@ createChallenge withDataFiles expectedDirectory spec = do
return () return ()
where metric = gesMainMetric spec where metric = gesMainMetric spec
metrics = gesMetrics spec metrics = gesMetrics spec
precision = gesPrecision spec format = gesFormatting spec
testName = gesTestName spec testName = gesTestName spec
trainDirectory = expectedDirectory </> "train" trainDirectory = expectedDirectory </> "train"
devDirectory = expectedDirectory </> "dev-0" devDirectory = expectedDirectory </> "dev-0"
@ -332,8 +334,8 @@ character (inclusively).
|] ++ (commonReadmeMDContents testName) |] ++ (commonReadmeMDContents testName)
readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta ExactMatch) testName readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName
readmeMDContents (MultiLabelFMeasure beta _) testName = [i| readmeMDContents (MultiLabelFMeasure beta) testName = [i|
Tag names and their component Tag names and their component
============================= =============================
@ -423,18 +425,17 @@ Directory structure
|] |]
configContents :: [EvaluationScheme] -> Maybe Int -> String -> String configContents :: [EvaluationScheme] -> FormattingOptions -> String -> String
configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++ configContents schemes format testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++
(if testName /= defaultTestName (if testName /= defaultTestName
then then
" --test-name " ++ testName " --test-name " ++ testName
else else
"") ++ "") ++
(precisionOpt precision) ++ (precisionOpt format) ++
inHeaderOpts ++ inHeaderOpts ++
outHeaderOpts outHeaderOpts
where precisionOpt Nothing = "" where precisionOpt (FormattingOptions m b) = maybe "" (printf "--precision %d ") m ++ bool "" "--show-as-percentage" b
precisionOpt (Just p) = " --precision " ++ (show p)
((EvaluationScheme mainMetric _):_) = schemes ((EvaluationScheme mainMetric _):_) = schemes
inHeaderOpts = getHeaderOpts "in-header" inHeaderContents inHeaderOpts = getHeaderOpts "in-header" inHeaderContents
outHeaderOpts = getHeaderOpts "out-header" outHeaderContents outHeaderOpts = getHeaderOpts "out-header" outHeaderContents
@ -534,8 +535,8 @@ trainContents TokenAccuracy = [hereLit|* V N I like cats
trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19 The student's smart trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19 The student's smart
N:1-6 N:8-10 V:12-13 A:15-18 Mary's dog is nice N:1-6 N:8-10 V:12-13 A:15-18 Mary's dog is nice
|] |]
trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta ExactMatch) trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta)
trainContents (MultiLabelFMeasure _ _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5 trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5
Steven bloody Brown person/1,3 first-name/1 surname/3 Steven bloody Brown person/1,3 first-name/1 surname/3
James and James first-name/1 firstname/3 James and James first-name/1 firstname/3
|] |]
@ -607,8 +608,8 @@ Ala has a cat
devInContents SegmentAccuracy = [hereLit|John is smart devInContents SegmentAccuracy = [hereLit|John is smart
Mary's intelligent Mary's intelligent
|] |]
devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta ExactMatch) devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta)
devInContents (MultiLabelFMeasure _ _) = [hereLit|Jan Kowalski is here devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
I see him I see him
Barbara Barbara
|] |]
@ -675,8 +676,8 @@ N V * N
devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13 devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13
N:1-4 V:6-7 A:9-19 N:1-4 V:6-7 A:9-19
|] |]
devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta ExactMatch) devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta)
devExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,2 first-name/1 surname/2 devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2
first-name/1 first-name/1
|] |]
@ -748,8 +749,8 @@ I know
testInContents SegmentAccuracy = [hereLit|Mary's cat is old testInContents SegmentAccuracy = [hereLit|Mary's cat is old
John is young John is young
|] |]
testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta ExactMatch) testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta)
testInContents (MultiLabelFMeasure _ _) = [hereLit|John bloody Smith testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
Nobody is there Nobody is there
I saw Marketa I saw Marketa
|] |]
@ -817,8 +818,8 @@ testExpectedContents TokenAccuracy = [hereLit|* V N
testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17 testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17
N:1-4 V:6-7 A:9-13 N:1-4 V:6-7 A:9-13
|] |]
testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta ExactMatch) testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta)
testExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 surname/3 testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3
first-name/3 first-name/3
|] |]
@ -876,8 +877,8 @@ inHeaderContents BIOF1Labels = inHeaderContents BIOF1
inHeaderContents BIOF1 = Just ["Text"] inHeaderContents BIOF1 = Just ["Text"]
inHeaderContents TokenAccuracy = Just ["TokenizedText"] inHeaderContents TokenAccuracy = Just ["TokenizedText"]
inHeaderContents SegmentAccuracy = Just ["Segment"] inHeaderContents SegmentAccuracy = Just ["Segment"]
inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta ExactMatch) inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta)
inHeaderContents (MultiLabelFMeasure _ _) = Just ["Text"] inHeaderContents (MultiLabelFMeasure _) = Just ["Text"]
inHeaderContents MultiLabelLikelihood = inHeaderContents MultiLabelLogLoss inHeaderContents MultiLabelLikelihood = inHeaderContents MultiLabelLogLoss
inHeaderContents MultiLabelLogLoss = Just ["Utterance"] inHeaderContents MultiLabelLogLoss = Just ["Utterance"]
inHeaderContents (Soft2DFMeasure _) = inHeaderContents ClippEU inHeaderContents (Soft2DFMeasure _) = inHeaderContents ClippEU
@ -904,8 +905,8 @@ outHeaderContents BIOF1Labels = outHeaderContents BIOF1
outHeaderContents BIOF1 = Just ["BIOOutput"] outHeaderContents BIOF1 = Just ["BIOOutput"]
outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"] outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"]
outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"] outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"]
outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta ExactMatch) outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta)
outHeaderContents (MultiLabelFMeasure _ _) = Just ["Entities"] outHeaderContents (MultiLabelFMeasure _) = Just ["Entities"]
outHeaderContents MultiLabelLikelihood = outHeaderContents MultiLabelLogLoss outHeaderContents MultiLabelLikelihood = outHeaderContents MultiLabelLogLoss
outHeaderContents MultiLabelLogLoss = Just ["Emotion"] outHeaderContents MultiLabelLogLoss = Just ["Emotion"]
outHeaderContents (Soft2DFMeasure _) = Just ["Rectangle"] outHeaderContents (Soft2DFMeasure _) = Just ["Rectangle"]

View File

@ -1,3 +1,5 @@
{-# LANGUAGE LambdaCase #-}
module GEval.Formatting module GEval.Formatting
(formatTheResult, formatSimpleResult, formatTheResultWithErrorBounds) (formatTheResult, formatSimpleResult, formatTheResultWithErrorBounds)
where where
@ -7,25 +9,29 @@ import Data.Conduit.Bootstrap
import Text.Printf import Text.Printf
formatTheResult :: Maybe Int -> MetricResult -> String formatTheResult :: FormattingOptions -> MetricResult -> String
formatTheResult mPrecision (SimpleRun val) = formatSimpleResult mPrecision val formatTheResult format (SimpleRun val) = formatSimpleResult format val
formatTheResult mPrecision (BootstrapResampling vals) = formatTheResultWithErrorBounds mPrecision pointEstimate (Just errorBound) formatTheResult format (BootstrapResampling vals) = formatTheResultWithErrorBounds format pointEstimate (Just errorBound)
where pointEstimate = (upperBound + lowerBound) / 2.0 where pointEstimate = (upperBound + lowerBound) / 2.0
errorBound = (upperBound - lowerBound) / 2.0 errorBound = (upperBound - lowerBound) / 2.0
(lowerBound, upperBound) = getConfidenceBounds defaultConfidenceLevel vals (lowerBound, upperBound) = getConfidenceBounds defaultConfidenceLevel vals
formatTheResultWithErrorBounds :: Maybe Int -> MetricValue -> Maybe MetricValue -> String formatTheResultWithErrorBounds :: FormattingOptions -> MetricValue -> Maybe MetricValue -> String
formatTheResultWithErrorBounds mPrecision pointEstimate Nothing = formatSimpleResult mPrecision pointEstimate formatTheResultWithErrorBounds format pointEstimate Nothing = formatSimpleResult format pointEstimate
formatTheResultWithErrorBounds mPrecision pointEstimate (Just errorBound) = (formatSimpleResult correctedPrecision pointEstimate) formatTheResultWithErrorBounds format pointEstimate (Just errorBound) = (formatSimpleResult formatWithCorrectedPrecision pointEstimate)
++ "±" ++ "±"
++ (formatSimpleResult correctedPrecision errorBound) ++ (formatSimpleResult formatWithCorrectedPrecision errorBound)
where errorBoundMagnitude = (floor (logBase 10.0 errorBound)) - 1 where errorBoundMagnitude = (floor (logBase 10.0 errorBound)) - 1
correctedPrecision = Just $ selectLowerPrecision (max (-errorBoundMagnitude) 0) mPrecision formatWithCorrectedPrecision = selectLowerPrecision (max (-errorBoundMagnitude) 0) format
formatSimpleResult :: Maybe Int -> MetricValue -> String formatSimpleResult :: FormattingOptions -> MetricValue -> String
formatSimpleResult Nothing = show formatSimpleResult = \case
formatSimpleResult (Just prec) = printf "%0.*f" prec FormattingOptions (Just prec) True -> printf "%.*f" (prec-2) . (*100)
FormattingOptions (Just prec) _ -> printf "0.*f" prec
_ -> show
selectLowerPrecision :: Int -> Maybe Int -> Int selectLowerPrecision :: Int -> FormattingOptions -> FormattingOptions
selectLowerPrecision p Nothing = p selectLowerPrecision p = \case
selectLowerPrecision p (Just p') = min p p' a@(FormattingOptions _ True) -> a
FormattingOptions (Just prec) _ -> FormattingOptions (Just $ min prec p) False
_ -> FormattingOptions (Just p) False

View File

@ -6,7 +6,7 @@ module GEval.OptionsParser
runGEvalGetOptions, runGEvalGetOptions,
getOptions, getOptions,
metricReader, metricReader,
precisionArgParser formatParser
) where ) where
import Paths_geval (version) import Paths_geval (version)
@ -138,12 +138,15 @@ optionsParser = GEvalOptions
<> help "Mark worst features when in the line-by-line mode") <> help "Mark worst features when in the line-by-line mode")
precisionArgParser :: Parser Int formatParser :: Parser FormattingOptions
precisionArgParser = option auto formatParser = FormattingOptions
( long "precision" <$> (optional $ option auto ( long "precision"
<> short 'p' <> short 'p'
<> metavar "NUMBER-OF-FRACTIONAL-DIGITS" <> metavar "NUMBER-OF-FRACTIONAL-DIGITS"
<> help "Arithmetic precision, i.e. the number of fractional digits to be shown" ) <> help "Arithmetic precision, i.e. the number of fractional digits to be shown" ))
<*> switch ( long "show-as-percentage"
<> short '%'
<> help "Returns the result as a percentage (i.e. maximum value of 100 instead of 1)" )
specParser :: Parser GEvalSpecification specParser :: Parser GEvalSpecification
specParser = GEvalSpecification specParser = GEvalSpecification
@ -191,7 +194,7 @@ specParser = GEvalSpecification
<> metavar "INPUT" <> metavar "INPUT"
<> help "The name of the file with the input (applicable only for some metrics)" ) <> help "The name of the file with the input (applicable only for some metrics)" )
<*> ((flip fromMaybe) <$> (singletonMaybe <$> altMetricReader) <*> metricReader) <*> ((flip fromMaybe) <$> (singletonMaybe <$> altMetricReader) <*> metricReader)
<*> optional precisionArgParser <*> formatParser
<*> (optional $ option auto <*> (optional $ option auto
( long "tokenizer" ( long "tokenizer"
<> short 'T' <> short 'T'

View File

@ -343,16 +343,6 @@ main = hspec $ do
runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615 runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
it "labels given with probs and numbers" $ do it "labels given with probs and numbers" $ do
runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666 runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
it "information extraction" $ do
runGEvalTest "multilabel-f1-ie" `shouldReturnAlmost` 0.1111111111
it "information extraction with flags" $ do
runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444
it "information extraction with fuzzy matching" $ do
runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777
it "information extraction with smart fuzzy matching" $ do
runGEvalTest "multilabel-f1-ie-fuzzy-smart" `shouldReturnAlmost` 0.598444
it "information extraction with smart fuzzy matching hardened" $ do
runGEvalTest "multilabel-f1-ie-fuzzy-harden" `shouldReturnAlmost` 0.555555555
describe "Mean/MultiLabel-F" $ do describe "Mean/MultiLabel-F" $ do
it "simple" $ do it "simple" $ do
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5 runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
@ -478,7 +468,7 @@ main = hspec $ do
gesExpectedFile = "expected.tsv", gesExpectedFile = "expected.tsv",
gesInputFile = "in.tsv", gesInputFile = "in.tsv",
gesMetrics = [EvaluationScheme Likelihood []], gesMetrics = [EvaluationScheme Likelihood []],
gesPrecision = Nothing, gesFormatting = FormattingOptions Nothing False,
gesTokenizer = Nothing, gesTokenizer = Nothing,
gesGonitoHost = Nothing, gesGonitoHost = Nothing,
gesToken = Nothing, gesToken = Nothing,
@ -606,7 +596,7 @@ main = hspec $ do
let spec = defaultGEvalSpecification { let spec = defaultGEvalSpecification {
gesExpectedDirectory = Just tempDir, gesExpectedDirectory = Just tempDir,
gesMetrics = [scheme], gesMetrics = [scheme],
gesPrecision = Just 4 } gesFormatting = FormattingOptions (Just 4) False }
createChallenge True tempDir spec createChallenge True tempDir spec
validationChallenge tempDir spec validationChallenge tempDir spec
describe "test sample outputs" $ do describe "test sample outputs" $ do