pure percantage format feature commit
This commit is contained in:
parent
0a484b9c92
commit
32290d3715
@ -15,6 +15,11 @@ type MetricValue = Double
|
|||||||
|
|
||||||
data GraphSeries = GraphSeries [(Double, Double)]
|
data GraphSeries = GraphSeries [(Double, Double)]
|
||||||
|
|
||||||
|
data FormattingOptions = FormattingOptions {
|
||||||
|
decimalPlaces :: Maybe Int,
|
||||||
|
asPercentage :: Bool
|
||||||
|
}
|
||||||
|
|
||||||
data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue]
|
data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue]
|
||||||
|
|
||||||
instance Show MetricResult where
|
instance Show MetricResult where
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
{-# LANGUAGE ScopedTypeVariables #-}
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
{-# LANGUAGE PackageImports #-}
|
{-# LANGUAGE PackageImports #-}
|
||||||
|
|
||||||
|
|
||||||
module GEval.Core
|
module GEval.Core
|
||||||
( geval,
|
( geval,
|
||||||
gevalCore,
|
gevalCore,
|
||||||
@ -112,7 +113,6 @@ import GEval.Annotation
|
|||||||
import GEval.BlackBoxDebugging
|
import GEval.BlackBoxDebugging
|
||||||
import Data.Conduit.Bootstrap
|
import Data.Conduit.Bootstrap
|
||||||
import GEval.DataSource
|
import GEval.DataSource
|
||||||
import GEval.MatchingSpecification
|
|
||||||
|
|
||||||
import qualified Data.HashMap.Strict as M
|
import qualified Data.HashMap.Strict as M
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
@ -182,7 +182,7 @@ data GEvalSpecification = GEvalSpecification
|
|||||||
gesExpectedFile :: String,
|
gesExpectedFile :: String,
|
||||||
gesInputFile :: String,
|
gesInputFile :: String,
|
||||||
gesMetrics :: [EvaluationScheme],
|
gesMetrics :: [EvaluationScheme],
|
||||||
gesPrecision :: Maybe Int,
|
gesFormatting :: FormattingOptions,
|
||||||
gesTokenizer :: Maybe Tokenizer,
|
gesTokenizer :: Maybe Tokenizer,
|
||||||
gesGonitoHost :: Maybe String,
|
gesGonitoHost :: Maybe String,
|
||||||
gesToken :: Maybe String,
|
gesToken :: Maybe String,
|
||||||
@ -253,7 +253,7 @@ defaultGEvalSpecification = GEvalSpecification {
|
|||||||
gesExpectedFile = defaultExpectedFile,
|
gesExpectedFile = defaultExpectedFile,
|
||||||
gesInputFile = defaultInputFile,
|
gesInputFile = defaultInputFile,
|
||||||
gesMetrics = [EvaluationScheme defaultMetric []],
|
gesMetrics = [EvaluationScheme defaultMetric []],
|
||||||
gesPrecision = Nothing,
|
gesFormatting = FormattingOptions Nothing False,
|
||||||
gesTokenizer = Nothing,
|
gesTokenizer = Nothing,
|
||||||
gesGonitoHost = Nothing,
|
gesGonitoHost = Nothing,
|
||||||
gesToken = Nothing,
|
gesToken = Nothing,
|
||||||
@ -522,7 +522,7 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces
|
|||||||
-- some metrics are handled by Bootstrap due to legacy issues,
|
-- some metrics are handled by Bootstrap due to legacy issues,
|
||||||
-- fix on the way
|
-- fix on the way
|
||||||
handleBootstrap :: Metric -> Bool
|
handleBootstrap :: Metric -> Bool
|
||||||
handleBootstrap (Mean (MultiLabelFMeasure _ _)) = True
|
handleBootstrap (Mean (MultiLabelFMeasure _)) = True
|
||||||
handleBootstrap (Mean _) = False
|
handleBootstrap (Mean _) = False
|
||||||
handleBootstrap CharMatch = False
|
handleBootstrap CharMatch = False
|
||||||
handleBootstrap (LogLossHashed _) = False
|
handleBootstrap (LogLossHashed _) = False
|
||||||
@ -567,16 +567,13 @@ gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
|
|||||||
-> m (MetricOutput) -- ^ metric values for the output against the expected output
|
-> m (MetricOutput) -- ^ metric values for the output against the expected output
|
||||||
|
|
||||||
-- for the time being hardcoded
|
-- for the time being hardcoded
|
||||||
gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta matchingSpec)) lsSpec = do
|
gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do
|
||||||
gevalRunPipeline parserSpec (trans step) finalPipeline context
|
gevalRunPipeline parserSpec (trans step) finalPipeline context
|
||||||
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
|
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
|
||||||
context = fromSpecificationToWithoutInput lsSpec
|
context = fromSpecificationToWithoutInput lsSpec
|
||||||
step = case toSing matchingSpec of
|
step = itemStep SAMultiLabelFMeasure
|
||||||
SomeSing s -> itemStep (SAMultiLabelFMeasure s)
|
expParser = expectedParser SAMultiLabelFMeasure
|
||||||
expParser = case toSing matchingSpec of
|
outParser = outputParser SAMultiLabelFMeasure
|
||||||
SomeSing s -> expectedParser (SAMultiLabelFMeasure s)
|
|
||||||
outParser = case toSing matchingSpec of
|
|
||||||
SomeSing s -> outputParser (SAMultiLabelFMeasure s)
|
|
||||||
finalPipeline = fixer (
|
finalPipeline = fixer (
|
||||||
CL.map (fMeasureOnCounts beta)
|
CL.map (fMeasureOnCounts beta)
|
||||||
.| (bootstrapC numberOfSamples
|
.| (bootstrapC numberOfSamples
|
||||||
@ -633,10 +630,10 @@ gevalCoreOnSources (LogLossHashed nbOfBits) = helperLogLossHashed nbOfBits id
|
|||||||
gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToLikehood
|
gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToLikehood
|
||||||
|
|
||||||
|
|
||||||
gevalCoreOnSources (Mean (MultiLabelFMeasure beta matchingSpec))
|
gevalCoreOnSources (Mean (MultiLabelFMeasure beta))
|
||||||
= gevalCoreWithoutInputOnItemTargets (Right . intoWords)
|
= gevalCoreWithoutInputOnItemTargets (Right . intoWords)
|
||||||
(Right . getWords)
|
(Right . getWords)
|
||||||
((fMeasureOnCounts beta) . (getWeightedCounts (getMatchingFunctionForString matchingSpec)))
|
((fMeasureOnCounts beta) . (getCounts (==)))
|
||||||
averageC
|
averageC
|
||||||
id
|
id
|
||||||
noGraph
|
noGraph
|
||||||
@ -664,13 +661,12 @@ gevalCoreOnSources (Mean WER)
|
|||||||
gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being"
|
gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being"
|
||||||
|
|
||||||
-- only MultiLabel-F1 handled for JSONs for the time being...
|
-- only MultiLabel-F1 handled for JSONs for the time being...
|
||||||
gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) =
|
gevalCoreOnSources (MultiLabelFMeasure beta) = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
|
||||||
gevalCoreWithoutInputOnItemTargets (Right . intoWords)
|
(Right . getWords)
|
||||||
(Right . getWords)
|
(getCounts (==))
|
||||||
(getWeightedCounts (getMatchingFunctionForString matchingSpec))
|
countAgg
|
||||||
countAgg
|
(fMeasureOnCounts beta)
|
||||||
(fMeasureOnCounts beta)
|
noGraph
|
||||||
noGraph
|
|
||||||
where
|
where
|
||||||
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
|
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
|
||||||
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
|
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
|
||||||
@ -748,9 +744,9 @@ countFragAgg :: (Num n, Num v, Monad m) => ConduitM (n, n, v, v) o m (n, n, v, v
|
|||||||
countFragAgg = CC.foldl countFragFolder (fromInteger 0, fromInteger 0, fromInteger 0, fromInteger 0)
|
countFragAgg = CC.foldl countFragFolder (fromInteger 0, fromInteger 0, fromInteger 0, fromInteger 0)
|
||||||
|
|
||||||
gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
|
gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
|
||||||
(V.Vector (Double, Double) -> Double) -> -- ^ correlation function
|
(V.Vector (Double, Double) -> Double) -- ^ correlation function
|
||||||
LineSourcesSpecification (ResourceT m) ->
|
-> LineSourcesSpecification (ResourceT m)
|
||||||
m (MetricOutput) -- ^ metric values for the output against the expected output
|
-> m (MetricOutput) -- ^ metric values for the output against the expected output
|
||||||
gevalCoreByCorrelationMeasure correlationFunction =
|
gevalCoreByCorrelationMeasure correlationFunction =
|
||||||
gevalCoreWithoutInput SAPearson correlationC finalStep noGraph
|
gevalCoreWithoutInput SAPearson correlationC finalStep noGraph
|
||||||
where correlationC = CC.foldl (flip (:)) []
|
where correlationC = CC.foldl (flip (:)) []
|
||||||
@ -850,13 +846,14 @@ gevalRunPipeline' parserSpec itemStep finalPipeline context = do
|
|||||||
<$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..])
|
<$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..])
|
||||||
<*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| finalPipeline)
|
<*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| finalPipeline)
|
||||||
|
|
||||||
continueGEvalCalculations :: forall m t . (MonadIO m) =>
|
|
||||||
|
|
||||||
|
continueGEvalCalculations :: (MonadIO m) =>
|
||||||
SAMetric t
|
SAMetric t
|
||||||
-> Metric
|
-> Metric
|
||||||
-> ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) MetricOutput
|
-> ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) MetricOutput
|
||||||
|
|
||||||
continueGEvalCalculations (SAMultiLabelFMeasure matchingSpec) (MultiLabelFMeasure beta matchingSpec')
|
continueGEvalCalculations SAMultiLabelFMeasure (MultiLabelFMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph
|
||||||
= defineContinuation countAgg (fMeasureOnCounts beta) noGraph
|
|
||||||
|
|
||||||
continueGEvalCalculations SALikelihood Likelihood = defineContinuation averageC logLossToLikehood noGraph
|
continueGEvalCalculations SALikelihood Likelihood = defineContinuation averageC logLossToLikehood noGraph
|
||||||
|
|
||||||
|
@ -7,10 +7,9 @@ module GEval.CreateChallenge
|
|||||||
|
|
||||||
import GEval.Metric
|
import GEval.Metric
|
||||||
import GEval.EvaluationScheme
|
import GEval.EvaluationScheme
|
||||||
import GEval.Common (GEvalException(..))
|
import GEval.Common (GEvalException(..), FormattingOptions(..))
|
||||||
import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName)
|
import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName)
|
||||||
import GEval.Submit (tokenFileName)
|
import GEval.Submit (tokenFileName)
|
||||||
import GEval.MatchingSpecification (MatchingSpecification(ExactMatch))
|
|
||||||
import qualified System.Directory as D
|
import qualified System.Directory as D
|
||||||
import Control.Conditional (whenM)
|
import Control.Conditional (whenM)
|
||||||
|
|
||||||
@ -22,6 +21,9 @@ import Data.String.Here
|
|||||||
|
|
||||||
import Data.List (intercalate)
|
import Data.List (intercalate)
|
||||||
import Data.List.Split (splitOn)
|
import Data.List.Split (splitOn)
|
||||||
|
import Data.Bool
|
||||||
|
|
||||||
|
import Text.Printf
|
||||||
|
|
||||||
createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO ()
|
createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO ()
|
||||||
createChallenge withDataFiles expectedDirectory spec = do
|
createChallenge withDataFiles expectedDirectory spec = do
|
||||||
@ -31,7 +33,7 @@ createChallenge withDataFiles expectedDirectory spec = do
|
|||||||
D.createDirectoryIfMissing False testDirectory
|
D.createDirectoryIfMissing False testDirectory
|
||||||
createFile (expectedDirectory </> ".gitignore") $ gitignoreContents
|
createFile (expectedDirectory </> ".gitignore") $ gitignoreContents
|
||||||
createFile (expectedDirectory </> "README.md") $ readmeMDContents metric testName
|
createFile (expectedDirectory </> "README.md") $ readmeMDContents metric testName
|
||||||
createFile (expectedDirectory </> configFileName) $ configContents metrics precision testName
|
createFile (expectedDirectory </> configFileName) $ configContents metrics format testName
|
||||||
createHeaderFile expectedDirectory "in-header.tsv" $ inHeaderContents metric
|
createHeaderFile expectedDirectory "in-header.tsv" $ inHeaderContents metric
|
||||||
createHeaderFile expectedDirectory "out-header.tsv" $ outHeaderContents metric
|
createHeaderFile expectedDirectory "out-header.tsv" $ outHeaderContents metric
|
||||||
if withDataFiles
|
if withDataFiles
|
||||||
@ -49,7 +51,7 @@ createChallenge withDataFiles expectedDirectory spec = do
|
|||||||
return ()
|
return ()
|
||||||
where metric = gesMainMetric spec
|
where metric = gesMainMetric spec
|
||||||
metrics = gesMetrics spec
|
metrics = gesMetrics spec
|
||||||
precision = gesPrecision spec
|
format = gesFormatting spec
|
||||||
testName = gesTestName spec
|
testName = gesTestName spec
|
||||||
trainDirectory = expectedDirectory </> "train"
|
trainDirectory = expectedDirectory </> "train"
|
||||||
devDirectory = expectedDirectory </> "dev-0"
|
devDirectory = expectedDirectory </> "dev-0"
|
||||||
@ -332,8 +334,8 @@ character (inclusively).
|
|||||||
|
|
||||||
|] ++ (commonReadmeMDContents testName)
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta ExactMatch) testName
|
readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName
|
||||||
readmeMDContents (MultiLabelFMeasure beta _) testName = [i|
|
readmeMDContents (MultiLabelFMeasure beta) testName = [i|
|
||||||
Tag names and their component
|
Tag names and their component
|
||||||
=============================
|
=============================
|
||||||
|
|
||||||
@ -423,18 +425,17 @@ Directory structure
|
|||||||
|]
|
|]
|
||||||
|
|
||||||
|
|
||||||
configContents :: [EvaluationScheme] -> Maybe Int -> String -> String
|
configContents :: [EvaluationScheme] -> FormattingOptions -> String -> String
|
||||||
configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++
|
configContents schemes format testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++
|
||||||
(if testName /= defaultTestName
|
(if testName /= defaultTestName
|
||||||
then
|
then
|
||||||
" --test-name " ++ testName
|
" --test-name " ++ testName
|
||||||
else
|
else
|
||||||
"") ++
|
"") ++
|
||||||
(precisionOpt precision) ++
|
(precisionOpt format) ++
|
||||||
inHeaderOpts ++
|
inHeaderOpts ++
|
||||||
outHeaderOpts
|
outHeaderOpts
|
||||||
where precisionOpt Nothing = ""
|
where precisionOpt (FormattingOptions m b) = maybe "" (printf "--precision %d ") m ++ bool "" "--show-as-percentage" b
|
||||||
precisionOpt (Just p) = " --precision " ++ (show p)
|
|
||||||
((EvaluationScheme mainMetric _):_) = schemes
|
((EvaluationScheme mainMetric _):_) = schemes
|
||||||
inHeaderOpts = getHeaderOpts "in-header" inHeaderContents
|
inHeaderOpts = getHeaderOpts "in-header" inHeaderContents
|
||||||
outHeaderOpts = getHeaderOpts "out-header" outHeaderContents
|
outHeaderOpts = getHeaderOpts "out-header" outHeaderContents
|
||||||
@ -534,8 +535,8 @@ trainContents TokenAccuracy = [hereLit|* V N I like cats
|
|||||||
trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19 The student's smart
|
trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19 The student's smart
|
||||||
N:1-6 N:8-10 V:12-13 A:15-18 Mary's dog is nice
|
N:1-6 N:8-10 V:12-13 A:15-18 Mary's dog is nice
|
||||||
|]
|
|]
|
||||||
trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta ExactMatch)
|
trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta)
|
||||||
trainContents (MultiLabelFMeasure _ _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5
|
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5
|
||||||
Steven bloody Brown person/1,3 first-name/1 surname/3
|
Steven bloody Brown person/1,3 first-name/1 surname/3
|
||||||
James and James first-name/1 firstname/3
|
James and James first-name/1 firstname/3
|
||||||
|]
|
|]
|
||||||
@ -607,8 +608,8 @@ Ala has a cat
|
|||||||
devInContents SegmentAccuracy = [hereLit|John is smart
|
devInContents SegmentAccuracy = [hereLit|John is smart
|
||||||
Mary's intelligent
|
Mary's intelligent
|
||||||
|]
|
|]
|
||||||
devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta ExactMatch)
|
devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta)
|
||||||
devInContents (MultiLabelFMeasure _ _) = [hereLit|Jan Kowalski is here
|
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
|
||||||
I see him
|
I see him
|
||||||
Barbara
|
Barbara
|
||||||
|]
|
|]
|
||||||
@ -675,8 +676,8 @@ N V * N
|
|||||||
devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13
|
devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13
|
||||||
N:1-4 V:6-7 A:9-19
|
N:1-4 V:6-7 A:9-19
|
||||||
|]
|
|]
|
||||||
devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta ExactMatch)
|
devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta)
|
||||||
devExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,2 first-name/1 surname/2
|
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2
|
||||||
|
|
||||||
first-name/1
|
first-name/1
|
||||||
|]
|
|]
|
||||||
@ -748,8 +749,8 @@ I know
|
|||||||
testInContents SegmentAccuracy = [hereLit|Mary's cat is old
|
testInContents SegmentAccuracy = [hereLit|Mary's cat is old
|
||||||
John is young
|
John is young
|
||||||
|]
|
|]
|
||||||
testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta ExactMatch)
|
testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta)
|
||||||
testInContents (MultiLabelFMeasure _ _) = [hereLit|John bloody Smith
|
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
|
||||||
Nobody is there
|
Nobody is there
|
||||||
I saw Marketa
|
I saw Marketa
|
||||||
|]
|
|]
|
||||||
@ -817,8 +818,8 @@ testExpectedContents TokenAccuracy = [hereLit|* V N
|
|||||||
testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17
|
testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17
|
||||||
N:1-4 V:6-7 A:9-13
|
N:1-4 V:6-7 A:9-13
|
||||||
|]
|
|]
|
||||||
testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta ExactMatch)
|
testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta)
|
||||||
testExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 surname/3
|
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3
|
||||||
|
|
||||||
first-name/3
|
first-name/3
|
||||||
|]
|
|]
|
||||||
@ -876,8 +877,8 @@ inHeaderContents BIOF1Labels = inHeaderContents BIOF1
|
|||||||
inHeaderContents BIOF1 = Just ["Text"]
|
inHeaderContents BIOF1 = Just ["Text"]
|
||||||
inHeaderContents TokenAccuracy = Just ["TokenizedText"]
|
inHeaderContents TokenAccuracy = Just ["TokenizedText"]
|
||||||
inHeaderContents SegmentAccuracy = Just ["Segment"]
|
inHeaderContents SegmentAccuracy = Just ["Segment"]
|
||||||
inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta ExactMatch)
|
inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta)
|
||||||
inHeaderContents (MultiLabelFMeasure _ _) = Just ["Text"]
|
inHeaderContents (MultiLabelFMeasure _) = Just ["Text"]
|
||||||
inHeaderContents MultiLabelLikelihood = inHeaderContents MultiLabelLogLoss
|
inHeaderContents MultiLabelLikelihood = inHeaderContents MultiLabelLogLoss
|
||||||
inHeaderContents MultiLabelLogLoss = Just ["Utterance"]
|
inHeaderContents MultiLabelLogLoss = Just ["Utterance"]
|
||||||
inHeaderContents (Soft2DFMeasure _) = inHeaderContents ClippEU
|
inHeaderContents (Soft2DFMeasure _) = inHeaderContents ClippEU
|
||||||
@ -904,8 +905,8 @@ outHeaderContents BIOF1Labels = outHeaderContents BIOF1
|
|||||||
outHeaderContents BIOF1 = Just ["BIOOutput"]
|
outHeaderContents BIOF1 = Just ["BIOOutput"]
|
||||||
outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"]
|
outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"]
|
||||||
outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"]
|
outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"]
|
||||||
outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta ExactMatch)
|
outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta)
|
||||||
outHeaderContents (MultiLabelFMeasure _ _) = Just ["Entities"]
|
outHeaderContents (MultiLabelFMeasure _) = Just ["Entities"]
|
||||||
outHeaderContents MultiLabelLikelihood = outHeaderContents MultiLabelLogLoss
|
outHeaderContents MultiLabelLikelihood = outHeaderContents MultiLabelLogLoss
|
||||||
outHeaderContents MultiLabelLogLoss = Just ["Emotion"]
|
outHeaderContents MultiLabelLogLoss = Just ["Emotion"]
|
||||||
outHeaderContents (Soft2DFMeasure _) = Just ["Rectangle"]
|
outHeaderContents (Soft2DFMeasure _) = Just ["Rectangle"]
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
{-# LANGUAGE LambdaCase #-}
|
||||||
|
|
||||||
module GEval.Formatting
|
module GEval.Formatting
|
||||||
(formatTheResult, formatSimpleResult, formatTheResultWithErrorBounds)
|
(formatTheResult, formatSimpleResult, formatTheResultWithErrorBounds)
|
||||||
where
|
where
|
||||||
@ -7,25 +9,29 @@ import Data.Conduit.Bootstrap
|
|||||||
import Text.Printf
|
import Text.Printf
|
||||||
|
|
||||||
|
|
||||||
formatTheResult :: Maybe Int -> MetricResult -> String
|
formatTheResult :: FormattingOptions -> MetricResult -> String
|
||||||
formatTheResult mPrecision (SimpleRun val) = formatSimpleResult mPrecision val
|
formatTheResult format (SimpleRun val) = formatSimpleResult format val
|
||||||
formatTheResult mPrecision (BootstrapResampling vals) = formatTheResultWithErrorBounds mPrecision pointEstimate (Just errorBound)
|
formatTheResult format (BootstrapResampling vals) = formatTheResultWithErrorBounds format pointEstimate (Just errorBound)
|
||||||
where pointEstimate = (upperBound + lowerBound) / 2.0
|
where pointEstimate = (upperBound + lowerBound) / 2.0
|
||||||
errorBound = (upperBound - lowerBound) / 2.0
|
errorBound = (upperBound - lowerBound) / 2.0
|
||||||
(lowerBound, upperBound) = getConfidenceBounds defaultConfidenceLevel vals
|
(lowerBound, upperBound) = getConfidenceBounds defaultConfidenceLevel vals
|
||||||
|
|
||||||
formatTheResultWithErrorBounds :: Maybe Int -> MetricValue -> Maybe MetricValue -> String
|
formatTheResultWithErrorBounds :: FormattingOptions -> MetricValue -> Maybe MetricValue -> String
|
||||||
formatTheResultWithErrorBounds mPrecision pointEstimate Nothing = formatSimpleResult mPrecision pointEstimate
|
formatTheResultWithErrorBounds format pointEstimate Nothing = formatSimpleResult format pointEstimate
|
||||||
formatTheResultWithErrorBounds mPrecision pointEstimate (Just errorBound) = (formatSimpleResult correctedPrecision pointEstimate)
|
formatTheResultWithErrorBounds format pointEstimate (Just errorBound) = (formatSimpleResult formatWithCorrectedPrecision pointEstimate)
|
||||||
++ "±"
|
++ "±"
|
||||||
++ (formatSimpleResult correctedPrecision errorBound)
|
++ (formatSimpleResult formatWithCorrectedPrecision errorBound)
|
||||||
where errorBoundMagnitude = (floor (logBase 10.0 errorBound)) - 1
|
where errorBoundMagnitude = (floor (logBase 10.0 errorBound)) - 1
|
||||||
correctedPrecision = Just $ selectLowerPrecision (max (-errorBoundMagnitude) 0) mPrecision
|
formatWithCorrectedPrecision = selectLowerPrecision (max (-errorBoundMagnitude) 0) format
|
||||||
|
|
||||||
formatSimpleResult :: Maybe Int -> MetricValue -> String
|
formatSimpleResult :: FormattingOptions -> MetricValue -> String
|
||||||
formatSimpleResult Nothing = show
|
formatSimpleResult = \case
|
||||||
formatSimpleResult (Just prec) = printf "%0.*f" prec
|
FormattingOptions (Just prec) True -> printf "%.*f" (prec-2) . (*100)
|
||||||
|
FormattingOptions (Just prec) _ -> printf "0.*f" prec
|
||||||
|
_ -> show
|
||||||
|
|
||||||
selectLowerPrecision :: Int -> Maybe Int -> Int
|
selectLowerPrecision :: Int -> FormattingOptions -> FormattingOptions
|
||||||
selectLowerPrecision p Nothing = p
|
selectLowerPrecision p = \case
|
||||||
selectLowerPrecision p (Just p') = min p p'
|
a@(FormattingOptions _ True) -> a
|
||||||
|
FormattingOptions (Just prec) _ -> FormattingOptions (Just $ min prec p) False
|
||||||
|
_ -> FormattingOptions (Just p) False
|
||||||
|
@ -6,7 +6,7 @@ module GEval.OptionsParser
|
|||||||
runGEvalGetOptions,
|
runGEvalGetOptions,
|
||||||
getOptions,
|
getOptions,
|
||||||
metricReader,
|
metricReader,
|
||||||
precisionArgParser
|
formatParser
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Paths_geval (version)
|
import Paths_geval (version)
|
||||||
@ -138,12 +138,15 @@ optionsParser = GEvalOptions
|
|||||||
<> help "Mark worst features when in the line-by-line mode")
|
<> help "Mark worst features when in the line-by-line mode")
|
||||||
|
|
||||||
|
|
||||||
precisionArgParser :: Parser Int
|
formatParser :: Parser FormattingOptions
|
||||||
precisionArgParser = option auto
|
formatParser = FormattingOptions
|
||||||
( long "precision"
|
<$> (optional $ option auto ( long "precision"
|
||||||
<> short 'p'
|
<> short 'p'
|
||||||
<> metavar "NUMBER-OF-FRACTIONAL-DIGITS"
|
<> metavar "NUMBER-OF-FRACTIONAL-DIGITS"
|
||||||
<> help "Arithmetic precision, i.e. the number of fractional digits to be shown" )
|
<> help "Arithmetic precision, i.e. the number of fractional digits to be shown" ))
|
||||||
|
<*> switch ( long "show-as-percentage"
|
||||||
|
<> short '%'
|
||||||
|
<> help "Returns the result as a percentage (i.e. maximum value of 100 instead of 1)" )
|
||||||
|
|
||||||
specParser :: Parser GEvalSpecification
|
specParser :: Parser GEvalSpecification
|
||||||
specParser = GEvalSpecification
|
specParser = GEvalSpecification
|
||||||
@ -191,7 +194,7 @@ specParser = GEvalSpecification
|
|||||||
<> metavar "INPUT"
|
<> metavar "INPUT"
|
||||||
<> help "The name of the file with the input (applicable only for some metrics)" )
|
<> help "The name of the file with the input (applicable only for some metrics)" )
|
||||||
<*> ((flip fromMaybe) <$> (singletonMaybe <$> altMetricReader) <*> metricReader)
|
<*> ((flip fromMaybe) <$> (singletonMaybe <$> altMetricReader) <*> metricReader)
|
||||||
<*> optional precisionArgParser
|
<*> formatParser
|
||||||
<*> (optional $ option auto
|
<*> (optional $ option auto
|
||||||
( long "tokenizer"
|
( long "tokenizer"
|
||||||
<> short 'T'
|
<> short 'T'
|
||||||
|
14
test/Spec.hs
14
test/Spec.hs
@ -343,16 +343,6 @@ main = hspec $ do
|
|||||||
runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
|
runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
|
||||||
it "labels given with probs and numbers" $ do
|
it "labels given with probs and numbers" $ do
|
||||||
runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
|
runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
|
||||||
it "information extraction" $ do
|
|
||||||
runGEvalTest "multilabel-f1-ie" `shouldReturnAlmost` 0.1111111111
|
|
||||||
it "information extraction with flags" $ do
|
|
||||||
runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444
|
|
||||||
it "information extraction with fuzzy matching" $ do
|
|
||||||
runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777
|
|
||||||
it "information extraction with smart fuzzy matching" $ do
|
|
||||||
runGEvalTest "multilabel-f1-ie-fuzzy-smart" `shouldReturnAlmost` 0.598444
|
|
||||||
it "information extraction with smart fuzzy matching hardened" $ do
|
|
||||||
runGEvalTest "multilabel-f1-ie-fuzzy-harden" `shouldReturnAlmost` 0.555555555
|
|
||||||
describe "Mean/MultiLabel-F" $ do
|
describe "Mean/MultiLabel-F" $ do
|
||||||
it "simple" $ do
|
it "simple" $ do
|
||||||
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
|
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
|
||||||
@ -478,7 +468,7 @@ main = hspec $ do
|
|||||||
gesExpectedFile = "expected.tsv",
|
gesExpectedFile = "expected.tsv",
|
||||||
gesInputFile = "in.tsv",
|
gesInputFile = "in.tsv",
|
||||||
gesMetrics = [EvaluationScheme Likelihood []],
|
gesMetrics = [EvaluationScheme Likelihood []],
|
||||||
gesPrecision = Nothing,
|
gesFormatting = FormattingOptions Nothing False,
|
||||||
gesTokenizer = Nothing,
|
gesTokenizer = Nothing,
|
||||||
gesGonitoHost = Nothing,
|
gesGonitoHost = Nothing,
|
||||||
gesToken = Nothing,
|
gesToken = Nothing,
|
||||||
@ -606,7 +596,7 @@ main = hspec $ do
|
|||||||
let spec = defaultGEvalSpecification {
|
let spec = defaultGEvalSpecification {
|
||||||
gesExpectedDirectory = Just tempDir,
|
gesExpectedDirectory = Just tempDir,
|
||||||
gesMetrics = [scheme],
|
gesMetrics = [scheme],
|
||||||
gesPrecision = Just 4 }
|
gesFormatting = FormattingOptions (Just 4) False }
|
||||||
createChallenge True tempDir spec
|
createChallenge True tempDir spec
|
||||||
validationChallenge tempDir spec
|
validationChallenge tempDir spec
|
||||||
describe "test sample outputs" $ do
|
describe "test sample outputs" $ do
|
||||||
|
Loading…
Reference in New Issue
Block a user