diff --git a/src/GEval/Common.hs b/src/GEval/Common.hs index c8b581d..9dd06d7 100644 --- a/src/GEval/Common.hs +++ b/src/GEval/Common.hs @@ -19,6 +19,7 @@ data FormattingOptions = FormattingOptions { decimalPlaces :: Maybe Int, asPercentage :: Bool } + deriving (Show) data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue] diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 0fb4b16..74ec429 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -56,8 +56,6 @@ module GEval.Core fromSpecificationToWithInput ) where -import Debug.Trace - import Data.Singletons.TH import GEval.Metric @@ -152,13 +150,39 @@ hasFiltering (_:ops) = hasFiltering ops -- | Could output be preprocessable isPreprocessable :: Metric -> Bool -isPreprocessable BLEU = True -isPreprocessable GLEU = True -isPreprocessable WER = True +isPreprocessable RMSE = False +isPreprocessable MSE = False +isPreprocessable Pearson = False +isPreprocessable Spearman = False +isPreprocessable BLEU = True +isPreprocessable GLEU = True +isPreprocessable WER = True isPreprocessable Accuracy = True +isPreprocessable ClippEU = False +isPreprocessable (FMeasure _) = False +isPreprocessable (MacroFMeasure _) = False +isPreprocessable (SoftFMeasure _) = False +isPreprocessable (ProbabilisticMultiLabelFMeasure _) = True +isPreprocessable (ProbabilisticSoftFMeasure _) = True +isPreprocessable (Soft2DFMeasure _) = False +isPreprocessable (FLCFMeasure _) = False +isPreprocessable NMI = False +isPreprocessable (LogLossHashed _) = False +isPreprocessable (LikelihoodHashed _) = False isPreprocessable CharMatch = True +isPreprocessable MAP = False +isPreprocessable LogLoss = False +isPreprocessable Likelihood = False +isPreprocessable BIOF1 = False +isPreprocessable BIOF1Labels = False isPreprocessable TokenAccuracy = True -isPreprocessable _ = False +isPreprocessable SegmentAccuracy = True +isPreprocessable MAE = False +isPreprocessable SMAPE = False +isPreprocessable (MultiLabelFMeasure _ _) = True +isPreprocessable MultiLabelLogLoss = False +isPreprocessable MultiLabelLikelihood = False +isPreprocessable (Mean metric) = isPreprocessable metric defaultOutDirectory = "." defaultTestName = "test-A" @@ -192,7 +216,7 @@ data GEvalSpecification = GEvalSpecification gesBootstrapResampling :: Maybe Int, gesInHeader :: Maybe String, gesOutHeader :: Maybe String } - + deriving (Show) gesMainMetric :: GEvalSpecification -> Metric gesMainMetric spec = case gesMetrics spec of @@ -560,6 +584,7 @@ isEmptyFileSource _ = return False logLossToLikehood logLoss = exp (-logLoss) data LineInFile = LineInFile SourceSpec Word32 Text + deriving Show gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Int -- ^ number of samples diff --git a/test/Spec.hs b/test/Spec.hs index 8a727c0..5d547a3 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -364,8 +364,7 @@ main = hspec $ do runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857 it "Regexp substition" $ do runGEvalTest "accuracy-with-flags" `shouldReturnAlmost` 0.8 - it "In line-by-line mode" $ do - let sampleChallenge = GEvalSpecification + let sampleChallenge = GEvalSpecification { gesOutDirectory = "test/accuracy-flags-line-by-line/accuracy-flags-line-by-line-solution", gesExpectedDirectory = Just "test/accuracy-flags-line-by-line/accuracy-flags-line-by-line", gesTestName = "test-A", @@ -384,6 +383,7 @@ main = hspec $ do gesBootstrapResampling = Nothing, gesInHeader = Nothing, gesOutHeader = Nothing } + it "In line-by-line mode Accuracy" $ do results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume) results `shouldBe` [ LineRecord "foo" @@ -396,6 +396,19 @@ main = hspec $ do "Fox bax 456 bax" 2 0.0] + it "In line-by-line mode F0" $ do + results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge { gesMetrics = [read "MultiLabel-F0:fs<\\d+><>"]} (const Data.Conduit.List.consume) + results `shouldBe` [ + LineRecord "foo" + "Ala 123 ma kota." + "Ala ma 2 kota ." + 1 + 1.0, + LineRecord "foo" + "Foo bar baz" + "Fox bax 456 bax" + 2 + 0.0] describe "Flag examples" $ do it "none" $ do runGEvalTest "flags-none" `shouldReturnAlmost` 0.2 diff --git a/test/multilabel-f1-simple/multilabel-f1-simple/test-A/expected.tsv b/test/multilabel-f1-simple/multilabel-f1-simple/test-A/expected.tsv index 64612c3..1dc07f2 100644 --- a/test/multilabel-f1-simple/multilabel-f1-simple/test-A/expected.tsv +++ b/test/multilabel-f1-simple/multilabel-f1-simple/test-A/expected.tsv @@ -1,4 +1,4 @@ -foo bar baz + foo bar baz foo -qqq qqq + qqq qqq