From 7503644bbe3af973efa9c3d3364c97ff99029aac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Grali=C5=84ski?= Date: Mon, 6 Aug 2018 12:09:31 +0200 Subject: [PATCH] sort in --worst-features --- src/GEval/LineByLine.hs | 3 +++ src/GEval/OptionsParser.hs | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index ae86f42..a9db949 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -72,6 +72,7 @@ runWorstFeatures ordering spec = runLineByLineGeneralized ordering' spec consum where consum :: ConduitT LineRecord Void (ResourceT IO) () consum = (rank (lessByMetric $ gesMainMetric spec) .| evalStateC 0 extractFeaturesAndPValues + .| gobbleAndDo (sortBy featureOrder) .| CL.map (encodeUtf8 . formatFeatureWithPValue) .| CC.unlinesAscii .| CC.stdout) @@ -83,6 +84,8 @@ runWorstFeatures ordering spec = runLineByLineGeneralized ordering' spec consum formatScore :: MetricValue -> Text formatScore = Data.Text.pack . printf "%f" ordering' = forceSomeOrdering ordering + featureOrder (FeatureWithPValue _ p1 _ _) (FeatureWithPValue _ p2 _ _) = + p1 `compare` p2 -- for commands like --worst-features we need some ordering (KeepTheOriginalOrder -- does not make sense at all) diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index 2ec66c0..d55335f 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -49,7 +49,7 @@ optionsParser = GEvalOptions (flag' WorstFeatures ( long "worst-features" <> short 'w' - <> help "Print a ranking of worst features, i.e. features that worsen the score significantly" )) + <> help "Print a ranking of worst features, i.e. features that worsen the score significantly. Features are sorted using p-value for Mann-Whitney U test comparing the items with a given feature and without it. For each feature the number of occurrences, average score and p-value is given." )) <|> (Diff <$> strOption ( long "diff"