sort in --worst-features

This commit is contained in:
Filip Graliński 2018-08-06 12:09:31 +02:00
parent bc1de4c3e6
commit 7503644bbe
2 changed files with 4 additions and 1 deletions

View File

@ -72,6 +72,7 @@ runWorstFeatures ordering spec = runLineByLineGeneralized ordering' spec consum
where consum :: ConduitT LineRecord Void (ResourceT IO) ()
consum = (rank (lessByMetric $ gesMainMetric spec)
.| evalStateC 0 extractFeaturesAndPValues
.| gobbleAndDo (sortBy featureOrder)
.| CL.map (encodeUtf8 . formatFeatureWithPValue)
.| CC.unlinesAscii
.| CC.stdout)
@ -83,6 +84,8 @@ runWorstFeatures ordering spec = runLineByLineGeneralized ordering' spec consum
formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f"
ordering' = forceSomeOrdering ordering
featureOrder (FeatureWithPValue _ p1 _ _) (FeatureWithPValue _ p2 _ _) =
p1 `compare` p2
-- for commands like --worst-features we need some ordering (KeepTheOriginalOrder
-- does not make sense at all)

View File

@ -49,7 +49,7 @@ optionsParser = GEvalOptions
(flag' WorstFeatures
( long "worst-features"
<> short 'w'
<> help "Print a ranking of worst features, i.e. features that worsen the score significantly" ))
<> help "Print a ranking of worst features, i.e. features that worsen the score significantly. Features are sorted using p-value for Mann-Whitney U test comparing the items with a given feature and without it. For each feature the number of occurrences, average score and p-value is given." ))
<|>
(Diff <$> strOption
( long "diff"