worst features show average score now
This commit is contained in:
parent
51abed6fa4
commit
bc1de4c3e6
@ -68,7 +68,7 @@ runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
|
|||||||
formatScore = Data.Text.pack . printf "%f"
|
formatScore = Data.Text.pack . printf "%f"
|
||||||
|
|
||||||
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> IO ()
|
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> IO ()
|
||||||
runWorstFeatures ordering spec = runLineByLineGeneralized ordering spec consum
|
runWorstFeatures ordering spec = runLineByLineGeneralized ordering' spec consum
|
||||||
where consum :: ConduitT LineRecord Void (ResourceT IO) ()
|
where consum :: ConduitT LineRecord Void (ResourceT IO) ()
|
||||||
consum = (rank (lessByMetric $ gesMainMetric spec)
|
consum = (rank (lessByMetric $ gesMainMetric spec)
|
||||||
.| evalStateC 0 extractFeaturesAndPValues
|
.| evalStateC 0 extractFeaturesAndPValues
|
||||||
@ -82,6 +82,13 @@ runWorstFeatures ordering spec = runLineByLineGeneralized ordering spec consum
|
|||||||
escapeTabs out]
|
escapeTabs out]
|
||||||
formatScore :: MetricValue -> Text
|
formatScore :: MetricValue -> Text
|
||||||
formatScore = Data.Text.pack . printf "%f"
|
formatScore = Data.Text.pack . printf "%f"
|
||||||
|
ordering' = forceSomeOrdering ordering
|
||||||
|
|
||||||
|
-- for commands like --worst-features we need some ordering (KeepTheOriginalOrder
|
||||||
|
-- does not make sense at all)
|
||||||
|
forceSomeOrdering :: ResultOrdering -> ResultOrdering
|
||||||
|
forceSomeOrdering FirstTheBest = FirstTheBest
|
||||||
|
forceSomeOrdering KeepTheOriginalOrder = FirstTheWorst
|
||||||
|
|
||||||
extractFeaturesAndPValues :: Monad m => ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
|
extractFeaturesAndPValues :: Monad m => ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
|
||||||
extractFeaturesAndPValues =
|
extractFeaturesAndPValues =
|
||||||
@ -90,34 +97,40 @@ extractFeaturesAndPValues =
|
|||||||
.| uScoresCounter
|
.| uScoresCounter
|
||||||
|
|
||||||
|
|
||||||
data RankedFeature = RankedFeature Text Double
|
data RankedFeature = RankedFeature Text Double MetricValue
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
data FeatureWithPValue = FeatureWithPValue Text Double Integer
|
data FeatureWithPValue = FeatureWithPValue Text -- ^ feature itself
|
||||||
|
Double -- ^ p-value
|
||||||
|
MetricValue -- ^ average metric value
|
||||||
|
Integer -- ^ count
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
formatFeatureWithPValue :: FeatureWithPValue -> Text
|
formatFeatureWithPValue :: FeatureWithPValue -> Text
|
||||||
formatFeatureWithPValue (FeatureWithPValue f p c) =
|
formatFeatureWithPValue (FeatureWithPValue f p avg c) =
|
||||||
f <> " " <> (pack $ show c) <> " " <> (pack $ printf "%0.20f" p)
|
Data.Text.intercalate "\t" [f,
|
||||||
|
(pack $ show c),
|
||||||
|
(pack $ printf "%0.8f" avg),
|
||||||
|
(pack $ printf "%0.20f" p)]
|
||||||
|
|
||||||
featureExtractor :: Monad m => ConduitT (Double, LineRecord) RankedFeature m ()
|
featureExtractor :: Monad m => ConduitT (Double, LineRecord) RankedFeature m ()
|
||||||
featureExtractor = CC.map extract .| CC.concat
|
featureExtractor = CC.map extract .| CC.concat
|
||||||
where extract (rank, LineRecord inLine expLine outLine _ _) =
|
where extract (rank, LineRecord inLine expLine outLine _ score) =
|
||||||
Prelude.map (\f -> RankedFeature f rank)
|
Prelude.map (\f -> RankedFeature f rank score)
|
||||||
$ Data.List.concat [
|
$ Data.List.concat [
|
||||||
extractUnigramFeatures "exp" expLine,
|
extractUnigramFeatures "exp" expLine,
|
||||||
extractUnigramFeaturesFromTabbed "in" inLine,
|
extractUnigramFeaturesFromTabbed "in" inLine,
|
||||||
extractUnigramFeatures "out" outLine]
|
extractUnigramFeatures "out" outLine]
|
||||||
|
|
||||||
uScoresCounter :: Monad m => ConduitT RankedFeature FeatureWithPValue (StateT Integer m) ()
|
uScoresCounter :: Monad m => ConduitT RankedFeature FeatureWithPValue (StateT Integer m) ()
|
||||||
uScoresCounter = CC.map (\(RankedFeature feature r) -> (feature, (r, 1)))
|
uScoresCounter = CC.map (\(RankedFeature feature r score) -> (feature, (r, score, 1)))
|
||||||
.| gobbleAndDo countUScores
|
.| gobbleAndDo countUScores
|
||||||
.| pValueCalculator
|
.| pValueCalculator
|
||||||
where countUScores l =
|
where countUScores l =
|
||||||
M.toList
|
M.toList
|
||||||
$ M.fromListWith (\(r1, c1) (r2, c2) -> ((r1 + r2), (c1 + c2))) l
|
$ M.fromListWith (\(r1, s1, c1) (r2, s2, c2) -> ((r1 + r2), (s1 + s2), (c1 + c2))) l
|
||||||
|
|
||||||
pValueCalculator :: Monad m => ConduitT (Text, (Double, Integer)) FeatureWithPValue (StateT Integer m) ()
|
pValueCalculator :: Monad m => ConduitT (Text, (Double, MetricValue, Integer)) FeatureWithPValue (StateT Integer m) ()
|
||||||
pValueCalculator = do
|
pValueCalculator = do
|
||||||
firstVal <- await
|
firstVal <- await
|
||||||
case firstVal of
|
case firstVal of
|
||||||
@ -127,8 +140,11 @@ pValueCalculator = do
|
|||||||
CC.map $ calculatePValue total
|
CC.map $ calculatePValue total
|
||||||
Nothing -> return ()
|
Nothing -> return ()
|
||||||
|
|
||||||
calculatePValue :: Integer -> (Text, (Double, Integer)) -> FeatureWithPValue
|
calculatePValue :: Integer -> (Text, (Double, MetricValue, Integer)) -> FeatureWithPValue
|
||||||
calculatePValue total (f, (r, c)) = FeatureWithPValue f (pvalue (r - minusR c) c (total - c)) c
|
calculatePValue total (f, (r, s, c)) = FeatureWithPValue f
|
||||||
|
(pvalue (r - minusR c) c (total - c))
|
||||||
|
(s / (fromIntegral c))
|
||||||
|
c
|
||||||
where minusR c = (c' * (c' + 1)) / 2.0
|
where minusR c = (c' * (c' + 1)) / 2.0
|
||||||
where c' = fromIntegral c
|
where c' = fromIntegral c
|
||||||
-- calulating p-value from Mann–Whitney U test
|
-- calulating p-value from Mann–Whitney U test
|
||||||
|
Loading…
Reference in New Issue
Block a user