clean up listing worst features
This commit is contained in:
parent
020b93ccf8
commit
8dac79fab2
@ -71,7 +71,7 @@ runWorstFeatures ordering spec = runLineByLineGeneralized ordering spec consum
|
|||||||
consum = (rank (lessByMetric $ gesMainMetric spec)
|
consum = (rank (lessByMetric $ gesMainMetric spec)
|
||||||
.| featureExtractor
|
.| featureExtractor
|
||||||
.| uScoresCounter
|
.| uScoresCounter
|
||||||
.| CL.map (encodeUtf8 . formatFeatureWithZScore)
|
.| CL.map (encodeUtf8 . formatFeatureWithPValue)
|
||||||
.| CC.unlinesAscii
|
.| CC.unlinesAscii
|
||||||
.| CC.stdout)
|
.| CC.stdout)
|
||||||
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
|
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
|
||||||
@ -85,12 +85,12 @@ runWorstFeatures ordering spec = runLineByLineGeneralized ordering spec consum
|
|||||||
data RankedFeature = RankedFeature Text Double
|
data RankedFeature = RankedFeature Text Double
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
data FeatureWithZScore = FeatureWithZScore Text Double Int
|
data FeatureWithPValue = FeatureWithPValue Text Double Int
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
formatFeatureWithZScore :: FeatureWithZScore -> Text
|
formatFeatureWithPValue :: FeatureWithPValue -> Text
|
||||||
formatFeatureWithZScore (FeatureWithZScore f z c) =
|
formatFeatureWithPValue (FeatureWithPValue f p c) =
|
||||||
f <> " " <> (pack $ show c) <> " " <> (pack $ printf "%0.20f" z)
|
f <> " " <> (pack $ show c) <> " " <> (pack $ printf "%0.20f" p)
|
||||||
|
|
||||||
featureExtractor :: Monad m => ConduitT (Double, LineRecord) RankedFeature m ()
|
featureExtractor :: Monad m => ConduitT (Double, LineRecord) RankedFeature m ()
|
||||||
featureExtractor = CC.map extract .| CC.concat
|
featureExtractor = CC.map extract .| CC.concat
|
||||||
@ -101,16 +101,19 @@ featureExtractor = CC.map extract .| CC.concat
|
|||||||
extractUnigramFeaturesFromTabbed "in" inLine,
|
extractUnigramFeaturesFromTabbed "in" inLine,
|
||||||
extractUnigramFeatures "out" outLine]
|
extractUnigramFeatures "out" outLine]
|
||||||
|
|
||||||
uScoresCounter :: Monad m => ConduitT RankedFeature FeatureWithZScore m ()
|
uScoresCounter :: Monad m => ConduitT RankedFeature FeatureWithPValue m ()
|
||||||
uScoresCounter = CC.map (\(RankedFeature feature r) -> (feature, (r, 1)))
|
uScoresCounter = CC.map (\(RankedFeature feature r) -> (feature, (r, 1)))
|
||||||
.| gobbleAndDo countUScores
|
.| gobbleDoAndContinueWithCount
|
||||||
.| CC.map (\(f, (r, c)) -> FeatureWithZScore f (zscore (r - minusR c) c (2942 - c)) c)
|
countUScores
|
||||||
|
(\total -> CC.map (\(f, (r, c)) -> FeatureWithPValue f (pvalue (r - minusR c) c (total - c)) c))
|
||||||
where countUScores l =
|
where countUScores l =
|
||||||
M.toList
|
M.toList
|
||||||
$ M.fromListWith (\(r1, c1) (r2, c2) -> ((r1 + r2), (c1 + c2))) l
|
$ M.fromListWith (\(r1, c1) (r2, c2) -> ((r1 + r2), (c1 + c2))) l
|
||||||
minusR c = (c' * (c' + 1)) / 2.0
|
minusR c = (c' * (c' + 1)) / 2.0
|
||||||
where c' = fromIntegral c
|
where c' = fromIntegral c
|
||||||
zscore u n1 n2 = let n1' = fromIntegral n1
|
-- calulating p-value from Mann–Whitney U test
|
||||||
|
-- (normal approximation is used)
|
||||||
|
pvalue u n1 n2 = let n1' = fromIntegral n1
|
||||||
n2' = fromIntegral n2
|
n2' = fromIntegral n2
|
||||||
mean = n1' * n2' / 2
|
mean = n1' * n2' / 2
|
||||||
sigma = sqrt $ n1' * n2' * (n1' + n2' + 1) / 12
|
sigma = sqrt $ n1' * n2' * (n1' + n2' + 1) / 12
|
||||||
@ -141,6 +144,13 @@ gobbleAndDo fun = do
|
|||||||
l <- CC.sinkList
|
l <- CC.sinkList
|
||||||
CC.yieldMany $ fun l
|
CC.yieldMany $ fun l
|
||||||
|
|
||||||
|
gobbleDoAndContinueWithCount :: Monad m => ([a] -> [b]) -> (Int -> ConduitT b c m ()) -> ConduitT a c m ()
|
||||||
|
gobbleDoAndContinueWithCount fun continuation = do
|
||||||
|
l <- CC.sinkList
|
||||||
|
let lc = Prelude.length l
|
||||||
|
(CC.yieldMany $ fun l) .| (continuation lc)
|
||||||
|
|
||||||
|
|
||||||
runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
|
runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
|
||||||
runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
|
runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
|
||||||
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
||||||
|
Loading…
Reference in New Issue
Block a user