geval/src/GEval/LineByLine.hs

351 lines
16 KiB
Haskell
Raw Normal View History

2018-01-09 11:17:11 +01:00
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
module GEval.LineByLine
2018-02-13 10:16:03 +01:00
(runLineByLine,
2018-08-02 12:50:13 +02:00
runWorstFeatures,
2018-05-26 14:40:26 +02:00
runLineByLineGeneralized,
runDiff,
2018-08-06 22:22:33 +02:00
runMostWorseningFeatures,
2018-05-26 21:10:22 +02:00
runDiffGeneralized,
LineRecord(..),
2018-08-17 16:57:47 +02:00
ResultOrdering(..),
justTokenize
2018-01-09 11:17:11 +01:00
) where
import GEval.Core
2019-01-10 22:53:43 +01:00
import GEval.Common
2018-08-17 16:57:47 +02:00
import Text.Tokenizer
2018-01-09 11:17:11 +01:00
import Data.Conduit.AutoDecompress (doNothing)
2018-01-09 11:17:11 +01:00
import Data.Conduit
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC
2018-08-17 16:57:47 +02:00
import qualified Data.Conduit.Text as CT
2018-01-09 11:17:11 +01:00
import Data.Text
import Data.Text.Encoding
2018-08-02 12:50:13 +02:00
import Data.Conduit.Rank
import Data.Maybe (fromMaybe)
2018-01-09 11:17:11 +01:00
2018-08-02 12:50:13 +02:00
import Data.List (sortBy, sort, concat)
2018-01-09 11:17:11 +01:00
import Control.Monad.IO.Class
import Control.Monad.Trans.Resource
2018-08-03 11:16:28 +02:00
import Data.Conduit.Lift
import Control.Monad.State.Strict
2018-01-09 11:17:11 +01:00
2018-08-02 12:50:13 +02:00
import Data.Monoid ((<>))
import GEval.FeatureExtractor
import GEval.BlackBoxDebugging
2018-08-02 12:50:13 +02:00
2018-01-09 11:17:11 +01:00
import Data.Word
import Text.Printf
2018-06-02 20:24:34 +02:00
import Data.Conduit.SmartSource
import System.FilePath
2018-08-02 12:50:13 +02:00
import Statistics.Distribution (cumulative)
import Statistics.Distribution.Normal (normalDistr)
import qualified Data.Map.Strict as M
2019-01-10 22:53:43 +01:00
import qualified Data.Set as S
2018-08-02 12:50:13 +02:00
2018-01-09 11:17:11 +01:00
data LineRecord = LineRecord Text Text Text Word32 MetricValue
deriving (Eq, Show)
runLineByLine :: ResultOrdering -> GEvalSpecification -> IO ()
runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
2018-05-26 14:40:26 +02:00
where consum :: ConduitT LineRecord Void (ResourceT IO) ()
2018-05-26 13:09:06 +02:00
consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
2018-01-09 11:17:11 +01:00
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
formatScore score,
escapeTabs inp,
escapeTabs exp,
escapeTabs out]
formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f"
2018-02-13 10:16:03 +01:00
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (worstFeaturesPipeline False spec bbdo)
2018-08-06 21:34:38 +02:00
where ordering' = forceSomeOrdering ordering
worstFeaturesPipeline :: Bool -> GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT LineRecord Void (ResourceT IO) ()
worstFeaturesPipeline reversed spec bbdo = rank (lessByMetric reversed $ gesMainMetric spec)
.| evalStateC 0 (extractFeaturesAndPValues spec bbdo)
2018-08-07 15:55:04 +02:00
.| gobbleAndDo (sortBy featureOrder)
2019-01-11 08:47:11 +01:00
.| filtreCartesian (bbdoCartesian bbdo)
2018-08-07 15:55:04 +02:00
.| CL.map (encodeUtf8 . formatFeatureWithPValue)
.| CC.unlinesAscii
.| CC.stdout
2018-08-06 21:34:38 +02:00
where formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
2018-08-02 12:50:13 +02:00
formatScore score,
escapeTabs inp,
escapeTabs exp,
escapeTabs out]
formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f"
2018-08-06 12:09:31 +02:00
featureOrder (FeatureWithPValue _ p1 _ _) (FeatureWithPValue _ p2 _ _) =
p1 `compare` p2
2018-08-06 11:59:04 +02:00
-- for commands like --worst-features we need some ordering (KeepTheOriginalOrder
-- does not make sense at all)
forceSomeOrdering :: ResultOrdering -> ResultOrdering
forceSomeOrdering FirstTheBest = FirstTheBest
forceSomeOrdering KeepTheOriginalOrder = FirstTheWorst
2018-08-02 12:50:13 +02:00
extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
extractFeaturesAndPValues spec bbdo =
2018-08-03 11:16:28 +02:00
totalCounter
.| featureExtractor spec bbdo
.| uScoresCounter (bbdoMinFrequency bbdo)
2018-08-03 11:16:28 +02:00
2019-01-09 17:45:06 +01:00
data RankedFeature = RankedFeature Feature Double MetricValue
2018-08-02 12:50:13 +02:00
deriving (Show)
2019-01-09 17:45:06 +01:00
data FeatureWithPValue = FeatureWithPValue Feature -- ^ feature itself
2018-08-06 11:59:04 +02:00
Double -- ^ p-value
MetricValue -- ^ average metric value
Integer -- ^ count
2018-08-02 12:50:13 +02:00
deriving (Show)
2018-08-02 22:09:25 +02:00
formatFeatureWithPValue :: FeatureWithPValue -> Text
2018-08-06 11:59:04 +02:00
formatFeatureWithPValue (FeatureWithPValue f p avg c) =
2019-01-09 17:45:06 +01:00
Data.Text.intercalate "\t" [pack $ show f,
2018-08-06 11:59:04 +02:00
(pack $ show c),
(pack $ printf "%0.8f" avg),
(pack $ printf "%0.20f" p)]
2018-08-02 12:50:13 +02:00
featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m ()
2019-01-10 22:53:43 +01:00
featureExtractor spec bbdo = CC.map extract
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
2019-01-10 22:53:43 +01:00
.| CC.map unwrapFeatures
.| CC.concat
where extract (rank, line@(LineRecord _ _ _ _ score)) =
2019-01-10 22:53:43 +01:00
LineWithPeggedFeatures rank score $ getFeatures mTokenizer bbdo line
mTokenizer = gesTokenizer spec
2019-01-10 22:53:43 +01:00
unwrapFeatures (LineWithFeatures rank score fs) = Prelude.map (\f -> RankedFeature f rank score) fs
finalFeatures False _ = CC.map peggedToUnaryLine
finalFeatures True minFreq = do
ls <- CC.sinkList
let unaryFeaturesFrequentEnough = S.fromList
$ Prelude.map (\(f, c) -> f)
$ Prelude.filter (\(f, c) -> c >= minFreq)
$ M.toList
$ M.fromListWith (+)
$ Data.List.concat
$ Prelude.map (\(LineWithPeggedFeatures _ _ fs) -> Prelude.map (\f -> (f, 1)) fs) ls
(CC.yieldMany $ ls) .| CC.map (addCartesian unaryFeaturesFrequentEnough)
where addCartesian wanted (LineWithPeggedFeatures rank score fs) = LineWithFeatures rank score
$ ((Prelude.map UnaryFeature fs) ++
(cartesianFeatures $ Prelude.filter ((flip S.member) wanted) fs))
2019-01-11 08:47:11 +01:00
filtreCartesian False = CC.map id
filtreCartesian True = CC.concatMapAccum step S.empty
where step f@(FeatureWithPValue (UnaryFeature p) _ _ _) mp = (S.insert p mp, [f])
step f@(FeatureWithPValue (CartesianFeature pA pB) _ _ _) mp = (mp, if pA `S.member` mp || pB `S.member` mp
then []
else [f])
2019-01-10 22:53:43 +01:00
peggedToUnaryLine :: LineWithPeggedFeatures -> LineWithFeatures
peggedToUnaryLine (LineWithPeggedFeatures rank score fs) = LineWithFeatures rank score (Prelude.map UnaryFeature fs)
getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> LineRecord -> [PeggedFeature]
getFeatures mTokenizer bbdo (LineRecord inLine expLine outLine _ _) =
Data.List.concat [
extractFeatures mTokenizer bbdo "exp" expLine,
extractFeaturesFromTabbed mTokenizer bbdo "in" inLine,
extractFeatures mTokenizer bbdo "out" outLine]
uScoresCounter :: Monad m => Integer -> ConduitT RankedFeature FeatureWithPValue (StateT Integer m) ()
uScoresCounter minFreq = CC.map (\(RankedFeature feature r score) -> (feature, (r, score, 1)))
.| gobbleAndDo countUScores
.| lowerFreqFiltre
.| pValueCalculator minFreq
2018-08-02 12:50:13 +02:00
where countUScores l =
M.toList
2018-08-06 11:59:04 +02:00
$ M.fromListWith (\(r1, s1, c1) (r2, s2, c2) -> ((r1 + r2), (s1 + s2), (c1 + c2))) l
lowerFreqFiltre = CC.filter (\(_, (_, _, c)) -> c >= minFreq)
2018-08-03 11:16:28 +02:00
pValueCalculator :: Monad m => Integer -> ConduitT (Feature, (Double, MetricValue, Integer)) FeatureWithPValue (StateT Integer m) ()
pValueCalculator minFreq = do
2018-08-03 11:16:28 +02:00
firstVal <- await
case firstVal of
Just i@(_, (_, _, c)) -> do
2018-08-03 11:16:28 +02:00
total <- lift get
if total - c >= minFreq
then yield $ calculatePValue total i
else return ()
CC.filter (\(_, (_, _, c)) -> total - c >= minFreq) .| CC.map (calculatePValue total)
2018-08-03 11:16:28 +02:00
Nothing -> return ()
2019-01-09 17:45:06 +01:00
calculatePValue :: Integer -> (Feature, (Double, MetricValue, Integer)) -> FeatureWithPValue
2018-08-06 11:59:04 +02:00
calculatePValue total (f, (r, s, c)) = FeatureWithPValue f
(pvalue (r - minusR c) c (total - c))
(s / (fromIntegral c))
c
2018-08-03 11:16:28 +02:00
where minusR c = (c' * (c' + 1)) / 2.0
2018-08-02 12:50:13 +02:00
where c' = fromIntegral c
2018-08-02 22:09:25 +02:00
-- calulating p-value from MannWhitney U test
-- (normal approximation is used)
pvalue u n1 n2 = let n1' = fromIntegral n1
2018-08-02 12:50:13 +02:00
n2' = fromIntegral n2
mean = n1' * n2' / 2
sigma = sqrt $ n1' * n2' * (n1' + n2' + 1) / 12
z = (u - mean) / sigma
in cumulative (normalDistr 0.0 1.0) z
2018-08-03 11:16:28 +02:00
totalCounter :: Monad m => ConduitT a a (StateT Integer m) ()
totalCounter = do
m <- await
case m of
Just x -> do
i <- lift get
lift $ put $ i + 1
yield x
totalCounter
Nothing -> return ()
2018-08-07 15:55:04 +02:00
lessByMetric :: Bool -> Metric -> (LineRecord -> LineRecord -> Bool)
lessByMetric reversed metric = lessByMetric' reversed (getMetricOrdering metric)
where lessByMetric' False TheHigherTheBetter =
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
scoreA < scoreB)
lessByMetric' False TheLowerTheBetter =
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
scoreA > scoreB)
lessByMetric' True TheHigherTheBetter =
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
scoreA > scoreB)
lessByMetric' True TheLowerTheBetter =
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
scoreA < scoreB)
2018-08-02 12:50:13 +02:00
runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
runLineByLineGeneralized ordering spec consum = do
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFilesSingleOut True spec
2018-08-13 10:09:55 +02:00
gevalLineByLineCore metric preprocess inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
2018-06-08 12:38:45 +02:00
where metric = gesMainMetric spec
2018-08-13 10:09:55 +02:00
preprocess = gesPreprocess spec
sorter KeepTheOriginalOrder = doNothing
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
sortOrder FirstTheBest TheLowerTheBetter = compareScores
sortOrder _ _ = flip compareScores
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
gobbleAndDo fun = do
l <- CC.sinkList
CC.yieldMany $ fun l
2018-05-26 14:40:26 +02:00
runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
2018-05-26 14:40:26 +02:00
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
2018-05-26 13:09:06 +02:00
consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
2018-02-13 10:16:03 +01:00
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
outA /= outB && scoreA /= scoreB
formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [
formatScoreDiff (scoreB - scoreA),
escapeTabs inp,
escapeTabs exp,
escapeTabs outA,
escapeTabs outB]
formatScoreDiff :: Double -> Text
formatScoreDiff = Data.Text.pack . printf "%f"
runMostWorseningFeatures :: ResultOrdering -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runMostWorseningFeatures ordering otherOut spec bbdo = runDiffGeneralized ordering' otherOut spec consum
2018-08-06 22:22:33 +02:00
where ordering' = forceSomeOrdering ordering
2018-08-07 15:55:04 +02:00
reversed = case ordering of
KeepTheOriginalOrder -> False
FirstTheWorst -> False
FirstTheBest -> True
2018-08-06 22:22:33 +02:00
consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = CC.map prepareFakeLineRecord
.| (worstFeaturesPipeline reversed spec bbdo)
2018-08-06 22:22:33 +02:00
prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
LineRecord inp exp out c (score - scorePrev)
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
runDiffGeneralized ordering otherOut spec consum = do
(inputSource, expectedSource, outSource) <- checkAndGetFilesSingleOut True spec
2018-06-02 20:24:34 +02:00
ooss <- getSmartSourceSpec ((gesOutDirectory spec) </> (gesTestName spec)) "out.tsv" otherOut
case ooss of
Left NoSpecGiven -> throwM $ NoOutFile otherOut
Left (NoFile fp) -> throwM $ NoOutFile fp
Left (NoDirectory d) -> throwM $ NoOutFile otherOut
Right otherOutSource -> do
2018-08-13 10:09:55 +02:00
let sourceA = gevalLineByLineSource metric preprocess inputSource expectedSource otherOutSource
let sourceB = gevalLineByLineSource metric preprocess inputSource expectedSource outSource
2018-06-02 20:24:34 +02:00
runResourceT $ runConduit $
((getZipSource $ (,)
<$> ZipSource sourceA
<*> ZipSource sourceB) .| sorter ordering .| consum)
2018-06-08 12:38:45 +02:00
where metric = gesMainMetric spec
2018-08-13 10:09:55 +02:00
preprocess = gesPreprocess spec
2018-05-28 10:04:27 +02:00
sorter KeepTheOriginalOrder = doNothing
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
sortOrder FirstTheBest TheLowerTheBetter = compareScores
sortOrder _ _ = flip compareScores
compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1))
((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2))
= (n1 - o1) `compare` (n2 - o2)
2018-05-26 14:40:26 +02:00
2018-02-13 10:16:03 +01:00
escapeTabs :: Text -> Text
escapeTabs = Data.Text.replace "\t" "<tab>"
2018-01-09 11:17:11 +01:00
2018-08-13 10:09:55 +02:00
gevalLineByLineCore :: Metric -> (Text -> Text) -> SourceSpec -> SourceSpec -> SourceSpec -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
gevalLineByLineCore metric preprocess inputSource expectedSource outSource consum =
2018-05-26 13:09:06 +02:00
runResourceT $ runConduit $
2018-08-13 10:09:55 +02:00
((gevalLineByLineSource metric preprocess inputSource expectedSource outSource) .| consum)
2018-02-13 08:44:27 +01:00
2018-08-13 10:09:55 +02:00
gevalLineByLineSource :: Metric -> (Text -> Text) -> SourceSpec -> SourceSpec -> SourceSpec -> ConduitT () LineRecord (ResourceT IO) ()
gevalLineByLineSource metric preprocess inputSource expectedSource outSource =
2018-02-13 08:44:27 +01:00
(getZipSource $ (,)
2018-01-09 11:17:11 +01:00
<$> ZipSource (CL.sourceList [1..])
2018-05-26 13:09:06 +02:00
<*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id))
2018-01-09 11:17:11 +01:00
context = (WithInput inputLineSource expectedLineSource outputLineSource)
2018-08-13 10:09:55 +02:00
inputLineSource = fileAsLineSource inputSource id
expectedLineSource = fileAsLineSource expectedSource id
outputLineSource = fileAsLineSource outSource id
2018-01-09 11:17:11 +01:00
justLine (LineInFile _ _ l) = l
evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do
2018-08-13 10:09:55 +02:00
s <- liftIO $ gevalCoreOnSingleLines metric preprocess (LineInFile inputSource lineNo inp)
(LineInFile expectedSource lineNo exp)
(LineInFile outSource lineNo out)
2018-01-09 11:17:11 +01:00
return $ LineRecord inp exp out lineNo s
2018-08-17 16:57:47 +02:00
justTokenize :: Maybe Tokenizer -> IO ()
justTokenize Nothing = error "a tokenizer must be specified with --tokenizer option"
justTokenize (Just tokenizer) =
runResourceT
$ runConduit
$ CC.stdin
.| CC.decodeUtf8Lenient
.| CT.lines
.| CC.map (tokenizeWithSpaces (Just tokenizer))
.| CC.unlines
.| CC.encodeUtf8
.| CC.stdout