geval/src/GEval/LineByLine.hs

351 lines
16 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
module GEval.LineByLine
(runLineByLine,
runWorstFeatures,
runLineByLineGeneralized,
runDiff,
runMostWorseningFeatures,
runDiffGeneralized,
LineRecord(..),
ResultOrdering(..),
justTokenize
) where
import GEval.Core
import GEval.Common
import Text.Tokenizer
import Data.Conduit.AutoDecompress (doNothing)
import Data.Conduit
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC
import qualified Data.Conduit.Text as CT
import Data.Text
import Data.Text.Encoding
import Data.Conduit.Rank
import Data.Maybe (fromMaybe)
import Data.List (sortBy, sort, concat)
import Control.Monad.IO.Class
import Control.Monad.Trans.Resource
import Data.Conduit.Lift
import Control.Monad.State.Strict
import Data.Monoid ((<>))
import GEval.FeatureExtractor
import GEval.BlackBoxDebugging
import Data.Word
import Text.Printf
import Data.Conduit.SmartSource
import System.FilePath
import Statistics.Distribution (cumulative)
import Statistics.Distribution.Normal (normalDistr)
import qualified Data.Map.Strict as M
import qualified Data.Set as S
data LineRecord = LineRecord Text Text Text Word32 MetricValue
deriving (Eq, Show)
runLineByLine :: ResultOrdering -> GEvalSpecification -> IO ()
runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
where consum :: ConduitT LineRecord Void (ResourceT IO) ()
consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
formatScore score,
escapeTabs inp,
escapeTabs exp,
escapeTabs out]
formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f"
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (worstFeaturesPipeline False spec bbdo)
where ordering' = forceSomeOrdering ordering
worstFeaturesPipeline :: Bool -> GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT LineRecord Void (ResourceT IO) ()
worstFeaturesPipeline reversed spec bbdo = rank (lessByMetric reversed $ gesMainMetric spec)
.| evalStateC 0 (extractFeaturesAndPValues spec bbdo)
.| gobbleAndDo (sortBy featureOrder)
.| filtreCartesian (bbdoCartesian bbdo)
.| CL.map (encodeUtf8 . formatFeatureWithPValue)
.| CC.unlinesAscii
.| CC.stdout
where formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
formatScore score,
escapeTabs inp,
escapeTabs exp,
escapeTabs out]
formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f"
featureOrder (FeatureWithPValue _ p1 _ _) (FeatureWithPValue _ p2 _ _) =
p1 `compare` p2
-- for commands like --worst-features we need some ordering (KeepTheOriginalOrder
-- does not make sense at all)
forceSomeOrdering :: ResultOrdering -> ResultOrdering
forceSomeOrdering FirstTheBest = FirstTheBest
forceSomeOrdering KeepTheOriginalOrder = FirstTheWorst
extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
extractFeaturesAndPValues spec bbdo =
totalCounter
.| featureExtractor spec bbdo
.| uScoresCounter (bbdoMinFrequency bbdo)
data RankedFeature = RankedFeature Feature Double MetricValue
deriving (Show)
data FeatureWithPValue = FeatureWithPValue Feature -- ^ feature itself
Double -- ^ p-value
MetricValue -- ^ average metric value
Integer -- ^ count
deriving (Show)
formatFeatureWithPValue :: FeatureWithPValue -> Text
formatFeatureWithPValue (FeatureWithPValue f p avg c) =
Data.Text.intercalate "\t" [pack $ show f,
(pack $ show c),
(pack $ printf "%0.8f" avg),
(pack $ printf "%0.20f" p)]
featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m ()
featureExtractor spec bbdo = CC.map extract
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
.| CC.map unwrapFeatures
.| CC.concat
where extract (rank, line@(LineRecord _ _ _ _ score)) =
LineWithPeggedFeatures rank score $ getFeatures mTokenizer bbdo line
mTokenizer = gesTokenizer spec
unwrapFeatures (LineWithFeatures rank score fs) = Prelude.map (\f -> RankedFeature f rank score) fs
finalFeatures False _ = CC.map peggedToUnaryLine
finalFeatures True minFreq = do
ls <- CC.sinkList
let unaryFeaturesFrequentEnough = S.fromList
$ Prelude.map (\(f, c) -> f)
$ Prelude.filter (\(f, c) -> c >= minFreq)
$ M.toList
$ M.fromListWith (+)
$ Data.List.concat
$ Prelude.map (\(LineWithPeggedFeatures _ _ fs) -> Prelude.map (\f -> (f, 1)) fs) ls
(CC.yieldMany $ ls) .| CC.map (addCartesian unaryFeaturesFrequentEnough)
where addCartesian wanted (LineWithPeggedFeatures rank score fs) = LineWithFeatures rank score
$ ((Prelude.map UnaryFeature fs) ++
(cartesianFeatures $ Prelude.filter ((flip S.member) wanted) fs))
filtreCartesian False = CC.map id
filtreCartesian True = CC.concatMapAccum step S.empty
where step f@(FeatureWithPValue (UnaryFeature p) _ _ _) mp = (S.insert p mp, [f])
step f@(FeatureWithPValue (CartesianFeature pA pB) _ _ _) mp = (mp, if pA `S.member` mp || pB `S.member` mp
then []
else [f])
peggedToUnaryLine :: LineWithPeggedFeatures -> LineWithFeatures
peggedToUnaryLine (LineWithPeggedFeatures rank score fs) = LineWithFeatures rank score (Prelude.map UnaryFeature fs)
getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> LineRecord -> [PeggedFeature]
getFeatures mTokenizer bbdo (LineRecord inLine expLine outLine _ _) =
Data.List.concat [
extractFeatures mTokenizer bbdo "exp" expLine,
extractFeaturesFromTabbed mTokenizer bbdo "in" inLine,
extractFeatures mTokenizer bbdo "out" outLine]
uScoresCounter :: Monad m => Integer -> ConduitT RankedFeature FeatureWithPValue (StateT Integer m) ()
uScoresCounter minFreq = CC.map (\(RankedFeature feature r score) -> (feature, (r, score, 1)))
.| gobbleAndDo countUScores
.| lowerFreqFiltre
.| pValueCalculator minFreq
where countUScores l =
M.toList
$ M.fromListWith (\(r1, s1, c1) (r2, s2, c2) -> ((r1 + r2), (s1 + s2), (c1 + c2))) l
lowerFreqFiltre = CC.filter (\(_, (_, _, c)) -> c >= minFreq)
pValueCalculator :: Monad m => Integer -> ConduitT (Feature, (Double, MetricValue, Integer)) FeatureWithPValue (StateT Integer m) ()
pValueCalculator minFreq = do
firstVal <- await
case firstVal of
Just i@(_, (_, _, c)) -> do
total <- lift get
if total - c >= minFreq
then yield $ calculatePValue total i
else return ()
CC.filter (\(_, (_, _, c)) -> total - c >= minFreq) .| CC.map (calculatePValue total)
Nothing -> return ()
calculatePValue :: Integer -> (Feature, (Double, MetricValue, Integer)) -> FeatureWithPValue
calculatePValue total (f, (r, s, c)) = FeatureWithPValue f
(pvalue (r - minusR c) c (total - c))
(s / (fromIntegral c))
c
where minusR c = (c' * (c' + 1)) / 2.0
where c' = fromIntegral c
-- calulating p-value from MannWhitney U test
-- (normal approximation is used)
pvalue u n1 n2 = let n1' = fromIntegral n1
n2' = fromIntegral n2
mean = n1' * n2' / 2
sigma = sqrt $ n1' * n2' * (n1' + n2' + 1) / 12
z = (u - mean) / sigma
in cumulative (normalDistr 0.0 1.0) z
totalCounter :: Monad m => ConduitT a a (StateT Integer m) ()
totalCounter = do
m <- await
case m of
Just x -> do
i <- lift get
lift $ put $ i + 1
yield x
totalCounter
Nothing -> return ()
lessByMetric :: Bool -> Metric -> (LineRecord -> LineRecord -> Bool)
lessByMetric reversed metric = lessByMetric' reversed (getMetricOrdering metric)
where lessByMetric' False TheHigherTheBetter =
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
scoreA < scoreB)
lessByMetric' False TheLowerTheBetter =
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
scoreA > scoreB)
lessByMetric' True TheHigherTheBetter =
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
scoreA > scoreB)
lessByMetric' True TheLowerTheBetter =
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
scoreA < scoreB)
runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
runLineByLineGeneralized ordering spec consum = do
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFilesSingleOut True spec
gevalLineByLineCore metric preprocess inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
where metric = gesMainMetric spec
preprocess = gesPreprocess spec
sorter KeepTheOriginalOrder = doNothing
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
sortOrder FirstTheBest TheLowerTheBetter = compareScores
sortOrder _ _ = flip compareScores
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
gobbleAndDo fun = do
l <- CC.sinkList
CC.yieldMany $ fun l
runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
outA /= outB && scoreA /= scoreB
formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [
formatScoreDiff (scoreB - scoreA),
escapeTabs inp,
escapeTabs exp,
escapeTabs outA,
escapeTabs outB]
formatScoreDiff :: Double -> Text
formatScoreDiff = Data.Text.pack . printf "%f"
runMostWorseningFeatures :: ResultOrdering -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runMostWorseningFeatures ordering otherOut spec bbdo = runDiffGeneralized ordering' otherOut spec consum
where ordering' = forceSomeOrdering ordering
reversed = case ordering of
KeepTheOriginalOrder -> False
FirstTheWorst -> False
FirstTheBest -> True
consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = CC.map prepareFakeLineRecord
.| (worstFeaturesPipeline reversed spec bbdo)
prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
LineRecord inp exp out c (score - scorePrev)
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
runDiffGeneralized ordering otherOut spec consum = do
(inputSource, expectedSource, outSource) <- checkAndGetFilesSingleOut True spec
ooss <- getSmartSourceSpec ((gesOutDirectory spec) </> (gesTestName spec)) "out.tsv" otherOut
case ooss of
Left NoSpecGiven -> throwM $ NoOutFile otherOut
Left (NoFile fp) -> throwM $ NoOutFile fp
Left (NoDirectory d) -> throwM $ NoOutFile otherOut
Right otherOutSource -> do
let sourceA = gevalLineByLineSource metric preprocess inputSource expectedSource otherOutSource
let sourceB = gevalLineByLineSource metric preprocess inputSource expectedSource outSource
runResourceT $ runConduit $
((getZipSource $ (,)
<$> ZipSource sourceA
<*> ZipSource sourceB) .| sorter ordering .| consum)
where metric = gesMainMetric spec
preprocess = gesPreprocess spec
sorter KeepTheOriginalOrder = doNothing
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
sortOrder FirstTheBest TheLowerTheBetter = compareScores
sortOrder _ _ = flip compareScores
compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1))
((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2))
= (n1 - o1) `compare` (n2 - o2)
escapeTabs :: Text -> Text
escapeTabs = Data.Text.replace "\t" "<tab>"
gevalLineByLineCore :: Metric -> (Text -> Text) -> SourceSpec -> SourceSpec -> SourceSpec -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
gevalLineByLineCore metric preprocess inputSource expectedSource outSource consum =
runResourceT $ runConduit $
((gevalLineByLineSource metric preprocess inputSource expectedSource outSource) .| consum)
gevalLineByLineSource :: Metric -> (Text -> Text) -> SourceSpec -> SourceSpec -> SourceSpec -> ConduitT () LineRecord (ResourceT IO) ()
gevalLineByLineSource metric preprocess inputSource expectedSource outSource =
(getZipSource $ (,)
<$> ZipSource (CL.sourceList [1..])
<*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id))
context = (WithInput inputLineSource expectedLineSource outputLineSource)
inputLineSource = fileAsLineSource inputSource id
expectedLineSource = fileAsLineSource expectedSource id
outputLineSource = fileAsLineSource outSource id
justLine (LineInFile _ _ l) = l
evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do
s <- liftIO $ gevalCoreOnSingleLines metric preprocess (LineInFile inputSource lineNo inp)
(LineInFile expectedSource lineNo exp)
(LineInFile outSource lineNo out)
return $ LineRecord inp exp out lineNo s
justTokenize :: Maybe Tokenizer -> IO ()
justTokenize Nothing = error "a tokenizer must be specified with --tokenizer option"
justTokenize (Just tokenizer) =
runResourceT
$ runConduit
$ CC.stdin
.| CC.decodeUtf8Lenient
.| CT.lines
.| CC.map (tokenizeWithSpaces (Just tokenizer))
.| CC.unlines
.| CC.encodeUtf8
.| CC.stdout