2018-01-09 11:17:11 +01:00
|
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
{-# LANGUAGE TypeFamilies #-}
|
|
|
|
|
{-# LANGUAGE FlexibleContexts #-}
|
|
|
|
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
|
|
|
|
{-# LANGUAGE AllowAmbiguousTypes #-}
|
|
|
|
|
{-# LANGUAGE FlexibleInstances #-}
|
|
|
|
|
{-# LANGUAGE ScopedTypeVariables #-}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
module GEval.LineByLine
|
2018-02-13 10:16:03 +01:00
|
|
|
|
(runLineByLine,
|
2018-08-02 12:50:13 +02:00
|
|
|
|
runWorstFeatures,
|
2018-05-26 14:40:26 +02:00
|
|
|
|
runLineByLineGeneralized,
|
|
|
|
|
runDiff,
|
2018-08-06 22:22:33 +02:00
|
|
|
|
runMostWorseningFeatures,
|
2018-05-26 21:10:22 +02:00
|
|
|
|
runDiffGeneralized,
|
2018-05-28 09:45:08 +02:00
|
|
|
|
LineRecord(..),
|
2018-08-17 16:57:47 +02:00
|
|
|
|
ResultOrdering(..),
|
|
|
|
|
justTokenize
|
2018-01-09 11:17:11 +01:00
|
|
|
|
) where
|
|
|
|
|
|
|
|
|
|
import GEval.Core
|
2019-01-10 22:53:43 +01:00
|
|
|
|
import GEval.Common
|
2018-08-17 16:57:47 +02:00
|
|
|
|
import Text.Tokenizer
|
2018-01-09 11:17:11 +01:00
|
|
|
|
|
2018-05-28 09:45:08 +02:00
|
|
|
|
import Data.Conduit.AutoDecompress (doNothing)
|
|
|
|
|
|
2018-01-09 11:17:11 +01:00
|
|
|
|
import Data.Conduit
|
|
|
|
|
import qualified Data.Conduit.List as CL
|
|
|
|
|
import qualified Data.Conduit.Combinators as CC
|
2018-08-17 16:57:47 +02:00
|
|
|
|
import qualified Data.Conduit.Text as CT
|
2018-01-09 11:17:11 +01:00
|
|
|
|
import Data.Text
|
|
|
|
|
import Data.Text.Encoding
|
2018-08-02 12:50:13 +02:00
|
|
|
|
import Data.Conduit.Rank
|
2019-01-11 10:16:39 +01:00
|
|
|
|
import Data.Maybe (fromMaybe)
|
2018-01-09 11:17:11 +01:00
|
|
|
|
|
2018-08-02 12:50:13 +02:00
|
|
|
|
import Data.List (sortBy, sort, concat)
|
2018-05-28 09:45:08 +02:00
|
|
|
|
|
2018-01-09 11:17:11 +01:00
|
|
|
|
import Control.Monad.IO.Class
|
|
|
|
|
import Control.Monad.Trans.Resource
|
2018-08-03 11:16:28 +02:00
|
|
|
|
import Data.Conduit.Lift
|
|
|
|
|
import Control.Monad.State.Strict
|
2018-01-09 11:17:11 +01:00
|
|
|
|
|
2018-08-02 12:50:13 +02:00
|
|
|
|
import Data.Monoid ((<>))
|
|
|
|
|
|
|
|
|
|
import GEval.FeatureExtractor
|
2019-01-10 08:15:34 +01:00
|
|
|
|
import GEval.BlackBoxDebugging
|
2018-08-02 12:50:13 +02:00
|
|
|
|
|
2018-01-09 11:17:11 +01:00
|
|
|
|
import Data.Word
|
|
|
|
|
|
|
|
|
|
import Text.Printf
|
|
|
|
|
|
2018-06-02 20:24:34 +02:00
|
|
|
|
import Data.Conduit.SmartSource
|
|
|
|
|
|
|
|
|
|
import System.FilePath
|
|
|
|
|
|
2018-08-02 12:50:13 +02:00
|
|
|
|
import Statistics.Distribution (cumulative)
|
|
|
|
|
import Statistics.Distribution.Normal (normalDistr)
|
|
|
|
|
|
|
|
|
|
import qualified Data.Map.Strict as M
|
2019-01-10 22:53:43 +01:00
|
|
|
|
import qualified Data.Set as S
|
2018-08-02 12:50:13 +02:00
|
|
|
|
|
2018-01-09 11:17:11 +01:00
|
|
|
|
data LineRecord = LineRecord Text Text Text Word32 MetricValue
|
|
|
|
|
deriving (Eq, Show)
|
|
|
|
|
|
2018-05-28 09:45:08 +02:00
|
|
|
|
runLineByLine :: ResultOrdering -> GEvalSpecification -> IO ()
|
|
|
|
|
runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
|
2018-05-26 14:40:26 +02:00
|
|
|
|
where consum :: ConduitT LineRecord Void (ResourceT IO) ()
|
2018-05-26 13:09:06 +02:00
|
|
|
|
consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
|
2018-01-09 11:17:11 +01:00
|
|
|
|
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
|
|
|
|
|
formatScore score,
|
|
|
|
|
escapeTabs inp,
|
|
|
|
|
escapeTabs exp,
|
|
|
|
|
escapeTabs out]
|
|
|
|
|
formatScore :: MetricValue -> Text
|
|
|
|
|
formatScore = Data.Text.pack . printf "%f"
|
2018-02-13 10:16:03 +01:00
|
|
|
|
|
2019-01-10 08:15:34 +01:00
|
|
|
|
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
|
|
|
|
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (worstFeaturesPipeline False spec bbdo)
|
2018-08-06 21:34:38 +02:00
|
|
|
|
where ordering' = forceSomeOrdering ordering
|
|
|
|
|
|
|
|
|
|
|
2019-01-10 08:15:34 +01:00
|
|
|
|
|
|
|
|
|
worstFeaturesPipeline :: Bool -> GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT LineRecord Void (ResourceT IO) ()
|
|
|
|
|
worstFeaturesPipeline reversed spec bbdo = rank (lessByMetric reversed $ gesMainMetric spec)
|
|
|
|
|
.| evalStateC 0 (extractFeaturesAndPValues spec bbdo)
|
2018-08-07 15:55:04 +02:00
|
|
|
|
.| gobbleAndDo (sortBy featureOrder)
|
2019-01-11 08:47:11 +01:00
|
|
|
|
.| filtreCartesian (bbdoCartesian bbdo)
|
2018-08-07 15:55:04 +02:00
|
|
|
|
.| CL.map (encodeUtf8 . formatFeatureWithPValue)
|
|
|
|
|
.| CC.unlinesAscii
|
|
|
|
|
.| CC.stdout
|
2018-08-06 21:34:38 +02:00
|
|
|
|
where formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
|
2018-08-02 12:50:13 +02:00
|
|
|
|
formatScore score,
|
|
|
|
|
escapeTabs inp,
|
|
|
|
|
escapeTabs exp,
|
|
|
|
|
escapeTabs out]
|
|
|
|
|
formatScore :: MetricValue -> Text
|
|
|
|
|
formatScore = Data.Text.pack . printf "%f"
|
2018-08-06 12:09:31 +02:00
|
|
|
|
featureOrder (FeatureWithPValue _ p1 _ _) (FeatureWithPValue _ p2 _ _) =
|
|
|
|
|
p1 `compare` p2
|
2018-08-06 11:59:04 +02:00
|
|
|
|
|
|
|
|
|
-- for commands like --worst-features we need some ordering (KeepTheOriginalOrder
|
|
|
|
|
-- does not make sense at all)
|
|
|
|
|
forceSomeOrdering :: ResultOrdering -> ResultOrdering
|
|
|
|
|
forceSomeOrdering FirstTheBest = FirstTheBest
|
|
|
|
|
forceSomeOrdering KeepTheOriginalOrder = FirstTheWorst
|
2018-08-02 12:50:13 +02:00
|
|
|
|
|
2019-01-10 08:15:34 +01:00
|
|
|
|
extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
|
|
|
|
|
extractFeaturesAndPValues spec bbdo =
|
2018-08-03 11:16:28 +02:00
|
|
|
|
totalCounter
|
2019-01-10 09:58:04 +01:00
|
|
|
|
.| featureExtractor spec bbdo
|
2019-01-10 08:15:34 +01:00
|
|
|
|
.| uScoresCounter (bbdoMinFrequency bbdo)
|
2018-08-03 11:16:28 +02:00
|
|
|
|
|
|
|
|
|
|
2019-01-09 17:45:06 +01:00
|
|
|
|
data RankedFeature = RankedFeature Feature Double MetricValue
|
2018-08-02 12:50:13 +02:00
|
|
|
|
deriving (Show)
|
|
|
|
|
|
2019-01-09 17:45:06 +01:00
|
|
|
|
data FeatureWithPValue = FeatureWithPValue Feature -- ^ feature itself
|
2018-08-06 11:59:04 +02:00
|
|
|
|
Double -- ^ p-value
|
|
|
|
|
MetricValue -- ^ average metric value
|
|
|
|
|
Integer -- ^ count
|
2018-08-02 12:50:13 +02:00
|
|
|
|
deriving (Show)
|
|
|
|
|
|
2018-08-02 22:09:25 +02:00
|
|
|
|
formatFeatureWithPValue :: FeatureWithPValue -> Text
|
2018-08-06 11:59:04 +02:00
|
|
|
|
formatFeatureWithPValue (FeatureWithPValue f p avg c) =
|
2019-01-09 17:45:06 +01:00
|
|
|
|
Data.Text.intercalate "\t" [pack $ show f,
|
2018-08-06 11:59:04 +02:00
|
|
|
|
(pack $ show c),
|
|
|
|
|
(pack $ printf "%0.8f" avg),
|
|
|
|
|
(pack $ printf "%0.20f" p)]
|
2018-08-02 12:50:13 +02:00
|
|
|
|
|
2019-01-10 09:58:04 +01:00
|
|
|
|
featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m ()
|
2019-01-10 22:53:43 +01:00
|
|
|
|
featureExtractor spec bbdo = CC.map extract
|
2019-01-11 10:16:39 +01:00
|
|
|
|
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
|
2019-01-10 22:53:43 +01:00
|
|
|
|
.| CC.map unwrapFeatures
|
|
|
|
|
.| CC.concat
|
2019-01-10 14:01:29 +01:00
|
|
|
|
where extract (rank, line@(LineRecord _ _ _ _ score)) =
|
2019-01-10 22:53:43 +01:00
|
|
|
|
LineWithPeggedFeatures rank score $ getFeatures mTokenizer bbdo line
|
2018-08-17 17:27:25 +02:00
|
|
|
|
mTokenizer = gesTokenizer spec
|
2019-01-10 22:53:43 +01:00
|
|
|
|
unwrapFeatures (LineWithFeatures rank score fs) = Prelude.map (\f -> RankedFeature f rank score) fs
|
|
|
|
|
|
|
|
|
|
finalFeatures False _ = CC.map peggedToUnaryLine
|
|
|
|
|
finalFeatures True minFreq = do
|
|
|
|
|
ls <- CC.sinkList
|
|
|
|
|
let unaryFeaturesFrequentEnough = S.fromList
|
|
|
|
|
$ Prelude.map (\(f, c) -> f)
|
|
|
|
|
$ Prelude.filter (\(f, c) -> c >= minFreq)
|
|
|
|
|
$ M.toList
|
|
|
|
|
$ M.fromListWith (+)
|
|
|
|
|
$ Data.List.concat
|
|
|
|
|
$ Prelude.map (\(LineWithPeggedFeatures _ _ fs) -> Prelude.map (\f -> (f, 1)) fs) ls
|
|
|
|
|
|
|
|
|
|
(CC.yieldMany $ ls) .| CC.map (addCartesian unaryFeaturesFrequentEnough)
|
|
|
|
|
where addCartesian wanted (LineWithPeggedFeatures rank score fs) = LineWithFeatures rank score
|
|
|
|
|
$ ((Prelude.map UnaryFeature fs) ++
|
|
|
|
|
(cartesianFeatures $ Prelude.filter ((flip S.member) wanted) fs))
|
|
|
|
|
|
2019-01-11 08:47:11 +01:00
|
|
|
|
filtreCartesian False = CC.map id
|
|
|
|
|
filtreCartesian True = CC.concatMapAccum step S.empty
|
|
|
|
|
where step f@(FeatureWithPValue (UnaryFeature p) _ _ _) mp = (S.insert p mp, [f])
|
|
|
|
|
step f@(FeatureWithPValue (CartesianFeature pA pB) _ _ _) mp = (mp, if pA `S.member` mp || pB `S.member` mp
|
|
|
|
|
then []
|
|
|
|
|
else [f])
|
|
|
|
|
|
2019-01-10 22:53:43 +01:00
|
|
|
|
peggedToUnaryLine :: LineWithPeggedFeatures -> LineWithFeatures
|
|
|
|
|
peggedToUnaryLine (LineWithPeggedFeatures rank score fs) = LineWithFeatures rank score (Prelude.map UnaryFeature fs)
|
|
|
|
|
|
|
|
|
|
getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> LineRecord -> [PeggedFeature]
|
|
|
|
|
getFeatures mTokenizer bbdo (LineRecord inLine expLine outLine _ _) =
|
|
|
|
|
Data.List.concat [
|
|
|
|
|
extractFeatures mTokenizer bbdo "exp" expLine,
|
|
|
|
|
extractFeaturesFromTabbed mTokenizer bbdo "in" inLine,
|
|
|
|
|
extractFeatures mTokenizer bbdo "out" outLine]
|
2019-01-10 14:01:29 +01:00
|
|
|
|
|
2019-01-10 08:15:34 +01:00
|
|
|
|
uScoresCounter :: Monad m => Integer -> ConduitT RankedFeature FeatureWithPValue (StateT Integer m) ()
|
|
|
|
|
uScoresCounter minFreq = CC.map (\(RankedFeature feature r score) -> (feature, (r, score, 1)))
|
|
|
|
|
.| gobbleAndDo countUScores
|
|
|
|
|
.| lowerFreqFiltre
|
|
|
|
|
.| pValueCalculator minFreq
|
2018-08-02 12:50:13 +02:00
|
|
|
|
where countUScores l =
|
|
|
|
|
M.toList
|
2018-08-06 11:59:04 +02:00
|
|
|
|
$ M.fromListWith (\(r1, s1, c1) (r2, s2, c2) -> ((r1 + r2), (s1 + s2), (c1 + c2))) l
|
2019-01-10 08:15:34 +01:00
|
|
|
|
lowerFreqFiltre = CC.filter (\(_, (_, _, c)) -> c >= minFreq)
|
2018-08-03 11:16:28 +02:00
|
|
|
|
|
2019-01-10 08:15:34 +01:00
|
|
|
|
pValueCalculator :: Monad m => Integer -> ConduitT (Feature, (Double, MetricValue, Integer)) FeatureWithPValue (StateT Integer m) ()
|
|
|
|
|
pValueCalculator minFreq = do
|
2018-08-03 11:16:28 +02:00
|
|
|
|
firstVal <- await
|
|
|
|
|
case firstVal of
|
2019-01-10 08:15:34 +01:00
|
|
|
|
Just i@(_, (_, _, c)) -> do
|
2018-08-03 11:16:28 +02:00
|
|
|
|
total <- lift get
|
2019-01-10 08:15:34 +01:00
|
|
|
|
if total - c >= minFreq
|
|
|
|
|
then yield $ calculatePValue total i
|
|
|
|
|
else return ()
|
|
|
|
|
CC.filter (\(_, (_, _, c)) -> total - c >= minFreq) .| CC.map (calculatePValue total)
|
2018-08-03 11:16:28 +02:00
|
|
|
|
Nothing -> return ()
|
|
|
|
|
|
2019-01-09 17:45:06 +01:00
|
|
|
|
calculatePValue :: Integer -> (Feature, (Double, MetricValue, Integer)) -> FeatureWithPValue
|
2018-08-06 11:59:04 +02:00
|
|
|
|
calculatePValue total (f, (r, s, c)) = FeatureWithPValue f
|
|
|
|
|
(pvalue (r - minusR c) c (total - c))
|
|
|
|
|
(s / (fromIntegral c))
|
|
|
|
|
c
|
2018-08-03 11:16:28 +02:00
|
|
|
|
where minusR c = (c' * (c' + 1)) / 2.0
|
2018-08-02 12:50:13 +02:00
|
|
|
|
where c' = fromIntegral c
|
2018-08-02 22:09:25 +02:00
|
|
|
|
-- calulating p-value from Mann–Whitney U test
|
|
|
|
|
-- (normal approximation is used)
|
|
|
|
|
pvalue u n1 n2 = let n1' = fromIntegral n1
|
2018-08-02 12:50:13 +02:00
|
|
|
|
n2' = fromIntegral n2
|
|
|
|
|
mean = n1' * n2' / 2
|
|
|
|
|
sigma = sqrt $ n1' * n2' * (n1' + n2' + 1) / 12
|
|
|
|
|
z = (u - mean) / sigma
|
|
|
|
|
in cumulative (normalDistr 0.0 1.0) z
|
|
|
|
|
|
2018-08-03 11:16:28 +02:00
|
|
|
|
|
|
|
|
|
totalCounter :: Monad m => ConduitT a a (StateT Integer m) ()
|
|
|
|
|
totalCounter = do
|
|
|
|
|
m <- await
|
|
|
|
|
case m of
|
|
|
|
|
Just x -> do
|
|
|
|
|
i <- lift get
|
|
|
|
|
lift $ put $ i + 1
|
|
|
|
|
yield x
|
|
|
|
|
totalCounter
|
|
|
|
|
Nothing -> return ()
|
|
|
|
|
|
2018-08-07 15:55:04 +02:00
|
|
|
|
lessByMetric :: Bool -> Metric -> (LineRecord -> LineRecord -> Bool)
|
|
|
|
|
lessByMetric reversed metric = lessByMetric' reversed (getMetricOrdering metric)
|
|
|
|
|
where lessByMetric' False TheHigherTheBetter =
|
|
|
|
|
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
|
|
|
|
|
scoreA < scoreB)
|
|
|
|
|
lessByMetric' False TheLowerTheBetter =
|
|
|
|
|
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
|
|
|
|
|
scoreA > scoreB)
|
|
|
|
|
lessByMetric' True TheHigherTheBetter =
|
|
|
|
|
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
|
|
|
|
|
scoreA > scoreB)
|
|
|
|
|
lessByMetric' True TheLowerTheBetter =
|
|
|
|
|
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
|
|
|
|
|
scoreA < scoreB)
|
2018-08-02 12:50:13 +02:00
|
|
|
|
|
2018-05-28 09:45:08 +02:00
|
|
|
|
runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
|
|
|
|
|
runLineByLineGeneralized ordering spec consum = do
|
2018-06-28 15:36:47 +02:00
|
|
|
|
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFilesSingleOut True spec
|
2018-08-13 10:09:55 +02:00
|
|
|
|
gevalLineByLineCore metric preprocess inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
|
2018-06-08 12:38:45 +02:00
|
|
|
|
where metric = gesMainMetric spec
|
2018-08-13 10:09:55 +02:00
|
|
|
|
preprocess = gesPreprocess spec
|
2018-05-28 09:45:08 +02:00
|
|
|
|
sorter KeepTheOriginalOrder = doNothing
|
|
|
|
|
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
|
|
|
|
|
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
|
|
|
|
|
sortOrder FirstTheBest TheLowerTheBetter = compareScores
|
|
|
|
|
sortOrder _ _ = flip compareScores
|
|
|
|
|
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
|
|
|
|
|
|
|
|
|
|
gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
|
|
|
|
|
gobbleAndDo fun = do
|
|
|
|
|
l <- CC.sinkList
|
|
|
|
|
CC.yieldMany $ fun l
|
2018-05-26 14:40:26 +02:00
|
|
|
|
|
2018-05-28 09:45:08 +02:00
|
|
|
|
runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
|
|
|
|
|
runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
|
2018-05-26 14:40:26 +02:00
|
|
|
|
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
2018-05-26 13:09:06 +02:00
|
|
|
|
consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
|
2018-02-13 10:16:03 +01:00
|
|
|
|
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
|
|
|
|
|
outA /= outB && scoreA /= scoreB
|
|
|
|
|
formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [
|
|
|
|
|
formatScoreDiff (scoreB - scoreA),
|
|
|
|
|
escapeTabs inp,
|
|
|
|
|
escapeTabs exp,
|
|
|
|
|
escapeTabs outA,
|
|
|
|
|
escapeTabs outB]
|
|
|
|
|
formatScoreDiff :: Double -> Text
|
|
|
|
|
formatScoreDiff = Data.Text.pack . printf "%f"
|
|
|
|
|
|
2019-01-10 08:15:34 +01:00
|
|
|
|
runMostWorseningFeatures :: ResultOrdering -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
|
|
|
|
runMostWorseningFeatures ordering otherOut spec bbdo = runDiffGeneralized ordering' otherOut spec consum
|
2018-08-06 22:22:33 +02:00
|
|
|
|
where ordering' = forceSomeOrdering ordering
|
2018-08-07 15:55:04 +02:00
|
|
|
|
reversed = case ordering of
|
|
|
|
|
KeepTheOriginalOrder -> False
|
|
|
|
|
FirstTheWorst -> False
|
|
|
|
|
FirstTheBest -> True
|
2018-08-06 22:22:33 +02:00
|
|
|
|
consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
|
|
|
|
consum = CC.map prepareFakeLineRecord
|
2019-01-10 08:15:34 +01:00
|
|
|
|
.| (worstFeaturesPipeline reversed spec bbdo)
|
2018-08-06 22:22:33 +02:00
|
|
|
|
prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
|
|
|
|
|
prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
|
|
|
|
|
LineRecord inp exp out c (score - scorePrev)
|
|
|
|
|
|
|
|
|
|
|
2018-05-28 09:45:08 +02:00
|
|
|
|
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
|
|
|
|
|
runDiffGeneralized ordering otherOut spec consum = do
|
2018-06-28 15:36:47 +02:00
|
|
|
|
(inputSource, expectedSource, outSource) <- checkAndGetFilesSingleOut True spec
|
2018-06-02 20:24:34 +02:00
|
|
|
|
ooss <- getSmartSourceSpec ((gesOutDirectory spec) </> (gesTestName spec)) "out.tsv" otherOut
|
|
|
|
|
case ooss of
|
|
|
|
|
Left NoSpecGiven -> throwM $ NoOutFile otherOut
|
|
|
|
|
Left (NoFile fp) -> throwM $ NoOutFile fp
|
|
|
|
|
Left (NoDirectory d) -> throwM $ NoOutFile otherOut
|
|
|
|
|
Right otherOutSource -> do
|
2018-08-13 10:09:55 +02:00
|
|
|
|
let sourceA = gevalLineByLineSource metric preprocess inputSource expectedSource otherOutSource
|
|
|
|
|
let sourceB = gevalLineByLineSource metric preprocess inputSource expectedSource outSource
|
2018-06-02 20:24:34 +02:00
|
|
|
|
runResourceT $ runConduit $
|
|
|
|
|
((getZipSource $ (,)
|
|
|
|
|
<$> ZipSource sourceA
|
|
|
|
|
<*> ZipSource sourceB) .| sorter ordering .| consum)
|
2018-06-08 12:38:45 +02:00
|
|
|
|
where metric = gesMainMetric spec
|
2018-08-13 10:09:55 +02:00
|
|
|
|
preprocess = gesPreprocess spec
|
2018-05-28 10:04:27 +02:00
|
|
|
|
sorter KeepTheOriginalOrder = doNothing
|
|
|
|
|
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
|
|
|
|
|
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
|
|
|
|
|
sortOrder FirstTheBest TheLowerTheBetter = compareScores
|
|
|
|
|
sortOrder _ _ = flip compareScores
|
|
|
|
|
compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1))
|
|
|
|
|
((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2))
|
|
|
|
|
= (n1 - o1) `compare` (n2 - o2)
|
|
|
|
|
|
2018-05-26 14:40:26 +02:00
|
|
|
|
|
2018-02-13 10:16:03 +01:00
|
|
|
|
escapeTabs :: Text -> Text
|
|
|
|
|
escapeTabs = Data.Text.replace "\t" "<tab>"
|
2018-01-09 11:17:11 +01:00
|
|
|
|
|
2018-08-13 10:09:55 +02:00
|
|
|
|
gevalLineByLineCore :: Metric -> (Text -> Text) -> SourceSpec -> SourceSpec -> SourceSpec -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
|
|
|
|
|
gevalLineByLineCore metric preprocess inputSource expectedSource outSource consum =
|
2018-05-26 13:09:06 +02:00
|
|
|
|
runResourceT $ runConduit $
|
2018-08-13 10:09:55 +02:00
|
|
|
|
((gevalLineByLineSource metric preprocess inputSource expectedSource outSource) .| consum)
|
2018-02-13 08:44:27 +01:00
|
|
|
|
|
2018-08-13 10:09:55 +02:00
|
|
|
|
gevalLineByLineSource :: Metric -> (Text -> Text) -> SourceSpec -> SourceSpec -> SourceSpec -> ConduitT () LineRecord (ResourceT IO) ()
|
|
|
|
|
gevalLineByLineSource metric preprocess inputSource expectedSource outSource =
|
2018-02-13 08:44:27 +01:00
|
|
|
|
(getZipSource $ (,)
|
2018-01-09 11:17:11 +01:00
|
|
|
|
<$> ZipSource (CL.sourceList [1..])
|
2018-05-26 13:09:06 +02:00
|
|
|
|
<*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
|
2018-01-13 14:39:11 +01:00
|
|
|
|
where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id))
|
2018-01-09 11:17:11 +01:00
|
|
|
|
context = (WithInput inputLineSource expectedLineSource outputLineSource)
|
2018-08-13 10:09:55 +02:00
|
|
|
|
inputLineSource = fileAsLineSource inputSource id
|
|
|
|
|
expectedLineSource = fileAsLineSource expectedSource id
|
|
|
|
|
outputLineSource = fileAsLineSource outSource id
|
2018-01-09 11:17:11 +01:00
|
|
|
|
justLine (LineInFile _ _ l) = l
|
|
|
|
|
evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do
|
2018-08-13 10:09:55 +02:00
|
|
|
|
s <- liftIO $ gevalCoreOnSingleLines metric preprocess (LineInFile inputSource lineNo inp)
|
|
|
|
|
(LineInFile expectedSource lineNo exp)
|
|
|
|
|
(LineInFile outSource lineNo out)
|
2018-01-09 11:17:11 +01:00
|
|
|
|
return $ LineRecord inp exp out lineNo s
|
2018-08-17 16:57:47 +02:00
|
|
|
|
|
|
|
|
|
justTokenize :: Maybe Tokenizer -> IO ()
|
|
|
|
|
justTokenize Nothing = error "a tokenizer must be specified with --tokenizer option"
|
|
|
|
|
justTokenize (Just tokenizer) =
|
|
|
|
|
runResourceT
|
|
|
|
|
$ runConduit
|
|
|
|
|
$ CC.stdin
|
|
|
|
|
.| CC.decodeUtf8Lenient
|
|
|
|
|
.| CT.lines
|
|
|
|
|
.| CC.map (tokenizeWithSpaces (Just tokenizer))
|
|
|
|
|
.| CC.unlines
|
|
|
|
|
.| CC.encodeUtf8
|
|
|
|
|
.| CC.stdout
|