add sorting for --line-by-line internally

This commit is contained in:
Filip Gralinski 2018-05-28 09:45:08 +02:00
parent f68223409e
commit ab1056301e
4 changed files with 39 additions and 16 deletions

View File

@ -1,7 +1,8 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
module Data.Conduit.AutoDecompress
(autoDecompress)
(autoDecompress,
doNothing)
where
import Data.Conduit
@ -34,11 +35,11 @@ autoDecompress = do
Nothing -> return ()
lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> Conduit ByteString m ByteString
lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> ConduitT ByteString ByteString m ()
lookAtMagicNumbers (31, 139) = ungzip
lookAtMagicNumbers (66, 90) = BZ.bunzip2
lookAtMagicNumbers (253, 55) = XZ.decompress Nothing
lookAtMagicNumbers _ = doNothing
doNothing :: Monad m => Conduit ByteString m ByteString
doNothing :: Monad m => ConduitT a a m ()
doNothing = Data.Conduit.Combinators.filter (const True)

View File

@ -12,17 +12,22 @@ module GEval.LineByLine
runLineByLineGeneralized,
runDiff,
runDiffGeneralized,
LineRecord(..)
LineRecord(..),
ResultOrdering(..)
) where
import GEval.Core
import Data.Conduit.AutoDecompress (doNothing)
import Data.Conduit
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC
import Data.Text
import Data.Text.Encoding
import Data.List (sortBy, sort)
import Control.Monad.IO.Class
import Control.Monad.Trans.Resource
@ -33,8 +38,10 @@ import Text.Printf
data LineRecord = LineRecord Text Text Text Word32 MetricValue
deriving (Eq, Show)
runLineByLine :: GEvalSpecification -> IO ()
runLineByLine spec = runLineByLineGeneralized spec consum
data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
runLineByLine :: ResultOrdering -> GEvalSpecification -> IO ()
runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
where consum :: ConduitT LineRecord Void (ResourceT IO) ()
consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
@ -45,14 +52,25 @@ runLineByLine spec = runLineByLineGeneralized spec consum
formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f"
runLineByLineGeneralized :: GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
runLineByLineGeneralized spec consum = do
runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
runLineByLineGeneralized ordering spec consum = do
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
where metric = gesMetric spec
sorter KeepTheOriginalOrder = doNothing
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
sortOrder FirstTheBest TheLowerTheBetter = compareScores
sortOrder _ _ = flip compareScores
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
runDiff :: FilePath -> GEvalSpecification -> IO ()
runDiff otherOut spec = runDiffGeneralized otherOut spec consum
gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
gobbleAndDo fun = do
l <- CC.sinkList
CC.yieldMany $ fun l
runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
@ -66,8 +84,8 @@ runDiff otherOut spec = runDiffGeneralized otherOut spec consum
formatScoreDiff :: Double -> Text
formatScoreDiff = Data.Text.pack . printf "%f"
runDiffGeneralized :: FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
runDiffGeneralized otherOut spec consum = do
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
runDiffGeneralized ordering otherOut spec consum = do
let otherOutFilePath = getOutFile spec otherOut
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath

View File

@ -155,10 +155,10 @@ runGEval''' (Just Init) spec = do
initChallenge spec
return Nothing
runGEval''' (Just LineByLine) spec = do
runLineByLine spec
runLineByLine KeepTheOriginalOrder spec
return Nothing
runGEval''' (Just (Diff otherOut)) spec = do
runDiff otherOut spec
runDiff KeepTheOriginalOrder otherOut spec
return Nothing
initChallenge :: GEvalSpecification -> IO ()

View File

@ -278,11 +278,15 @@ main = hspec $ do
gesMetric = Likelihood,
gesPrecision = Nothing }
it "simple test" $ do
results <- runLineByLineGeneralized sampleChallenge Data.Conduit.List.consume
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge Data.Conduit.List.consume
Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
"bar",
"baz",
"baq"]
it "test sorting" $ do
results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume
Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq"
neverMatch :: Char -> Int -> Bool
neverMatch _ _ = False