From ab1056301eccfd0bf4459297544eda6a8426a43e Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 28 May 2018 09:45:08 +0200 Subject: [PATCH] add sorting for --line-by-line internally --- src/Data/Conduit/AutoDecompress.hs | 7 +++--- src/GEval/LineByLine.hs | 38 ++++++++++++++++++++++-------- src/GEval/OptionsParser.hs | 4 ++-- test/Spec.hs | 6 ++++- 4 files changed, 39 insertions(+), 16 deletions(-) diff --git a/src/Data/Conduit/AutoDecompress.hs b/src/Data/Conduit/AutoDecompress.hs index 2e92402..b959d6f 100644 --- a/src/Data/Conduit/AutoDecompress.hs +++ b/src/Data/Conduit/AutoDecompress.hs @@ -1,7 +1,8 @@ {-# LANGUAGE AllowAmbiguousTypes #-} module Data.Conduit.AutoDecompress - (autoDecompress) + (autoDecompress, + doNothing) where import Data.Conduit @@ -34,11 +35,11 @@ autoDecompress = do Nothing -> return () -lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> Conduit ByteString m ByteString +lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> ConduitT ByteString ByteString m () lookAtMagicNumbers (31, 139) = ungzip lookAtMagicNumbers (66, 90) = BZ.bunzip2 lookAtMagicNumbers (253, 55) = XZ.decompress Nothing lookAtMagicNumbers _ = doNothing -doNothing :: Monad m => Conduit ByteString m ByteString +doNothing :: Monad m => ConduitT a a m () doNothing = Data.Conduit.Combinators.filter (const True) diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index fe6aa30..91848db 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -12,17 +12,22 @@ module GEval.LineByLine runLineByLineGeneralized, runDiff, runDiffGeneralized, - LineRecord(..) + LineRecord(..), + ResultOrdering(..) ) where import GEval.Core +import Data.Conduit.AutoDecompress (doNothing) + import Data.Conduit import qualified Data.Conduit.List as CL import qualified Data.Conduit.Combinators as CC import Data.Text import Data.Text.Encoding +import Data.List (sortBy, sort) + import Control.Monad.IO.Class import Control.Monad.Trans.Resource @@ -33,8 +38,10 @@ import Text.Printf data LineRecord = LineRecord Text Text Text Word32 MetricValue deriving (Eq, Show) -runLineByLine :: GEvalSpecification -> IO () -runLineByLine spec = runLineByLineGeneralized spec consum +data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest + +runLineByLine :: ResultOrdering -> GEvalSpecification -> IO () +runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum where consum :: ConduitT LineRecord Void (ResourceT IO) () consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [ @@ -45,14 +52,25 @@ runLineByLine spec = runLineByLineGeneralized spec consum formatScore :: MetricValue -> Text formatScore = Data.Text.pack . printf "%f" -runLineByLineGeneralized :: GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a -runLineByLineGeneralized spec consum = do +runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a +runLineByLineGeneralized ordering spec consum = do (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec - gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum + gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum) where metric = gesMetric spec + sorter KeepTheOriginalOrder = doNothing + sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric)) + sortOrder FirstTheWorst TheHigherTheBetter = compareScores + sortOrder FirstTheBest TheLowerTheBetter = compareScores + sortOrder _ _ = flip compareScores + compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2 -runDiff :: FilePath -> GEvalSpecification -> IO () -runDiff otherOut spec = runDiffGeneralized otherOut spec consum +gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m () +gobbleAndDo fun = do + l <- CC.sinkList + CC.yieldMany $ fun l + +runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO () +runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) () consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) = @@ -66,8 +84,8 @@ runDiff otherOut spec = runDiffGeneralized otherOut spec consum formatScoreDiff :: Double -> Text formatScoreDiff = Data.Text.pack . printf "%f" -runDiffGeneralized :: FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a -runDiffGeneralized otherOut spec consum = do +runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a +runDiffGeneralized ordering otherOut spec consum = do let otherOutFilePath = getOutFile spec otherOut (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index d8ce4e7..76d6808 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -155,10 +155,10 @@ runGEval''' (Just Init) spec = do initChallenge spec return Nothing runGEval''' (Just LineByLine) spec = do - runLineByLine spec + runLineByLine KeepTheOriginalOrder spec return Nothing runGEval''' (Just (Diff otherOut)) spec = do - runDiff otherOut spec + runDiff KeepTheOriginalOrder otherOut spec return Nothing initChallenge :: GEvalSpecification -> IO () diff --git a/test/Spec.hs b/test/Spec.hs index c1b24c8..b8dac23 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -278,11 +278,15 @@ main = hspec $ do gesMetric = Likelihood, gesPrecision = Nothing } it "simple test" $ do - results <- runLineByLineGeneralized sampleChallenge Data.Conduit.List.consume + results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge Data.Conduit.List.consume Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo", "bar", "baz", "baq"] + it "test sorting" $ do + results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume + Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq" + neverMatch :: Char -> Int -> Bool neverMatch _ _ = False