add sorting for --line-by-line internally

This commit is contained in:
Filip Gralinski 2018-05-28 09:45:08 +02:00
parent f68223409e
commit ab1056301e
4 changed files with 39 additions and 16 deletions

View File

@ -1,7 +1,8 @@
{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE AllowAmbiguousTypes #-}
module Data.Conduit.AutoDecompress module Data.Conduit.AutoDecompress
(autoDecompress) (autoDecompress,
doNothing)
where where
import Data.Conduit import Data.Conduit
@ -34,11 +35,11 @@ autoDecompress = do
Nothing -> return () Nothing -> return ()
lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> Conduit ByteString m ByteString lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> ConduitT ByteString ByteString m ()
lookAtMagicNumbers (31, 139) = ungzip lookAtMagicNumbers (31, 139) = ungzip
lookAtMagicNumbers (66, 90) = BZ.bunzip2 lookAtMagicNumbers (66, 90) = BZ.bunzip2
lookAtMagicNumbers (253, 55) = XZ.decompress Nothing lookAtMagicNumbers (253, 55) = XZ.decompress Nothing
lookAtMagicNumbers _ = doNothing lookAtMagicNumbers _ = doNothing
doNothing :: Monad m => Conduit ByteString m ByteString doNothing :: Monad m => ConduitT a a m ()
doNothing = Data.Conduit.Combinators.filter (const True) doNothing = Data.Conduit.Combinators.filter (const True)

View File

@ -12,17 +12,22 @@ module GEval.LineByLine
runLineByLineGeneralized, runLineByLineGeneralized,
runDiff, runDiff,
runDiffGeneralized, runDiffGeneralized,
LineRecord(..) LineRecord(..),
ResultOrdering(..)
) where ) where
import GEval.Core import GEval.Core
import Data.Conduit.AutoDecompress (doNothing)
import Data.Conduit import Data.Conduit
import qualified Data.Conduit.List as CL import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC import qualified Data.Conduit.Combinators as CC
import Data.Text import Data.Text
import Data.Text.Encoding import Data.Text.Encoding
import Data.List (sortBy, sort)
import Control.Monad.IO.Class import Control.Monad.IO.Class
import Control.Monad.Trans.Resource import Control.Monad.Trans.Resource
@ -33,8 +38,10 @@ import Text.Printf
data LineRecord = LineRecord Text Text Text Word32 MetricValue data LineRecord = LineRecord Text Text Text Word32 MetricValue
deriving (Eq, Show) deriving (Eq, Show)
runLineByLine :: GEvalSpecification -> IO () data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
runLineByLine spec = runLineByLineGeneralized spec consum
runLineByLine :: ResultOrdering -> GEvalSpecification -> IO ()
runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
where consum :: ConduitT LineRecord Void (ResourceT IO) () where consum :: ConduitT LineRecord Void (ResourceT IO) ()
consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [ formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
@ -45,14 +52,25 @@ runLineByLine spec = runLineByLineGeneralized spec consum
formatScore :: MetricValue -> Text formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f" formatScore = Data.Text.pack . printf "%f"
runLineByLineGeneralized :: GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
runLineByLineGeneralized spec consum = do runLineByLineGeneralized ordering spec consum = do
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
where metric = gesMetric spec where metric = gesMetric spec
sorter KeepTheOriginalOrder = doNothing
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
sortOrder FirstTheBest TheLowerTheBetter = compareScores
sortOrder _ _ = flip compareScores
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
runDiff :: FilePath -> GEvalSpecification -> IO () gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
runDiff otherOut spec = runDiffGeneralized otherOut spec consum gobbleAndDo fun = do
l <- CC.sinkList
CC.yieldMany $ fun l
runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) () where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) = shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
@ -66,8 +84,8 @@ runDiff otherOut spec = runDiffGeneralized otherOut spec consum
formatScoreDiff :: Double -> Text formatScoreDiff :: Double -> Text
formatScoreDiff = Data.Text.pack . printf "%f" formatScoreDiff = Data.Text.pack . printf "%f"
runDiffGeneralized :: FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
runDiffGeneralized otherOut spec consum = do runDiffGeneralized ordering otherOut spec consum = do
let otherOutFilePath = getOutFile spec otherOut let otherOutFilePath = getOutFile spec otherOut
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath

View File

@ -155,10 +155,10 @@ runGEval''' (Just Init) spec = do
initChallenge spec initChallenge spec
return Nothing return Nothing
runGEval''' (Just LineByLine) spec = do runGEval''' (Just LineByLine) spec = do
runLineByLine spec runLineByLine KeepTheOriginalOrder spec
return Nothing return Nothing
runGEval''' (Just (Diff otherOut)) spec = do runGEval''' (Just (Diff otherOut)) spec = do
runDiff otherOut spec runDiff KeepTheOriginalOrder otherOut spec
return Nothing return Nothing
initChallenge :: GEvalSpecification -> IO () initChallenge :: GEvalSpecification -> IO ()

View File

@ -278,11 +278,15 @@ main = hspec $ do
gesMetric = Likelihood, gesMetric = Likelihood,
gesPrecision = Nothing } gesPrecision = Nothing }
it "simple test" $ do it "simple test" $ do
results <- runLineByLineGeneralized sampleChallenge Data.Conduit.List.consume results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge Data.Conduit.List.consume
Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo", Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
"bar", "bar",
"baz", "baz",
"baq"] "baq"]
it "test sorting" $ do
results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume
Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq"
neverMatch :: Char -> Int -> Bool neverMatch :: Char -> Int -> Bool
neverMatch _ _ = False neverMatch _ _ = False