From 2b1cf80601598c6076b65dc39a205e2ba1597a2b Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Wed, 1 Aug 2018 22:39:34 +0200 Subject: [PATCH] implement ranking conduit --- geval.cabal | 1 + src/Data/Conduit/Rank.hs | 37 +++++++++++++++++++++++++++++++++++++ test/Spec.hs | 25 +++++++++++++++++++++++-- 3 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 src/Data/Conduit/Rank.hs diff --git a/geval.cabal b/geval.cabal index 362b770..bd6bbb5 100644 --- a/geval.cabal +++ b/geval.cabal @@ -30,6 +30,7 @@ library , GEval.ParseParams , Data.Conduit.AutoDecompress , Data.Conduit.SmartSource + , Data.Conduit.Rank , Paths_geval build-depends: base >= 4.7 && < 5 , cond diff --git a/src/Data/Conduit/Rank.hs b/src/Data/Conduit/Rank.hs new file mode 100644 index 0000000..6e4ed60 --- /dev/null +++ b/src/Data/Conduit/Rank.hs @@ -0,0 +1,37 @@ +{-# LANGUAGE AllowAmbiguousTypes #-} + +module Data.Conduit.Rank + (rank) + where + +import Data.Conduit + +data PreviousStuff a = None | Cached [a] + +rank :: Monad m => (a -> a -> Bool) -> ConduitT a (Double, a) m () +rank less = rank' less 1.0 None + +rank' :: Monad m => (a -> a -> Bool) -> Double -> PreviousStuff a -> ConduitT a (Double, a) m () +rank' less r ps = do + mx <- await + case mx of + Just x -> + case ps of + None -> do + rank' less r $ Cached [x] + Cached s@(h:_) -> do + if h `less` x + then + do + yieldBatch r s + rank' less (r + (fromIntegral $ length s)) $ Cached [x] + else + rank' less r $ Cached (x:s) + Nothing -> + case ps of + None -> return () + Cached s -> yieldBatch r s + +yieldBatch :: Monad m => Double -> [a] -> ConduitT a (Double, a) m () +yieldBatch r s = mapM_ (\x -> yield (r', x)) $ reverse s + where r' = (r + (r + (fromIntegral $ (length s - 1)))) / 2.0 diff --git a/test/Spec.hs b/test/Spec.hs index 6a905b6..d8b3b8f 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} import Test.Hspec @@ -23,10 +24,12 @@ import Data.Conduit.List (consume) import qualified Test.HUnit as HU import Data.Conduit.SmartSource +import Data.Conduit.Rank import qualified Data.Conduit.Text as CT import Data.Conduit import Control.Monad.Trans.Resource import qualified Data.Conduit.List as CL +import qualified Data.Conduit.Combinators as CC informationRetrievalBookExample :: [(String, Int)] informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3), @@ -343,9 +346,27 @@ main = hspec $ do OutputFileParsed "out" (Data.Map.Strict.fromList [("nb_epochs", "1"), ("foo", ""), ("bar-baz", "8")]) + describe "ranking" $ do + it "simple case" $ do + checkConduitPure (rank (\(a,_) (b,_) -> a < b)) [(3.0::Double, "foo"::String), + (10.0, "bar"), + (12.0, "baz")] + [(1.0, (3.0::Double, "foo"::String)), + (2.0, (10.0, "bar")), + (3.0, (12.0, "baz"))] + it "one item" $ do + checkConduitPure (rank (\(a,_) (b,_) -> a < b)) [(5.0::Double, "foo"::String)] + [(1.0, (5.0::Double, "foo"::String))] + it "take between" $ do + checkConduitPure (rank (<)) [3.0::Double, 5.0, 5.0, 10.0] + [(1.0::Double, 3.0), + (2.5, 5.0), + (2.5, 5.0), + (4.0, 10.0)] - - +checkConduitPure conduit inList expList = do + let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList + mapM_ (\(o,e) -> (fst o) `shouldBeAlmost` (fst e)) $ Prelude.zip outList expList readFromSmartSource :: FilePath -> FilePath -> String -> IO [String] readFromSmartSource defaultDir defaultFile specS = do