implement ranking conduit

This commit is contained in:
Filip Gralinski 2018-08-01 22:39:34 +02:00
parent 4b3a4fa665
commit 2b1cf80601
3 changed files with 61 additions and 2 deletions

View File

@ -30,6 +30,7 @@ library
, GEval.ParseParams , GEval.ParseParams
, Data.Conduit.AutoDecompress , Data.Conduit.AutoDecompress
, Data.Conduit.SmartSource , Data.Conduit.SmartSource
, Data.Conduit.Rank
, Paths_geval , Paths_geval
build-depends: base >= 4.7 && < 5 build-depends: base >= 4.7 && < 5
, cond , cond

37
src/Data/Conduit/Rank.hs Normal file
View File

@ -0,0 +1,37 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
module Data.Conduit.Rank
(rank)
where
import Data.Conduit
data PreviousStuff a = None | Cached [a]
rank :: Monad m => (a -> a -> Bool) -> ConduitT a (Double, a) m ()
rank less = rank' less 1.0 None
rank' :: Monad m => (a -> a -> Bool) -> Double -> PreviousStuff a -> ConduitT a (Double, a) m ()
rank' less r ps = do
mx <- await
case mx of
Just x ->
case ps of
None -> do
rank' less r $ Cached [x]
Cached s@(h:_) -> do
if h `less` x
then
do
yieldBatch r s
rank' less (r + (fromIntegral $ length s)) $ Cached [x]
else
rank' less r $ Cached (x:s)
Nothing ->
case ps of
None -> return ()
Cached s -> yieldBatch r s
yieldBatch :: Monad m => Double -> [a] -> ConduitT a (Double, a) m ()
yieldBatch r s = mapM_ (\x -> yield (r', x)) $ reverse s
where r' = (r + (r + (fromIntegral $ (length s - 1)))) / 2.0

View File

@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
import Test.Hspec import Test.Hspec
@ -23,10 +24,12 @@ import Data.Conduit.List (consume)
import qualified Test.HUnit as HU import qualified Test.HUnit as HU
import Data.Conduit.SmartSource import Data.Conduit.SmartSource
import Data.Conduit.Rank
import qualified Data.Conduit.Text as CT import qualified Data.Conduit.Text as CT
import Data.Conduit import Data.Conduit
import Control.Monad.Trans.Resource import Control.Monad.Trans.Resource
import qualified Data.Conduit.List as CL import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC
informationRetrievalBookExample :: [(String, Int)] informationRetrievalBookExample :: [(String, Int)]
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3), informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
@ -343,9 +346,27 @@ main = hspec $ do
OutputFileParsed "out" (Data.Map.Strict.fromList [("nb_epochs", "1"), OutputFileParsed "out" (Data.Map.Strict.fromList [("nb_epochs", "1"),
("foo", ""), ("foo", ""),
("bar-baz", "8")]) ("bar-baz", "8")])
describe "ranking" $ do
it "simple case" $ do
checkConduitPure (rank (\(a,_) (b,_) -> a < b)) [(3.0::Double, "foo"::String),
(10.0, "bar"),
(12.0, "baz")]
[(1.0, (3.0::Double, "foo"::String)),
(2.0, (10.0, "bar")),
(3.0, (12.0, "baz"))]
it "one item" $ do
checkConduitPure (rank (\(a,_) (b,_) -> a < b)) [(5.0::Double, "foo"::String)]
[(1.0, (5.0::Double, "foo"::String))]
it "take between" $ do
checkConduitPure (rank (<)) [3.0::Double, 5.0, 5.0, 10.0]
[(1.0::Double, 3.0),
(2.5, 5.0),
(2.5, 5.0),
(4.0, 10.0)]
checkConduitPure conduit inList expList = do
let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList
mapM_ (\(o,e) -> (fst o) `shouldBeAlmost` (fst e)) $ Prelude.zip outList expList
readFromSmartSource :: FilePath -> FilePath -> String -> IO [String] readFromSmartSource :: FilePath -> FilePath -> String -> IO [String]
readFromSmartSource defaultDir defaultFile specS = do readFromSmartSource defaultDir defaultFile specS = do