Multiple metrics can be packed via "Cartesian" strings

This commit is contained in:
Filip Gralinski 2019-12-14 20:59:00 +01:00
parent 5f532c71c7
commit 2234efa107
4 changed files with 66 additions and 5 deletions

View File

@ -48,6 +48,7 @@ library
, GEval.Selector , GEval.Selector
, Data.Statistics.Loess , Data.Statistics.Loess
, Data.Statistics.Calibration , Data.Statistics.Calibration
, Data.CartesianStrings
, Paths_geval , Paths_geval
build-depends: base >= 4.7 && < 5 build-depends: base >= 4.7 && < 5
, cond , cond

View File

@ -0,0 +1,42 @@
{-# LANGUAGE OverloadedStrings #-}
module Data.CartesianStrings
(parseCartesianString,
concatCartesianStrings,
CartesianStrings(..))
where
import Data.List (findIndex)
import Data.List.Split (splitOn)
-- A helper library for parsing strings representing sets of strings
-- obtained via a Cartesian product, e.g.:
-- - "foo" represents just ["foo"]
-- - "a-{foo,bar,baz}-b" represents ["a-foo-b", "a-bar-b", "a-baz-b"]
-- - "{foo,bar,baz}-{x,y}-{0,1,2}" represents a set containing 18 strings
cartProd :: [a] -> [b] -> [(a, b)]
cartProd xs ys = [(x,y) | x <- xs, y <- ys]
parseCartesianString :: String -> [String]
parseCartesianString s =
case findIndex (=='{') s of
Just begIx ->
let pref = take begIx s
c = drop (begIx + 1) s
in case findIndex (=='}') c of
Just endIx ->
let inf = take endIx c
current = splitOn "," inf
rest = parseCartesianString $ drop (endIx + 1) c
in map (uncurry (++)) $ cartProd (map (pref++) current) rest
Nothing -> [s]
data CartesianStrings a = CartesianStrings [a]
deriving (Eq)
instance Read a => Read (CartesianStrings a) where
readsPrec _ s = [(CartesianStrings (map read $ parseCartesianString s), "")]
concatCartesianStrings :: [CartesianStrings a] -> [a]
concatCartesianStrings = concat . map (\(CartesianStrings ss) -> ss)

View File

@ -39,6 +39,7 @@ import GEval.Validation
import Data.List (intercalate) import Data.List (intercalate)
import Data.Conduit.SmartSource import Data.Conduit.SmartSource
import Data.CartesianStrings
fullOptionsParser = info (helper <*> optionsParser) fullOptionsParser = info (helper <*> optionsParser)
(fullDesc (fullDesc
@ -243,11 +244,12 @@ sel (Just m) _ = m
metricReader :: Parser [EvaluationScheme] metricReader :: Parser [EvaluationScheme]
metricReader = many $ option auto -- actually `some` should be used instead of `many`, the problem is that metricReader = concatCartesianStrings <$>
(many $ option auto -- actually `some` should be used instead of `many`, the problem is that
( long "metric" -- --metric might be in the config.txt file... ( long "metric" -- --metric might be in the config.txt file...
<> short 'm' <> short 'm'
<> metavar "METRIC" <> metavar "METRIC"
<> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList)) <> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList)))
altMetricReader :: Parser (Maybe EvaluationScheme) altMetricReader :: Parser (Maybe EvaluationScheme)
altMetricReader = optional $ option auto altMetricReader = optional $ option auto

View File

@ -64,6 +64,7 @@ import qualified Data.Vector.Unboxed as DVU
import qualified Statistics.Matrix.Types as SMT import qualified Statistics.Matrix.Types as SMT
import Data.Statistics.Loess (loess) import Data.Statistics.Loess (loess)
import Data.Statistics.Calibration (calibration) import Data.Statistics.Calibration (calibration)
import Data.CartesianStrings (parseCartesianString)
informationRetrievalBookExample :: [(String, Int)] informationRetrievalBookExample :: [(String, Int)]
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3), informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
@ -670,6 +671,21 @@ main = hspec $ do
calibration [True, False] [0.0, 1.0] `shouldBeAlmost` 0.0 calibration [True, False] [0.0, 1.0] `shouldBeAlmost` 0.0
calibration [True, False, False, True, False] [0.0, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0 calibration [True, False, False, True, False] [0.0, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
calibration [False, True, True, True, True, False, False, True, False] [0.25, 0.25, 0.0, 0.25, 0.25, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0 calibration [False, True, True, True, True, False, False, True, False] [0.25, 0.25, 0.0, 0.25, 0.25, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
describe "Cartesian strings" $ do
it "singleton" $ do
(parseCartesianString "foo") `shouldBe` ["foo"]
it "simple" $ do
parseCartesianString "a-{foo,bar,baz}-b" `shouldBe` ["a-foo-b", "a-bar-b", "a-baz-b"]
it "3x2" $ do
parseCartesianString "a-{foo,bar,baz}-{b,c}" `shouldBe` ["a-foo-b", "a-foo-c", "a-bar-b",
"a-bar-c", "a-baz-b", "a-baz-c" ]
it "3x2x3" $ do
parseCartesianString "{foo,bar,ba}-{b,c}-{0,1,2}x" `shouldBe` ["foo-b-0x", "foo-b-1x", "foo-b-2x",
"foo-c-0x", "foo-c-1x", "foo-c-2x",
"bar-b-0x", "bar-b-1x", "bar-b-2x",
"bar-c-0x", "bar-c-1x", "bar-c-2x",
"ba-b-0x", "ba-b-1x", "ba-b-2x",
"ba-c-0x", "ba-c-1x", "ba-c-2x" ]
checkConduitPure conduit inList expList = do checkConduitPure conduit inList expList = do
let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList