Multiple metrics can be packed via "Cartesian" strings

This commit is contained in:
Filip Gralinski 2019-12-14 20:59:00 +01:00
parent 5f532c71c7
commit 2234efa107
4 changed files with 66 additions and 5 deletions

View File

@ -48,6 +48,7 @@ library
, GEval.Selector
, Data.Statistics.Loess
, Data.Statistics.Calibration
, Data.CartesianStrings
, Paths_geval
build-depends: base >= 4.7 && < 5
, cond

View File

@ -0,0 +1,42 @@
{-# LANGUAGE OverloadedStrings #-}
module Data.CartesianStrings
(parseCartesianString,
concatCartesianStrings,
CartesianStrings(..))
where
import Data.List (findIndex)
import Data.List.Split (splitOn)
-- A helper library for parsing strings representing sets of strings
-- obtained via a Cartesian product, e.g.:
-- - "foo" represents just ["foo"]
-- - "a-{foo,bar,baz}-b" represents ["a-foo-b", "a-bar-b", "a-baz-b"]
-- - "{foo,bar,baz}-{x,y}-{0,1,2}" represents a set containing 18 strings
cartProd :: [a] -> [b] -> [(a, b)]
cartProd xs ys = [(x,y) | x <- xs, y <- ys]
parseCartesianString :: String -> [String]
parseCartesianString s =
case findIndex (=='{') s of
Just begIx ->
let pref = take begIx s
c = drop (begIx + 1) s
in case findIndex (=='}') c of
Just endIx ->
let inf = take endIx c
current = splitOn "," inf
rest = parseCartesianString $ drop (endIx + 1) c
in map (uncurry (++)) $ cartProd (map (pref++) current) rest
Nothing -> [s]
data CartesianStrings a = CartesianStrings [a]
deriving (Eq)
instance Read a => Read (CartesianStrings a) where
readsPrec _ s = [(CartesianStrings (map read $ parseCartesianString s), "")]
concatCartesianStrings :: [CartesianStrings a] -> [a]
concatCartesianStrings = concat . map (\(CartesianStrings ss) -> ss)

View File

@ -39,6 +39,7 @@ import GEval.Validation
import Data.List (intercalate)
import Data.Conduit.SmartSource
import Data.CartesianStrings
fullOptionsParser = info (helper <*> optionsParser)
(fullDesc
@ -243,11 +244,12 @@ sel (Just m) _ = m
metricReader :: Parser [EvaluationScheme]
metricReader = many $ option auto -- actually `some` should be used instead of `many`, the problem is that
( long "metric" -- --metric might be in the config.txt file...
<> short 'm'
<> metavar "METRIC"
<> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList))
metricReader = concatCartesianStrings <$>
(many $ option auto -- actually `some` should be used instead of `many`, the problem is that
( long "metric" -- --metric might be in the config.txt file...
<> short 'm'
<> metavar "METRIC"
<> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList)))
altMetricReader :: Parser (Maybe EvaluationScheme)
altMetricReader = optional $ option auto

View File

@ -64,6 +64,7 @@ import qualified Data.Vector.Unboxed as DVU
import qualified Statistics.Matrix.Types as SMT
import Data.Statistics.Loess (loess)
import Data.Statistics.Calibration (calibration)
import Data.CartesianStrings (parseCartesianString)
informationRetrievalBookExample :: [(String, Int)]
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
@ -670,6 +671,21 @@ main = hspec $ do
calibration [True, False] [0.0, 1.0] `shouldBeAlmost` 0.0
calibration [True, False, False, True, False] [0.0, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
calibration [False, True, True, True, True, False, False, True, False] [0.25, 0.25, 0.0, 0.25, 0.25, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
describe "Cartesian strings" $ do
it "singleton" $ do
(parseCartesianString "foo") `shouldBe` ["foo"]
it "simple" $ do
parseCartesianString "a-{foo,bar,baz}-b" `shouldBe` ["a-foo-b", "a-bar-b", "a-baz-b"]
it "3x2" $ do
parseCartesianString "a-{foo,bar,baz}-{b,c}" `shouldBe` ["a-foo-b", "a-foo-c", "a-bar-b",
"a-bar-c", "a-baz-b", "a-baz-c" ]
it "3x2x3" $ do
parseCartesianString "{foo,bar,ba}-{b,c}-{0,1,2}x" `shouldBe` ["foo-b-0x", "foo-b-1x", "foo-b-2x",
"foo-c-0x", "foo-c-1x", "foo-c-2x",
"bar-b-0x", "bar-b-1x", "bar-b-2x",
"bar-c-0x", "bar-c-1x", "bar-c-2x",
"ba-b-0x", "ba-b-1x", "ba-b-2x",
"ba-c-0x", "ba-c-1x", "ba-c-2x" ]
checkConduitPure conduit inList expList = do
let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList