diff --git a/geval.cabal b/geval.cabal index c76d561..ab9d753 100644 --- a/geval.cabal +++ b/geval.cabal @@ -48,6 +48,7 @@ library , GEval.Selector , Data.Statistics.Loess , Data.Statistics.Calibration + , Data.CartesianStrings , Paths_geval build-depends: base >= 4.7 && < 5 , cond diff --git a/src/Data/CartesianStrings.hs b/src/Data/CartesianStrings.hs new file mode 100644 index 0000000..70f2418 --- /dev/null +++ b/src/Data/CartesianStrings.hs @@ -0,0 +1,42 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Data.CartesianStrings + (parseCartesianString, + concatCartesianStrings, + CartesianStrings(..)) + where + +import Data.List (findIndex) +import Data.List.Split (splitOn) + +-- A helper library for parsing strings representing sets of strings +-- obtained via a Cartesian product, e.g.: +-- - "foo" represents just ["foo"] +-- - "a-{foo,bar,baz}-b" represents ["a-foo-b", "a-bar-b", "a-baz-b"] +-- - "{foo,bar,baz}-{x,y}-{0,1,2}" represents a set containing 18 strings + +cartProd :: [a] -> [b] -> [(a, b)] +cartProd xs ys = [(x,y) | x <- xs, y <- ys] + +parseCartesianString :: String -> [String] +parseCartesianString s = + case findIndex (=='{') s of + Just begIx -> + let pref = take begIx s + c = drop (begIx + 1) s + in case findIndex (=='}') c of + Just endIx -> + let inf = take endIx c + current = splitOn "," inf + rest = parseCartesianString $ drop (endIx + 1) c + in map (uncurry (++)) $ cartProd (map (pref++) current) rest + Nothing -> [s] + +data CartesianStrings a = CartesianStrings [a] + deriving (Eq) + +instance Read a => Read (CartesianStrings a) where + readsPrec _ s = [(CartesianStrings (map read $ parseCartesianString s), "")] + +concatCartesianStrings :: [CartesianStrings a] -> [a] +concatCartesianStrings = concat . map (\(CartesianStrings ss) -> ss) diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index 61c2952..d0767d8 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -39,6 +39,7 @@ import GEval.Validation import Data.List (intercalate) import Data.Conduit.SmartSource +import Data.CartesianStrings fullOptionsParser = info (helper <*> optionsParser) (fullDesc @@ -243,11 +244,12 @@ sel (Just m) _ = m metricReader :: Parser [EvaluationScheme] -metricReader = many $ option auto -- actually `some` should be used instead of `many`, the problem is that - ( long "metric" -- --metric might be in the config.txt file... - <> short 'm' - <> metavar "METRIC" - <> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList)) +metricReader = concatCartesianStrings <$> + (many $ option auto -- actually `some` should be used instead of `many`, the problem is that + ( long "metric" -- --metric might be in the config.txt file... + <> short 'm' + <> metavar "METRIC" + <> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList))) altMetricReader :: Parser (Maybe EvaluationScheme) altMetricReader = optional $ option auto diff --git a/test/Spec.hs b/test/Spec.hs index c350775..3348a14 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -64,6 +64,7 @@ import qualified Data.Vector.Unboxed as DVU import qualified Statistics.Matrix.Types as SMT import Data.Statistics.Loess (loess) import Data.Statistics.Calibration (calibration) +import Data.CartesianStrings (parseCartesianString) informationRetrievalBookExample :: [(String, Int)] informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3), @@ -670,6 +671,21 @@ main = hspec $ do calibration [True, False] [0.0, 1.0] `shouldBeAlmost` 0.0 calibration [True, False, False, True, False] [0.0, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0 calibration [False, True, True, True, True, False, False, True, False] [0.25, 0.25, 0.0, 0.25, 0.25, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0 + describe "Cartesian strings" $ do + it "singleton" $ do + (parseCartesianString "foo") `shouldBe` ["foo"] + it "simple" $ do + parseCartesianString "a-{foo,bar,baz}-b" `shouldBe` ["a-foo-b", "a-bar-b", "a-baz-b"] + it "3x2" $ do + parseCartesianString "a-{foo,bar,baz}-{b,c}" `shouldBe` ["a-foo-b", "a-foo-c", "a-bar-b", + "a-bar-c", "a-baz-b", "a-baz-c" ] + it "3x2x3" $ do + parseCartesianString "{foo,bar,ba}-{b,c}-{0,1,2}x" `shouldBe` ["foo-b-0x", "foo-b-1x", "foo-b-2x", + "foo-c-0x", "foo-c-1x", "foo-c-2x", + "bar-b-0x", "bar-b-1x", "bar-b-2x", + "bar-c-0x", "bar-c-1x", "bar-c-2x", + "ba-b-0x", "ba-b-1x", "ba-b-2x", + "ba-c-0x", "ba-c-1x", "ba-c-2x" ] checkConduitPure conduit inList expList = do let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList