diff --git a/geval.cabal b/geval.cabal index 40dfead..42ceba1 100644 --- a/geval.cabal +++ b/geval.cabal @@ -27,6 +27,7 @@ library , GEval.CharMatch , GEval.LineByLine , GEval.BIO + , GEval.ParseParams , Data.Conduit.AutoDecompress , Data.Conduit.SmartSource , Paths_geval @@ -62,6 +63,7 @@ library , lzma-conduit , Glob , naturalcomp + , containers default-language: Haskell2010 executable geval @@ -91,6 +93,7 @@ test-suite geval-test , conduit , conduit-extra , conduit + , containers ghc-options: -threaded -rtsopts -with-rtsopts=-N default-language: Haskell2010 diff --git a/src/GEval/ParseParams.hs b/src/GEval/ParseParams.hs new file mode 100644 index 0000000..036e568 --- /dev/null +++ b/src/GEval/ParseParams.hs @@ -0,0 +1,40 @@ +{-# LANGUAGE OverloadedStrings #-} + +module GEval.ParseParams(parseParamsFromFilePath,OutputFileParsed(..)) + where + +import Data.Map.Strict as M +import Data.Text +import Data.Attoparsec.Text + +import System.FilePath + +data OutputFileParsed = OutputFileParsed String (M.Map Text Text) + deriving (Eq, Show) + +parseParamsFromFilePath :: FilePath -> OutputFileParsed +parseParamsFromFilePath filePath = parseParamsFromBaseName fileBaseName + where fileBaseName = dropExtensions $ takeBaseName filePath + +parseParamsFromBaseName :: FilePath -> OutputFileParsed +parseParamsFromBaseName baseName = case parseOnly (parser <* endOfInput) (pack baseName) of + (Right v) -> v + (Left _) -> OutputFileParsed baseName M.empty + +parser :: Parser OutputFileParsed +parser = do + coreName <- many1 $ notChar '-' + "-" + paramList <- parseParamList + return $ OutputFileParsed coreName (M.fromList paramList) + + +parseParamList :: Parser [(Text, Text)] +parseParamList = parseParam `sepBy` (char ',') + +parseParam :: Parser (Text, Text) +parseParam = do + param <- many1 $ satisfy (\c -> c /= '=' && c /= ',') + "=" + val <- many1 $ notChar ',' + pure $ (strip $ pack param, strip $ pack val) diff --git a/test/Spec.hs b/test/Spec.hs index 7a71977..061602f 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -10,11 +10,14 @@ import GEval.PrecisionRecall import GEval.ClusteringMetrics import GEval.BIO import GEval.LineByLine +import GEval.ParseParams import Data.Attoparsec.Text import Options.Applicative import Data.Text import Text.EditDistance +import Data.Map.Strict + import Data.Conduit.List (consume) import qualified Test.HUnit as HU @@ -315,6 +318,24 @@ main = hspec $ do readFromSmartSource "baz" "out.tsv" "test/files/foo.txt" `shouldReturn` ["foo\n"] readFromSmartSource "" "" "https://httpbin.org/robots.txt" `shouldReturn` ["User-agent: *\nDisallow: /deny\n"] + describe "parse model params from filenames" $ do + it "no params 1" $ do + parseParamsFromFilePath "out.tsv" `shouldBe` OutputFileParsed "out" Data.Map.Strict.empty + it "no params 2" $ do + parseParamsFromFilePath "out.tsv.xz" `shouldBe` OutputFileParsed "out" Data.Map.Strict.empty + it "no params 3" $ do + parseParamsFromFilePath "out-test-foo_bar.tsv" `shouldBe` OutputFileParsed "out-test-foo_bar" Data.Map.Strict.empty + it "one parameter" $ do + parseParamsFromFilePath "out-nb_epochs=123.tsv" `shouldBe` + OutputFileParsed "out" (Data.Map.Strict.fromList [("nb_epochs", "123")]) + it "complex" $ do + parseParamsFromFilePath "out-nb_epochs = 12,foo=off, bar-baz =10.tsv" `shouldBe` + OutputFileParsed "out" (Data.Map.Strict.fromList [("nb_epochs", "12"), + ("foo", "off"), + ("bar-baz", "10")]) + + + readFromSmartSource :: FilePath -> FilePath -> String -> IO [String] readFromSmartSource defaultDir defaultFile specS = do