diff --git a/src/Data/Conduit/SmartSource.hs b/src/Data/Conduit/SmartSource.hs index e75449b..5fa34c2 100644 --- a/src/Data/Conduit/SmartSource.hs +++ b/src/Data/Conduit/SmartSource.hs @@ -11,32 +11,66 @@ import System.FilePath import Control.Monad.Trans.Resource (MonadResource) import Data.Conduit import qualified Data.ByteString as S -import Data.Conduit.Binary (sourceFile) +import Data.Conduit.Binary (sourceFile, sourceHandle) import Network.HTTP.Conduit import Control.Monad.IO.Class import Control.Monad.Trans.Class (lift) +import qualified System.Directory as D +import System.Posix +import System.FilePath +import System.IO (stdin) +import Control.Monad ((<=<), filterM) -data SmartSpec = NoSpec - | Stdin - | FileNameSpec FilePath +data SourceSpec =Stdin | FilePathSpec FilePath | Http String | Https String - | GitSpec String (Maybe FilePath) - | PossiblyGitSpec String - deriving (Eq, Show) + | GitSpec String FilePath + | NoSource + deriving (Eq, Show) ---smartSource :: (MonadIO m, MonadResource m) => [FilePath] -> Maybe FilePath -> SmartSpec -> Producer m S.ByteString -smartSource defaultDirs defaultFile spec = pureSmartSource defaultDirs spec +recoverPath :: SourceSpec -> String +recoverPath Stdin = "-" +recoverPath (FilePathSpec filePath) = filePath +recoverPath (Http url) = url +recoverPath (Https url) = url +recoverPath (GitSpec branch filePath) = branch ++ ":" ++ filePath ---pureSmartSource :: (MonadIO m, MonadResource m) => [FilePath] -> SmartSpec -> Producer m S.ByteString -pureSmartSource _ NoSpec = error "No source specification given" -pureSmartSource _ (FileNameSpec fileName) = sourceFile fileName -pureSmartSource _ (FilePathSpec fileName) = sourceFile fileName -pureSmartSource [] (PossiblyGitSpec spec) = sourceFile spec -pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir spec) ---pureSmartSource _ (Https url) = httpSource url ---pureSmartSource _ (Http url) = httpSource url +data SmartSourceError = NoFile FilePath + | NoDirectory FilePath + | NoSpecGiven + deriving (Eq, Show) + +getSmartSourceSpec :: FilePath -> FilePath -> String -> IO (Either SmartSourceError SourceSpec) +getSmartSourceSpec _ "" "" = return $ Left NoSpecGiven +getSmartSourceSpec _ _ "-" = return $ Right Stdin +getSmartSourceSpec defaultDirectory defaultFile spec + | "http://" `isPrefixOf` spec = return $ Right $ Http spec + | "https://" `isPrefixOf` spec = return $ Right $ Https spec + | otherwise = do + inDefaultDirectory <- lookForCompressedFiles (defaultDirectory spec) + isInDefaultDirectory <- D.doesFileExist inDefaultDirectory + if isInDefaultDirectory + then + return $ Right $ FilePathSpec inDefaultDirectory + else + do + isThere <- D.doesFileExist spec + if isThere + then + return $ Right $ FilePathSpec spec + else + do + isDirectoryThere <- D.doesDirectoryExist defaultDirectory + if isDirectoryThere + then + return $ Left $ NoFile inDefaultDirectory + else + return $ Left $ NoDirectory spec + +smartSource (FilePathSpec filePath) = sourceFile filePath +smartSource Stdin = sourceHandle stdin +smartSource NoSource = error $ "should not be here" -- httpSource :: MonadResource m => String -> ConduitM () S.ByteString m () -- httpSource url = do @@ -47,45 +81,6 @@ pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir s -- httpsource -- lift finalizer -parseSmartSpec :: FilePath -> SmartSpec -parseSmartSpec "" = NoSpec -parseSmartSpec "-" = Stdin -parseSmartSpec spec - | "http://" `isPrefixOf` spec = Http spec - | "https://" `isPrefixOf` spec = Https spec - | otherwise = case elemIndex ':' spec of - Just ix -> let ref = take ix spec in - if checkRefFormat ref - then - GitSpec ref (if ix == length spec - 1 - then - Nothing - else - Just $ drop (ix+1) spec) - else - fileSpec - Nothing -> if checkRefFormat spec && not ('/' `elem` spec) && not ('.' `elem` spec) - then - PossiblyGitSpec spec - else - fileSpec - where fileSpec = (if '/' `elem` spec then FilePathSpec else FileNameSpec) spec - -parseSmartSpecInContext :: [FilePath] -> Maybe FilePath -> String -> Maybe SmartSpec -parseSmartSpecInContext defaultDirs defaultFile spec = parseSmartSpecInContext' defaultDirs defaultFile $ parseSmartSpec spec - where parseSmartSpecInContext' _ Nothing NoSpec = Nothing - parseSmartSpecInContext' [] (Just defaultFile) NoSpec = Just $ FileNameSpec defaultFile - parseSmartSpecInContext' (firstDir:_) (Just defaultFile) NoSpec = Just $ FilePathSpec (firstDir defaultFile) - - parseSmartSpecInContext' (firstDir:_) _ (FileNameSpec fileName) = Just $ FilePathSpec (firstDir fileName) - - parseSmartSpecInContext' _ Nothing (GitSpec branch Nothing) = Nothing - parseSmartSpecInContext' [] (Just defaultFile) (GitSpec branch Nothing) = Just $ GitSpec branch $ Just defaultFile - parseSmartSpecInContext' (firstDir:_) (Just defaultFile) (GitSpec branch Nothing) - = Just $ GitSpec branch $ Just (firstDir defaultFile) - - parseSmartSpecInContext' _ _ parsedSpec = Just parsedSpec - checkRefFormat :: String -> Bool checkRefFormat ref = not ("./" `isInfixOf` ref) && @@ -106,3 +101,21 @@ checkRefFormat ref = isUnwantedChar '\\' = True isUnwantedChar '\177' = True isUnwantedChar c = ord c < 32 + +lookForCompressedFiles :: FilePath -> IO FilePath +lookForCompressedFiles = lookForAlternativeFiles [".gz", ".xz", ".bz2"] + +lookForAlternativeFiles :: [String] -> FilePath -> IO FilePath +lookForAlternativeFiles suffixes filePath + | takeExtension filePath `Prelude.elem` suffixes = return filePath + | otherwise = do + fileIsThere <- D.doesFileExist filePath + if fileIsThere + then + return filePath + else + do + found <- Control.Monad.filterM D.doesFileExist $ Prelude.map (filePath <.>) suffixes + return $ case found of + [fp] -> fp + _ -> filePath diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 5d4719c..2c2aecb 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -34,8 +34,7 @@ module GEval.Core EvaluationContext(..), ParserSpec(..), fileAsLineSource, - checkAndGetFiles, - getOutFile + checkAndGetFiles ) where import Data.Conduit @@ -158,6 +157,10 @@ getMetricOrdering Likelihood = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter getMetricOrdering BIOF1Labels = TheHigherTheBetter +isInputNeeded :: Metric -> Bool +isInputNeeded CharMatch = True +isInputNeeded _ = False + defaultOutDirectory = "." defaultTestName = "test-A" defaultOutFile = "out.tsv" @@ -247,67 +250,66 @@ isEmptyFile path = do return ((fileSize stat) == 0) -data LineSource m = LineSource (Source m Text) FilePath Word32 +data LineSource m = LineSource (Source m Text) SourceSpec Word32 geval :: GEvalSpecification -> IO (MetricValue) geval gevalSpec = do - (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles gevalSpec - gevalCore metric inputFilePath expectedFilePath outFilePath + (inputSource, expectedSource, outSource) <- checkAndGetFiles False gevalSpec + gevalCore metric inputSource expectedSource outSource where metric = gesMetric gevalSpec -checkAndGetFiles :: GEvalSpecification -> IO (FilePath, FilePath, FilePath) -checkAndGetFiles gevalSpec = do - unlessM (D.doesDirectoryExist outDirectory) $ throwM $ NoOutDirectory outDirectory - unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory - unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory - unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory - inputFilePath <- lookForCompressedFiles inputFilePath' - expectedFilePath <- lookForCompressedFiles expectedFilePath' - outFilePath <- lookForCompressedFiles outFilePath' - checkInputFileIfNeeded metric inputFilePath - return (inputFilePath, expectedFilePath, outFilePath) - where expectedFilePath' = expectedTestDirectory (gesExpectedFile gevalSpec) - outFilePath' = getOutFile gevalSpec (gesOutFile gevalSpec) - inputFilePath' = expectedTestDirectory (gesInputFile gevalSpec) - expectedTestDirectory = expectedDirectory testName +checkAndGetFiles :: Bool -> GEvalSpecification -> IO (SourceSpec, SourceSpec, SourceSpec) +checkAndGetFiles forceInput gevalSpec = do + oss <- getSmartSourceSpec outTestDirectory "out.tsv" outFile + case oss of + Left NoSpecGiven -> throwM $ NoOutFile outFile + Left (NoFile fp) -> throwM $ NoOutFile fp + Left (NoDirectory d) -> do + unlessM (D.doesDirectoryExist outDirectory) $ throwM $ NoOutDirectory outDirectory + unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory + throwM $ NoOutFile outFile + Right outSource -> do + ess <- getSmartSourceSpec expectedTestDirectory "expected.tsv" expectedFile + case ess of + Left NoSpecGiven -> throwM $ NoExpectedFile expectedFile + Left (NoFile fp) -> throwM $ NoExpectedFile fp + Left (NoDirectory d) -> do + unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory + unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory + throwM $ NoExpectedDirectory d + Right expectedSource -> do + -- in most cases inputSource is NoSource (unless needed by a metric or in the line-by-line mode) + inputSource <- getInputSourceIfNeeded forceInput metric expectedTestDirectory inputFile + return (inputSource, expectedSource, outSource) + where expectedTestDirectory = expectedDirectory testName outTestDirectory = outDirectory testName expectedDirectory = getExpectedDirectory gevalSpec outDirectory = gesOutDirectory gevalSpec testName = gesTestName gevalSpec + outFile = gesOutFile gevalSpec + expectedFile = gesExpectedFile gevalSpec + inputFile = gesInputFile gevalSpec metric = gesMetric gevalSpec -lookForCompressedFiles :: FilePath -> IO FilePath -lookForCompressedFiles = lookForAlternativeFiles [".gz", ".xz", ".bz2"] - -lookForAlternativeFiles :: [String] -> FilePath -> IO FilePath -lookForAlternativeFiles suffixes filePath - | takeExtension filePath `Prelude.elem` suffixes = return filePath - | otherwise = do - fileIsThere <- D.doesFileExist filePath - if fileIsThere - then - return filePath - else - do - found <- Control.Monad.filterM D.doesFileExist $ Prelude.map (filePath <.>) suffixes - return $ case found of - [fp] -> fp - _ -> filePath - getOutFile :: GEvalSpecification -> FilePath -> FilePath getOutFile gevalSpec out = outDirectory testName out where outDirectory = gesOutDirectory gevalSpec testName = gesTestName gevalSpec -checkInputFileIfNeeded :: Metric -> FilePath -> IO () -checkInputFileIfNeeded CharMatch inputFilePath = do - unlessM (D.doesFileExist inputFilePath) $ throwM $ NoInputFile inputFilePath - return () -checkInputFileIfNeeded _ _ = return () +getInputSourceIfNeeded :: Bool -> Metric -> FilePath -> FilePath -> IO SourceSpec +getInputSourceIfNeeded forced metric directory inputFilePath + | forced || (isInputNeeded metric) = do + iss <- getSmartSourceSpec directory "in.tsv" inputFilePath + case iss of + Left NoSpecGiven -> throwM $ NoInputFile inputFilePath + Left (NoFile fp) -> throwM $ NoInputFile fp + Left (NoDirectory _) -> throwM $ NoInputFile inputFilePath + Right sourceSpec -> return sourceSpec + | otherwise = return NoSource -fileAsLineSource :: FilePath -> LineSource (ResourceT IO) -fileAsLineSource filePath = - LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= autoDecompress $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1 +fileAsLineSource :: SourceSpec -> LineSource (ResourceT IO) +fileAsLineSource spec = + LineSource ((smartSource spec) $= autoDecompress $= CT.decodeUtf8Lenient =$= CT.lines) spec 1 gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue) gevalCoreOnSingleLines metric inpLine expLine outLine = @@ -316,18 +318,20 @@ gevalCoreOnSingleLines metric inpLine expLine outLine = (singleLineAsLineSource outLine) singleLineAsLineSource :: LineInFile -> LineSource (ResourceT IO) -singleLineAsLineSource (LineInFile filePath lineNo line) = - LineSource (CL.sourceList [line]) filePath lineNo +singleLineAsLineSource (LineInFile sourceSpec lineNo line) = + LineSource (CL.sourceList [line]) sourceSpec lineNo -gevalCore :: Metric -> String -> String -> String -> IO (MetricValue) -gevalCore metric inputFilePath expectedFilePath outFilePath = do - unlessM (D.doesFileExist expectedFilePath) $ throwM $ NoExpectedFile expectedFilePath - unlessM (D.doesFileExist outFilePath) $ throwM $ NoOutFile outFilePath - whenM (isEmptyFile outFilePath) $ throwM $ EmptyOutput +gevalCore :: Metric -> SourceSpec -> SourceSpec -> SourceSpec -> IO (MetricValue) +gevalCore metric inputSource expectedSource outSource = do + whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput gevalCoreOnSources metric - (fileAsLineSource inputFilePath) - (fileAsLineSource expectedFilePath) - (fileAsLineSource outFilePath) + (fileAsLineSource inputSource) + (fileAsLineSource expectedSource) + (fileAsLineSource outSource) + +isEmptyFileSource :: SourceSpec -> IO Bool +isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath +isEmptyFileSource _ = return False logLossToLikehood logLoss = exp (-logLoss) @@ -351,7 +355,7 @@ gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLi gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do gevalCore' metric inputLineSource expectedLineSource outLineSource -data LineInFile = LineInFile FilePath Word32 Text +data LineInFile = LineInFile SourceSpec Word32 Text gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue) gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id @@ -501,8 +505,8 @@ class EvaluationContext ctxt m where data ParsedRecord ctxt :: * recordSource :: ctxt -> ParserSpec ctxt -> Source (ResourceT m) (WrappedParsedRecord ctxt) getFirstLineNo :: Proxy m -> ctxt -> Word32 - getExpectedFilePath :: ctxt -> String - getOutFilePath :: ctxt -> String + getExpectedSource :: ctxt -> SourceSpec + getOutSource :: ctxt -> SourceSpec checkStep :: Proxy m -> ((Word32, ParsedRecord ctxt) -> c) -> (Word32, WrappedParsedRecord ctxt) -> Maybe c checkStepM :: ((Word32, ParsedRecord ctxt) -> (ResourceT m) c) -> (Word32, WrappedParsedRecord ctxt) -> (ResourceT m) (Maybe c) @@ -513,8 +517,8 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (Withou data WrappedParsedRecord (WithoutInput m e o) = WrappedParsedRecordWithoutInput (SourceItem e) (SourceItem o) data ParsedRecord (WithoutInput m e o) = ParsedRecordWithoutInput e o getFirstLineNo _ (WithoutInput _ (LineSource _ _ lineNo)) = lineNo - getExpectedFilePath (WithoutInput (LineSource _ expectedFilePath _) _) = expectedFilePath - getOutFilePath (WithoutInput _ (LineSource _ outFilePath _)) = outFilePath + getExpectedSource (WithoutInput (LineSource _ expectedSource _) _) = expectedSource + getOutSource (WithoutInput _ (LineSource _ outSource _)) = outSource recordSource (WithoutInput expectedLineSource outLineSource) (ParserSpecWithoutInput expParser outParser) = getZipSource $ WrappedParsedRecordWithoutInput <$> ZipSource (items expectedLineSource expParser) <*> ZipSource (items outLineSource outParser) @@ -542,8 +546,8 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithIn data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o) data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o getFirstLineNo _ (WithInput _ _ (LineSource _ _ lineNo)) = lineNo - getExpectedFilePath (WithInput _ (LineSource _ expectedFilePath _) _) = expectedFilePath - getOutFilePath (WithInput _ _ (LineSource _ outFilePath _)) = outFilePath + getExpectedSource (WithInput _ (LineSource _ expectedSource _) _) = expectedSource + getOutSource (WithInput _ _ (LineSource _ outSource _)) = outSource recordSource (WithInput inputLineSource expectedLineSource outLineSource) (ParserSpecWithInput inpParser expParser outParser) = getZipSource $ (\x (y,z) -> WrappedParsedRecordWithInput x y z) <$> ZipSource (items inputLineSource inpParser) <*> (ZipSource $ getZipSource $ (,) <$> ZipSource (items expectedLineSource expParser) diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index 74f053b..08667d5 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -35,6 +35,10 @@ import Data.Word import Text.Printf +import Data.Conduit.SmartSource + +import System.FilePath + data LineRecord = LineRecord Text Text Text Word32 MetricValue deriving (Eq, Show) @@ -52,7 +56,7 @@ runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a runLineByLineGeneralized ordering spec consum = do - (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec + (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles True spec gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum) where metric = gesMetric spec sorter KeepTheOriginalOrder = doNothing @@ -84,14 +88,19 @@ runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consu runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a runDiffGeneralized ordering otherOut spec consum = do - let otherOutFilePath = getOutFile spec otherOut - (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec - let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath - let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath - runResourceT $ runConduit $ - ((getZipSource $ (,) - <$> ZipSource sourceA - <*> ZipSource sourceB) .| sorter ordering .| consum) + (inputSource, expectedSource, outSource) <- checkAndGetFiles True spec + ooss <- getSmartSourceSpec ((gesOutDirectory spec) (gesTestName spec)) "out.tsv" otherOut + case ooss of + Left NoSpecGiven -> throwM $ NoOutFile otherOut + Left (NoFile fp) -> throwM $ NoOutFile fp + Left (NoDirectory d) -> throwM $ NoOutFile otherOut + Right otherOutSource -> do + let sourceA = gevalLineByLineSource metric inputSource expectedSource outSource + let sourceB = gevalLineByLineSource metric inputSource expectedSource otherOutSource + runResourceT $ runConduit $ + ((getZipSource $ (,) + <$> ZipSource sourceA + <*> ZipSource sourceB) .| sorter ordering .| consum) where metric = gesMetric spec sorter KeepTheOriginalOrder = doNothing sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric)) @@ -106,24 +115,24 @@ runDiffGeneralized ordering otherOut spec consum = do escapeTabs :: Text -> Text escapeTabs = Data.Text.replace "\t" "" -gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) a -> IO a -gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum = +gevalLineByLineCore :: Metric -> SourceSpec -> SourceSpec -> SourceSpec -> ConduitT LineRecord Void (ResourceT IO) a -> IO a +gevalLineByLineCore metric inputSource expectedSource outSource consum = runResourceT $ runConduit $ - ((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) .| consum) + ((gevalLineByLineSource metric inputSource expectedSource outSource) .| consum) -gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT () LineRecord (ResourceT IO) () -gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath = +gevalLineByLineSource :: Metric -> SourceSpec -> SourceSpec -> SourceSpec -> ConduitT () LineRecord (ResourceT IO) () +gevalLineByLineSource metric inputSource expectedSource outSource = (getZipSource $ (,) <$> ZipSource (CL.sourceList [1..]) <*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id)) context = (WithInput inputLineSource expectedLineSource outputLineSource) - inputLineSource = fileAsLineSource inputFilePath - expectedLineSource = fileAsLineSource expectedFilePath - outputLineSource = fileAsLineSource outFilePath + inputLineSource = fileAsLineSource inputSource + expectedLineSource = fileAsLineSource expectedSource + outputLineSource = fileAsLineSource outSource justLine (LineInFile _ _ l) = l evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do - s <- liftIO $ gevalCoreOnSingleLines metric (LineInFile inputFilePath lineNo inp) - (LineInFile expectedFilePath lineNo exp) - (LineInFile outFilePath lineNo out) + s <- liftIO $ gevalCoreOnSingleLines metric (LineInFile inputSource lineNo inp) + (LineInFile expectedSource lineNo exp) + (LineInFile outSource lineNo out) return $ LineRecord inp exp out lineNo s diff --git a/test/Spec.hs b/test/Spec.hs index 6ddf120..9658a4e 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -207,9 +207,9 @@ main = hspec $ do runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866 describe "evaluating single lines" $ do it "RMSE" $ do - gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla") - (LineInFile "stub2" 1 "3.4") - (LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8 + gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla") + (LineInFile (FilePathSpec "stub2") 1 "3.4") + (LineInFile (FilePathSpec "stub3") 1 "2.6") `shouldReturnAlmost` 0.8 describe "BIO format" $ do it "just parse" $ do let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name" @@ -302,36 +302,20 @@ main = hspec $ do it "accuracy instead of log loss" $ do runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "log-loss-hashed-probs" `shouldReturnAlmost` 0.4 describe "smart sources" $ do - it "smart specs are parsed" $ do - parseSmartSpec "" `shouldBe` NoSpec - parseSmartSpec "-" `shouldBe` Stdin - parseSmartSpec "http://gonito.net/foo" `shouldBe` Http "http://gonito.net/foo" - parseSmartSpec "https://gonito.net" `shouldBe` Https "https://gonito.net" - parseSmartSpec "branch:" `shouldBe` GitSpec "branch" Nothing - parseSmartSpec "37be:foo/bar.tsv" `shouldBe` GitSpec "37be" (Just "foo/bar.tsv") - parseSmartSpec "bla/xyz:foo/bar.tsv" `shouldBe` GitSpec "bla/xyz" (Just "foo/bar.tsv") - parseSmartSpec "out.tsv" `shouldBe` FileNameSpec "out.tsv" - parseSmartSpec "dev-1/out.tsv" `shouldBe` FilePathSpec "dev-1/out.tsv" - parseSmartSpec "../out.tsv" `shouldBe` FilePathSpec "../out.tsv" - parseSmartSpec "4a5f" `shouldBe` PossiblyGitSpec "4a5f" - parseSmartSpec "!!" `shouldBe` PossiblyGitSpec "!!" - parseSmartSpec "branch" `shouldBe` PossiblyGitSpec "branch" - it "smart specs are parsed in context" $ do - parseSmartSpecInContext [] Nothing "xyz" `shouldBe` Just (PossiblyGitSpec "xyz") - parseSmartSpecInContext ["foo", "bar"] Nothing "out.tsv" `shouldBe` Just (FilePathSpec "foo/out.tsv") - parseSmartSpecInContext [] (Just "default") "" `shouldBe` Just (FileNameSpec "default") - parseSmartSpecInContext ["foo"] (Just "default") "" `shouldBe` Just (FilePathSpec "foo/default") - parseSmartSpecInContext ["foo/bar"] (Just "default") "http://gonito.net" `shouldBe` Just (Http "http://gonito.net") - parseSmartSpecInContext ["foo/bar"] Nothing "" `shouldBe` Nothing + it "smart specs are obtained" $ do + getSmartSourceSpec "foo" "" "" `shouldReturn` Left NoSpecGiven + getSmartSourceSpec "foo" "out.tsv" "-" `shouldReturn` Right Stdin + getSmartSourceSpec "foo" "out.sv" "http://gonito.net/foo" `shouldReturn` (Right $ Http "http://gonito.net/foo") + getSmartSourceSpec "foo" "in.tsv" "https://gonito.net" `shouldReturn` (Right $ Https "https://gonito.net") it "sources are accessed" $ do - readFromSmartSource [] Nothing "test/files/foo.txt" `shouldReturn` ["foo\n"] + readFromSmartSource "baz" "out.tsv" "test/files/foo.txt" `shouldReturn` ["foo\n"] -- readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn` -- ["User-agent: *\nDisallow: /deny\n"] -readFromSmartSource :: [FilePath] -> Maybe FilePath -> String -> IO [String] -readFromSmartSource defaultDirs defaultFile specS = do - let (Just spec) = parseSmartSpecInContext defaultDirs defaultFile specS - let source = smartSource defaultDirs defaultFile spec +readFromSmartSource :: FilePath -> FilePath -> String -> IO [String] +readFromSmartSource defaultDir defaultFile specS = do + (Right spec) <- getSmartSourceSpec defaultDir defaultFile specS + let source = smartSource spec contents <- runResourceT (source $$ CT.decodeUtf8Lenient =$ CL.consume) return $ Prelude.map unpack contents