switch to smart sources
This commit is contained in:
parent
18ed47322e
commit
57ee8a1296
@ -11,32 +11,66 @@ import System.FilePath
|
|||||||
import Control.Monad.Trans.Resource (MonadResource)
|
import Control.Monad.Trans.Resource (MonadResource)
|
||||||
import Data.Conduit
|
import Data.Conduit
|
||||||
import qualified Data.ByteString as S
|
import qualified Data.ByteString as S
|
||||||
import Data.Conduit.Binary (sourceFile)
|
import Data.Conduit.Binary (sourceFile, sourceHandle)
|
||||||
import Network.HTTP.Conduit
|
import Network.HTTP.Conduit
|
||||||
import Control.Monad.IO.Class
|
import Control.Monad.IO.Class
|
||||||
import Control.Monad.Trans.Class (lift)
|
import Control.Monad.Trans.Class (lift)
|
||||||
|
import qualified System.Directory as D
|
||||||
|
import System.Posix
|
||||||
|
import System.FilePath
|
||||||
|
import System.IO (stdin)
|
||||||
|
import Control.Monad ((<=<), filterM)
|
||||||
|
|
||||||
data SmartSpec = NoSpec
|
data SourceSpec =Stdin
|
||||||
| Stdin
|
|
||||||
| FileNameSpec FilePath
|
|
||||||
| FilePathSpec FilePath
|
| FilePathSpec FilePath
|
||||||
| Http String
|
| Http String
|
||||||
| Https String
|
| Https String
|
||||||
| GitSpec String (Maybe FilePath)
|
| GitSpec String FilePath
|
||||||
| PossiblyGitSpec String
|
| NoSource
|
||||||
deriving (Eq, Show)
|
deriving (Eq, Show)
|
||||||
|
|
||||||
--smartSource :: (MonadIO m, MonadResource m) => [FilePath] -> Maybe FilePath -> SmartSpec -> Producer m S.ByteString
|
recoverPath :: SourceSpec -> String
|
||||||
smartSource defaultDirs defaultFile spec = pureSmartSource defaultDirs spec
|
recoverPath Stdin = "-"
|
||||||
|
recoverPath (FilePathSpec filePath) = filePath
|
||||||
|
recoverPath (Http url) = url
|
||||||
|
recoverPath (Https url) = url
|
||||||
|
recoverPath (GitSpec branch filePath) = branch ++ ":" ++ filePath
|
||||||
|
|
||||||
--pureSmartSource :: (MonadIO m, MonadResource m) => [FilePath] -> SmartSpec -> Producer m S.ByteString
|
data SmartSourceError = NoFile FilePath
|
||||||
pureSmartSource _ NoSpec = error "No source specification given"
|
| NoDirectory FilePath
|
||||||
pureSmartSource _ (FileNameSpec fileName) = sourceFile fileName
|
| NoSpecGiven
|
||||||
pureSmartSource _ (FilePathSpec fileName) = sourceFile fileName
|
deriving (Eq, Show)
|
||||||
pureSmartSource [] (PossiblyGitSpec spec) = sourceFile spec
|
|
||||||
pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir </> spec)
|
getSmartSourceSpec :: FilePath -> FilePath -> String -> IO (Either SmartSourceError SourceSpec)
|
||||||
--pureSmartSource _ (Https url) = httpSource url
|
getSmartSourceSpec _ "" "" = return $ Left NoSpecGiven
|
||||||
--pureSmartSource _ (Http url) = httpSource url
|
getSmartSourceSpec _ _ "-" = return $ Right Stdin
|
||||||
|
getSmartSourceSpec defaultDirectory defaultFile spec
|
||||||
|
| "http://" `isPrefixOf` spec = return $ Right $ Http spec
|
||||||
|
| "https://" `isPrefixOf` spec = return $ Right $ Https spec
|
||||||
|
| otherwise = do
|
||||||
|
inDefaultDirectory <- lookForCompressedFiles (defaultDirectory </> spec)
|
||||||
|
isInDefaultDirectory <- D.doesFileExist inDefaultDirectory
|
||||||
|
if isInDefaultDirectory
|
||||||
|
then
|
||||||
|
return $ Right $ FilePathSpec inDefaultDirectory
|
||||||
|
else
|
||||||
|
do
|
||||||
|
isThere <- D.doesFileExist spec
|
||||||
|
if isThere
|
||||||
|
then
|
||||||
|
return $ Right $ FilePathSpec spec
|
||||||
|
else
|
||||||
|
do
|
||||||
|
isDirectoryThere <- D.doesDirectoryExist defaultDirectory
|
||||||
|
if isDirectoryThere
|
||||||
|
then
|
||||||
|
return $ Left $ NoFile inDefaultDirectory
|
||||||
|
else
|
||||||
|
return $ Left $ NoDirectory spec
|
||||||
|
|
||||||
|
smartSource (FilePathSpec filePath) = sourceFile filePath
|
||||||
|
smartSource Stdin = sourceHandle stdin
|
||||||
|
smartSource NoSource = error $ "should not be here"
|
||||||
|
|
||||||
-- httpSource :: MonadResource m => String -> ConduitM () S.ByteString m ()
|
-- httpSource :: MonadResource m => String -> ConduitM () S.ByteString m ()
|
||||||
-- httpSource url = do
|
-- httpSource url = do
|
||||||
@ -47,45 +81,6 @@ pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir </> s
|
|||||||
-- httpsource
|
-- httpsource
|
||||||
-- lift finalizer
|
-- lift finalizer
|
||||||
|
|
||||||
parseSmartSpec :: FilePath -> SmartSpec
|
|
||||||
parseSmartSpec "" = NoSpec
|
|
||||||
parseSmartSpec "-" = Stdin
|
|
||||||
parseSmartSpec spec
|
|
||||||
| "http://" `isPrefixOf` spec = Http spec
|
|
||||||
| "https://" `isPrefixOf` spec = Https spec
|
|
||||||
| otherwise = case elemIndex ':' spec of
|
|
||||||
Just ix -> let ref = take ix spec in
|
|
||||||
if checkRefFormat ref
|
|
||||||
then
|
|
||||||
GitSpec ref (if ix == length spec - 1
|
|
||||||
then
|
|
||||||
Nothing
|
|
||||||
else
|
|
||||||
Just $ drop (ix+1) spec)
|
|
||||||
else
|
|
||||||
fileSpec
|
|
||||||
Nothing -> if checkRefFormat spec && not ('/' `elem` spec) && not ('.' `elem` spec)
|
|
||||||
then
|
|
||||||
PossiblyGitSpec spec
|
|
||||||
else
|
|
||||||
fileSpec
|
|
||||||
where fileSpec = (if '/' `elem` spec then FilePathSpec else FileNameSpec) spec
|
|
||||||
|
|
||||||
parseSmartSpecInContext :: [FilePath] -> Maybe FilePath -> String -> Maybe SmartSpec
|
|
||||||
parseSmartSpecInContext defaultDirs defaultFile spec = parseSmartSpecInContext' defaultDirs defaultFile $ parseSmartSpec spec
|
|
||||||
where parseSmartSpecInContext' _ Nothing NoSpec = Nothing
|
|
||||||
parseSmartSpecInContext' [] (Just defaultFile) NoSpec = Just $ FileNameSpec defaultFile
|
|
||||||
parseSmartSpecInContext' (firstDir:_) (Just defaultFile) NoSpec = Just $ FilePathSpec (firstDir </> defaultFile)
|
|
||||||
|
|
||||||
parseSmartSpecInContext' (firstDir:_) _ (FileNameSpec fileName) = Just $ FilePathSpec (firstDir </> fileName)
|
|
||||||
|
|
||||||
parseSmartSpecInContext' _ Nothing (GitSpec branch Nothing) = Nothing
|
|
||||||
parseSmartSpecInContext' [] (Just defaultFile) (GitSpec branch Nothing) = Just $ GitSpec branch $ Just defaultFile
|
|
||||||
parseSmartSpecInContext' (firstDir:_) (Just defaultFile) (GitSpec branch Nothing)
|
|
||||||
= Just $ GitSpec branch $ Just (firstDir </> defaultFile)
|
|
||||||
|
|
||||||
parseSmartSpecInContext' _ _ parsedSpec = Just parsedSpec
|
|
||||||
|
|
||||||
checkRefFormat :: String -> Bool
|
checkRefFormat :: String -> Bool
|
||||||
checkRefFormat ref =
|
checkRefFormat ref =
|
||||||
not ("./" `isInfixOf` ref) &&
|
not ("./" `isInfixOf` ref) &&
|
||||||
@ -106,3 +101,21 @@ checkRefFormat ref =
|
|||||||
isUnwantedChar '\\' = True
|
isUnwantedChar '\\' = True
|
||||||
isUnwantedChar '\177' = True
|
isUnwantedChar '\177' = True
|
||||||
isUnwantedChar c = ord c < 32
|
isUnwantedChar c = ord c < 32
|
||||||
|
|
||||||
|
lookForCompressedFiles :: FilePath -> IO FilePath
|
||||||
|
lookForCompressedFiles = lookForAlternativeFiles [".gz", ".xz", ".bz2"]
|
||||||
|
|
||||||
|
lookForAlternativeFiles :: [String] -> FilePath -> IO FilePath
|
||||||
|
lookForAlternativeFiles suffixes filePath
|
||||||
|
| takeExtension filePath `Prelude.elem` suffixes = return filePath
|
||||||
|
| otherwise = do
|
||||||
|
fileIsThere <- D.doesFileExist filePath
|
||||||
|
if fileIsThere
|
||||||
|
then
|
||||||
|
return filePath
|
||||||
|
else
|
||||||
|
do
|
||||||
|
found <- Control.Monad.filterM D.doesFileExist $ Prelude.map (filePath <.>) suffixes
|
||||||
|
return $ case found of
|
||||||
|
[fp] -> fp
|
||||||
|
_ -> filePath
|
||||||
|
@ -34,8 +34,7 @@ module GEval.Core
|
|||||||
EvaluationContext(..),
|
EvaluationContext(..),
|
||||||
ParserSpec(..),
|
ParserSpec(..),
|
||||||
fileAsLineSource,
|
fileAsLineSource,
|
||||||
checkAndGetFiles,
|
checkAndGetFiles
|
||||||
getOutFile
|
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Data.Conduit
|
import Data.Conduit
|
||||||
@ -158,6 +157,10 @@ getMetricOrdering Likelihood = TheHigherTheBetter
|
|||||||
getMetricOrdering BIOF1 = TheHigherTheBetter
|
getMetricOrdering BIOF1 = TheHigherTheBetter
|
||||||
getMetricOrdering BIOF1Labels = TheHigherTheBetter
|
getMetricOrdering BIOF1Labels = TheHigherTheBetter
|
||||||
|
|
||||||
|
isInputNeeded :: Metric -> Bool
|
||||||
|
isInputNeeded CharMatch = True
|
||||||
|
isInputNeeded _ = False
|
||||||
|
|
||||||
defaultOutDirectory = "."
|
defaultOutDirectory = "."
|
||||||
defaultTestName = "test-A"
|
defaultTestName = "test-A"
|
||||||
defaultOutFile = "out.tsv"
|
defaultOutFile = "out.tsv"
|
||||||
@ -247,67 +250,66 @@ isEmptyFile path = do
|
|||||||
return ((fileSize stat) == 0)
|
return ((fileSize stat) == 0)
|
||||||
|
|
||||||
|
|
||||||
data LineSource m = LineSource (Source m Text) FilePath Word32
|
data LineSource m = LineSource (Source m Text) SourceSpec Word32
|
||||||
|
|
||||||
geval :: GEvalSpecification -> IO (MetricValue)
|
geval :: GEvalSpecification -> IO (MetricValue)
|
||||||
geval gevalSpec = do
|
geval gevalSpec = do
|
||||||
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles gevalSpec
|
(inputSource, expectedSource, outSource) <- checkAndGetFiles False gevalSpec
|
||||||
gevalCore metric inputFilePath expectedFilePath outFilePath
|
gevalCore metric inputSource expectedSource outSource
|
||||||
where metric = gesMetric gevalSpec
|
where metric = gesMetric gevalSpec
|
||||||
|
|
||||||
checkAndGetFiles :: GEvalSpecification -> IO (FilePath, FilePath, FilePath)
|
checkAndGetFiles :: Bool -> GEvalSpecification -> IO (SourceSpec, SourceSpec, SourceSpec)
|
||||||
checkAndGetFiles gevalSpec = do
|
checkAndGetFiles forceInput gevalSpec = do
|
||||||
|
oss <- getSmartSourceSpec outTestDirectory "out.tsv" outFile
|
||||||
|
case oss of
|
||||||
|
Left NoSpecGiven -> throwM $ NoOutFile outFile
|
||||||
|
Left (NoFile fp) -> throwM $ NoOutFile fp
|
||||||
|
Left (NoDirectory d) -> do
|
||||||
unlessM (D.doesDirectoryExist outDirectory) $ throwM $ NoOutDirectory outDirectory
|
unlessM (D.doesDirectoryExist outDirectory) $ throwM $ NoOutDirectory outDirectory
|
||||||
unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory
|
|
||||||
unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory
|
unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory
|
||||||
|
throwM $ NoOutFile outFile
|
||||||
|
Right outSource -> do
|
||||||
|
ess <- getSmartSourceSpec expectedTestDirectory "expected.tsv" expectedFile
|
||||||
|
case ess of
|
||||||
|
Left NoSpecGiven -> throwM $ NoExpectedFile expectedFile
|
||||||
|
Left (NoFile fp) -> throwM $ NoExpectedFile fp
|
||||||
|
Left (NoDirectory d) -> do
|
||||||
|
unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory
|
||||||
unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory
|
unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory
|
||||||
inputFilePath <- lookForCompressedFiles inputFilePath'
|
throwM $ NoExpectedDirectory d
|
||||||
expectedFilePath <- lookForCompressedFiles expectedFilePath'
|
Right expectedSource -> do
|
||||||
outFilePath <- lookForCompressedFiles outFilePath'
|
-- in most cases inputSource is NoSource (unless needed by a metric or in the line-by-line mode)
|
||||||
checkInputFileIfNeeded metric inputFilePath
|
inputSource <- getInputSourceIfNeeded forceInput metric expectedTestDirectory inputFile
|
||||||
return (inputFilePath, expectedFilePath, outFilePath)
|
return (inputSource, expectedSource, outSource)
|
||||||
where expectedFilePath' = expectedTestDirectory </> (gesExpectedFile gevalSpec)
|
where expectedTestDirectory = expectedDirectory </> testName
|
||||||
outFilePath' = getOutFile gevalSpec (gesOutFile gevalSpec)
|
|
||||||
inputFilePath' = expectedTestDirectory </> (gesInputFile gevalSpec)
|
|
||||||
expectedTestDirectory = expectedDirectory </> testName
|
|
||||||
outTestDirectory = outDirectory </> testName
|
outTestDirectory = outDirectory </> testName
|
||||||
expectedDirectory = getExpectedDirectory gevalSpec
|
expectedDirectory = getExpectedDirectory gevalSpec
|
||||||
outDirectory = gesOutDirectory gevalSpec
|
outDirectory = gesOutDirectory gevalSpec
|
||||||
testName = gesTestName gevalSpec
|
testName = gesTestName gevalSpec
|
||||||
|
outFile = gesOutFile gevalSpec
|
||||||
|
expectedFile = gesExpectedFile gevalSpec
|
||||||
|
inputFile = gesInputFile gevalSpec
|
||||||
metric = gesMetric gevalSpec
|
metric = gesMetric gevalSpec
|
||||||
|
|
||||||
lookForCompressedFiles :: FilePath -> IO FilePath
|
|
||||||
lookForCompressedFiles = lookForAlternativeFiles [".gz", ".xz", ".bz2"]
|
|
||||||
|
|
||||||
lookForAlternativeFiles :: [String] -> FilePath -> IO FilePath
|
|
||||||
lookForAlternativeFiles suffixes filePath
|
|
||||||
| takeExtension filePath `Prelude.elem` suffixes = return filePath
|
|
||||||
| otherwise = do
|
|
||||||
fileIsThere <- D.doesFileExist filePath
|
|
||||||
if fileIsThere
|
|
||||||
then
|
|
||||||
return filePath
|
|
||||||
else
|
|
||||||
do
|
|
||||||
found <- Control.Monad.filterM D.doesFileExist $ Prelude.map (filePath <.>) suffixes
|
|
||||||
return $ case found of
|
|
||||||
[fp] -> fp
|
|
||||||
_ -> filePath
|
|
||||||
|
|
||||||
getOutFile :: GEvalSpecification -> FilePath -> FilePath
|
getOutFile :: GEvalSpecification -> FilePath -> FilePath
|
||||||
getOutFile gevalSpec out = outDirectory </> testName </> out
|
getOutFile gevalSpec out = outDirectory </> testName </> out
|
||||||
where outDirectory = gesOutDirectory gevalSpec
|
where outDirectory = gesOutDirectory gevalSpec
|
||||||
testName = gesTestName gevalSpec
|
testName = gesTestName gevalSpec
|
||||||
|
|
||||||
checkInputFileIfNeeded :: Metric -> FilePath -> IO ()
|
getInputSourceIfNeeded :: Bool -> Metric -> FilePath -> FilePath -> IO SourceSpec
|
||||||
checkInputFileIfNeeded CharMatch inputFilePath = do
|
getInputSourceIfNeeded forced metric directory inputFilePath
|
||||||
unlessM (D.doesFileExist inputFilePath) $ throwM $ NoInputFile inputFilePath
|
| forced || (isInputNeeded metric) = do
|
||||||
return ()
|
iss <- getSmartSourceSpec directory "in.tsv" inputFilePath
|
||||||
checkInputFileIfNeeded _ _ = return ()
|
case iss of
|
||||||
|
Left NoSpecGiven -> throwM $ NoInputFile inputFilePath
|
||||||
|
Left (NoFile fp) -> throwM $ NoInputFile fp
|
||||||
|
Left (NoDirectory _) -> throwM $ NoInputFile inputFilePath
|
||||||
|
Right sourceSpec -> return sourceSpec
|
||||||
|
| otherwise = return NoSource
|
||||||
|
|
||||||
fileAsLineSource :: FilePath -> LineSource (ResourceT IO)
|
fileAsLineSource :: SourceSpec -> LineSource (ResourceT IO)
|
||||||
fileAsLineSource filePath =
|
fileAsLineSource spec =
|
||||||
LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= autoDecompress $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
|
LineSource ((smartSource spec) $= autoDecompress $= CT.decodeUtf8Lenient =$= CT.lines) spec 1
|
||||||
|
|
||||||
gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue)
|
gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue)
|
||||||
gevalCoreOnSingleLines metric inpLine expLine outLine =
|
gevalCoreOnSingleLines metric inpLine expLine outLine =
|
||||||
@ -316,18 +318,20 @@ gevalCoreOnSingleLines metric inpLine expLine outLine =
|
|||||||
(singleLineAsLineSource outLine)
|
(singleLineAsLineSource outLine)
|
||||||
|
|
||||||
singleLineAsLineSource :: LineInFile -> LineSource (ResourceT IO)
|
singleLineAsLineSource :: LineInFile -> LineSource (ResourceT IO)
|
||||||
singleLineAsLineSource (LineInFile filePath lineNo line) =
|
singleLineAsLineSource (LineInFile sourceSpec lineNo line) =
|
||||||
LineSource (CL.sourceList [line]) filePath lineNo
|
LineSource (CL.sourceList [line]) sourceSpec lineNo
|
||||||
|
|
||||||
gevalCore :: Metric -> String -> String -> String -> IO (MetricValue)
|
gevalCore :: Metric -> SourceSpec -> SourceSpec -> SourceSpec -> IO (MetricValue)
|
||||||
gevalCore metric inputFilePath expectedFilePath outFilePath = do
|
gevalCore metric inputSource expectedSource outSource = do
|
||||||
unlessM (D.doesFileExist expectedFilePath) $ throwM $ NoExpectedFile expectedFilePath
|
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
|
||||||
unlessM (D.doesFileExist outFilePath) $ throwM $ NoOutFile outFilePath
|
|
||||||
whenM (isEmptyFile outFilePath) $ throwM $ EmptyOutput
|
|
||||||
gevalCoreOnSources metric
|
gevalCoreOnSources metric
|
||||||
(fileAsLineSource inputFilePath)
|
(fileAsLineSource inputSource)
|
||||||
(fileAsLineSource expectedFilePath)
|
(fileAsLineSource expectedSource)
|
||||||
(fileAsLineSource outFilePath)
|
(fileAsLineSource outSource)
|
||||||
|
|
||||||
|
isEmptyFileSource :: SourceSpec -> IO Bool
|
||||||
|
isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
|
||||||
|
isEmptyFileSource _ = return False
|
||||||
|
|
||||||
logLossToLikehood logLoss = exp (-logLoss)
|
logLossToLikehood logLoss = exp (-logLoss)
|
||||||
|
|
||||||
@ -351,7 +355,7 @@ gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLi
|
|||||||
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
|
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
|
||||||
gevalCore' metric inputLineSource expectedLineSource outLineSource
|
gevalCore' metric inputLineSource expectedLineSource outLineSource
|
||||||
|
|
||||||
data LineInFile = LineInFile FilePath Word32 Text
|
data LineInFile = LineInFile SourceSpec Word32 Text
|
||||||
|
|
||||||
gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
|
gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
|
||||||
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id
|
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id
|
||||||
@ -501,8 +505,8 @@ class EvaluationContext ctxt m where
|
|||||||
data ParsedRecord ctxt :: *
|
data ParsedRecord ctxt :: *
|
||||||
recordSource :: ctxt -> ParserSpec ctxt -> Source (ResourceT m) (WrappedParsedRecord ctxt)
|
recordSource :: ctxt -> ParserSpec ctxt -> Source (ResourceT m) (WrappedParsedRecord ctxt)
|
||||||
getFirstLineNo :: Proxy m -> ctxt -> Word32
|
getFirstLineNo :: Proxy m -> ctxt -> Word32
|
||||||
getExpectedFilePath :: ctxt -> String
|
getExpectedSource :: ctxt -> SourceSpec
|
||||||
getOutFilePath :: ctxt -> String
|
getOutSource :: ctxt -> SourceSpec
|
||||||
checkStep :: Proxy m -> ((Word32, ParsedRecord ctxt) -> c) -> (Word32, WrappedParsedRecord ctxt) -> Maybe c
|
checkStep :: Proxy m -> ((Word32, ParsedRecord ctxt) -> c) -> (Word32, WrappedParsedRecord ctxt) -> Maybe c
|
||||||
checkStepM :: ((Word32, ParsedRecord ctxt) -> (ResourceT m) c) -> (Word32, WrappedParsedRecord ctxt) -> (ResourceT m) (Maybe c)
|
checkStepM :: ((Word32, ParsedRecord ctxt) -> (ResourceT m) c) -> (Word32, WrappedParsedRecord ctxt) -> (ResourceT m) (Maybe c)
|
||||||
|
|
||||||
@ -513,8 +517,8 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (Withou
|
|||||||
data WrappedParsedRecord (WithoutInput m e o) = WrappedParsedRecordWithoutInput (SourceItem e) (SourceItem o)
|
data WrappedParsedRecord (WithoutInput m e o) = WrappedParsedRecordWithoutInput (SourceItem e) (SourceItem o)
|
||||||
data ParsedRecord (WithoutInput m e o) = ParsedRecordWithoutInput e o
|
data ParsedRecord (WithoutInput m e o) = ParsedRecordWithoutInput e o
|
||||||
getFirstLineNo _ (WithoutInput _ (LineSource _ _ lineNo)) = lineNo
|
getFirstLineNo _ (WithoutInput _ (LineSource _ _ lineNo)) = lineNo
|
||||||
getExpectedFilePath (WithoutInput (LineSource _ expectedFilePath _) _) = expectedFilePath
|
getExpectedSource (WithoutInput (LineSource _ expectedSource _) _) = expectedSource
|
||||||
getOutFilePath (WithoutInput _ (LineSource _ outFilePath _)) = outFilePath
|
getOutSource (WithoutInput _ (LineSource _ outSource _)) = outSource
|
||||||
recordSource (WithoutInput expectedLineSource outLineSource) (ParserSpecWithoutInput expParser outParser) = getZipSource $ WrappedParsedRecordWithoutInput
|
recordSource (WithoutInput expectedLineSource outLineSource) (ParserSpecWithoutInput expParser outParser) = getZipSource $ WrappedParsedRecordWithoutInput
|
||||||
<$> ZipSource (items expectedLineSource expParser)
|
<$> ZipSource (items expectedLineSource expParser)
|
||||||
<*> ZipSource (items outLineSource outParser)
|
<*> ZipSource (items outLineSource outParser)
|
||||||
@ -542,8 +546,8 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithIn
|
|||||||
data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o)
|
data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o)
|
||||||
data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o
|
data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o
|
||||||
getFirstLineNo _ (WithInput _ _ (LineSource _ _ lineNo)) = lineNo
|
getFirstLineNo _ (WithInput _ _ (LineSource _ _ lineNo)) = lineNo
|
||||||
getExpectedFilePath (WithInput _ (LineSource _ expectedFilePath _) _) = expectedFilePath
|
getExpectedSource (WithInput _ (LineSource _ expectedSource _) _) = expectedSource
|
||||||
getOutFilePath (WithInput _ _ (LineSource _ outFilePath _)) = outFilePath
|
getOutSource (WithInput _ _ (LineSource _ outSource _)) = outSource
|
||||||
recordSource (WithInput inputLineSource expectedLineSource outLineSource) (ParserSpecWithInput inpParser expParser outParser) = getZipSource $ (\x (y,z) -> WrappedParsedRecordWithInput x y z)
|
recordSource (WithInput inputLineSource expectedLineSource outLineSource) (ParserSpecWithInput inpParser expParser outParser) = getZipSource $ (\x (y,z) -> WrappedParsedRecordWithInput x y z)
|
||||||
<$> ZipSource (items inputLineSource inpParser) <*> (ZipSource $ getZipSource $ (,)
|
<$> ZipSource (items inputLineSource inpParser) <*> (ZipSource $ getZipSource $ (,)
|
||||||
<$> ZipSource (items expectedLineSource expParser)
|
<$> ZipSource (items expectedLineSource expParser)
|
||||||
|
@ -35,6 +35,10 @@ import Data.Word
|
|||||||
|
|
||||||
import Text.Printf
|
import Text.Printf
|
||||||
|
|
||||||
|
import Data.Conduit.SmartSource
|
||||||
|
|
||||||
|
import System.FilePath
|
||||||
|
|
||||||
data LineRecord = LineRecord Text Text Text Word32 MetricValue
|
data LineRecord = LineRecord Text Text Text Word32 MetricValue
|
||||||
deriving (Eq, Show)
|
deriving (Eq, Show)
|
||||||
|
|
||||||
@ -52,7 +56,7 @@ runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
|
|||||||
|
|
||||||
runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
|
runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
|
||||||
runLineByLineGeneralized ordering spec consum = do
|
runLineByLineGeneralized ordering spec consum = do
|
||||||
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
|
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles True spec
|
||||||
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
|
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
|
||||||
where metric = gesMetric spec
|
where metric = gesMetric spec
|
||||||
sorter KeepTheOriginalOrder = doNothing
|
sorter KeepTheOriginalOrder = doNothing
|
||||||
@ -84,10 +88,15 @@ runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consu
|
|||||||
|
|
||||||
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
|
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
|
||||||
runDiffGeneralized ordering otherOut spec consum = do
|
runDiffGeneralized ordering otherOut spec consum = do
|
||||||
let otherOutFilePath = getOutFile spec otherOut
|
(inputSource, expectedSource, outSource) <- checkAndGetFiles True spec
|
||||||
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
|
ooss <- getSmartSourceSpec ((gesOutDirectory spec) </> (gesTestName spec)) "out.tsv" otherOut
|
||||||
let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath
|
case ooss of
|
||||||
let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath
|
Left NoSpecGiven -> throwM $ NoOutFile otherOut
|
||||||
|
Left (NoFile fp) -> throwM $ NoOutFile fp
|
||||||
|
Left (NoDirectory d) -> throwM $ NoOutFile otherOut
|
||||||
|
Right otherOutSource -> do
|
||||||
|
let sourceA = gevalLineByLineSource metric inputSource expectedSource outSource
|
||||||
|
let sourceB = gevalLineByLineSource metric inputSource expectedSource otherOutSource
|
||||||
runResourceT $ runConduit $
|
runResourceT $ runConduit $
|
||||||
((getZipSource $ (,)
|
((getZipSource $ (,)
|
||||||
<$> ZipSource sourceA
|
<$> ZipSource sourceA
|
||||||
@ -106,24 +115,24 @@ runDiffGeneralized ordering otherOut spec consum = do
|
|||||||
escapeTabs :: Text -> Text
|
escapeTabs :: Text -> Text
|
||||||
escapeTabs = Data.Text.replace "\t" "<tab>"
|
escapeTabs = Data.Text.replace "\t" "<tab>"
|
||||||
|
|
||||||
gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
|
gevalLineByLineCore :: Metric -> SourceSpec -> SourceSpec -> SourceSpec -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
|
||||||
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum =
|
gevalLineByLineCore metric inputSource expectedSource outSource consum =
|
||||||
runResourceT $ runConduit $
|
runResourceT $ runConduit $
|
||||||
((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) .| consum)
|
((gevalLineByLineSource metric inputSource expectedSource outSource) .| consum)
|
||||||
|
|
||||||
gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT () LineRecord (ResourceT IO) ()
|
gevalLineByLineSource :: Metric -> SourceSpec -> SourceSpec -> SourceSpec -> ConduitT () LineRecord (ResourceT IO) ()
|
||||||
gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath =
|
gevalLineByLineSource metric inputSource expectedSource outSource =
|
||||||
(getZipSource $ (,)
|
(getZipSource $ (,)
|
||||||
<$> ZipSource (CL.sourceList [1..])
|
<$> ZipSource (CL.sourceList [1..])
|
||||||
<*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
|
<*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
|
||||||
where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id))
|
where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id))
|
||||||
context = (WithInput inputLineSource expectedLineSource outputLineSource)
|
context = (WithInput inputLineSource expectedLineSource outputLineSource)
|
||||||
inputLineSource = fileAsLineSource inputFilePath
|
inputLineSource = fileAsLineSource inputSource
|
||||||
expectedLineSource = fileAsLineSource expectedFilePath
|
expectedLineSource = fileAsLineSource expectedSource
|
||||||
outputLineSource = fileAsLineSource outFilePath
|
outputLineSource = fileAsLineSource outSource
|
||||||
justLine (LineInFile _ _ l) = l
|
justLine (LineInFile _ _ l) = l
|
||||||
evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do
|
evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do
|
||||||
s <- liftIO $ gevalCoreOnSingleLines metric (LineInFile inputFilePath lineNo inp)
|
s <- liftIO $ gevalCoreOnSingleLines metric (LineInFile inputSource lineNo inp)
|
||||||
(LineInFile expectedFilePath lineNo exp)
|
(LineInFile expectedSource lineNo exp)
|
||||||
(LineInFile outFilePath lineNo out)
|
(LineInFile outSource lineNo out)
|
||||||
return $ LineRecord inp exp out lineNo s
|
return $ LineRecord inp exp out lineNo s
|
||||||
|
42
test/Spec.hs
42
test/Spec.hs
@ -207,9 +207,9 @@ main = hspec $ do
|
|||||||
runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
|
runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
|
||||||
describe "evaluating single lines" $ do
|
describe "evaluating single lines" $ do
|
||||||
it "RMSE" $ do
|
it "RMSE" $ do
|
||||||
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
|
gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla")
|
||||||
(LineInFile "stub2" 1 "3.4")
|
(LineInFile (FilePathSpec "stub2") 1 "3.4")
|
||||||
(LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8
|
(LineInFile (FilePathSpec "stub3") 1 "2.6") `shouldReturnAlmost` 0.8
|
||||||
describe "BIO format" $ do
|
describe "BIO format" $ do
|
||||||
it "just parse" $ do
|
it "just parse" $ do
|
||||||
let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name"
|
let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name"
|
||||||
@ -302,36 +302,20 @@ main = hspec $ do
|
|||||||
it "accuracy instead of log loss" $ do
|
it "accuracy instead of log loss" $ do
|
||||||
runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "log-loss-hashed-probs" `shouldReturnAlmost` 0.4
|
runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "log-loss-hashed-probs" `shouldReturnAlmost` 0.4
|
||||||
describe "smart sources" $ do
|
describe "smart sources" $ do
|
||||||
it "smart specs are parsed" $ do
|
it "smart specs are obtained" $ do
|
||||||
parseSmartSpec "" `shouldBe` NoSpec
|
getSmartSourceSpec "foo" "" "" `shouldReturn` Left NoSpecGiven
|
||||||
parseSmartSpec "-" `shouldBe` Stdin
|
getSmartSourceSpec "foo" "out.tsv" "-" `shouldReturn` Right Stdin
|
||||||
parseSmartSpec "http://gonito.net/foo" `shouldBe` Http "http://gonito.net/foo"
|
getSmartSourceSpec "foo" "out.sv" "http://gonito.net/foo" `shouldReturn` (Right $ Http "http://gonito.net/foo")
|
||||||
parseSmartSpec "https://gonito.net" `shouldBe` Https "https://gonito.net"
|
getSmartSourceSpec "foo" "in.tsv" "https://gonito.net" `shouldReturn` (Right $ Https "https://gonito.net")
|
||||||
parseSmartSpec "branch:" `shouldBe` GitSpec "branch" Nothing
|
|
||||||
parseSmartSpec "37be:foo/bar.tsv" `shouldBe` GitSpec "37be" (Just "foo/bar.tsv")
|
|
||||||
parseSmartSpec "bla/xyz:foo/bar.tsv" `shouldBe` GitSpec "bla/xyz" (Just "foo/bar.tsv")
|
|
||||||
parseSmartSpec "out.tsv" `shouldBe` FileNameSpec "out.tsv"
|
|
||||||
parseSmartSpec "dev-1/out.tsv" `shouldBe` FilePathSpec "dev-1/out.tsv"
|
|
||||||
parseSmartSpec "../out.tsv" `shouldBe` FilePathSpec "../out.tsv"
|
|
||||||
parseSmartSpec "4a5f" `shouldBe` PossiblyGitSpec "4a5f"
|
|
||||||
parseSmartSpec "!!" `shouldBe` PossiblyGitSpec "!!"
|
|
||||||
parseSmartSpec "branch" `shouldBe` PossiblyGitSpec "branch"
|
|
||||||
it "smart specs are parsed in context" $ do
|
|
||||||
parseSmartSpecInContext [] Nothing "xyz" `shouldBe` Just (PossiblyGitSpec "xyz")
|
|
||||||
parseSmartSpecInContext ["foo", "bar"] Nothing "out.tsv" `shouldBe` Just (FilePathSpec "foo/out.tsv")
|
|
||||||
parseSmartSpecInContext [] (Just "default") "" `shouldBe` Just (FileNameSpec "default")
|
|
||||||
parseSmartSpecInContext ["foo"] (Just "default") "" `shouldBe` Just (FilePathSpec "foo/default")
|
|
||||||
parseSmartSpecInContext ["foo/bar"] (Just "default") "http://gonito.net" `shouldBe` Just (Http "http://gonito.net")
|
|
||||||
parseSmartSpecInContext ["foo/bar"] Nothing "" `shouldBe` Nothing
|
|
||||||
it "sources are accessed" $ do
|
it "sources are accessed" $ do
|
||||||
readFromSmartSource [] Nothing "test/files/foo.txt" `shouldReturn` ["foo\n"]
|
readFromSmartSource "baz" "out.tsv" "test/files/foo.txt" `shouldReturn` ["foo\n"]
|
||||||
-- readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn`
|
-- readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn`
|
||||||
-- ["User-agent: *\nDisallow: /deny\n"]
|
-- ["User-agent: *\nDisallow: /deny\n"]
|
||||||
|
|
||||||
readFromSmartSource :: [FilePath] -> Maybe FilePath -> String -> IO [String]
|
readFromSmartSource :: FilePath -> FilePath -> String -> IO [String]
|
||||||
readFromSmartSource defaultDirs defaultFile specS = do
|
readFromSmartSource defaultDir defaultFile specS = do
|
||||||
let (Just spec) = parseSmartSpecInContext defaultDirs defaultFile specS
|
(Right spec) <- getSmartSourceSpec defaultDir defaultFile specS
|
||||||
let source = smartSource defaultDirs defaultFile spec
|
let source = smartSource spec
|
||||||
contents <- runResourceT (source $$ CT.decodeUtf8Lenient =$ CL.consume)
|
contents <- runResourceT (source $$ CT.decodeUtf8Lenient =$ CL.consume)
|
||||||
return $ Prelude.map unpack contents
|
return $ Prelude.map unpack contents
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user