switch to smart sources

This commit is contained in:
Filip Gralinski 2018-06-02 20:24:34 +02:00
parent 18ed47322e
commit 57ee8a1296
4 changed files with 178 additions and 168 deletions

View File

@ -11,32 +11,66 @@ import System.FilePath
import Control.Monad.Trans.Resource (MonadResource) import Control.Monad.Trans.Resource (MonadResource)
import Data.Conduit import Data.Conduit
import qualified Data.ByteString as S import qualified Data.ByteString as S
import Data.Conduit.Binary (sourceFile) import Data.Conduit.Binary (sourceFile, sourceHandle)
import Network.HTTP.Conduit import Network.HTTP.Conduit
import Control.Monad.IO.Class import Control.Monad.IO.Class
import Control.Monad.Trans.Class (lift) import Control.Monad.Trans.Class (lift)
import qualified System.Directory as D
import System.Posix
import System.FilePath
import System.IO (stdin)
import Control.Monad ((<=<), filterM)
data SmartSpec = NoSpec data SourceSpec =Stdin
| Stdin
| FileNameSpec FilePath
| FilePathSpec FilePath | FilePathSpec FilePath
| Http String | Http String
| Https String | Https String
| GitSpec String (Maybe FilePath) | GitSpec String FilePath
| PossiblyGitSpec String | NoSource
deriving (Eq, Show) deriving (Eq, Show)
--smartSource :: (MonadIO m, MonadResource m) => [FilePath] -> Maybe FilePath -> SmartSpec -> Producer m S.ByteString recoverPath :: SourceSpec -> String
smartSource defaultDirs defaultFile spec = pureSmartSource defaultDirs spec recoverPath Stdin = "-"
recoverPath (FilePathSpec filePath) = filePath
recoverPath (Http url) = url
recoverPath (Https url) = url
recoverPath (GitSpec branch filePath) = branch ++ ":" ++ filePath
--pureSmartSource :: (MonadIO m, MonadResource m) => [FilePath] -> SmartSpec -> Producer m S.ByteString data SmartSourceError = NoFile FilePath
pureSmartSource _ NoSpec = error "No source specification given" | NoDirectory FilePath
pureSmartSource _ (FileNameSpec fileName) = sourceFile fileName | NoSpecGiven
pureSmartSource _ (FilePathSpec fileName) = sourceFile fileName deriving (Eq, Show)
pureSmartSource [] (PossiblyGitSpec spec) = sourceFile spec
pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir </> spec) getSmartSourceSpec :: FilePath -> FilePath -> String -> IO (Either SmartSourceError SourceSpec)
--pureSmartSource _ (Https url) = httpSource url getSmartSourceSpec _ "" "" = return $ Left NoSpecGiven
--pureSmartSource _ (Http url) = httpSource url getSmartSourceSpec _ _ "-" = return $ Right Stdin
getSmartSourceSpec defaultDirectory defaultFile spec
| "http://" `isPrefixOf` spec = return $ Right $ Http spec
| "https://" `isPrefixOf` spec = return $ Right $ Https spec
| otherwise = do
inDefaultDirectory <- lookForCompressedFiles (defaultDirectory </> spec)
isInDefaultDirectory <- D.doesFileExist inDefaultDirectory
if isInDefaultDirectory
then
return $ Right $ FilePathSpec inDefaultDirectory
else
do
isThere <- D.doesFileExist spec
if isThere
then
return $ Right $ FilePathSpec spec
else
do
isDirectoryThere <- D.doesDirectoryExist defaultDirectory
if isDirectoryThere
then
return $ Left $ NoFile inDefaultDirectory
else
return $ Left $ NoDirectory spec
smartSource (FilePathSpec filePath) = sourceFile filePath
smartSource Stdin = sourceHandle stdin
smartSource NoSource = error $ "should not be here"
-- httpSource :: MonadResource m => String -> ConduitM () S.ByteString m () -- httpSource :: MonadResource m => String -> ConduitM () S.ByteString m ()
-- httpSource url = do -- httpSource url = do
@ -47,45 +81,6 @@ pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir </> s
-- httpsource -- httpsource
-- lift finalizer -- lift finalizer
parseSmartSpec :: FilePath -> SmartSpec
parseSmartSpec "" = NoSpec
parseSmartSpec "-" = Stdin
parseSmartSpec spec
| "http://" `isPrefixOf` spec = Http spec
| "https://" `isPrefixOf` spec = Https spec
| otherwise = case elemIndex ':' spec of
Just ix -> let ref = take ix spec in
if checkRefFormat ref
then
GitSpec ref (if ix == length spec - 1
then
Nothing
else
Just $ drop (ix+1) spec)
else
fileSpec
Nothing -> if checkRefFormat spec && not ('/' `elem` spec) && not ('.' `elem` spec)
then
PossiblyGitSpec spec
else
fileSpec
where fileSpec = (if '/' `elem` spec then FilePathSpec else FileNameSpec) spec
parseSmartSpecInContext :: [FilePath] -> Maybe FilePath -> String -> Maybe SmartSpec
parseSmartSpecInContext defaultDirs defaultFile spec = parseSmartSpecInContext' defaultDirs defaultFile $ parseSmartSpec spec
where parseSmartSpecInContext' _ Nothing NoSpec = Nothing
parseSmartSpecInContext' [] (Just defaultFile) NoSpec = Just $ FileNameSpec defaultFile
parseSmartSpecInContext' (firstDir:_) (Just defaultFile) NoSpec = Just $ FilePathSpec (firstDir </> defaultFile)
parseSmartSpecInContext' (firstDir:_) _ (FileNameSpec fileName) = Just $ FilePathSpec (firstDir </> fileName)
parseSmartSpecInContext' _ Nothing (GitSpec branch Nothing) = Nothing
parseSmartSpecInContext' [] (Just defaultFile) (GitSpec branch Nothing) = Just $ GitSpec branch $ Just defaultFile
parseSmartSpecInContext' (firstDir:_) (Just defaultFile) (GitSpec branch Nothing)
= Just $ GitSpec branch $ Just (firstDir </> defaultFile)
parseSmartSpecInContext' _ _ parsedSpec = Just parsedSpec
checkRefFormat :: String -> Bool checkRefFormat :: String -> Bool
checkRefFormat ref = checkRefFormat ref =
not ("./" `isInfixOf` ref) && not ("./" `isInfixOf` ref) &&
@ -106,3 +101,21 @@ checkRefFormat ref =
isUnwantedChar '\\' = True isUnwantedChar '\\' = True
isUnwantedChar '\177' = True isUnwantedChar '\177' = True
isUnwantedChar c = ord c < 32 isUnwantedChar c = ord c < 32
lookForCompressedFiles :: FilePath -> IO FilePath
lookForCompressedFiles = lookForAlternativeFiles [".gz", ".xz", ".bz2"]
lookForAlternativeFiles :: [String] -> FilePath -> IO FilePath
lookForAlternativeFiles suffixes filePath
| takeExtension filePath `Prelude.elem` suffixes = return filePath
| otherwise = do
fileIsThere <- D.doesFileExist filePath
if fileIsThere
then
return filePath
else
do
found <- Control.Monad.filterM D.doesFileExist $ Prelude.map (filePath <.>) suffixes
return $ case found of
[fp] -> fp
_ -> filePath

View File

@ -34,8 +34,7 @@ module GEval.Core
EvaluationContext(..), EvaluationContext(..),
ParserSpec(..), ParserSpec(..),
fileAsLineSource, fileAsLineSource,
checkAndGetFiles, checkAndGetFiles
getOutFile
) where ) where
import Data.Conduit import Data.Conduit
@ -158,6 +157,10 @@ getMetricOrdering Likelihood = TheHigherTheBetter
getMetricOrdering BIOF1 = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter
getMetricOrdering BIOF1Labels = TheHigherTheBetter getMetricOrdering BIOF1Labels = TheHigherTheBetter
isInputNeeded :: Metric -> Bool
isInputNeeded CharMatch = True
isInputNeeded _ = False
defaultOutDirectory = "." defaultOutDirectory = "."
defaultTestName = "test-A" defaultTestName = "test-A"
defaultOutFile = "out.tsv" defaultOutFile = "out.tsv"
@ -247,67 +250,66 @@ isEmptyFile path = do
return ((fileSize stat) == 0) return ((fileSize stat) == 0)
data LineSource m = LineSource (Source m Text) FilePath Word32 data LineSource m = LineSource (Source m Text) SourceSpec Word32
geval :: GEvalSpecification -> IO (MetricValue) geval :: GEvalSpecification -> IO (MetricValue)
geval gevalSpec = do geval gevalSpec = do
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles gevalSpec (inputSource, expectedSource, outSource) <- checkAndGetFiles False gevalSpec
gevalCore metric inputFilePath expectedFilePath outFilePath gevalCore metric inputSource expectedSource outSource
where metric = gesMetric gevalSpec where metric = gesMetric gevalSpec
checkAndGetFiles :: GEvalSpecification -> IO (FilePath, FilePath, FilePath) checkAndGetFiles :: Bool -> GEvalSpecification -> IO (SourceSpec, SourceSpec, SourceSpec)
checkAndGetFiles gevalSpec = do checkAndGetFiles forceInput gevalSpec = do
unlessM (D.doesDirectoryExist outDirectory) $ throwM $ NoOutDirectory outDirectory oss <- getSmartSourceSpec outTestDirectory "out.tsv" outFile
unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory case oss of
unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory Left NoSpecGiven -> throwM $ NoOutFile outFile
unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory Left (NoFile fp) -> throwM $ NoOutFile fp
inputFilePath <- lookForCompressedFiles inputFilePath' Left (NoDirectory d) -> do
expectedFilePath <- lookForCompressedFiles expectedFilePath' unlessM (D.doesDirectoryExist outDirectory) $ throwM $ NoOutDirectory outDirectory
outFilePath <- lookForCompressedFiles outFilePath' unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory
checkInputFileIfNeeded metric inputFilePath throwM $ NoOutFile outFile
return (inputFilePath, expectedFilePath, outFilePath) Right outSource -> do
where expectedFilePath' = expectedTestDirectory </> (gesExpectedFile gevalSpec) ess <- getSmartSourceSpec expectedTestDirectory "expected.tsv" expectedFile
outFilePath' = getOutFile gevalSpec (gesOutFile gevalSpec) case ess of
inputFilePath' = expectedTestDirectory </> (gesInputFile gevalSpec) Left NoSpecGiven -> throwM $ NoExpectedFile expectedFile
expectedTestDirectory = expectedDirectory </> testName Left (NoFile fp) -> throwM $ NoExpectedFile fp
Left (NoDirectory d) -> do
unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory
unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory
throwM $ NoExpectedDirectory d
Right expectedSource -> do
-- in most cases inputSource is NoSource (unless needed by a metric or in the line-by-line mode)
inputSource <- getInputSourceIfNeeded forceInput metric expectedTestDirectory inputFile
return (inputSource, expectedSource, outSource)
where expectedTestDirectory = expectedDirectory </> testName
outTestDirectory = outDirectory </> testName outTestDirectory = outDirectory </> testName
expectedDirectory = getExpectedDirectory gevalSpec expectedDirectory = getExpectedDirectory gevalSpec
outDirectory = gesOutDirectory gevalSpec outDirectory = gesOutDirectory gevalSpec
testName = gesTestName gevalSpec testName = gesTestName gevalSpec
outFile = gesOutFile gevalSpec
expectedFile = gesExpectedFile gevalSpec
inputFile = gesInputFile gevalSpec
metric = gesMetric gevalSpec metric = gesMetric gevalSpec
lookForCompressedFiles :: FilePath -> IO FilePath
lookForCompressedFiles = lookForAlternativeFiles [".gz", ".xz", ".bz2"]
lookForAlternativeFiles :: [String] -> FilePath -> IO FilePath
lookForAlternativeFiles suffixes filePath
| takeExtension filePath `Prelude.elem` suffixes = return filePath
| otherwise = do
fileIsThere <- D.doesFileExist filePath
if fileIsThere
then
return filePath
else
do
found <- Control.Monad.filterM D.doesFileExist $ Prelude.map (filePath <.>) suffixes
return $ case found of
[fp] -> fp
_ -> filePath
getOutFile :: GEvalSpecification -> FilePath -> FilePath getOutFile :: GEvalSpecification -> FilePath -> FilePath
getOutFile gevalSpec out = outDirectory </> testName </> out getOutFile gevalSpec out = outDirectory </> testName </> out
where outDirectory = gesOutDirectory gevalSpec where outDirectory = gesOutDirectory gevalSpec
testName = gesTestName gevalSpec testName = gesTestName gevalSpec
checkInputFileIfNeeded :: Metric -> FilePath -> IO () getInputSourceIfNeeded :: Bool -> Metric -> FilePath -> FilePath -> IO SourceSpec
checkInputFileIfNeeded CharMatch inputFilePath = do getInputSourceIfNeeded forced metric directory inputFilePath
unlessM (D.doesFileExist inputFilePath) $ throwM $ NoInputFile inputFilePath | forced || (isInputNeeded metric) = do
return () iss <- getSmartSourceSpec directory "in.tsv" inputFilePath
checkInputFileIfNeeded _ _ = return () case iss of
Left NoSpecGiven -> throwM $ NoInputFile inputFilePath
Left (NoFile fp) -> throwM $ NoInputFile fp
Left (NoDirectory _) -> throwM $ NoInputFile inputFilePath
Right sourceSpec -> return sourceSpec
| otherwise = return NoSource
fileAsLineSource :: FilePath -> LineSource (ResourceT IO) fileAsLineSource :: SourceSpec -> LineSource (ResourceT IO)
fileAsLineSource filePath = fileAsLineSource spec =
LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= autoDecompress $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1 LineSource ((smartSource spec) $= autoDecompress $= CT.decodeUtf8Lenient =$= CT.lines) spec 1
gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue) gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue)
gevalCoreOnSingleLines metric inpLine expLine outLine = gevalCoreOnSingleLines metric inpLine expLine outLine =
@ -316,18 +318,20 @@ gevalCoreOnSingleLines metric inpLine expLine outLine =
(singleLineAsLineSource outLine) (singleLineAsLineSource outLine)
singleLineAsLineSource :: LineInFile -> LineSource (ResourceT IO) singleLineAsLineSource :: LineInFile -> LineSource (ResourceT IO)
singleLineAsLineSource (LineInFile filePath lineNo line) = singleLineAsLineSource (LineInFile sourceSpec lineNo line) =
LineSource (CL.sourceList [line]) filePath lineNo LineSource (CL.sourceList [line]) sourceSpec lineNo
gevalCore :: Metric -> String -> String -> String -> IO (MetricValue) gevalCore :: Metric -> SourceSpec -> SourceSpec -> SourceSpec -> IO (MetricValue)
gevalCore metric inputFilePath expectedFilePath outFilePath = do gevalCore metric inputSource expectedSource outSource = do
unlessM (D.doesFileExist expectedFilePath) $ throwM $ NoExpectedFile expectedFilePath whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
unlessM (D.doesFileExist outFilePath) $ throwM $ NoOutFile outFilePath
whenM (isEmptyFile outFilePath) $ throwM $ EmptyOutput
gevalCoreOnSources metric gevalCoreOnSources metric
(fileAsLineSource inputFilePath) (fileAsLineSource inputSource)
(fileAsLineSource expectedFilePath) (fileAsLineSource expectedSource)
(fileAsLineSource outFilePath) (fileAsLineSource outSource)
isEmptyFileSource :: SourceSpec -> IO Bool
isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
isEmptyFileSource _ = return False
logLossToLikehood logLoss = exp (-logLoss) logLossToLikehood logLoss = exp (-logLoss)
@ -351,7 +355,7 @@ gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLi
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
gevalCore' metric inputLineSource expectedLineSource outLineSource gevalCore' metric inputLineSource expectedLineSource outLineSource
data LineInFile = LineInFile FilePath Word32 Text data LineInFile = LineInFile SourceSpec Word32 Text
gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue) gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id
@ -501,8 +505,8 @@ class EvaluationContext ctxt m where
data ParsedRecord ctxt :: * data ParsedRecord ctxt :: *
recordSource :: ctxt -> ParserSpec ctxt -> Source (ResourceT m) (WrappedParsedRecord ctxt) recordSource :: ctxt -> ParserSpec ctxt -> Source (ResourceT m) (WrappedParsedRecord ctxt)
getFirstLineNo :: Proxy m -> ctxt -> Word32 getFirstLineNo :: Proxy m -> ctxt -> Word32
getExpectedFilePath :: ctxt -> String getExpectedSource :: ctxt -> SourceSpec
getOutFilePath :: ctxt -> String getOutSource :: ctxt -> SourceSpec
checkStep :: Proxy m -> ((Word32, ParsedRecord ctxt) -> c) -> (Word32, WrappedParsedRecord ctxt) -> Maybe c checkStep :: Proxy m -> ((Word32, ParsedRecord ctxt) -> c) -> (Word32, WrappedParsedRecord ctxt) -> Maybe c
checkStepM :: ((Word32, ParsedRecord ctxt) -> (ResourceT m) c) -> (Word32, WrappedParsedRecord ctxt) -> (ResourceT m) (Maybe c) checkStepM :: ((Word32, ParsedRecord ctxt) -> (ResourceT m) c) -> (Word32, WrappedParsedRecord ctxt) -> (ResourceT m) (Maybe c)
@ -513,8 +517,8 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (Withou
data WrappedParsedRecord (WithoutInput m e o) = WrappedParsedRecordWithoutInput (SourceItem e) (SourceItem o) data WrappedParsedRecord (WithoutInput m e o) = WrappedParsedRecordWithoutInput (SourceItem e) (SourceItem o)
data ParsedRecord (WithoutInput m e o) = ParsedRecordWithoutInput e o data ParsedRecord (WithoutInput m e o) = ParsedRecordWithoutInput e o
getFirstLineNo _ (WithoutInput _ (LineSource _ _ lineNo)) = lineNo getFirstLineNo _ (WithoutInput _ (LineSource _ _ lineNo)) = lineNo
getExpectedFilePath (WithoutInput (LineSource _ expectedFilePath _) _) = expectedFilePath getExpectedSource (WithoutInput (LineSource _ expectedSource _) _) = expectedSource
getOutFilePath (WithoutInput _ (LineSource _ outFilePath _)) = outFilePath getOutSource (WithoutInput _ (LineSource _ outSource _)) = outSource
recordSource (WithoutInput expectedLineSource outLineSource) (ParserSpecWithoutInput expParser outParser) = getZipSource $ WrappedParsedRecordWithoutInput recordSource (WithoutInput expectedLineSource outLineSource) (ParserSpecWithoutInput expParser outParser) = getZipSource $ WrappedParsedRecordWithoutInput
<$> ZipSource (items expectedLineSource expParser) <$> ZipSource (items expectedLineSource expParser)
<*> ZipSource (items outLineSource outParser) <*> ZipSource (items outLineSource outParser)
@ -542,8 +546,8 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithIn
data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o) data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o)
data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o
getFirstLineNo _ (WithInput _ _ (LineSource _ _ lineNo)) = lineNo getFirstLineNo _ (WithInput _ _ (LineSource _ _ lineNo)) = lineNo
getExpectedFilePath (WithInput _ (LineSource _ expectedFilePath _) _) = expectedFilePath getExpectedSource (WithInput _ (LineSource _ expectedSource _) _) = expectedSource
getOutFilePath (WithInput _ _ (LineSource _ outFilePath _)) = outFilePath getOutSource (WithInput _ _ (LineSource _ outSource _)) = outSource
recordSource (WithInput inputLineSource expectedLineSource outLineSource) (ParserSpecWithInput inpParser expParser outParser) = getZipSource $ (\x (y,z) -> WrappedParsedRecordWithInput x y z) recordSource (WithInput inputLineSource expectedLineSource outLineSource) (ParserSpecWithInput inpParser expParser outParser) = getZipSource $ (\x (y,z) -> WrappedParsedRecordWithInput x y z)
<$> ZipSource (items inputLineSource inpParser) <*> (ZipSource $ getZipSource $ (,) <$> ZipSource (items inputLineSource inpParser) <*> (ZipSource $ getZipSource $ (,)
<$> ZipSource (items expectedLineSource expParser) <$> ZipSource (items expectedLineSource expParser)

View File

@ -35,6 +35,10 @@ import Data.Word
import Text.Printf import Text.Printf
import Data.Conduit.SmartSource
import System.FilePath
data LineRecord = LineRecord Text Text Text Word32 MetricValue data LineRecord = LineRecord Text Text Text Word32 MetricValue
deriving (Eq, Show) deriving (Eq, Show)
@ -52,7 +56,7 @@ runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
runLineByLineGeneralized ordering spec consum = do runLineByLineGeneralized ordering spec consum = do
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles True spec
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum) gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
where metric = gesMetric spec where metric = gesMetric spec
sorter KeepTheOriginalOrder = doNothing sorter KeepTheOriginalOrder = doNothing
@ -84,14 +88,19 @@ runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consu
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
runDiffGeneralized ordering otherOut spec consum = do runDiffGeneralized ordering otherOut spec consum = do
let otherOutFilePath = getOutFile spec otherOut (inputSource, expectedSource, outSource) <- checkAndGetFiles True spec
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec ooss <- getSmartSourceSpec ((gesOutDirectory spec) </> (gesTestName spec)) "out.tsv" otherOut
let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath case ooss of
let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath Left NoSpecGiven -> throwM $ NoOutFile otherOut
runResourceT $ runConduit $ Left (NoFile fp) -> throwM $ NoOutFile fp
((getZipSource $ (,) Left (NoDirectory d) -> throwM $ NoOutFile otherOut
<$> ZipSource sourceA Right otherOutSource -> do
<*> ZipSource sourceB) .| sorter ordering .| consum) let sourceA = gevalLineByLineSource metric inputSource expectedSource outSource
let sourceB = gevalLineByLineSource metric inputSource expectedSource otherOutSource
runResourceT $ runConduit $
((getZipSource $ (,)
<$> ZipSource sourceA
<*> ZipSource sourceB) .| sorter ordering .| consum)
where metric = gesMetric spec where metric = gesMetric spec
sorter KeepTheOriginalOrder = doNothing sorter KeepTheOriginalOrder = doNothing
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric)) sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
@ -106,24 +115,24 @@ runDiffGeneralized ordering otherOut spec consum = do
escapeTabs :: Text -> Text escapeTabs :: Text -> Text
escapeTabs = Data.Text.replace "\t" "<tab>" escapeTabs = Data.Text.replace "\t" "<tab>"
gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) a -> IO a gevalLineByLineCore :: Metric -> SourceSpec -> SourceSpec -> SourceSpec -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum = gevalLineByLineCore metric inputSource expectedSource outSource consum =
runResourceT $ runConduit $ runResourceT $ runConduit $
((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) .| consum) ((gevalLineByLineSource metric inputSource expectedSource outSource) .| consum)
gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT () LineRecord (ResourceT IO) () gevalLineByLineSource :: Metric -> SourceSpec -> SourceSpec -> SourceSpec -> ConduitT () LineRecord (ResourceT IO) ()
gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath = gevalLineByLineSource metric inputSource expectedSource outSource =
(getZipSource $ (,) (getZipSource $ (,)
<$> ZipSource (CL.sourceList [1..]) <$> ZipSource (CL.sourceList [1..])
<*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes <*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id)) where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id))
context = (WithInput inputLineSource expectedLineSource outputLineSource) context = (WithInput inputLineSource expectedLineSource outputLineSource)
inputLineSource = fileAsLineSource inputFilePath inputLineSource = fileAsLineSource inputSource
expectedLineSource = fileAsLineSource expectedFilePath expectedLineSource = fileAsLineSource expectedSource
outputLineSource = fileAsLineSource outFilePath outputLineSource = fileAsLineSource outSource
justLine (LineInFile _ _ l) = l justLine (LineInFile _ _ l) = l
evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do
s <- liftIO $ gevalCoreOnSingleLines metric (LineInFile inputFilePath lineNo inp) s <- liftIO $ gevalCoreOnSingleLines metric (LineInFile inputSource lineNo inp)
(LineInFile expectedFilePath lineNo exp) (LineInFile expectedSource lineNo exp)
(LineInFile outFilePath lineNo out) (LineInFile outSource lineNo out)
return $ LineRecord inp exp out lineNo s return $ LineRecord inp exp out lineNo s

View File

@ -207,9 +207,9 @@ main = hspec $ do
runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866 runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
describe "evaluating single lines" $ do describe "evaluating single lines" $ do
it "RMSE" $ do it "RMSE" $ do
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla") gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla")
(LineInFile "stub2" 1 "3.4") (LineInFile (FilePathSpec "stub2") 1 "3.4")
(LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8 (LineInFile (FilePathSpec "stub3") 1 "2.6") `shouldReturnAlmost` 0.8
describe "BIO format" $ do describe "BIO format" $ do
it "just parse" $ do it "just parse" $ do
let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name" let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name"
@ -302,36 +302,20 @@ main = hspec $ do
it "accuracy instead of log loss" $ do it "accuracy instead of log loss" $ do
runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "log-loss-hashed-probs" `shouldReturnAlmost` 0.4 runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "log-loss-hashed-probs" `shouldReturnAlmost` 0.4
describe "smart sources" $ do describe "smart sources" $ do
it "smart specs are parsed" $ do it "smart specs are obtained" $ do
parseSmartSpec "" `shouldBe` NoSpec getSmartSourceSpec "foo" "" "" `shouldReturn` Left NoSpecGiven
parseSmartSpec "-" `shouldBe` Stdin getSmartSourceSpec "foo" "out.tsv" "-" `shouldReturn` Right Stdin
parseSmartSpec "http://gonito.net/foo" `shouldBe` Http "http://gonito.net/foo" getSmartSourceSpec "foo" "out.sv" "http://gonito.net/foo" `shouldReturn` (Right $ Http "http://gonito.net/foo")
parseSmartSpec "https://gonito.net" `shouldBe` Https "https://gonito.net" getSmartSourceSpec "foo" "in.tsv" "https://gonito.net" `shouldReturn` (Right $ Https "https://gonito.net")
parseSmartSpec "branch:" `shouldBe` GitSpec "branch" Nothing
parseSmartSpec "37be:foo/bar.tsv" `shouldBe` GitSpec "37be" (Just "foo/bar.tsv")
parseSmartSpec "bla/xyz:foo/bar.tsv" `shouldBe` GitSpec "bla/xyz" (Just "foo/bar.tsv")
parseSmartSpec "out.tsv" `shouldBe` FileNameSpec "out.tsv"
parseSmartSpec "dev-1/out.tsv" `shouldBe` FilePathSpec "dev-1/out.tsv"
parseSmartSpec "../out.tsv" `shouldBe` FilePathSpec "../out.tsv"
parseSmartSpec "4a5f" `shouldBe` PossiblyGitSpec "4a5f"
parseSmartSpec "!!" `shouldBe` PossiblyGitSpec "!!"
parseSmartSpec "branch" `shouldBe` PossiblyGitSpec "branch"
it "smart specs are parsed in context" $ do
parseSmartSpecInContext [] Nothing "xyz" `shouldBe` Just (PossiblyGitSpec "xyz")
parseSmartSpecInContext ["foo", "bar"] Nothing "out.tsv" `shouldBe` Just (FilePathSpec "foo/out.tsv")
parseSmartSpecInContext [] (Just "default") "" `shouldBe` Just (FileNameSpec "default")
parseSmartSpecInContext ["foo"] (Just "default") "" `shouldBe` Just (FilePathSpec "foo/default")
parseSmartSpecInContext ["foo/bar"] (Just "default") "http://gonito.net" `shouldBe` Just (Http "http://gonito.net")
parseSmartSpecInContext ["foo/bar"] Nothing "" `shouldBe` Nothing
it "sources are accessed" $ do it "sources are accessed" $ do
readFromSmartSource [] Nothing "test/files/foo.txt" `shouldReturn` ["foo\n"] readFromSmartSource "baz" "out.tsv" "test/files/foo.txt" `shouldReturn` ["foo\n"]
-- readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn` -- readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn`
-- ["User-agent: *\nDisallow: /deny\n"] -- ["User-agent: *\nDisallow: /deny\n"]
readFromSmartSource :: [FilePath] -> Maybe FilePath -> String -> IO [String] readFromSmartSource :: FilePath -> FilePath -> String -> IO [String]
readFromSmartSource defaultDirs defaultFile specS = do readFromSmartSource defaultDir defaultFile specS = do
let (Just spec) = parseSmartSpecInContext defaultDirs defaultFile specS (Right spec) <- getSmartSourceSpec defaultDir defaultFile specS
let source = smartSource defaultDirs defaultFile spec let source = smartSource spec
contents <- runResourceT (source $$ CT.decodeUtf8Lenient =$ CL.consume) contents <- runResourceT (source $$ CT.decodeUtf8Lenient =$ CL.consume)
return $ Prelude.map unpack contents return $ Prelude.map unpack contents