Complete move to the new style of train files

This commit is contained in:
Filip Gralinski 2019-12-13 20:31:40 +01:00
parent 26f20ba466
commit 0826d457b2
2 changed files with 34 additions and 18 deletions

View File

@ -33,8 +33,7 @@ createChallenge withDataFiles expectedDirectory spec = do
if withDataFiles
then
do
createFile (trainDirectory </> "in.tsv") $ trainInContents metric
createFile (trainDirectory </> expectedFile) $ trainExpectedContents metric
createTrainFiles metric trainDirectory expectedFile
createFile (devDirectory </> "in.tsv") $ devInContents metric
createFile (devDirectory </> expectedFile) $ devExpectedContents metric
@ -53,6 +52,16 @@ createChallenge withDataFiles expectedDirectory spec = do
testDirectory = expectedDirectory </> testName
expectedFile = gesExpectedFile spec
createTrainFiles :: Metric -> FilePath -> FilePath -> IO ()
createTrainFiles metric@(LogLossHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
createTrainFiles metric@(LikelihoodHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
createTrainFiles metric trainDirectory expectedFile = do
createFile (trainDirectory </> "in.tsv") $ trainInContents metric
createFile (trainDirectory </> expectedFile) $ trainExpectedContents metric
createSingleTrainFile metric trainDirectory =
createFile (trainDirectory </> "train.tsv") $ trainContents metric
createFile :: FilePath -> String -> IO ()
createFile filePath contents = do
whenM (D.doesFileExist filePath) $ throwM $ FileAlreadyThere filePath

View File

@ -45,7 +45,6 @@ data ValidationException = NoChallengeDirectory FilePath
| SpaceSuffixDetect FilePath
| VaryingNumberOfColumns FilePath
| BestPossibleValueNotObtainedWithExpectedData MetricValue MetricValue
| OldStyleTrainFile
instance Exception ValidationException
@ -66,7 +65,6 @@ instance Show ValidationException where
show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths
show (VaryingNumberOfColumns filePaths) = somethingWrongWithFilesMessage "The file contains varying number of columns" filePaths
show (BestPossibleValueNotObtainedWithExpectedData expected got) = "The best possible value was not obtained with the expected data, expected: " ++ (show expected) ++ " , obtained: " ++ (show got)
show OldStyleTrainFile = "Found old-style train file `train.tsv`, whereas the same convention as in test directories should be used (`in.tsv` and `expected.tsv`)"
validationChallenge :: FilePath -> GEvalSpecification -> IO ()
validationChallenge challengeDirectory spec = do
@ -78,16 +76,13 @@ validationChallenge challengeDirectory spec = do
checkCorrectFile gitignoreFile
checkCorrectFile readmeFile
testDirectories <- findTestDirs challengeDirectory
checkTestDirectories mainMetric testDirectories
checkTrainDirectory mainMetric challengeDirectory
mapM_ (runOnTest spec) testDirectories
checkTestDirectories spec testDirectories
checkTrainDirectory spec challengeDirectory
where
configFile = challengeDirectory </> "config.txt"
gitignoreFile = challengeDirectory </> ".gitignore"
readmeFile = challengeDirectory </> "README.md"
mainMetric = evaluationSchemeMetric $ head $ gesMetrics spec
checkCorrectFile :: FilePath -> IO ()
checkCorrectFile filePath = do
@ -150,8 +145,7 @@ never = depth ==? 0
testDirFilter :: FindClause Bool
testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*"
||? SFF.fileName ~~? "test-*"
||? SFF.fileName ==? "train")
||? SFF.fileName ~~? "test-*")
fileFilter :: String -> FindClause Bool
fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
@ -159,12 +153,12 @@ fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileN
exts = Prelude.concat [ "(", intercalate "|" compressedFilesHandled, ")" ]
checkTestDirectories :: Metric -> [FilePath] -> IO ()
checkTestDirectories :: GEvalSpecification -> [FilePath] -> IO ()
checkTestDirectories _ [] = throwM NoTestDirectories
checkTestDirectories metric directories = mapM_ (checkTestDirectory metric) directories
checkTestDirectories spec directories = mapM_ (checkTestDirectory spec) directories
checkTestDirectory :: Metric -> FilePath -> IO ()
checkTestDirectory metric directoryPath = do
checkTestDirectory :: GEvalSpecification -> FilePath -> IO ()
checkTestDirectory spec directoryPath = do
inputFiles <- findInputFiles directoryPath
when (null inputFiles) $ throw $ NoInputFile inputFile
when (length inputFiles > 1) $ throw $ TooManyInputFiles inputFiles
@ -184,16 +178,29 @@ checkTestDirectory metric directoryPath = do
outputFiles <- findOutputFiles directoryPath
unless (null outputFiles) $ throw $ OutputFileDetected outputFiles
runOnTest spec directoryPath
where
metric = evaluationSchemeMetric $ head $ gesMetrics spec
inputFile = directoryPath </> defaultInputFile
expectedFile = directoryPath </> defaultExpectedFile
checkTrainDirectory :: Metric -> FilePath -> IO ()
checkTrainDirectory metric challengeDirectory = do
checkTrainDirectory :: GEvalSpecification -> FilePath -> IO ()
checkTrainDirectory spec challengeDirectory = do
let trainDirectory = challengeDirectory </> "train"
whenM (doesDirectoryExist trainDirectory) $ do
trainFiles <- findTrainFiles trainDirectory
when (not $ null trainFiles) $ throw $ OldStyleTrainFile
if (not $ null trainFiles)
then
do
putStrLn "WARNING: Found old-style train file `train.tsv`, whereas the same convention as in"
putStrLn "WARNING: test directories if preferred (`in.tsv` and `expected.tsv`)."
putStrLn "WARNING: (Though, there might still be some cases when `train.tsv` is needed, e.g. for training LMs.)"
else
do
runOnTest spec trainDirectory
checkColumns :: FilePath -> IO ()
checkColumns filePath = do