Complete move to the new style of train files
This commit is contained in:
parent
26f20ba466
commit
0826d457b2
@ -33,8 +33,7 @@ createChallenge withDataFiles expectedDirectory spec = do
|
|||||||
if withDataFiles
|
if withDataFiles
|
||||||
then
|
then
|
||||||
do
|
do
|
||||||
createFile (trainDirectory </> "in.tsv") $ trainInContents metric
|
createTrainFiles metric trainDirectory expectedFile
|
||||||
createFile (trainDirectory </> expectedFile) $ trainExpectedContents metric
|
|
||||||
|
|
||||||
createFile (devDirectory </> "in.tsv") $ devInContents metric
|
createFile (devDirectory </> "in.tsv") $ devInContents metric
|
||||||
createFile (devDirectory </> expectedFile) $ devExpectedContents metric
|
createFile (devDirectory </> expectedFile) $ devExpectedContents metric
|
||||||
@ -53,6 +52,16 @@ createChallenge withDataFiles expectedDirectory spec = do
|
|||||||
testDirectory = expectedDirectory </> testName
|
testDirectory = expectedDirectory </> testName
|
||||||
expectedFile = gesExpectedFile spec
|
expectedFile = gesExpectedFile spec
|
||||||
|
|
||||||
|
createTrainFiles :: Metric -> FilePath -> FilePath -> IO ()
|
||||||
|
createTrainFiles metric@(LogLossHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
|
||||||
|
createTrainFiles metric@(LikelihoodHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
|
||||||
|
createTrainFiles metric trainDirectory expectedFile = do
|
||||||
|
createFile (trainDirectory </> "in.tsv") $ trainInContents metric
|
||||||
|
createFile (trainDirectory </> expectedFile) $ trainExpectedContents metric
|
||||||
|
|
||||||
|
createSingleTrainFile metric trainDirectory =
|
||||||
|
createFile (trainDirectory </> "train.tsv") $ trainContents metric
|
||||||
|
|
||||||
createFile :: FilePath -> String -> IO ()
|
createFile :: FilePath -> String -> IO ()
|
||||||
createFile filePath contents = do
|
createFile filePath contents = do
|
||||||
whenM (D.doesFileExist filePath) $ throwM $ FileAlreadyThere filePath
|
whenM (D.doesFileExist filePath) $ throwM $ FileAlreadyThere filePath
|
||||||
|
@ -45,7 +45,6 @@ data ValidationException = NoChallengeDirectory FilePath
|
|||||||
| SpaceSuffixDetect FilePath
|
| SpaceSuffixDetect FilePath
|
||||||
| VaryingNumberOfColumns FilePath
|
| VaryingNumberOfColumns FilePath
|
||||||
| BestPossibleValueNotObtainedWithExpectedData MetricValue MetricValue
|
| BestPossibleValueNotObtainedWithExpectedData MetricValue MetricValue
|
||||||
| OldStyleTrainFile
|
|
||||||
|
|
||||||
instance Exception ValidationException
|
instance Exception ValidationException
|
||||||
|
|
||||||
@ -66,7 +65,6 @@ instance Show ValidationException where
|
|||||||
show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths
|
show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths
|
||||||
show (VaryingNumberOfColumns filePaths) = somethingWrongWithFilesMessage "The file contains varying number of columns" filePaths
|
show (VaryingNumberOfColumns filePaths) = somethingWrongWithFilesMessage "The file contains varying number of columns" filePaths
|
||||||
show (BestPossibleValueNotObtainedWithExpectedData expected got) = "The best possible value was not obtained with the expected data, expected: " ++ (show expected) ++ " , obtained: " ++ (show got)
|
show (BestPossibleValueNotObtainedWithExpectedData expected got) = "The best possible value was not obtained with the expected data, expected: " ++ (show expected) ++ " , obtained: " ++ (show got)
|
||||||
show OldStyleTrainFile = "Found old-style train file `train.tsv`, whereas the same convention as in test directories should be used (`in.tsv` and `expected.tsv`)"
|
|
||||||
|
|
||||||
validationChallenge :: FilePath -> GEvalSpecification -> IO ()
|
validationChallenge :: FilePath -> GEvalSpecification -> IO ()
|
||||||
validationChallenge challengeDirectory spec = do
|
validationChallenge challengeDirectory spec = do
|
||||||
@ -78,16 +76,13 @@ validationChallenge challengeDirectory spec = do
|
|||||||
checkCorrectFile gitignoreFile
|
checkCorrectFile gitignoreFile
|
||||||
checkCorrectFile readmeFile
|
checkCorrectFile readmeFile
|
||||||
testDirectories <- findTestDirs challengeDirectory
|
testDirectories <- findTestDirs challengeDirectory
|
||||||
checkTestDirectories mainMetric testDirectories
|
checkTestDirectories spec testDirectories
|
||||||
checkTrainDirectory mainMetric challengeDirectory
|
checkTrainDirectory spec challengeDirectory
|
||||||
|
|
||||||
mapM_ (runOnTest spec) testDirectories
|
|
||||||
|
|
||||||
where
|
where
|
||||||
configFile = challengeDirectory </> "config.txt"
|
configFile = challengeDirectory </> "config.txt"
|
||||||
gitignoreFile = challengeDirectory </> ".gitignore"
|
gitignoreFile = challengeDirectory </> ".gitignore"
|
||||||
readmeFile = challengeDirectory </> "README.md"
|
readmeFile = challengeDirectory </> "README.md"
|
||||||
mainMetric = evaluationSchemeMetric $ head $ gesMetrics spec
|
|
||||||
|
|
||||||
checkCorrectFile :: FilePath -> IO ()
|
checkCorrectFile :: FilePath -> IO ()
|
||||||
checkCorrectFile filePath = do
|
checkCorrectFile filePath = do
|
||||||
@ -150,8 +145,7 @@ never = depth ==? 0
|
|||||||
|
|
||||||
testDirFilter :: FindClause Bool
|
testDirFilter :: FindClause Bool
|
||||||
testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*"
|
testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*"
|
||||||
||? SFF.fileName ~~? "test-*"
|
||? SFF.fileName ~~? "test-*")
|
||||||
||? SFF.fileName ==? "train")
|
|
||||||
|
|
||||||
fileFilter :: String -> FindClause Bool
|
fileFilter :: String -> FindClause Bool
|
||||||
fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
|
fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
|
||||||
@ -159,12 +153,12 @@ fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileN
|
|||||||
exts = Prelude.concat [ "(", intercalate "|" compressedFilesHandled, ")" ]
|
exts = Prelude.concat [ "(", intercalate "|" compressedFilesHandled, ")" ]
|
||||||
|
|
||||||
|
|
||||||
checkTestDirectories :: Metric -> [FilePath] -> IO ()
|
checkTestDirectories :: GEvalSpecification -> [FilePath] -> IO ()
|
||||||
checkTestDirectories _ [] = throwM NoTestDirectories
|
checkTestDirectories _ [] = throwM NoTestDirectories
|
||||||
checkTestDirectories metric directories = mapM_ (checkTestDirectory metric) directories
|
checkTestDirectories spec directories = mapM_ (checkTestDirectory spec) directories
|
||||||
|
|
||||||
checkTestDirectory :: Metric -> FilePath -> IO ()
|
checkTestDirectory :: GEvalSpecification -> FilePath -> IO ()
|
||||||
checkTestDirectory metric directoryPath = do
|
checkTestDirectory spec directoryPath = do
|
||||||
inputFiles <- findInputFiles directoryPath
|
inputFiles <- findInputFiles directoryPath
|
||||||
when (null inputFiles) $ throw $ NoInputFile inputFile
|
when (null inputFiles) $ throw $ NoInputFile inputFile
|
||||||
when (length inputFiles > 1) $ throw $ TooManyInputFiles inputFiles
|
when (length inputFiles > 1) $ throw $ TooManyInputFiles inputFiles
|
||||||
@ -184,16 +178,29 @@ checkTestDirectory metric directoryPath = do
|
|||||||
|
|
||||||
outputFiles <- findOutputFiles directoryPath
|
outputFiles <- findOutputFiles directoryPath
|
||||||
unless (null outputFiles) $ throw $ OutputFileDetected outputFiles
|
unless (null outputFiles) $ throw $ OutputFileDetected outputFiles
|
||||||
|
|
||||||
|
runOnTest spec directoryPath
|
||||||
|
|
||||||
where
|
where
|
||||||
|
metric = evaluationSchemeMetric $ head $ gesMetrics spec
|
||||||
inputFile = directoryPath </> defaultInputFile
|
inputFile = directoryPath </> defaultInputFile
|
||||||
|
|
||||||
expectedFile = directoryPath </> defaultExpectedFile
|
expectedFile = directoryPath </> defaultExpectedFile
|
||||||
|
|
||||||
checkTrainDirectory :: Metric -> FilePath -> IO ()
|
checkTrainDirectory :: GEvalSpecification -> FilePath -> IO ()
|
||||||
checkTrainDirectory metric challengeDirectory = do
|
checkTrainDirectory spec challengeDirectory = do
|
||||||
let trainDirectory = challengeDirectory </> "train"
|
let trainDirectory = challengeDirectory </> "train"
|
||||||
whenM (doesDirectoryExist trainDirectory) $ do
|
whenM (doesDirectoryExist trainDirectory) $ do
|
||||||
trainFiles <- findTrainFiles trainDirectory
|
trainFiles <- findTrainFiles trainDirectory
|
||||||
when (not $ null trainFiles) $ throw $ OldStyleTrainFile
|
if (not $ null trainFiles)
|
||||||
|
then
|
||||||
|
do
|
||||||
|
putStrLn "WARNING: Found old-style train file `train.tsv`, whereas the same convention as in"
|
||||||
|
putStrLn "WARNING: test directories if preferred (`in.tsv` and `expected.tsv`)."
|
||||||
|
putStrLn "WARNING: (Though, there might still be some cases when `train.tsv` is needed, e.g. for training LMs.)"
|
||||||
|
else
|
||||||
|
do
|
||||||
|
runOnTest spec trainDirectory
|
||||||
|
|
||||||
checkColumns :: FilePath -> IO ()
|
checkColumns :: FilePath -> IO ()
|
||||||
checkColumns filePath = do
|
checkColumns filePath = do
|
||||||
|
Loading…
Reference in New Issue
Block a user