From 0826d457b2544e198ffa551f14b723ef132dbe2b Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Fri, 13 Dec 2019 20:31:40 +0100 Subject: [PATCH] Complete move to the new style of train files --- src/GEval/CreateChallenge.hs | 13 ++++++++++-- src/GEval/Validation.hs | 39 +++++++++++++++++++++--------------- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 44095ea..de087af 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -33,8 +33,7 @@ createChallenge withDataFiles expectedDirectory spec = do if withDataFiles then do - createFile (trainDirectory "in.tsv") $ trainInContents metric - createFile (trainDirectory expectedFile) $ trainExpectedContents metric + createTrainFiles metric trainDirectory expectedFile createFile (devDirectory "in.tsv") $ devInContents metric createFile (devDirectory expectedFile) $ devExpectedContents metric @@ -53,6 +52,16 @@ createChallenge withDataFiles expectedDirectory spec = do testDirectory = expectedDirectory testName expectedFile = gesExpectedFile spec +createTrainFiles :: Metric -> FilePath -> FilePath -> IO () +createTrainFiles metric@(LogLossHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory +createTrainFiles metric@(LikelihoodHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory +createTrainFiles metric trainDirectory expectedFile = do + createFile (trainDirectory "in.tsv") $ trainInContents metric + createFile (trainDirectory expectedFile) $ trainExpectedContents metric + +createSingleTrainFile metric trainDirectory = + createFile (trainDirectory "train.tsv") $ trainContents metric + createFile :: FilePath -> String -> IO () createFile filePath contents = do whenM (D.doesFileExist filePath) $ throwM $ FileAlreadyThere filePath diff --git a/src/GEval/Validation.hs b/src/GEval/Validation.hs index 235d356..91f9de1 100644 --- a/src/GEval/Validation.hs +++ b/src/GEval/Validation.hs @@ -45,7 +45,6 @@ data ValidationException = NoChallengeDirectory FilePath | SpaceSuffixDetect FilePath | VaryingNumberOfColumns FilePath | BestPossibleValueNotObtainedWithExpectedData MetricValue MetricValue - | OldStyleTrainFile instance Exception ValidationException @@ -66,7 +65,6 @@ instance Show ValidationException where show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths show (VaryingNumberOfColumns filePaths) = somethingWrongWithFilesMessage "The file contains varying number of columns" filePaths show (BestPossibleValueNotObtainedWithExpectedData expected got) = "The best possible value was not obtained with the expected data, expected: " ++ (show expected) ++ " , obtained: " ++ (show got) - show OldStyleTrainFile = "Found old-style train file `train.tsv`, whereas the same convention as in test directories should be used (`in.tsv` and `expected.tsv`)" validationChallenge :: FilePath -> GEvalSpecification -> IO () validationChallenge challengeDirectory spec = do @@ -78,16 +76,13 @@ validationChallenge challengeDirectory spec = do checkCorrectFile gitignoreFile checkCorrectFile readmeFile testDirectories <- findTestDirs challengeDirectory - checkTestDirectories mainMetric testDirectories - checkTrainDirectory mainMetric challengeDirectory - - mapM_ (runOnTest spec) testDirectories + checkTestDirectories spec testDirectories + checkTrainDirectory spec challengeDirectory where configFile = challengeDirectory "config.txt" gitignoreFile = challengeDirectory ".gitignore" readmeFile = challengeDirectory "README.md" - mainMetric = evaluationSchemeMetric $ head $ gesMetrics spec checkCorrectFile :: FilePath -> IO () checkCorrectFile filePath = do @@ -150,8 +145,7 @@ never = depth ==? 0 testDirFilter :: FindClause Bool testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*" - ||? SFF.fileName ~~? "test-*" - ||? SFF.fileName ==? "train") + ||? SFF.fileName ~~? "test-*") fileFilter :: String -> FindClause Bool fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts) @@ -159,12 +153,12 @@ fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileN exts = Prelude.concat [ "(", intercalate "|" compressedFilesHandled, ")" ] -checkTestDirectories :: Metric -> [FilePath] -> IO () +checkTestDirectories :: GEvalSpecification -> [FilePath] -> IO () checkTestDirectories _ [] = throwM NoTestDirectories -checkTestDirectories metric directories = mapM_ (checkTestDirectory metric) directories +checkTestDirectories spec directories = mapM_ (checkTestDirectory spec) directories -checkTestDirectory :: Metric -> FilePath -> IO () -checkTestDirectory metric directoryPath = do +checkTestDirectory :: GEvalSpecification -> FilePath -> IO () +checkTestDirectory spec directoryPath = do inputFiles <- findInputFiles directoryPath when (null inputFiles) $ throw $ NoInputFile inputFile when (length inputFiles > 1) $ throw $ TooManyInputFiles inputFiles @@ -184,16 +178,29 @@ checkTestDirectory metric directoryPath = do outputFiles <- findOutputFiles directoryPath unless (null outputFiles) $ throw $ OutputFileDetected outputFiles + + runOnTest spec directoryPath + where + metric = evaluationSchemeMetric $ head $ gesMetrics spec inputFile = directoryPath defaultInputFile + expectedFile = directoryPath defaultExpectedFile -checkTrainDirectory :: Metric -> FilePath -> IO () -checkTrainDirectory metric challengeDirectory = do +checkTrainDirectory :: GEvalSpecification -> FilePath -> IO () +checkTrainDirectory spec challengeDirectory = do let trainDirectory = challengeDirectory "train" whenM (doesDirectoryExist trainDirectory) $ do trainFiles <- findTrainFiles trainDirectory - when (not $ null trainFiles) $ throw $ OldStyleTrainFile + if (not $ null trainFiles) + then + do + putStrLn "WARNING: Found old-style train file `train.tsv`, whereas the same convention as in" + putStrLn "WARNING: test directories if preferred (`in.tsv` and `expected.tsv`)." + putStrLn "WARNING: (Though, there might still be some cases when `train.tsv` is needed, e.g. for training LMs.)" + else + do + runOnTest spec trainDirectory checkColumns :: FilePath -> IO () checkColumns filePath = do