diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 325aed4..410d657 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -18,6 +18,9 @@ import Control.Exception import Control.Monad.Trans.Resource import Data.String.Here +import Data.List (intercalate) +import Data.List.Split (splitOn) + createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO () createChallenge withDataFiles expectedDirectory spec = do D.createDirectoryIfMissing False expectedDirectory @@ -30,7 +33,8 @@ createChallenge withDataFiles expectedDirectory spec = do if withDataFiles then do - createFile (trainDirectory "train.tsv") $ trainContents metric + createFile (trainDirectory "in.tsv") $ trainInContents metric + createFile (trainDirectory expectedFile) $ trainExpectedContents metric createFile (devDirectory "in.tsv") $ devInContents metric createFile (devDirectory expectedFile) $ devExpectedContents metric @@ -413,6 +417,20 @@ configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("- where precisionOpt Nothing = "" precisionOpt (Just p) = " --precision " ++ (show p) +-- Originally train content was in one file, to avoid large changes +-- for the time being we are using the original function. + +trainInContents :: Metric -> String +trainInContents metric = unlines + $ map (intercalate "\t") + $ map tail + $ map (splitOn "\t") + $ lines + $ trainContents metric + +trainExpectedContents :: Metric -> String +trainExpectedContents metric = unlines $ map head $ map (splitOn "\t") $ lines $ trainContents metric + trainContents :: Metric -> String trainContents (Mean metric) = trainContents metric trainContents GLEU = trainContents BLEU @@ -502,13 +520,13 @@ I am sad SADNESS I am so sad and hateful SADNESS HATE |] trainContents (Soft2DFMeasure _) = trainContents ClippEU -trainContents ClippEU = [hereLit|2/0,0,10,150 foo.djvu -1/30,40,100,1000 bar.djvu +trainContents ClippEU = [hereLit|2/0,0,10,150/10 foo.djvu +1/30,40,100,1000/10 bar.djvu |] -trainContents _ = [hereLit|0.06 0.39 0 0.206 -1.00 1.00 1 0.017 -317.8 5.20 67 0.048 -14.6 19.22 27 0.047 +trainContents _ = [hereLit|0.06 0.39 0 0.206 +1.00 1.00 1 0.017 +317.8 5.20 67 0.048 +14.6 19.22 27 0.047 |] devInContents :: Metric -> String diff --git a/src/GEval/Validation.hs b/src/GEval/Validation.hs index ef2e981..235d356 100644 --- a/src/GEval/Validation.hs +++ b/src/GEval/Validation.hs @@ -45,6 +45,7 @@ data ValidationException = NoChallengeDirectory FilePath | SpaceSuffixDetect FilePath | VaryingNumberOfColumns FilePath | BestPossibleValueNotObtainedWithExpectedData MetricValue MetricValue + | OldStyleTrainFile instance Exception ValidationException @@ -65,6 +66,7 @@ instance Show ValidationException where show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths show (VaryingNumberOfColumns filePaths) = somethingWrongWithFilesMessage "The file contains varying number of columns" filePaths show (BestPossibleValueNotObtainedWithExpectedData expected got) = "The best possible value was not obtained with the expected data, expected: " ++ (show expected) ++ " , obtained: " ++ (show got) + show OldStyleTrainFile = "Found old-style train file `train.tsv`, whereas the same convention as in test directories should be used (`in.tsv` and `expected.tsv`)" validationChallenge :: FilePath -> GEvalSpecification -> IO () validationChallenge challengeDirectory spec = do @@ -147,7 +149,9 @@ never :: FindClause Bool never = depth ==? 0 testDirFilter :: FindClause Bool -testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*" ||? SFF.fileName ~~? "test-*") +testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*" + ||? SFF.fileName ~~? "test-*" + ||? SFF.fileName ==? "train") fileFilter :: String -> FindClause Bool fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts) @@ -189,12 +193,7 @@ checkTrainDirectory metric challengeDirectory = do let trainDirectory = challengeDirectory "train" whenM (doesDirectoryExist trainDirectory) $ do trainFiles <- findTrainFiles trainDirectory - when (null trainFiles) $ throw $ NoInputFile "train.tsv" - when (length trainFiles > 1) $ throw $ TooManyTrainFiles trainFiles - let [trainFile] = trainFiles - checkCorrectFile trainFile - when (fixedNumberOfColumnsInInput metric && fixedNumberOfColumnsInExpected metric) $ do - checkColumns trainFile + when (not $ null trainFiles) $ throw $ OldStyleTrainFile checkColumns :: FilePath -> IO () checkColumns filePath = do