Towards new-style of train

This commit is contained in:
Filip Graliński 2019-11-26 16:41:33 +01:00
parent ef8945af11
commit 74d999d4bf
2 changed files with 31 additions and 14 deletions

View File

@ -18,6 +18,9 @@ import Control.Exception
import Control.Monad.Trans.Resource import Control.Monad.Trans.Resource
import Data.String.Here import Data.String.Here
import Data.List (intercalate)
import Data.List.Split (splitOn)
createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO () createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO ()
createChallenge withDataFiles expectedDirectory spec = do createChallenge withDataFiles expectedDirectory spec = do
D.createDirectoryIfMissing False expectedDirectory D.createDirectoryIfMissing False expectedDirectory
@ -30,7 +33,8 @@ createChallenge withDataFiles expectedDirectory spec = do
if withDataFiles if withDataFiles
then then
do do
createFile (trainDirectory </> "train.tsv") $ trainContents metric createFile (trainDirectory </> "in.tsv") $ trainInContents metric
createFile (trainDirectory </> expectedFile) $ trainExpectedContents metric
createFile (devDirectory </> "in.tsv") $ devInContents metric createFile (devDirectory </> "in.tsv") $ devInContents metric
createFile (devDirectory </> expectedFile) $ devExpectedContents metric createFile (devDirectory </> expectedFile) $ devExpectedContents metric
@ -413,6 +417,20 @@ configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("-
where precisionOpt Nothing = "" where precisionOpt Nothing = ""
precisionOpt (Just p) = " --precision " ++ (show p) precisionOpt (Just p) = " --precision " ++ (show p)
-- Originally train content was in one file, to avoid large changes
-- for the time being we are using the original function.
trainInContents :: Metric -> String
trainInContents metric = unlines
$ map (intercalate "\t")
$ map tail
$ map (splitOn "\t")
$ lines
$ trainContents metric
trainExpectedContents :: Metric -> String
trainExpectedContents metric = unlines $ map head $ map (splitOn "\t") $ lines $ trainContents metric
trainContents :: Metric -> String trainContents :: Metric -> String
trainContents (Mean metric) = trainContents metric trainContents (Mean metric) = trainContents metric
trainContents GLEU = trainContents BLEU trainContents GLEU = trainContents BLEU
@ -502,13 +520,13 @@ I am sad SADNESS
I am so sad and hateful SADNESS HATE I am so sad and hateful SADNESS HATE
|] |]
trainContents (Soft2DFMeasure _) = trainContents ClippEU trainContents (Soft2DFMeasure _) = trainContents ClippEU
trainContents ClippEU = [hereLit|2/0,0,10,150 foo.djvu trainContents ClippEU = [hereLit|2/0,0,10,150/10 foo.djvu
1/30,40,100,1000 bar.djvu 1/30,40,100,1000/10 bar.djvu
|] |]
trainContents _ = [hereLit|0.06 0.39 0 0.206 trainContents _ = [hereLit|0.06 0.39 0 0.206
1.00 1.00 1 0.017 1.00 1.00 1 0.017
317.8 5.20 67 0.048 317.8 5.20 67 0.048
14.6 19.22 27 0.047 14.6 19.22 27 0.047
|] |]
devInContents :: Metric -> String devInContents :: Metric -> String

View File

@ -45,6 +45,7 @@ data ValidationException = NoChallengeDirectory FilePath
| SpaceSuffixDetect FilePath | SpaceSuffixDetect FilePath
| VaryingNumberOfColumns FilePath | VaryingNumberOfColumns FilePath
| BestPossibleValueNotObtainedWithExpectedData MetricValue MetricValue | BestPossibleValueNotObtainedWithExpectedData MetricValue MetricValue
| OldStyleTrainFile
instance Exception ValidationException instance Exception ValidationException
@ -65,6 +66,7 @@ instance Show ValidationException where
show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths
show (VaryingNumberOfColumns filePaths) = somethingWrongWithFilesMessage "The file contains varying number of columns" filePaths show (VaryingNumberOfColumns filePaths) = somethingWrongWithFilesMessage "The file contains varying number of columns" filePaths
show (BestPossibleValueNotObtainedWithExpectedData expected got) = "The best possible value was not obtained with the expected data, expected: " ++ (show expected) ++ " , obtained: " ++ (show got) show (BestPossibleValueNotObtainedWithExpectedData expected got) = "The best possible value was not obtained with the expected data, expected: " ++ (show expected) ++ " , obtained: " ++ (show got)
show OldStyleTrainFile = "Found old-style train file `train.tsv`, whereas the same convention as in test directories should be used (`in.tsv` and `expected.tsv`)"
validationChallenge :: FilePath -> GEvalSpecification -> IO () validationChallenge :: FilePath -> GEvalSpecification -> IO ()
validationChallenge challengeDirectory spec = do validationChallenge challengeDirectory spec = do
@ -147,7 +149,9 @@ never :: FindClause Bool
never = depth ==? 0 never = depth ==? 0
testDirFilter :: FindClause Bool testDirFilter :: FindClause Bool
testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*" ||? SFF.fileName ~~? "test-*") testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*"
||? SFF.fileName ~~? "test-*"
||? SFF.fileName ==? "train")
fileFilter :: String -> FindClause Bool fileFilter :: String -> FindClause Bool
fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts) fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
@ -189,12 +193,7 @@ checkTrainDirectory metric challengeDirectory = do
let trainDirectory = challengeDirectory </> "train" let trainDirectory = challengeDirectory </> "train"
whenM (doesDirectoryExist trainDirectory) $ do whenM (doesDirectoryExist trainDirectory) $ do
trainFiles <- findTrainFiles trainDirectory trainFiles <- findTrainFiles trainDirectory
when (null trainFiles) $ throw $ NoInputFile "train.tsv" when (not $ null trainFiles) $ throw $ OldStyleTrainFile
when (length trainFiles > 1) $ throw $ TooManyTrainFiles trainFiles
let [trainFile] = trainFiles
checkCorrectFile trainFile
when (fixedNumberOfColumnsInInput metric && fixedNumberOfColumnsInExpected metric) $ do
checkColumns trainFile
checkColumns :: FilePath -> IO () checkColumns :: FilePath -> IO ()
checkColumns filePath = do checkColumns filePath = do