Towards new-style of train
This commit is contained in:
parent
ef8945af11
commit
74d999d4bf
@ -18,6 +18,9 @@ import Control.Exception
|
|||||||
import Control.Monad.Trans.Resource
|
import Control.Monad.Trans.Resource
|
||||||
import Data.String.Here
|
import Data.String.Here
|
||||||
|
|
||||||
|
import Data.List (intercalate)
|
||||||
|
import Data.List.Split (splitOn)
|
||||||
|
|
||||||
createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO ()
|
createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO ()
|
||||||
createChallenge withDataFiles expectedDirectory spec = do
|
createChallenge withDataFiles expectedDirectory spec = do
|
||||||
D.createDirectoryIfMissing False expectedDirectory
|
D.createDirectoryIfMissing False expectedDirectory
|
||||||
@ -30,7 +33,8 @@ createChallenge withDataFiles expectedDirectory spec = do
|
|||||||
if withDataFiles
|
if withDataFiles
|
||||||
then
|
then
|
||||||
do
|
do
|
||||||
createFile (trainDirectory </> "train.tsv") $ trainContents metric
|
createFile (trainDirectory </> "in.tsv") $ trainInContents metric
|
||||||
|
createFile (trainDirectory </> expectedFile) $ trainExpectedContents metric
|
||||||
|
|
||||||
createFile (devDirectory </> "in.tsv") $ devInContents metric
|
createFile (devDirectory </> "in.tsv") $ devInContents metric
|
||||||
createFile (devDirectory </> expectedFile) $ devExpectedContents metric
|
createFile (devDirectory </> expectedFile) $ devExpectedContents metric
|
||||||
@ -413,6 +417,20 @@ configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("-
|
|||||||
where precisionOpt Nothing = ""
|
where precisionOpt Nothing = ""
|
||||||
precisionOpt (Just p) = " --precision " ++ (show p)
|
precisionOpt (Just p) = " --precision " ++ (show p)
|
||||||
|
|
||||||
|
-- Originally train content was in one file, to avoid large changes
|
||||||
|
-- for the time being we are using the original function.
|
||||||
|
|
||||||
|
trainInContents :: Metric -> String
|
||||||
|
trainInContents metric = unlines
|
||||||
|
$ map (intercalate "\t")
|
||||||
|
$ map tail
|
||||||
|
$ map (splitOn "\t")
|
||||||
|
$ lines
|
||||||
|
$ trainContents metric
|
||||||
|
|
||||||
|
trainExpectedContents :: Metric -> String
|
||||||
|
trainExpectedContents metric = unlines $ map head $ map (splitOn "\t") $ lines $ trainContents metric
|
||||||
|
|
||||||
trainContents :: Metric -> String
|
trainContents :: Metric -> String
|
||||||
trainContents (Mean metric) = trainContents metric
|
trainContents (Mean metric) = trainContents metric
|
||||||
trainContents GLEU = trainContents BLEU
|
trainContents GLEU = trainContents BLEU
|
||||||
@ -502,8 +520,8 @@ I am sad SADNESS
|
|||||||
I am so sad and hateful SADNESS HATE
|
I am so sad and hateful SADNESS HATE
|
||||||
|]
|
|]
|
||||||
trainContents (Soft2DFMeasure _) = trainContents ClippEU
|
trainContents (Soft2DFMeasure _) = trainContents ClippEU
|
||||||
trainContents ClippEU = [hereLit|2/0,0,10,150 foo.djvu
|
trainContents ClippEU = [hereLit|2/0,0,10,150/10 foo.djvu
|
||||||
1/30,40,100,1000 bar.djvu
|
1/30,40,100,1000/10 bar.djvu
|
||||||
|]
|
|]
|
||||||
trainContents _ = [hereLit|0.06 0.39 0 0.206
|
trainContents _ = [hereLit|0.06 0.39 0 0.206
|
||||||
1.00 1.00 1 0.017
|
1.00 1.00 1 0.017
|
||||||
|
@ -45,6 +45,7 @@ data ValidationException = NoChallengeDirectory FilePath
|
|||||||
| SpaceSuffixDetect FilePath
|
| SpaceSuffixDetect FilePath
|
||||||
| VaryingNumberOfColumns FilePath
|
| VaryingNumberOfColumns FilePath
|
||||||
| BestPossibleValueNotObtainedWithExpectedData MetricValue MetricValue
|
| BestPossibleValueNotObtainedWithExpectedData MetricValue MetricValue
|
||||||
|
| OldStyleTrainFile
|
||||||
|
|
||||||
instance Exception ValidationException
|
instance Exception ValidationException
|
||||||
|
|
||||||
@ -65,6 +66,7 @@ instance Show ValidationException where
|
|||||||
show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths
|
show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths
|
||||||
show (VaryingNumberOfColumns filePaths) = somethingWrongWithFilesMessage "The file contains varying number of columns" filePaths
|
show (VaryingNumberOfColumns filePaths) = somethingWrongWithFilesMessage "The file contains varying number of columns" filePaths
|
||||||
show (BestPossibleValueNotObtainedWithExpectedData expected got) = "The best possible value was not obtained with the expected data, expected: " ++ (show expected) ++ " , obtained: " ++ (show got)
|
show (BestPossibleValueNotObtainedWithExpectedData expected got) = "The best possible value was not obtained with the expected data, expected: " ++ (show expected) ++ " , obtained: " ++ (show got)
|
||||||
|
show OldStyleTrainFile = "Found old-style train file `train.tsv`, whereas the same convention as in test directories should be used (`in.tsv` and `expected.tsv`)"
|
||||||
|
|
||||||
validationChallenge :: FilePath -> GEvalSpecification -> IO ()
|
validationChallenge :: FilePath -> GEvalSpecification -> IO ()
|
||||||
validationChallenge challengeDirectory spec = do
|
validationChallenge challengeDirectory spec = do
|
||||||
@ -147,7 +149,9 @@ never :: FindClause Bool
|
|||||||
never = depth ==? 0
|
never = depth ==? 0
|
||||||
|
|
||||||
testDirFilter :: FindClause Bool
|
testDirFilter :: FindClause Bool
|
||||||
testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*" ||? SFF.fileName ~~? "test-*")
|
testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*"
|
||||||
|
||? SFF.fileName ~~? "test-*"
|
||||||
|
||? SFF.fileName ==? "train")
|
||||||
|
|
||||||
fileFilter :: String -> FindClause Bool
|
fileFilter :: String -> FindClause Bool
|
||||||
fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
|
fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
|
||||||
@ -189,12 +193,7 @@ checkTrainDirectory metric challengeDirectory = do
|
|||||||
let trainDirectory = challengeDirectory </> "train"
|
let trainDirectory = challengeDirectory </> "train"
|
||||||
whenM (doesDirectoryExist trainDirectory) $ do
|
whenM (doesDirectoryExist trainDirectory) $ do
|
||||||
trainFiles <- findTrainFiles trainDirectory
|
trainFiles <- findTrainFiles trainDirectory
|
||||||
when (null trainFiles) $ throw $ NoInputFile "train.tsv"
|
when (not $ null trainFiles) $ throw $ OldStyleTrainFile
|
||||||
when (length trainFiles > 1) $ throw $ TooManyTrainFiles trainFiles
|
|
||||||
let [trainFile] = trainFiles
|
|
||||||
checkCorrectFile trainFile
|
|
||||||
when (fixedNumberOfColumnsInInput metric && fixedNumberOfColumnsInExpected metric) $ do
|
|
||||||
checkColumns trainFile
|
|
||||||
|
|
||||||
checkColumns :: FilePath -> IO ()
|
checkColumns :: FilePath -> IO ()
|
||||||
checkColumns filePath = do
|
checkColumns filePath = do
|
||||||
|
Loading…
Reference in New Issue
Block a user