Add simple validation of a challenge (--validate option)
This commit is contained in:
parent
b4ad774623
commit
19d231e140
@ -698,3 +698,9 @@ Filip Graliński
|
|||||||
## Contributors
|
## Contributors
|
||||||
|
|
||||||
Piotr Halama
|
Piotr Halama
|
||||||
|
Karol Kaczmarek
|
||||||
|
|
||||||
|
## Copyright
|
||||||
|
|
||||||
|
2015-2019 Filip Graliński
|
||||||
|
2019 Applica.ai
|
||||||
|
@ -30,6 +30,7 @@ library
|
|||||||
, GEval.ParseParams
|
, GEval.ParseParams
|
||||||
, GEval.ProbList
|
, GEval.ProbList
|
||||||
, GEval.Submit
|
, GEval.Submit
|
||||||
|
, GEval.Validation
|
||||||
, Data.Conduit.AutoDecompress
|
, Data.Conduit.AutoDecompress
|
||||||
, Data.Conduit.SmartSource
|
, Data.Conduit.SmartSource
|
||||||
, Data.Conduit.Rank
|
, Data.Conduit.Rank
|
||||||
@ -91,6 +92,7 @@ library
|
|||||||
, Chart
|
, Chart
|
||||||
, Chart-cairo
|
, Chart-cairo
|
||||||
, errors
|
, errors
|
||||||
|
, filemanip
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable geval
|
executable geval
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
{-# LANGUAGE AllowAmbiguousTypes #-}
|
{-# LANGUAGE AllowAmbiguousTypes #-}
|
||||||
{-# LANGUAGE FlexibleInstances #-}
|
{-# LANGUAGE FlexibleInstances #-}
|
||||||
{-# LANGUAGE ScopedTypeVariables #-}
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
{-# LANGUAGE PackageImports #-}
|
||||||
|
|
||||||
|
|
||||||
module GEval.Core
|
module GEval.Core
|
||||||
@ -44,7 +45,9 @@ module GEval.Core
|
|||||||
gesPreprocess,
|
gesPreprocess,
|
||||||
getDataDecoder,
|
getDataDecoder,
|
||||||
threeLineSource,
|
threeLineSource,
|
||||||
extensionsHandled
|
extensionsHandled,
|
||||||
|
isEmptyFile,
|
||||||
|
somethingWrongWithFilesMessage
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Data.Conduit
|
import Data.Conduit
|
||||||
@ -109,7 +112,7 @@ import Data.Proxy
|
|||||||
|
|
||||||
import Data.Word
|
import Data.Word
|
||||||
|
|
||||||
import System.FilePath.Glob
|
import "Glob" System.FilePath.Glob
|
||||||
|
|
||||||
defaultLogLossHashedSize :: Word32
|
defaultLogLossHashedSize :: Word32
|
||||||
defaultLogLossHashedSize = 10
|
defaultLogLossHashedSize = 10
|
||||||
@ -313,6 +316,7 @@ data GEvalSpecialCommand = Init
|
|||||||
| LineByLine | WorstFeatures
|
| LineByLine | WorstFeatures
|
||||||
| Diff FilePath | MostWorseningFeatures FilePath
|
| Diff FilePath | MostWorseningFeatures FilePath
|
||||||
| PrintVersion | JustTokenize | Submit
|
| PrintVersion | JustTokenize | Submit
|
||||||
|
| Validate
|
||||||
|
|
||||||
data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
|
data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
|
||||||
|
|
||||||
|
@ -32,6 +32,7 @@ import GEval.LineByLine
|
|||||||
import GEval.Submit (submit)
|
import GEval.Submit (submit)
|
||||||
import GEval.BlackBoxDebugging
|
import GEval.BlackBoxDebugging
|
||||||
import GEval.Selector
|
import GEval.Selector
|
||||||
|
import GEval.Validation
|
||||||
|
|
||||||
import Data.Conduit.SmartSource
|
import Data.Conduit.SmartSource
|
||||||
|
|
||||||
@ -81,6 +82,10 @@ optionsParser = GEvalOptions
|
|||||||
( long "submit"
|
( long "submit"
|
||||||
<> short 'S'
|
<> short 'S'
|
||||||
<> help "Submit current solution for evaluation to an external Gonito instance specified with --gonito-host option. Optionally, specify --token."))
|
<> help "Submit current solution for evaluation to an external Gonito instance specified with --gonito-host option. Optionally, specify --token."))
|
||||||
|
<|>
|
||||||
|
(flag' Validate
|
||||||
|
( long "validate"
|
||||||
|
<> help "Validate challenge, it searches for potential errors in the given challenge path, like missing columns, files or format data."))
|
||||||
)
|
)
|
||||||
|
|
||||||
<*> ((flag' FirstTheWorst
|
<*> ((flag' FirstTheWorst
|
||||||
@ -332,6 +337,9 @@ runGEval''' (Just JustTokenize) _ _ spec _ _ = do
|
|||||||
runGEval''' (Just Submit) _ _ spec _ _ = do
|
runGEval''' (Just Submit) _ _ spec _ _ = do
|
||||||
submit (gesGonitoHost spec) (gesToken spec) (gesGonitoGitAnnexRemote spec)
|
submit (gesGonitoHost spec) (gesToken spec) (gesGonitoGitAnnexRemote spec)
|
||||||
return Nothing
|
return Nothing
|
||||||
|
runGEval''' (Just Validate) _ _ spec _ _ = do
|
||||||
|
validateChallenge spec
|
||||||
|
return Nothing
|
||||||
|
|
||||||
getGraphFilename :: Int -> FilePath -> FilePath
|
getGraphFilename :: Int -> FilePath -> FilePath
|
||||||
getGraphFilename 0 fp = fp
|
getGraphFilename 0 fp = fp
|
||||||
@ -378,3 +386,17 @@ to create a directory CHALLENGE representing a Gonito challenge.
|
|||||||
(Note that `--out-directory` option is not taken into account with `--init` option.)
|
(Note that `--out-directory` option is not taken into account with `--init` option.)
|
||||||
|]
|
|]
|
||||||
exitFailure
|
exitFailure
|
||||||
|
|
||||||
|
|
||||||
|
validateChallenge :: GEvalSpecification -> IO ()
|
||||||
|
validateChallenge spec = case gesExpectedDirectory spec of
|
||||||
|
Nothing -> showValidateInstructions
|
||||||
|
Just expectedDirectory -> validationChallenge expectedDirectory spec
|
||||||
|
|
||||||
|
showValidateInstructions = do
|
||||||
|
putStrLn [here|
|
||||||
|
Run:
|
||||||
|
geval --validate --expected-directory CHALLENGE
|
||||||
|
to validate a directory CHALLENGE representing a Gonito challenge.
|
||||||
|
|]
|
||||||
|
exitFailure
|
||||||
|
134
src/GEval/Validation.hs
Normal file
134
src/GEval/Validation.hs
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module GEval.Validation
|
||||||
|
( validationChallenge
|
||||||
|
) where
|
||||||
|
|
||||||
|
import GEval.Core
|
||||||
|
import qualified System.Directory as D
|
||||||
|
|
||||||
|
import System.FilePath.Find as SFF
|
||||||
|
import System.FilePath
|
||||||
|
import Control.Exception
|
||||||
|
import Control.Monad.Trans.Resource
|
||||||
|
import Control.Monad.IO.Class
|
||||||
|
import Control.Conditional (unlessM, whenM, unless, when)
|
||||||
|
import Data.Conduit
|
||||||
|
import qualified Data.Conduit.List as CL
|
||||||
|
import qualified Data.Conduit.Combinators as CC
|
||||||
|
import qualified Data.Conduit.Text as CT
|
||||||
|
import Data.Conduit.Binary (sourceFile)
|
||||||
|
import Data.Conduit.AutoDecompress (autoDecompress)
|
||||||
|
import Data.Conduit.SmartSource (compressedFilesHandled)
|
||||||
|
import Data.List (intercalate)
|
||||||
|
import qualified Data.Text as T
|
||||||
|
|
||||||
|
data ValidationException = NoChallengeDirectory FilePath
|
||||||
|
| NoFoundFile FilePath
|
||||||
|
| NoConfigFile FilePath
|
||||||
|
| NoReadmeFile FilePath
|
||||||
|
| NoGitignoreFile FilePath
|
||||||
|
| EmptyFile FilePath
|
||||||
|
| NoTestDirectories
|
||||||
|
| TooManyInputFiles [FilePath]
|
||||||
|
| TooManyExpectedFiles [FilePath]
|
||||||
|
| OutputFileDetected [FilePath]
|
||||||
|
| CharacterCRDetected FilePath
|
||||||
|
| SpaceSuffixDetect FilePath
|
||||||
|
|
||||||
|
instance Exception ValidationException
|
||||||
|
|
||||||
|
instance Show ValidationException where
|
||||||
|
show (NoChallengeDirectory filePath) = somethingWrongWithFilesMessage "No challenge directory" filePath
|
||||||
|
show (NoFoundFile filePath) = somethingWrongWithFilesMessage "No file found" filePath
|
||||||
|
show (NoConfigFile filePath) = somethingWrongWithFilesMessage "No config.txt file" filePath
|
||||||
|
show (NoReadmeFile filePath) = somethingWrongWithFilesMessage "No README.md file" filePath
|
||||||
|
show (NoGitignoreFile filePath) = somethingWrongWithFilesMessage "No .gitignore file" filePath
|
||||||
|
show (EmptyFile filePath) = somethingWrongWithFilesMessage "Empty file" filePath
|
||||||
|
show NoTestDirectories = "No directories with test data, expected `dev-0` and/or `test-A` directory"
|
||||||
|
show (TooManyInputFiles filePaths) = somethingWrongWithFilesMessage "Too many input files" $ intercalate "`, `" filePaths
|
||||||
|
show (TooManyExpectedFiles filePaths) = somethingWrongWithFilesMessage "Too many expected files" $ intercalate "`, `" filePaths
|
||||||
|
show (OutputFileDetected filePaths) = somethingWrongWithFilesMessage "Output file/s detected" $ intercalate "`, `" filePaths
|
||||||
|
show (CharacterCRDetected filePaths) = somethingWrongWithFilesMessage "Found CR (Carriage Return, 0x0D) character" filePaths
|
||||||
|
show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths
|
||||||
|
|
||||||
|
|
||||||
|
validationChallenge :: FilePath -> GEvalSpecification -> IO ()
|
||||||
|
validationChallenge challengeDirectory spec = do
|
||||||
|
unlessM (D.doesDirectoryExist challengeDirectory) $ throwM $ NoChallengeDirectory challengeDirectory
|
||||||
|
unlessM (D.doesFileExist configFile) $ throwM $ NoConfigFile configFile
|
||||||
|
unlessM (D.doesFileExist gitignoreFile) $ throwM $ NoGitignoreFile gitignoreFile
|
||||||
|
unlessM (D.doesFileExist readmeFile) $ throwM $ NoReadmeFile readmeFile
|
||||||
|
checkCorrectFile configFile
|
||||||
|
checkCorrectFile gitignoreFile
|
||||||
|
checkCorrectFile readmeFile
|
||||||
|
testDirectories <- findTestDirs challengeDirectory
|
||||||
|
checkTestDirectories testDirectories
|
||||||
|
where
|
||||||
|
configFile = challengeDirectory </> "config.txt"
|
||||||
|
gitignoreFile = challengeDirectory </> ".gitignore"
|
||||||
|
readmeFile = challengeDirectory </> "README.md"
|
||||||
|
|
||||||
|
|
||||||
|
checkCorrectFile :: FilePath -> IO ()
|
||||||
|
checkCorrectFile filePath = do
|
||||||
|
whenM (isEmptyFile filePath) $ throwM $ EmptyFile filePath
|
||||||
|
lines' <- getFileLines filePath
|
||||||
|
let lines = map T.pack lines'
|
||||||
|
when (any (T.isInfixOf "\r") lines) $ throw $ CharacterCRDetected filePath
|
||||||
|
when (any (T.isSuffixOf " ") lines) $ throw $ SpaceSuffixDetect filePath
|
||||||
|
|
||||||
|
getFileLines :: FilePath -> IO [String]
|
||||||
|
getFileLines file = runResourceT $ runConduit (sourceFile file
|
||||||
|
.| autoDecompress
|
||||||
|
.| CC.decodeUtf8Lenient
|
||||||
|
.| CT.lines
|
||||||
|
.| CC.map T.unpack
|
||||||
|
.| CL.consume)
|
||||||
|
|
||||||
|
|
||||||
|
findTestDirs :: FilePath -> IO [FilePath]
|
||||||
|
findTestDirs = SFF.find never testDirFilter
|
||||||
|
|
||||||
|
findInputFiles :: FilePath -> IO [FilePath]
|
||||||
|
findInputFiles = SFF.find never $ fileFilter "in.tsv"
|
||||||
|
|
||||||
|
findOutputFiles :: FilePath -> IO [FilePath]
|
||||||
|
findOutputFiles = SFF.find never $ fileFilter "out*.tsv"
|
||||||
|
|
||||||
|
findExpectedFiles :: FilePath -> IO [FilePath]
|
||||||
|
findExpectedFiles = SFF.find never $ fileFilter "expected.tsv"
|
||||||
|
|
||||||
|
never :: FindClause Bool
|
||||||
|
never = depth ==? 0
|
||||||
|
|
||||||
|
testDirFilter :: FindClause Bool
|
||||||
|
testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*" ||? SFF.fileName ~~? "test-*")
|
||||||
|
|
||||||
|
fileFilter :: String -> FindClause Bool
|
||||||
|
fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
|
||||||
|
where
|
||||||
|
exts = Prelude.concat [ "(", intercalate "|" compressedFilesHandled, ")" ]
|
||||||
|
|
||||||
|
|
||||||
|
checkTestDirectories :: [FilePath] -> IO ()
|
||||||
|
checkTestDirectories [] = throwM NoTestDirectories
|
||||||
|
checkTestDirectories directories = mapM_ checkTestDirectory directories
|
||||||
|
|
||||||
|
checkTestDirectory :: FilePath -> IO ()
|
||||||
|
checkTestDirectory directoryPath = do
|
||||||
|
inputFiles <- findInputFiles directoryPath
|
||||||
|
when (null inputFiles) $ throw $ NoInputFile inputFile
|
||||||
|
when (length inputFiles > 1) $ throw $ TooManyInputFiles inputFiles
|
||||||
|
checkCorrectFile $ head inputFiles
|
||||||
|
|
||||||
|
expectedFiles <- findExpectedFiles directoryPath
|
||||||
|
when (null expectedFiles) $ throw $ NoExpectedFile expectedFile
|
||||||
|
when (length expectedFiles > 1) $ throw $ TooManyExpectedFiles expectedFiles
|
||||||
|
checkCorrectFile $ head expectedFiles
|
||||||
|
|
||||||
|
outputFiles <- findOutputFiles directoryPath
|
||||||
|
unless (null outputFiles) $ throw $ OutputFileDetected outputFiles
|
||||||
|
where
|
||||||
|
inputFile = directoryPath </> "in.tsv"
|
||||||
|
expectedFile = directoryPath </> "expected.tsv"
|
Loading…
Reference in New Issue
Block a user