Add simple validation of a challenge (--validate option)

This commit is contained in:
Karol Kaczmarek 2019-07-30 08:14:10 +00:00 committed by Filip Graliński
parent b4ad774623
commit 19d231e140
5 changed files with 170 additions and 2 deletions

View File

@ -698,3 +698,9 @@ Filip Graliński
## Contributors
Piotr Halama
Karol Kaczmarek
## Copyright
2015-2019 Filip Graliński
2019 Applica.ai

View File

@ -30,6 +30,7 @@ library
, GEval.ParseParams
, GEval.ProbList
, GEval.Submit
, GEval.Validation
, Data.Conduit.AutoDecompress
, Data.Conduit.SmartSource
, Data.Conduit.Rank
@ -91,6 +92,7 @@ library
, Chart
, Chart-cairo
, errors
, filemanip
default-language: Haskell2010
executable geval

View File

@ -4,6 +4,7 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE PackageImports #-}
module GEval.Core
@ -44,7 +45,9 @@ module GEval.Core
gesPreprocess,
getDataDecoder,
threeLineSource,
extensionsHandled
extensionsHandled,
isEmptyFile,
somethingWrongWithFilesMessage
) where
import Data.Conduit
@ -109,7 +112,7 @@ import Data.Proxy
import Data.Word
import System.FilePath.Glob
import "Glob" System.FilePath.Glob
defaultLogLossHashedSize :: Word32
defaultLogLossHashedSize = 10
@ -313,6 +316,7 @@ data GEvalSpecialCommand = Init
| LineByLine | WorstFeatures
| Diff FilePath | MostWorseningFeatures FilePath
| PrintVersion | JustTokenize | Submit
| Validate
data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest

View File

@ -32,6 +32,7 @@ import GEval.LineByLine
import GEval.Submit (submit)
import GEval.BlackBoxDebugging
import GEval.Selector
import GEval.Validation
import Data.Conduit.SmartSource
@ -81,6 +82,10 @@ optionsParser = GEvalOptions
( long "submit"
<> short 'S'
<> help "Submit current solution for evaluation to an external Gonito instance specified with --gonito-host option. Optionally, specify --token."))
<|>
(flag' Validate
( long "validate"
<> help "Validate challenge, it searches for potential errors in the given challenge path, like missing columns, files or format data."))
)
<*> ((flag' FirstTheWorst
@ -332,6 +337,9 @@ runGEval''' (Just JustTokenize) _ _ spec _ _ = do
runGEval''' (Just Submit) _ _ spec _ _ = do
submit (gesGonitoHost spec) (gesToken spec) (gesGonitoGitAnnexRemote spec)
return Nothing
runGEval''' (Just Validate) _ _ spec _ _ = do
validateChallenge spec
return Nothing
getGraphFilename :: Int -> FilePath -> FilePath
getGraphFilename 0 fp = fp
@ -378,3 +386,17 @@ to create a directory CHALLENGE representing a Gonito challenge.
(Note that `--out-directory` option is not taken into account with `--init` option.)
|]
exitFailure
validateChallenge :: GEvalSpecification -> IO ()
validateChallenge spec = case gesExpectedDirectory spec of
Nothing -> showValidateInstructions
Just expectedDirectory -> validationChallenge expectedDirectory spec
showValidateInstructions = do
putStrLn [here|
Run:
geval --validate --expected-directory CHALLENGE
to validate a directory CHALLENGE representing a Gonito challenge.
|]
exitFailure

134
src/GEval/Validation.hs Normal file
View File

@ -0,0 +1,134 @@
{-# LANGUAGE OverloadedStrings #-}
module GEval.Validation
( validationChallenge
) where
import GEval.Core
import qualified System.Directory as D
import System.FilePath.Find as SFF
import System.FilePath
import Control.Exception
import Control.Monad.Trans.Resource
import Control.Monad.IO.Class
import Control.Conditional (unlessM, whenM, unless, when)
import Data.Conduit
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC
import qualified Data.Conduit.Text as CT
import Data.Conduit.Binary (sourceFile)
import Data.Conduit.AutoDecompress (autoDecompress)
import Data.Conduit.SmartSource (compressedFilesHandled)
import Data.List (intercalate)
import qualified Data.Text as T
data ValidationException = NoChallengeDirectory FilePath
| NoFoundFile FilePath
| NoConfigFile FilePath
| NoReadmeFile FilePath
| NoGitignoreFile FilePath
| EmptyFile FilePath
| NoTestDirectories
| TooManyInputFiles [FilePath]
| TooManyExpectedFiles [FilePath]
| OutputFileDetected [FilePath]
| CharacterCRDetected FilePath
| SpaceSuffixDetect FilePath
instance Exception ValidationException
instance Show ValidationException where
show (NoChallengeDirectory filePath) = somethingWrongWithFilesMessage "No challenge directory" filePath
show (NoFoundFile filePath) = somethingWrongWithFilesMessage "No file found" filePath
show (NoConfigFile filePath) = somethingWrongWithFilesMessage "No config.txt file" filePath
show (NoReadmeFile filePath) = somethingWrongWithFilesMessage "No README.md file" filePath
show (NoGitignoreFile filePath) = somethingWrongWithFilesMessage "No .gitignore file" filePath
show (EmptyFile filePath) = somethingWrongWithFilesMessage "Empty file" filePath
show NoTestDirectories = "No directories with test data, expected `dev-0` and/or `test-A` directory"
show (TooManyInputFiles filePaths) = somethingWrongWithFilesMessage "Too many input files" $ intercalate "`, `" filePaths
show (TooManyExpectedFiles filePaths) = somethingWrongWithFilesMessage "Too many expected files" $ intercalate "`, `" filePaths
show (OutputFileDetected filePaths) = somethingWrongWithFilesMessage "Output file/s detected" $ intercalate "`, `" filePaths
show (CharacterCRDetected filePaths) = somethingWrongWithFilesMessage "Found CR (Carriage Return, 0x0D) character" filePaths
show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths
validationChallenge :: FilePath -> GEvalSpecification -> IO ()
validationChallenge challengeDirectory spec = do
unlessM (D.doesDirectoryExist challengeDirectory) $ throwM $ NoChallengeDirectory challengeDirectory
unlessM (D.doesFileExist configFile) $ throwM $ NoConfigFile configFile
unlessM (D.doesFileExist gitignoreFile) $ throwM $ NoGitignoreFile gitignoreFile
unlessM (D.doesFileExist readmeFile) $ throwM $ NoReadmeFile readmeFile
checkCorrectFile configFile
checkCorrectFile gitignoreFile
checkCorrectFile readmeFile
testDirectories <- findTestDirs challengeDirectory
checkTestDirectories testDirectories
where
configFile = challengeDirectory </> "config.txt"
gitignoreFile = challengeDirectory </> ".gitignore"
readmeFile = challengeDirectory </> "README.md"
checkCorrectFile :: FilePath -> IO ()
checkCorrectFile filePath = do
whenM (isEmptyFile filePath) $ throwM $ EmptyFile filePath
lines' <- getFileLines filePath
let lines = map T.pack lines'
when (any (T.isInfixOf "\r") lines) $ throw $ CharacterCRDetected filePath
when (any (T.isSuffixOf " ") lines) $ throw $ SpaceSuffixDetect filePath
getFileLines :: FilePath -> IO [String]
getFileLines file = runResourceT $ runConduit (sourceFile file
.| autoDecompress
.| CC.decodeUtf8Lenient
.| CT.lines
.| CC.map T.unpack
.| CL.consume)
findTestDirs :: FilePath -> IO [FilePath]
findTestDirs = SFF.find never testDirFilter
findInputFiles :: FilePath -> IO [FilePath]
findInputFiles = SFF.find never $ fileFilter "in.tsv"
findOutputFiles :: FilePath -> IO [FilePath]
findOutputFiles = SFF.find never $ fileFilter "out*.tsv"
findExpectedFiles :: FilePath -> IO [FilePath]
findExpectedFiles = SFF.find never $ fileFilter "expected.tsv"
never :: FindClause Bool
never = depth ==? 0
testDirFilter :: FindClause Bool
testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*" ||? SFF.fileName ~~? "test-*")
fileFilter :: String -> FindClause Bool
fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
where
exts = Prelude.concat [ "(", intercalate "|" compressedFilesHandled, ")" ]
checkTestDirectories :: [FilePath] -> IO ()
checkTestDirectories [] = throwM NoTestDirectories
checkTestDirectories directories = mapM_ checkTestDirectory directories
checkTestDirectory :: FilePath -> IO ()
checkTestDirectory directoryPath = do
inputFiles <- findInputFiles directoryPath
when (null inputFiles) $ throw $ NoInputFile inputFile
when (length inputFiles > 1) $ throw $ TooManyInputFiles inputFiles
checkCorrectFile $ head inputFiles
expectedFiles <- findExpectedFiles directoryPath
when (null expectedFiles) $ throw $ NoExpectedFile expectedFile
when (length expectedFiles > 1) $ throw $ TooManyExpectedFiles expectedFiles
checkCorrectFile $ head expectedFiles
outputFiles <- findOutputFiles directoryPath
unless (null outputFiles) $ throw $ OutputFileDetected outputFiles
where
inputFile = directoryPath </> "in.tsv"
expectedFile = directoryPath </> "expected.tsv"