diff --git a/README.md b/README.md index 80c15d9..1a0fd11 100644 --- a/README.md +++ b/README.md @@ -698,3 +698,9 @@ Filip Graliński ## Contributors Piotr Halama +Karol Kaczmarek + +## Copyright + +2015-2019 Filip Graliński +2019 Applica.ai diff --git a/geval.cabal b/geval.cabal index 1219dfd..1ab66df 100644 --- a/geval.cabal +++ b/geval.cabal @@ -30,6 +30,7 @@ library , GEval.ParseParams , GEval.ProbList , GEval.Submit + , GEval.Validation , Data.Conduit.AutoDecompress , Data.Conduit.SmartSource , Data.Conduit.Rank @@ -91,6 +92,7 @@ library , Chart , Chart-cairo , errors + , filemanip default-language: Haskell2010 executable geval diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 7c9841a..a3cc017 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -4,6 +4,7 @@ {-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE PackageImports #-} module GEval.Core @@ -44,7 +45,9 @@ module GEval.Core gesPreprocess, getDataDecoder, threeLineSource, - extensionsHandled + extensionsHandled, + isEmptyFile, + somethingWrongWithFilesMessage ) where import Data.Conduit @@ -109,7 +112,7 @@ import Data.Proxy import Data.Word -import System.FilePath.Glob +import "Glob" System.FilePath.Glob defaultLogLossHashedSize :: Word32 defaultLogLossHashedSize = 10 @@ -313,6 +316,7 @@ data GEvalSpecialCommand = Init | LineByLine | WorstFeatures | Diff FilePath | MostWorseningFeatures FilePath | PrintVersion | JustTokenize | Submit + | Validate data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index 1692897..d68b0b7 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -32,6 +32,7 @@ import GEval.LineByLine import GEval.Submit (submit) import GEval.BlackBoxDebugging import GEval.Selector +import GEval.Validation import Data.Conduit.SmartSource @@ -81,6 +82,10 @@ optionsParser = GEvalOptions ( long "submit" <> short 'S' <> help "Submit current solution for evaluation to an external Gonito instance specified with --gonito-host option. Optionally, specify --token.")) + <|> + (flag' Validate + ( long "validate" + <> help "Validate challenge, it searches for potential errors in the given challenge path, like missing columns, files or format data.")) ) <*> ((flag' FirstTheWorst @@ -332,6 +337,9 @@ runGEval''' (Just JustTokenize) _ _ spec _ _ = do runGEval''' (Just Submit) _ _ spec _ _ = do submit (gesGonitoHost spec) (gesToken spec) (gesGonitoGitAnnexRemote spec) return Nothing +runGEval''' (Just Validate) _ _ spec _ _ = do + validateChallenge spec + return Nothing getGraphFilename :: Int -> FilePath -> FilePath getGraphFilename 0 fp = fp @@ -378,3 +386,17 @@ to create a directory CHALLENGE representing a Gonito challenge. (Note that `--out-directory` option is not taken into account with `--init` option.) |] exitFailure + + +validateChallenge :: GEvalSpecification -> IO () +validateChallenge spec = case gesExpectedDirectory spec of + Nothing -> showValidateInstructions + Just expectedDirectory -> validationChallenge expectedDirectory spec + +showValidateInstructions = do + putStrLn [here| +Run: + geval --validate --expected-directory CHALLENGE +to validate a directory CHALLENGE representing a Gonito challenge. +|] + exitFailure diff --git a/src/GEval/Validation.hs b/src/GEval/Validation.hs new file mode 100644 index 0000000..47a5aa0 --- /dev/null +++ b/src/GEval/Validation.hs @@ -0,0 +1,134 @@ +{-# LANGUAGE OverloadedStrings #-} + +module GEval.Validation + ( validationChallenge + ) where + +import GEval.Core +import qualified System.Directory as D + +import System.FilePath.Find as SFF +import System.FilePath +import Control.Exception +import Control.Monad.Trans.Resource +import Control.Monad.IO.Class +import Control.Conditional (unlessM, whenM, unless, when) +import Data.Conduit +import qualified Data.Conduit.List as CL +import qualified Data.Conduit.Combinators as CC +import qualified Data.Conduit.Text as CT +import Data.Conduit.Binary (sourceFile) +import Data.Conduit.AutoDecompress (autoDecompress) +import Data.Conduit.SmartSource (compressedFilesHandled) +import Data.List (intercalate) +import qualified Data.Text as T + +data ValidationException = NoChallengeDirectory FilePath + | NoFoundFile FilePath + | NoConfigFile FilePath + | NoReadmeFile FilePath + | NoGitignoreFile FilePath + | EmptyFile FilePath + | NoTestDirectories + | TooManyInputFiles [FilePath] + | TooManyExpectedFiles [FilePath] + | OutputFileDetected [FilePath] + | CharacterCRDetected FilePath + | SpaceSuffixDetect FilePath + +instance Exception ValidationException + +instance Show ValidationException where + show (NoChallengeDirectory filePath) = somethingWrongWithFilesMessage "No challenge directory" filePath + show (NoFoundFile filePath) = somethingWrongWithFilesMessage "No file found" filePath + show (NoConfigFile filePath) = somethingWrongWithFilesMessage "No config.txt file" filePath + show (NoReadmeFile filePath) = somethingWrongWithFilesMessage "No README.md file" filePath + show (NoGitignoreFile filePath) = somethingWrongWithFilesMessage "No .gitignore file" filePath + show (EmptyFile filePath) = somethingWrongWithFilesMessage "Empty file" filePath + show NoTestDirectories = "No directories with test data, expected `dev-0` and/or `test-A` directory" + show (TooManyInputFiles filePaths) = somethingWrongWithFilesMessage "Too many input files" $ intercalate "`, `" filePaths + show (TooManyExpectedFiles filePaths) = somethingWrongWithFilesMessage "Too many expected files" $ intercalate "`, `" filePaths + show (OutputFileDetected filePaths) = somethingWrongWithFilesMessage "Output file/s detected" $ intercalate "`, `" filePaths + show (CharacterCRDetected filePaths) = somethingWrongWithFilesMessage "Found CR (Carriage Return, 0x0D) character" filePaths + show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths + + +validationChallenge :: FilePath -> GEvalSpecification -> IO () +validationChallenge challengeDirectory spec = do + unlessM (D.doesDirectoryExist challengeDirectory) $ throwM $ NoChallengeDirectory challengeDirectory + unlessM (D.doesFileExist configFile) $ throwM $ NoConfigFile configFile + unlessM (D.doesFileExist gitignoreFile) $ throwM $ NoGitignoreFile gitignoreFile + unlessM (D.doesFileExist readmeFile) $ throwM $ NoReadmeFile readmeFile + checkCorrectFile configFile + checkCorrectFile gitignoreFile + checkCorrectFile readmeFile + testDirectories <- findTestDirs challengeDirectory + checkTestDirectories testDirectories + where + configFile = challengeDirectory "config.txt" + gitignoreFile = challengeDirectory ".gitignore" + readmeFile = challengeDirectory "README.md" + + +checkCorrectFile :: FilePath -> IO () +checkCorrectFile filePath = do + whenM (isEmptyFile filePath) $ throwM $ EmptyFile filePath + lines' <- getFileLines filePath + let lines = map T.pack lines' + when (any (T.isInfixOf "\r") lines) $ throw $ CharacterCRDetected filePath + when (any (T.isSuffixOf " ") lines) $ throw $ SpaceSuffixDetect filePath + +getFileLines :: FilePath -> IO [String] +getFileLines file = runResourceT $ runConduit (sourceFile file + .| autoDecompress + .| CC.decodeUtf8Lenient + .| CT.lines + .| CC.map T.unpack + .| CL.consume) + + +findTestDirs :: FilePath -> IO [FilePath] +findTestDirs = SFF.find never testDirFilter + +findInputFiles :: FilePath -> IO [FilePath] +findInputFiles = SFF.find never $ fileFilter "in.tsv" + +findOutputFiles :: FilePath -> IO [FilePath] +findOutputFiles = SFF.find never $ fileFilter "out*.tsv" + +findExpectedFiles :: FilePath -> IO [FilePath] +findExpectedFiles = SFF.find never $ fileFilter "expected.tsv" + +never :: FindClause Bool +never = depth ==? 0 + +testDirFilter :: FindClause Bool +testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*" ||? SFF.fileName ~~? "test-*") + +fileFilter :: String -> FindClause Bool +fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts) + where + exts = Prelude.concat [ "(", intercalate "|" compressedFilesHandled, ")" ] + + +checkTestDirectories :: [FilePath] -> IO () +checkTestDirectories [] = throwM NoTestDirectories +checkTestDirectories directories = mapM_ checkTestDirectory directories + +checkTestDirectory :: FilePath -> IO () +checkTestDirectory directoryPath = do + inputFiles <- findInputFiles directoryPath + when (null inputFiles) $ throw $ NoInputFile inputFile + when (length inputFiles > 1) $ throw $ TooManyInputFiles inputFiles + checkCorrectFile $ head inputFiles + + expectedFiles <- findExpectedFiles directoryPath + when (null expectedFiles) $ throw $ NoExpectedFile expectedFile + when (length expectedFiles > 1) $ throw $ TooManyExpectedFiles expectedFiles + checkCorrectFile $ head expectedFiles + + outputFiles <- findOutputFiles directoryPath + unless (null outputFiles) $ throw $ OutputFileDetected outputFiles + where + inputFile = directoryPath "in.tsv" + expectedFile = directoryPath "expected.tsv"