diff --git a/geval.cabal b/geval.cabal index 9a54aa8..b4d7842 100644 --- a/geval.cabal +++ b/geval.cabal @@ -1,5 +1,5 @@ name: geval -version: 1.18.0.1 +version: 1.18.1.0 synopsis: Machine learning evaluation tools description: Please see README.md homepage: http://github.com/name/project @@ -94,6 +94,7 @@ library , Chart-cairo , errors , filemanip + , temporary default-language: Haskell2010 executable geval diff --git a/src/GEval/Common.hs b/src/GEval/Common.hs index e46fb17..33d7486 100644 --- a/src/GEval/Common.hs +++ b/src/GEval/Common.hs @@ -70,3 +70,9 @@ bigrams :: [a] -> [(a, a)] bigrams [] = [] bigrams [_] = [] bigrams u = zip u $ tail u + +class AEq a where + (=~) :: a -> a -> Bool + +instance AEq Double where + x =~ y = abs ( x - y ) < (1.0e-4 :: Double) diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index f849ad9..696ae7d 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -295,7 +295,7 @@ checkAndGetFilesSingleOut forceInput gevalSpec = do checkAndGetFiles :: Bool -> GEvalSpecification -> IO (SourceSpec, SourceSpec, [SourceSpec]) checkAndGetFiles forceInput gevalSpec = do - ess <- getSmartSourceSpec expectedTestDirectory "expected.tsv" expectedFile + ess <- getSmartSourceSpec expectedTestDirectory defaultExpectedFile expectedFile case ess of Left NoSpecGiven -> throwM $ NoExpectedFile expectedFile Left (NoFile fp) -> throwM $ NoExpectedFile fp @@ -380,7 +380,7 @@ getOutFile gevalSpec out = outDirectory testName out getInputSourceIfNeeded :: Bool -> [Metric] -> FilePath -> FilePath -> IO SourceSpec getInputSourceIfNeeded forced metrics directory inputFilePath | forced || (Prelude.any isInputNeeded metrics) = do - iss <- getSmartSourceSpec directory "in.tsv" inputFilePath + iss <- getSmartSourceSpec directory defaultInputFile inputFilePath case iss of Left NoSpecGiven -> throwM $ NoInputFile inputFilePath Left (NoFile fp) -> throwM $ NoInputFile fp diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 4510ade..b26e55d 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -313,6 +313,35 @@ given at all, probability 0.0 is assumed. (But note that returning in an infinite manner). |] ++ (commonReadmeMDContents testName) +readmeMDContents ClippEU testName = [i| +Sample challenge for clipping rectangles +======================================== + +The metric is ClippEU, i.e. F2-score (F-measure with preference for recall). + +Reference format +---------------- + +(For expected.tsv files.) + +Each line describes expected clippings to be found in a corresponding PDF/DjVu file. Each expected clipping is specified as P/X0,Y0,X1,Y1/M, where: + + P — DjVu page number (starting from 1) + X0, Y0, X1, Y1 — clipping coordinates (in pixels) + M — margin of error for each direction (in pixels) + +Output format +------------- + +(for out.tsv files.) + +Similar to the reference format, each line describes clippings found in a corresponding PDF/DjVu file. Each clipping should be given as P/X0,Y0,X1,Y1, where: + + P — DjVu page number (starting from 1) + X0, Y0, X1, Y1 — clipping coordinates (in pixels) + +|] ++ (commonReadmeMDContents testName) + readmeMDContents _ testName = [i| GEval sample challenge ====================== @@ -435,10 +464,13 @@ Love and hate LOVE HATE I am sad SADNESS I am so sad and hateful SADNESS HATE |] -trainContents _ = [hereLit|0.06 0.39 0 0.206 -1.00 1.00 1 0.017 -317.8 5.20 67 0.048 -14.6 19.22 27 0.047 +trainContents ClippEU = [hereLit|2/0,0,10,150 foo.djvu +1/30,40,100,1000 bar.djvu +|] +trainContents _ = [hereLit|0.06 0.39 0 0.206 +1.00 1.00 1 0.017 +317.8 5.20 67 0.048 +14.6 19.22 27 0.047 |] devInContents :: Metric -> String @@ -496,6 +528,9 @@ devInContents MultiLabelLikelihood = devInContents MultiLabelLogLoss devInContents MultiLabelLogLoss = [hereLit|I am in love I am a sad hater |] +devInContents ClippEU = [hereLit|file1.djvu +file2.djvu +|] devInContents _ = [hereLit|0.72 0 0.007 9.54 62 0.054 |] @@ -555,6 +590,9 @@ devExpectedContents MultiLabelLikelihood = devExpectedContents MultiLabelLogLoss devExpectedContents MultiLabelLogLoss = [hereLit|LOVE SADNESS LOVE |] +devExpectedContents ClippEU = [hereLit| +10/10,20,30,100/5 3/0,50,500,500/5 +|] devExpectedContents _ = [hereLit|0.82 95.2 |] @@ -616,6 +654,9 @@ testInContents MultiLabelLikelihood = testInContents MultiLabelLogLoss testInContents MultiLabelLogLoss = [hereLit|I am very sad I hate |] +testInContents ClippEU = [hereLit|file3.djvu +file4.djvu +|] testInContents _ = [hereLit|0.72 0 0.007 9.54 62 0.054 |] @@ -677,6 +718,9 @@ testExpectedContents MultiLabelLikelihood = testExpectedContents MultiLabelLogLo testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS HATE |] +testExpectedContents ClippEU = [hereLit|3/0,0,100,100/10 +1/10,10,1000,1000/10 +|] testExpectedContents _ = [hereLit|0.11 17.2 |] diff --git a/src/GEval/Metric.hs b/src/GEval/Metric.hs index 4550a92..6776eef 100644 --- a/src/GEval/Metric.hs +++ b/src/GEval/Metric.hs @@ -1,12 +1,22 @@ +{-# LANGUAGE OverloadedStrings #-} + module GEval.Metric (Metric(..), MetricOrdering(..), defaultLogLossHashedSize, getMetricOrdering, - listOfAvailableMetrics) + listOfAvailableMetrics, + bestPossibleValue, + perfectOutLineFromExpectedLine) where import Data.Word +import Data.Text +import Data.Monoid ((<>)) + +import GEval.Common +import GEval.ClippEU +import Data.Attoparsec.Text (parseOnly) -- here metrics and their basic properties are listed, -- the evaluation procedures are defined in GEval.Core @@ -180,5 +190,30 @@ getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter +bestPossibleValue :: Metric -> MetricValue +bestPossibleValue metric = case getMetricOrdering metric of + TheLowerTheBetter -> 0.0 + TheHigherTheBetter -> 1.0 + +perfectOutLineFromExpectedLine :: Metric -> Text -> Text +perfectOutLineFromExpectedLine (LogLossHashed _) t = t <> ":1.0" +perfectOutLineFromExpectedLine (LikelihoodHashed _) t = t <> ":1.0" +perfectOutLineFromExpectedLine BLEU t = getFirstColumn t +perfectOutLineFromExpectedLine GLEU t = getFirstColumn t +perfectOutLineFromExpectedLine ClippEU t = cleanMarginFromClippEU t +perfectOutLineFromExpectedLine _ t = t + +getFirstColumn :: Text -> Text +getFirstColumn t = case splitOn "\t" t of + [] -> "" + (h:_) -> h + +cleanMarginFromClippEU :: Text -> Text +cleanMarginFromClippEU t = Data.Text.unwords outs + where outs = Prelude.map toOut specs + (Right specs) = parseOnly lineClippingSpecsParser t + toOut (ClippingSpec (PageNumber pageNumber) (Rectangle (Point x0 y0) (Point x1 y1)) _) = + pack ((show pageNumber) ++ "/" ++ (show x0) ++ "," ++ (show y0) ++ "," ++ (show x1) ++ "," ++ (show y1)) + defaultLogLossHashedSize :: Word32 defaultLogLossHashedSize = 10 diff --git a/src/GEval/Validation.hs b/src/GEval/Validation.hs index c34f3ff..00ec475 100644 --- a/src/GEval/Validation.hs +++ b/src/GEval/Validation.hs @@ -5,11 +5,13 @@ module GEval.Validation ) where import GEval.Metric -import GEval.Core (GEvalSpecification(..), GEvalException(..), somethingWrongWithFilesMessage, isEmptyFile) +import GEval.Core (GEvalSpecification(..), GEvalException(..), somethingWrongWithFilesMessage, isEmptyFile, geval, defaultInputFile, defaultExpectedFile, defaultOutFile) +import GEval.Common import qualified System.Directory as D import System.FilePath.Find as SFF import System.FilePath +import System.Directory import Control.Exception import Control.Monad.Trans.Resource import Control.Monad.IO.Class @@ -18,12 +20,14 @@ import Data.Conduit import qualified Data.Conduit.List as CL import qualified Data.Conduit.Combinators as CC import qualified Data.Conduit.Text as CT -import Data.Conduit.Binary (sourceFile) +import Data.Conduit.Binary (sourceFile, sinkFile) import Data.Conduit.AutoDecompress (autoDecompress) import Data.Conduit.SmartSource (compressedFilesHandled) import Data.List (intercalate) import qualified Data.Text as T +import System.IO.Temp + data ValidationException = NoChallengeDirectory FilePath | NoFoundFile FilePath | NoConfigFile FilePath @@ -36,6 +40,7 @@ data ValidationException = NoChallengeDirectory FilePath | OutputFileDetected [FilePath] | CharacterCRDetected FilePath | SpaceSuffixDetect FilePath + | BestPossibleValueNotObtainedWithExpectedData MetricValue MetricValue instance Exception ValidationException @@ -52,7 +57,7 @@ instance Show ValidationException where show (OutputFileDetected filePaths) = somethingWrongWithFilesMessage "Output file/s detected" $ intercalate "`, `" filePaths show (CharacterCRDetected filePaths) = somethingWrongWithFilesMessage "Found CR (Carriage Return, 0x0D) character" filePaths show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths - + show (BestPossibleValueNotObtainedWithExpectedData expected got) = "The best possible value was not obtained with the expected data, expected: " ++ (show expected) ++ " , obtained: " ++ (show got) validationChallenge :: FilePath -> GEvalSpecification -> IO () validationChallenge challengeDirectory spec = do @@ -65,6 +70,9 @@ validationChallenge challengeDirectory spec = do checkCorrectFile readmeFile testDirectories <- findTestDirs challengeDirectory checkTestDirectories testDirectories + + mapM_ (runOnTest spec) testDirectories + where configFile = challengeDirectory "config.txt" gitignoreFile = challengeDirectory ".gitignore" @@ -82,23 +90,34 @@ checkCorrectFile filePath = do getFileLines :: FilePath -> IO [String] getFileLines file = runResourceT $ runConduit (sourceFile file .| autoDecompress - .| CC.decodeUtf8Lenient + .| CC.decodeUtf8 .| CT.lines .| CC.map T.unpack .| CL.consume) +createPerfectOutputFromExpected :: Metric -> FilePath -> FilePath -> IO () +createPerfectOutputFromExpected metric expectedFile outFile = do + runResourceT $ runConduit $ (sourceFile expectedFile + .| autoDecompress + .| CC.decodeUtf8 + .| CT.lines + .| CC.map (perfectOutLineFromExpectedLine metric) + .| CC.unlines + .| CC.encodeUtf8 + .| sinkFile outFile) + findTestDirs :: FilePath -> IO [FilePath] findTestDirs = SFF.find never testDirFilter findInputFiles :: FilePath -> IO [FilePath] -findInputFiles = SFF.find never $ fileFilter "in.tsv" +findInputFiles = SFF.find never $ fileFilter defaultInputFile findOutputFiles :: FilePath -> IO [FilePath] findOutputFiles = SFF.find never $ fileFilter "out*.tsv" findExpectedFiles :: FilePath -> IO [FilePath] -findExpectedFiles = SFF.find never $ fileFilter "expected.tsv" +findExpectedFiles = SFF.find never $ fileFilter defaultExpectedFile never :: FindClause Bool never = depth ==? 0 @@ -131,5 +150,28 @@ checkTestDirectory directoryPath = do outputFiles <- findOutputFiles directoryPath unless (null outputFiles) $ throw $ OutputFileDetected outputFiles where - inputFile = directoryPath "in.tsv" - expectedFile = directoryPath "expected.tsv" + inputFile = directoryPath defaultInputFile + expectedFile = directoryPath defaultExpectedFile + +runOnTest :: GEvalSpecification -> FilePath -> IO () +runOnTest spec testPath = do + [expectedFile] <- findExpectedFiles testPath + let testName = takeFileName testPath + let modifiedSpec = spec { + gesExpectedDirectory = Just (takeDirectory testPath), + gesTestName = testName + } + + (flip mapM_) (gesMetrics spec) $ \metric -> do + withSystemTempDirectory "geval-validation" $ \tmpDir -> do + let tmpOutDir = tmpDir testName + let tmpOutFile = tmpOutDir defaultOutFile + createDirectory tmpOutDir + let specificSpec = modifiedSpec { + gesMetrics = [metric], + gesOutDirectory = tmpDir } + createPerfectOutputFromExpected metric expectedFile tmpOutFile + [(_, [MetricOutput value _])] <- geval specificSpec + let bestValue = bestPossibleValue metric + unless (bestValue =~ value) $ throw $ BestPossibleValueNotObtainedWithExpectedData bestValue value + return () diff --git a/test/Spec.hs b/test/Spec.hs index 2760452..2a4ff26 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -509,7 +509,7 @@ main = hspec $ do withSystemTempDirectory "geval-validation-test" $ \tempDir -> do let spec = defaultGEvalSpecification { gesExpectedDirectory = Just tempDir, - gesMetrics = [BLEU], + gesMetrics = [metric], gesPrecision = Just 4 } createChallenge True tempDir spec validationChallenge tempDir spec @@ -660,12 +660,6 @@ extractMetric testName = do Left _ -> Nothing Right opts -> Just $ gesMainMetric $ geoSpec opts -class AEq a where - (=~) :: a -> a -> Bool - -instance AEq Double where - x =~ y = abs ( x - y ) < (1.0e-4 :: Double) - (@=~?) :: (Show a, AEq a) => a -> a -> HU.Assertion (@=~?) actual expected = expected =~ actual HU.@? assertionMsg where diff --git a/validate-gonito-net-challenges.sh b/validate-gonito-net-challenges.sh new file mode 100755 index 0000000..479897d --- /dev/null +++ b/validate-gonito-net-challenges.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +ARENA=$1 + +wget --quiet 'https://gonito.net/list-challenges' -O - | perl -ne 'print "$1\n" if m{}' | while read challenge +do + echo "---------------- $challenge ---------------------" + + challenge_dir="$ARENA/${challenge}-dont-peek" + + if [[ ! -d "${challenge_dir}" ]] + then + (cd $ARENA && git clone "ssh://gitolite@gonito.net/${challenge}-dont-peek") + fi + + geval --validate --expected-directory "${challenge_dir}" + +done