2015-08-23 08:14:47 +02:00
|
|
|
module GEval.Core
|
2015-08-21 22:56:32 +02:00
|
|
|
( geval,
|
|
|
|
gevalCore,
|
|
|
|
Metric(..),
|
2015-08-22 20:29:56 +02:00
|
|
|
MetricValue,
|
2015-08-21 22:56:32 +02:00
|
|
|
GEvalSpecification(..),
|
2015-08-22 00:00:46 +02:00
|
|
|
GEvalOptions(..),
|
|
|
|
defaultGEvalSpecification,
|
|
|
|
defaultOutDirectory,
|
|
|
|
defaultTestName,
|
|
|
|
defaultOutFile,
|
|
|
|
defaultExpectedFile,
|
2015-08-23 07:40:37 +02:00
|
|
|
defaultMetric,
|
|
|
|
getExpectedDirectory
|
2015-08-17 23:32:00 +02:00
|
|
|
) where
|
|
|
|
|
2015-08-19 22:14:34 +02:00
|
|
|
import Data.Conduit
|
|
|
|
import Data.Conduit.Combinators as CC
|
|
|
|
import qualified Data.Conduit.Binary as CB
|
|
|
|
import qualified Data.Conduit.Text as CT
|
|
|
|
import Control.Monad.Trans.Resource
|
|
|
|
import qualified Data.Conduit.List as CL
|
|
|
|
import Data.Text
|
|
|
|
import Data.Text.Read as TR
|
2015-08-19 23:24:19 +02:00
|
|
|
import Control.Applicative
|
2015-08-22 19:29:39 +02:00
|
|
|
import Control.Exception
|
|
|
|
import Control.Conditional (unlessM)
|
|
|
|
import qualified System.Directory as D
|
2015-08-19 22:14:34 +02:00
|
|
|
|
2015-08-21 22:56:32 +02:00
|
|
|
import System.FilePath
|
|
|
|
import Data.Maybe
|
|
|
|
|
2015-08-22 20:29:56 +02:00
|
|
|
type MetricValue = Double
|
|
|
|
|
2015-08-22 19:35:19 +02:00
|
|
|
data Metric = RMSE | MSE | BLEU
|
2015-08-22 00:00:46 +02:00
|
|
|
deriving (Show, Read)
|
2015-08-21 22:56:32 +02:00
|
|
|
|
|
|
|
defaultOutDirectory = "."
|
|
|
|
defaultTestName = "test-A"
|
|
|
|
defaultOutFile = "out.tsv"
|
|
|
|
defaultExpectedFile = "expected.tsv"
|
|
|
|
|
|
|
|
defaultMetric :: Metric
|
2015-08-22 19:35:19 +02:00
|
|
|
defaultMetric = RMSE
|
2015-08-21 22:56:32 +02:00
|
|
|
|
2015-08-22 00:00:46 +02:00
|
|
|
|
2015-08-21 22:56:32 +02:00
|
|
|
data GEvalSpecification = GEvalSpecification
|
|
|
|
{ gesOutDirectory :: String,
|
|
|
|
gesExpectedDirectory :: Maybe String,
|
|
|
|
gesTestName :: String,
|
|
|
|
gesOutFile :: String,
|
|
|
|
gesExpectedFile :: String,
|
|
|
|
gesMetric :: Metric }
|
|
|
|
|
2015-08-23 07:40:37 +02:00
|
|
|
getExpectedDirectory :: GEvalSpecification -> FilePath
|
|
|
|
getExpectedDirectory spec = fromMaybe outDirectory $ gesExpectedDirectory spec
|
|
|
|
where outDirectory = gesOutDirectory spec
|
|
|
|
|
2015-08-22 00:00:46 +02:00
|
|
|
data GEvalOptions = GEvalOptions
|
|
|
|
{ geoInit :: Bool,
|
|
|
|
geoSpec :: GEvalSpecification }
|
|
|
|
|
2015-08-21 22:56:32 +02:00
|
|
|
|
2015-08-22 19:29:39 +02:00
|
|
|
data GEvalException = NoExpectedFile FilePath
|
|
|
|
| NoOutFile FilePath
|
|
|
|
| NoExpectedDirectory FilePath
|
|
|
|
| NoOutDirectory FilePath
|
|
|
|
| NoExpectedTestDirectory FilePath
|
|
|
|
| NoOutTestDirectory FilePath
|
|
|
|
|
|
|
|
instance Exception GEvalException
|
|
|
|
|
|
|
|
instance Show GEvalException where
|
|
|
|
show (NoExpectedFile filePath) = somethingWrongWithFilesMessage "No file with the expected results" filePath
|
|
|
|
show (NoOutFile filePath) = somethingWrongWithFilesMessage "No file with the test results" filePath
|
|
|
|
show (NoExpectedDirectory filePath) = somethingWrongWithFilesMessage "No directory with the expected results" filePath
|
|
|
|
show (NoOutDirectory filePath) = somethingWrongWithFilesMessage "No directory with the test results" filePath
|
|
|
|
show (NoExpectedTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the expected results" filePath
|
|
|
|
show (NoOutTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the results obtained" filePath
|
|
|
|
|
|
|
|
|
|
|
|
somethingWrongWithFilesMessage :: String -> FilePath -> String
|
|
|
|
somethingWrongWithFilesMessage msg filePath = Prelude.concat
|
|
|
|
[ msg, ": `", filePath, "`" ]
|
|
|
|
|
2015-08-21 22:56:32 +02:00
|
|
|
defaultGEvalSpecification = GEvalSpecification {
|
|
|
|
gesOutDirectory = defaultOutDirectory,
|
|
|
|
gesExpectedDirectory = Nothing,
|
|
|
|
gesTestName = defaultTestName,
|
|
|
|
gesOutFile = defaultOutFile,
|
|
|
|
gesExpectedFile = defaultExpectedFile,
|
|
|
|
gesMetric = defaultMetric }
|
|
|
|
|
|
|
|
|
2015-08-22 20:29:56 +02:00
|
|
|
geval :: GEvalSpecification -> IO (MetricValue)
|
2015-08-22 19:29:39 +02:00
|
|
|
geval gevalSpec = do
|
|
|
|
unlessM (D.doesDirectoryExist outDirectory) $ throwM $ NoOutDirectory outDirectory
|
|
|
|
unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory
|
|
|
|
unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory
|
|
|
|
unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory
|
|
|
|
gevalCore metric expectedFilePath outFilePath
|
|
|
|
where expectedFilePath = expectedTestDirectory </> (gesExpectedFile gevalSpec)
|
|
|
|
outFilePath = outTestDirectory </> (gesOutFile gevalSpec)
|
|
|
|
expectedTestDirectory = expectedDirectory </> testName
|
|
|
|
outTestDirectory = outDirectory </> testName
|
2015-08-23 07:40:37 +02:00
|
|
|
expectedDirectory = getExpectedDirectory gevalSpec
|
2015-08-21 22:56:32 +02:00
|
|
|
outDirectory = gesOutDirectory gevalSpec
|
|
|
|
testName = gesTestName gevalSpec
|
|
|
|
metric = gesMetric gevalSpec
|
|
|
|
|
2015-08-22 20:29:56 +02:00
|
|
|
gevalCore :: Metric -> String -> String -> IO (MetricValue)
|
2015-08-21 22:56:32 +02:00
|
|
|
gevalCore MSE expectedFilePath outFilePath = do
|
2015-08-22 19:29:39 +02:00
|
|
|
unlessM (D.doesFileExist expectedFilePath) $ throwM $ NoExpectedFile expectedFilePath
|
|
|
|
unlessM (D.doesFileExist outFilePath) $ throwM $ NoOutFile outFilePath
|
2015-08-22 19:35:19 +02:00
|
|
|
runResourceT $
|
2015-08-19 23:24:19 +02:00
|
|
|
(getZipSource $ (,)
|
|
|
|
<$> ZipSource (items expectedFilePath)
|
|
|
|
<*> ZipSource (items outFilePath))
|
|
|
|
$$ (CL.map itemError
|
|
|
|
=$ averageC)
|
2015-08-22 19:35:19 +02:00
|
|
|
|
|
|
|
gevalCore RMSE expectedFilePath outFilePath = do
|
|
|
|
mse <- gevalCore MSE expectedFilePath outFilePath
|
2015-08-19 23:24:19 +02:00
|
|
|
return $ mse ** 0.5
|
|
|
|
|
|
|
|
averageC :: MonadResource m => Sink Double m Double
|
|
|
|
averageC = getZipSink
|
|
|
|
$ (\total count -> total / fromIntegral count)
|
|
|
|
<$> ZipSink CC.sum
|
|
|
|
<*> ZipSink CC.length
|
|
|
|
|
|
|
|
items :: MonadResource m => String -> Source m Double
|
|
|
|
items filePath =
|
|
|
|
CB.sourceFile filePath
|
|
|
|
$= (CT.decode CT.utf8
|
|
|
|
=$= CT.lines
|
|
|
|
=$= CL.map TR.double
|
|
|
|
=$= CC.map getValue)
|
|
|
|
|
|
|
|
|
|
|
|
itemError :: (Double, Double) -> Double
|
|
|
|
itemError (exp, out) = (exp-out)**2
|
2015-08-19 22:14:34 +02:00
|
|
|
|
|
|
|
getValue :: Either String (Double, Text) -> Double
|
|
|
|
getValue (Right (x, _)) = x
|