geval/src/GEval/Core.hs

151 lines
5.4 KiB
Haskell
Raw Normal View History

2015-08-23 08:14:47 +02:00
module GEval.Core
2015-08-21 22:56:32 +02:00
( geval,
gevalCore,
Metric(..),
MetricValue,
2015-08-21 22:56:32 +02:00
GEvalSpecification(..),
2015-08-22 00:00:46 +02:00
GEvalOptions(..),
2015-08-23 17:47:40 +02:00
GEvalException(..),
2015-08-22 00:00:46 +02:00
defaultGEvalSpecification,
defaultOutDirectory,
defaultTestName,
defaultOutFile,
defaultExpectedFile,
defaultMetric,
2015-08-23 20:53:43 +02:00
getExpectedDirectory,
configFileName
2015-08-17 23:32:00 +02:00
) where
2015-08-19 22:14:34 +02:00
import Data.Conduit
import Data.Conduit.Combinators as CC
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Text as CT
import Control.Monad.Trans.Resource
import qualified Data.Conduit.List as CL
import Data.Text
import Data.Text.Read as TR
2015-08-19 23:24:19 +02:00
import Control.Applicative
2015-08-22 19:29:39 +02:00
import Control.Exception
import Control.Conditional (unlessM)
import qualified System.Directory as D
2015-08-19 22:14:34 +02:00
2015-08-21 22:56:32 +02:00
import System.FilePath
import Data.Maybe
type MetricValue = Double
2015-08-22 19:35:19 +02:00
data Metric = RMSE | MSE | BLEU
2015-08-22 00:00:46 +02:00
deriving (Show, Read)
2015-08-21 22:56:32 +02:00
defaultOutDirectory = "."
defaultTestName = "test-A"
defaultOutFile = "out.tsv"
defaultExpectedFile = "expected.tsv"
defaultMetric :: Metric
2015-08-22 19:35:19 +02:00
defaultMetric = RMSE
2015-08-21 22:56:32 +02:00
2015-08-23 20:53:43 +02:00
configFileName :: FilePath
configFileName = "config.txt"
2015-08-22 00:00:46 +02:00
2015-08-21 22:56:32 +02:00
data GEvalSpecification = GEvalSpecification
2015-08-23 20:53:43 +02:00
{ gesOutDirectory :: FilePath,
gesExpectedDirectory :: Maybe FilePath,
2015-08-21 22:56:32 +02:00
gesTestName :: String,
gesOutFile :: String,
gesExpectedFile :: String,
gesMetric :: Metric }
getExpectedDirectory :: GEvalSpecification -> FilePath
getExpectedDirectory spec = fromMaybe outDirectory $ gesExpectedDirectory spec
where outDirectory = gesOutDirectory spec
2015-08-22 00:00:46 +02:00
data GEvalOptions = GEvalOptions
{ geoInit :: Bool,
geoSpec :: GEvalSpecification }
2015-08-21 22:56:32 +02:00
2015-08-22 19:29:39 +02:00
data GEvalException = NoExpectedFile FilePath
| NoOutFile FilePath
| NoExpectedDirectory FilePath
| NoOutDirectory FilePath
| NoExpectedTestDirectory FilePath
| NoOutTestDirectory FilePath
2015-08-23 17:47:40 +02:00
| FileAlreadyThere FilePath
2015-08-22 19:29:39 +02:00
instance Exception GEvalException
instance Show GEvalException where
show (NoExpectedFile filePath) = somethingWrongWithFilesMessage "No file with the expected results" filePath
show (NoOutFile filePath) = somethingWrongWithFilesMessage "No file with the test results" filePath
show (NoExpectedDirectory filePath) = somethingWrongWithFilesMessage "No directory with the expected results" filePath
show (NoOutDirectory filePath) = somethingWrongWithFilesMessage "No directory with the test results" filePath
show (NoExpectedTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the expected results" filePath
show (NoOutTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the results obtained" filePath
2015-08-23 17:47:40 +02:00
show (FileAlreadyThere filePath) = somethingWrongWithFilesMessage "File already there" filePath
2015-08-22 19:29:39 +02:00
somethingWrongWithFilesMessage :: String -> FilePath -> String
somethingWrongWithFilesMessage msg filePath = Prelude.concat
[ msg, ": `", filePath, "`" ]
2015-08-21 22:56:32 +02:00
defaultGEvalSpecification = GEvalSpecification {
gesOutDirectory = defaultOutDirectory,
gesExpectedDirectory = Nothing,
gesTestName = defaultTestName,
gesOutFile = defaultOutFile,
gesExpectedFile = defaultExpectedFile,
gesMetric = defaultMetric }
geval :: GEvalSpecification -> IO (MetricValue)
2015-08-22 19:29:39 +02:00
geval gevalSpec = do
unlessM (D.doesDirectoryExist outDirectory) $ throwM $ NoOutDirectory outDirectory
unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory
unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory
unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory
gevalCore metric expectedFilePath outFilePath
where expectedFilePath = expectedTestDirectory </> (gesExpectedFile gevalSpec)
outFilePath = outTestDirectory </> (gesOutFile gevalSpec)
expectedTestDirectory = expectedDirectory </> testName
outTestDirectory = outDirectory </> testName
expectedDirectory = getExpectedDirectory gevalSpec
2015-08-21 22:56:32 +02:00
outDirectory = gesOutDirectory gevalSpec
testName = gesTestName gevalSpec
metric = gesMetric gevalSpec
gevalCore :: Metric -> String -> String -> IO (MetricValue)
2015-08-21 22:56:32 +02:00
gevalCore MSE expectedFilePath outFilePath = do
2015-08-22 19:29:39 +02:00
unlessM (D.doesFileExist expectedFilePath) $ throwM $ NoExpectedFile expectedFilePath
unlessM (D.doesFileExist outFilePath) $ throwM $ NoOutFile outFilePath
2015-08-22 19:35:19 +02:00
runResourceT $
2015-08-19 23:24:19 +02:00
(getZipSource $ (,)
<$> ZipSource (items expectedFilePath)
<*> ZipSource (items outFilePath))
$$ (CL.map itemError
=$ averageC)
2015-08-22 19:35:19 +02:00
gevalCore RMSE expectedFilePath outFilePath = do
mse <- gevalCore MSE expectedFilePath outFilePath
2015-08-19 23:24:19 +02:00
return $ mse ** 0.5
averageC :: MonadResource m => Sink Double m Double
averageC = getZipSink
$ (\total count -> total / fromIntegral count)
<$> ZipSink CC.sum
<*> ZipSink CC.length
items :: MonadResource m => String -> Source m Double
items filePath =
CB.sourceFile filePath
$= (CT.decode CT.utf8
=$= CT.lines
=$= CL.map TR.double
=$= CC.map getValue)
itemError :: (Double, Double) -> Double
itemError (exp, out) = (exp-out)**2
2015-08-19 22:14:34 +02:00
getValue :: Either String (Double, Text) -> Double
getValue (Right (x, _)) = x