2015-08-23 08:14:47 +02:00
|
|
|
module GEval.Core
|
2015-08-21 22:56:32 +02:00
|
|
|
( geval,
|
|
|
|
gevalCore,
|
|
|
|
Metric(..),
|
2015-08-22 20:29:56 +02:00
|
|
|
MetricValue,
|
2015-08-21 22:56:32 +02:00
|
|
|
GEvalSpecification(..),
|
2015-08-22 00:00:46 +02:00
|
|
|
GEvalOptions(..),
|
2015-08-23 17:47:40 +02:00
|
|
|
GEvalException(..),
|
2015-08-22 00:00:46 +02:00
|
|
|
defaultGEvalSpecification,
|
|
|
|
defaultOutDirectory,
|
|
|
|
defaultTestName,
|
|
|
|
defaultOutFile,
|
|
|
|
defaultExpectedFile,
|
2015-08-23 07:40:37 +02:00
|
|
|
defaultMetric,
|
2015-08-23 20:53:43 +02:00
|
|
|
getExpectedDirectory,
|
|
|
|
configFileName
|
2015-08-17 23:32:00 +02:00
|
|
|
) where
|
|
|
|
|
2015-08-19 22:14:34 +02:00
|
|
|
import Data.Conduit
|
|
|
|
import Data.Conduit.Combinators as CC
|
|
|
|
import qualified Data.Conduit.Binary as CB
|
|
|
|
import qualified Data.Conduit.Text as CT
|
|
|
|
import Control.Monad.Trans.Resource
|
|
|
|
import qualified Data.Conduit.List as CL
|
|
|
|
import Data.Text
|
|
|
|
import Data.Text.Read as TR
|
2015-08-19 23:24:19 +02:00
|
|
|
import Control.Applicative
|
2015-08-22 19:29:39 +02:00
|
|
|
import Control.Exception
|
|
|
|
import Control.Conditional (unlessM)
|
|
|
|
import qualified System.Directory as D
|
2015-08-19 22:14:34 +02:00
|
|
|
|
2015-08-21 22:56:32 +02:00
|
|
|
import System.FilePath
|
|
|
|
import Data.Maybe
|
|
|
|
|
2015-08-24 23:40:40 +02:00
|
|
|
import qualified Data.List.Split as DLS
|
|
|
|
|
|
|
|
import GEval.BLEU
|
|
|
|
|
2015-08-22 20:29:56 +02:00
|
|
|
type MetricValue = Double
|
|
|
|
|
2015-08-22 19:35:19 +02:00
|
|
|
data Metric = RMSE | MSE | BLEU
|
2015-08-22 00:00:46 +02:00
|
|
|
deriving (Show, Read)
|
2015-08-21 22:56:32 +02:00
|
|
|
|
|
|
|
defaultOutDirectory = "."
|
|
|
|
defaultTestName = "test-A"
|
|
|
|
defaultOutFile = "out.tsv"
|
|
|
|
defaultExpectedFile = "expected.tsv"
|
|
|
|
|
|
|
|
defaultMetric :: Metric
|
2015-08-22 19:35:19 +02:00
|
|
|
defaultMetric = RMSE
|
2015-08-21 22:56:32 +02:00
|
|
|
|
2015-08-23 20:53:43 +02:00
|
|
|
configFileName :: FilePath
|
|
|
|
configFileName = "config.txt"
|
2015-08-22 00:00:46 +02:00
|
|
|
|
2015-08-21 22:56:32 +02:00
|
|
|
data GEvalSpecification = GEvalSpecification
|
2015-08-23 20:53:43 +02:00
|
|
|
{ gesOutDirectory :: FilePath,
|
|
|
|
gesExpectedDirectory :: Maybe FilePath,
|
2015-08-21 22:56:32 +02:00
|
|
|
gesTestName :: String,
|
|
|
|
gesOutFile :: String,
|
|
|
|
gesExpectedFile :: String,
|
|
|
|
gesMetric :: Metric }
|
|
|
|
|
2015-08-23 07:40:37 +02:00
|
|
|
getExpectedDirectory :: GEvalSpecification -> FilePath
|
|
|
|
getExpectedDirectory spec = fromMaybe outDirectory $ gesExpectedDirectory spec
|
|
|
|
where outDirectory = gesOutDirectory spec
|
|
|
|
|
2015-08-22 00:00:46 +02:00
|
|
|
data GEvalOptions = GEvalOptions
|
|
|
|
{ geoInit :: Bool,
|
|
|
|
geoSpec :: GEvalSpecification }
|
|
|
|
|
2015-08-21 22:56:32 +02:00
|
|
|
|
2015-08-22 19:29:39 +02:00
|
|
|
data GEvalException = NoExpectedFile FilePath
|
|
|
|
| NoOutFile FilePath
|
|
|
|
| NoExpectedDirectory FilePath
|
|
|
|
| NoOutDirectory FilePath
|
|
|
|
| NoExpectedTestDirectory FilePath
|
|
|
|
| NoOutTestDirectory FilePath
|
2015-08-23 17:47:40 +02:00
|
|
|
| FileAlreadyThere FilePath
|
2015-08-22 19:29:39 +02:00
|
|
|
|
|
|
|
instance Exception GEvalException
|
|
|
|
|
|
|
|
instance Show GEvalException where
|
|
|
|
show (NoExpectedFile filePath) = somethingWrongWithFilesMessage "No file with the expected results" filePath
|
|
|
|
show (NoOutFile filePath) = somethingWrongWithFilesMessage "No file with the test results" filePath
|
|
|
|
show (NoExpectedDirectory filePath) = somethingWrongWithFilesMessage "No directory with the expected results" filePath
|
|
|
|
show (NoOutDirectory filePath) = somethingWrongWithFilesMessage "No directory with the test results" filePath
|
|
|
|
show (NoExpectedTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the expected results" filePath
|
|
|
|
show (NoOutTestDirectory filePath) = somethingWrongWithFilesMessage "No test subdirectory with the results obtained" filePath
|
2015-08-23 17:47:40 +02:00
|
|
|
show (FileAlreadyThere filePath) = somethingWrongWithFilesMessage "File already there" filePath
|
2015-08-22 19:29:39 +02:00
|
|
|
|
|
|
|
somethingWrongWithFilesMessage :: String -> FilePath -> String
|
|
|
|
somethingWrongWithFilesMessage msg filePath = Prelude.concat
|
|
|
|
[ msg, ": `", filePath, "`" ]
|
|
|
|
|
2015-08-21 22:56:32 +02:00
|
|
|
defaultGEvalSpecification = GEvalSpecification {
|
|
|
|
gesOutDirectory = defaultOutDirectory,
|
|
|
|
gesExpectedDirectory = Nothing,
|
|
|
|
gesTestName = defaultTestName,
|
|
|
|
gesOutFile = defaultOutFile,
|
|
|
|
gesExpectedFile = defaultExpectedFile,
|
|
|
|
gesMetric = defaultMetric }
|
|
|
|
|
|
|
|
|
2015-08-22 20:29:56 +02:00
|
|
|
geval :: GEvalSpecification -> IO (MetricValue)
|
2015-08-22 19:29:39 +02:00
|
|
|
geval gevalSpec = do
|
|
|
|
unlessM (D.doesDirectoryExist outDirectory) $ throwM $ NoOutDirectory outDirectory
|
|
|
|
unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory
|
|
|
|
unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory
|
|
|
|
unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory
|
|
|
|
gevalCore metric expectedFilePath outFilePath
|
|
|
|
where expectedFilePath = expectedTestDirectory </> (gesExpectedFile gevalSpec)
|
|
|
|
outFilePath = outTestDirectory </> (gesOutFile gevalSpec)
|
|
|
|
expectedTestDirectory = expectedDirectory </> testName
|
|
|
|
outTestDirectory = outDirectory </> testName
|
2015-08-23 07:40:37 +02:00
|
|
|
expectedDirectory = getExpectedDirectory gevalSpec
|
2015-08-21 22:56:32 +02:00
|
|
|
outDirectory = gesOutDirectory gevalSpec
|
|
|
|
testName = gesTestName gevalSpec
|
|
|
|
metric = gesMetric gevalSpec
|
|
|
|
|
2015-08-22 20:29:56 +02:00
|
|
|
gevalCore :: Metric -> String -> String -> IO (MetricValue)
|
2015-08-24 22:32:09 +02:00
|
|
|
gevalCore RMSE expectedFilePath outFilePath = do
|
|
|
|
mse <- gevalCore MSE expectedFilePath outFilePath
|
|
|
|
return $ mse ** 0.5
|
|
|
|
|
|
|
|
gevalCore metric expectedFilePath outFilePath = do
|
2015-08-22 19:29:39 +02:00
|
|
|
unlessM (D.doesFileExist expectedFilePath) $ throwM $ NoExpectedFile expectedFilePath
|
|
|
|
unlessM (D.doesFileExist outFilePath) $ throwM $ NoOutFile outFilePath
|
2015-08-24 22:32:09 +02:00
|
|
|
gevalCore' metric expectedFilePath outFilePath
|
|
|
|
|
|
|
|
gevalCore' :: Metric -> String -> String -> IO (MetricValue)
|
2015-08-24 23:40:40 +02:00
|
|
|
gevalCore' MSE = gevalCore'' outParser outParser itemError averageC id
|
2015-08-24 22:51:03 +02:00
|
|
|
where outParser = getValue . TR.double
|
|
|
|
|
2015-08-24 23:40:40 +02:00
|
|
|
gevalCore' BLEU = gevalCore'' (DLS.splitOn "\t" . unpack) unpack bleuCombine bleuAgg bleuFinal
|
|
|
|
where bleuFinal (p1, p2, p3, p4, cl, l1, l2, l3, l4) = p1 /. l1
|
|
|
|
bleuCombine (refs, sen) = bleuStep refs sen
|
|
|
|
bleuAgg = CC.foldl bleuFuse (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
|
|
|
bleuFuse (a1, a2, a3, a4, a5, a6, a7, a8, a9) (b1, b2, b3, b4, b5, b6, b7, b8, b9) = (a1+b1, a2+b2, a3+b3, a4+b4, a5+b5, a6+b6, a7+b7, a8+b8, a9+b9)
|
|
|
|
|
|
|
|
(/.) :: Int -> Int -> Double
|
|
|
|
x /. y = (fromIntegral x) / (fromIntegral y)
|
|
|
|
|
|
|
|
gevalCore'' :: (Text -> a) -> (Text -> b) -> ((a, b) -> c) -> (Sink c (ResourceT IO) d) -> (d -> Double ) -> String -> String -> IO (MetricValue)
|
|
|
|
gevalCore'' expParser outParser itemStep aggregator finalStep expectedFilePath outFilePath = do
|
|
|
|
v <- runResourceT $
|
2015-08-19 23:24:19 +02:00
|
|
|
(getZipSource $ (,)
|
2015-08-24 22:51:03 +02:00
|
|
|
<$> ZipSource (items expectedFilePath expParser)
|
|
|
|
<*> ZipSource (items outFilePath outParser))
|
|
|
|
$$ (CL.map itemStep
|
|
|
|
=$ aggregator)
|
2015-08-24 23:40:40 +02:00
|
|
|
return $ finalStep v
|
2015-08-19 23:24:19 +02:00
|
|
|
|
|
|
|
averageC :: MonadResource m => Sink Double m Double
|
|
|
|
averageC = getZipSink
|
|
|
|
$ (\total count -> total / fromIntegral count)
|
|
|
|
<$> ZipSink CC.sum
|
|
|
|
<*> ZipSink CC.length
|
|
|
|
|
2015-08-24 22:51:03 +02:00
|
|
|
items :: MonadResource m => String -> (Text -> a) -> Source m a
|
|
|
|
items filePath parser =
|
2015-08-19 23:24:19 +02:00
|
|
|
CB.sourceFile filePath
|
|
|
|
$= (CT.decode CT.utf8
|
|
|
|
=$= CT.lines
|
2015-08-24 22:51:03 +02:00
|
|
|
=$= CL.map parser)
|
2015-08-19 23:24:19 +02:00
|
|
|
|
|
|
|
itemError :: (Double, Double) -> Double
|
|
|
|
itemError (exp, out) = (exp-out)**2
|
2015-08-19 22:14:34 +02:00
|
|
|
|
|
|
|
getValue :: Either String (Double, Text) -> Double
|
|
|
|
getValue (Right (x, _)) = x
|