Refactor (introduce GEval.Metric)

This commit is contained in:
Filip Gralinski 2019-08-10 12:30:17 +02:00
parent 1606fe1fbb
commit 2236899c3d
5 changed files with 13 additions and 136 deletions

View File

@ -1,3 +1,7 @@
## 1.18.0.0
* Add --validate option
## 1.17.0.0
* Add Probabilistic-Soft-F-score

View File

@ -16,7 +16,8 @@ cabal-version: >=1.10
library
hs-source-dirs: src
exposed-modules: GEval.Core
GEval.CreateChallenge
, GEval.Metric
, GEval.CreateChallenge
, GEval.OptionsParser
, GEval.BLEU
, GEval.ClippEU

View File

@ -11,10 +11,8 @@ module GEval.Core
( geval,
gevalCore,
gevalCoreOnSingleLines,
module GEval.Metric,
LineInFile(..),
Metric(..),
MetricOrdering(..),
getMetricOrdering,
isBetter,
isBetterOrEqual,
GEvalSpecialCommand(..),
@ -50,6 +48,8 @@ module GEval.Core
somethingWrongWithFilesMessage
) where
import GEval.Metric
import Data.Conduit
import Data.Conduit.Combinators as CC
import qualified Data.Conduit.Binary as CB
@ -114,136 +114,6 @@ import Data.Word
import "Glob" System.FilePath.Glob
defaultLogLossHashedSize :: Word32
defaultLogLossHashedSize = 10
-- | evaluation metric
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU
| FMeasure Double | MacroFMeasure Double | NMI
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
| BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
| MultiLabelLogLoss | MultiLabelLikelihood
| SoftFMeasure Double | ProbabilisticSoftFMeasure Double
deriving (Eq)
instance Show Metric where
show RMSE = "RMSE"
show MSE = "MSE"
show Pearson = "Pearson"
show Spearman = "Spearman"
show BLEU = "BLEU"
show GLEU = "GLEU"
show WER = "WER"
show Accuracy = "Accuracy"
show ClippEU = "ClippEU"
show (FMeasure beta) = "F" ++ (show beta)
show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
show (SoftFMeasure beta) = "Soft-F" ++ (show beta)
show (ProbabilisticSoftFMeasure beta) = "Probabilistic-Soft-F" ++ (show beta)
show NMI = "NMI"
show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if
nbOfBits == defaultLogLossHashedSize
then
""
else
(show nbOfBits))
show (LikelihoodHashed nbOfBits) = "LikelihoodHashed" ++ (if
nbOfBits == defaultLogLossHashedSize
then
""
else
(show nbOfBits))
show CharMatch = "CharMatch"
show MAP = "MAP"
show LogLoss = "LogLoss"
show Likelihood = "Likelihood"
show BIOF1 = "BIO-F1"
show BIOF1Labels = "BIO-F1-Labels"
show TokenAccuracy = "TokenAccuracy"
show MAE = "MAE"
show SMAPE = "SMAPE"
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
show MultiLabelLogLoss = "MultiLabel-Logloss"
show MultiLabelLikelihood = "MultiLabel-Likelihood"
instance Read Metric where
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
readsPrec _ ('S':'p':'e':'a':'r':'m':'a':'n':theRest) = [(Spearman, theRest)]
readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)]
readsPrec _ ('G':'L':'E':'U':theRest) = [(GLEU, theRest)]
readsPrec _ ('W':'E':'R':theRest) = [(WER, theRest)]
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)]
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)]
readsPrec p ('F':theRest) = case readsPrec p theRest of
[(beta, theRest)] -> [(FMeasure beta, theRest)]
_ -> []
readsPrec p ('M':'a':'c':'r':'o':'-':'F':theRest) = case readsPrec p theRest of
[(beta, theRest)] -> [(MacroFMeasure beta, theRest)]
_ -> []
readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
[(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
_ -> []
readsPrec p ('S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
[(beta, theRest)] -> [(SoftFMeasure beta, theRest)]
_ -> []
readsPrec p ('P':'r':'o':'b':'a':'b':'i':'l':'i':'s':'t':'i':'c':'-':'S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
[(beta, theRest)] -> [(ProbabilisticSoftFMeasure beta, theRest)]
_ -> []
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
[(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)]
_ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)]
readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)]
readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)]
readsPrec _ ('S':'M':'A':'P':'E':theRest) = [(SMAPE, theRest)]
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)]
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(MultiLabelLikelihood, theRest)]
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
-- | Returns what is preferred for a given metric: high values or low values.
getMetricOrdering :: Metric -> MetricOrdering
getMetricOrdering RMSE = TheLowerTheBetter
getMetricOrdering MSE = TheLowerTheBetter
getMetricOrdering Pearson = TheHigherTheBetter
getMetricOrdering Spearman = TheHigherTheBetter
getMetricOrdering BLEU = TheHigherTheBetter
getMetricOrdering GLEU = TheHigherTheBetter
getMetricOrdering WER = TheLowerTheBetter
getMetricOrdering Accuracy = TheHigherTheBetter
getMetricOrdering ClippEU = TheHigherTheBetter
getMetricOrdering (FMeasure _) = TheHigherTheBetter
getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
getMetricOrdering (SoftFMeasure _) = TheHigherTheBetter
getMetricOrdering (ProbabilisticSoftFMeasure _) = TheHigherTheBetter
getMetricOrdering NMI = TheHigherTheBetter
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
getMetricOrdering CharMatch = TheHigherTheBetter
getMetricOrdering MAP = TheHigherTheBetter
getMetricOrdering LogLoss = TheLowerTheBetter
getMetricOrdering Likelihood = TheHigherTheBetter
getMetricOrdering BIOF1 = TheHigherTheBetter
getMetricOrdering BIOF1Labels = TheHigherTheBetter
getMetricOrdering TokenAccuracy = TheHigherTheBetter
getMetricOrdering MAE = TheLowerTheBetter
getMetricOrdering SMAPE = TheLowerTheBetter
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter
isBetterOrEqual :: Metric -> MetricValue -> MetricValue -> Bool
isBetterOrEqual metric valA valB = not (isBetter metric valB valA)

View File

@ -4,7 +4,8 @@ module GEval.CreateChallenge
(createChallenge)
where
import GEval.Core
import GEval.Metric
import GEval.Core (GEvalSpecification(..), GEvalException(..), configFileName, gesMainMetric, defaultTestName)
import GEval.Submit (tokenFileName)
import qualified System.Directory as D
import Control.Conditional (whenM)

View File

@ -4,7 +4,8 @@ module GEval.Validation
( validationChallenge
) where
import GEval.Core
import GEval.Metric
import GEval.Core (GEvalSpecification(..), GEvalException(..), somethingWrongWithFilesMessage, isEmptyFile)
import qualified System.Directory as D
import System.FilePath.Find as SFF