Add PerplexityHashed metric

2021-08-20 13:08:12 +02:00 · 2021-08-20 13:08:12 +02:00 · 612792799a
commit 612792799a
parent 2cd31bd613
9 changed files with 75 additions and 29 deletions
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@ -171,6 +171,7 @@ isPreprocessable (FLCFMeasure _) = False
 isPreprocessable NMI = False
 isPreprocessable (LogLossHashed _) = False
 isPreprocessable (LikelihoodHashed _) = False
 isPreprocessable (PerplexityHashed _) = False
 isPreprocessable CharMatch = True
 isPreprocessable MAP = False
 isPreprocessable LogLoss = False
@ -572,10 +573,11 @@ handleBootstrap (Mean _) = False
 handleBootstrap CharMatch = False
 handleBootstrap (LogLossHashed _) = False
 handleBootstrap (LikelihoodHashed _ ) = False
 handleBootstrap (PerplexityHashed _ ) = False
 handleBootstrap Pearson = False
 handleBootstrap Spearman = False
-handleBootstrap (ProbabilisticMultiLabelFMeasure beta) = False
+handleBootstrap (ProbabilisticMultiLabelFMeasure _) = False
-handleBootstrap (ProbabilisticSoftFMeasure beta) = False
+handleBootstrap (ProbabilisticSoftFMeasure _) = False
 handleBootstrap _ = True
 -- | Runs evaluation for a given metric using the sources specified
@ -601,8 +603,12 @@ isEmptyFileSource :: SourceSpec -> IO Bool
 isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
 isEmptyFileSource _ = return False
 logLossToLikehood :: Floating a => a -> a
 logLossToLikehood logLoss = exp (-logLoss)
 logLossToPerplexity :: Floating a => a -> a
 logLossToPerplexity logLoss = 1.0 / (logLossToLikehood logLoss)
 data LineInFile = LineInFile SourceSpec Word32 Text
                  deriving Show
@ -677,6 +683,7 @@ gevalCoreOnSources CharMatch = helper
 gevalCoreOnSources (LogLossHashed nbOfBits) = helperLogLossHashed nbOfBits id
 gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToLikehood
 gevalCoreOnSources (PerplexityHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToPerplexity
 gevalCoreOnSources (Mean (MultiLabelFMeasure beta matchingSpec))
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@ -15,9 +15,7 @@ import qualified System.Directory as D
 import Control.Conditional (whenM)
 import Data.Maybe (catMaybes)
 import System.IO
 import System.FilePath
 import Control.Exception
 import Control.Monad.Trans.Resource
 import Data.String.Here
@ -60,6 +58,7 @@ createChallenge withDataFiles expectedDirectory spec = do
        testDirectory = expectedDirectory </> testName
        expectedFile = gesExpectedFile spec
 createHeaderFile :: FilePath -> FilePath -> Maybe [[Char]] -> IO ()
 createHeaderFile _ _ Nothing = return ()
 createHeaderFile expectedDirectory headerFile (Just fields) = do
  createFile (expectedDirectory </> headerFile) $ (intercalate "\t" fields) ++ "\n"
@ -67,10 +66,12 @@ createHeaderFile expectedDirectory headerFile (Just fields) = do
 createTrainFiles :: Metric -> FilePath -> FilePath -> IO ()
 createTrainFiles metric@(LogLossHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
 createTrainFiles metric@(LikelihoodHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
 createTrainFiles metric@(PerplexityHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
 createTrainFiles metric trainDirectory expectedFile = do
  createFile (trainDirectory </> "in.tsv") $ trainInContents metric
  createFile (trainDirectory </> expectedFile) $ trainExpectedContents metric
 createSingleTrainFile :: Metric -> FilePath -> IO ()
 createSingleTrainFile metric trainDirectory =
  createFile (trainDirectory </> "train.tsv") $ trainContents metric
@ -199,6 +200,7 @@ This is a sample challenge for flat clustering (unsupervised learning challenge)
 |] ++ (commonReadmeMDContents testName)
 readmeMDContents (LikelihoodHashed b) testname = readmeMDContents (LogLossHashed b) testname
 readmeMDContents (PerplexityHashed b) testname = readmeMDContents (LogLossHashed b) testname
 readmeMDContents (LogLossHashed _) testName = [i|
 GEval sample challenge — language model evaluation
@ -356,7 +358,7 @@ character (inclusively).
 |] ++ (commonReadmeMDContents testName)
 readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta ExactMatch) testName
-readmeMDContents (MultiLabelFMeasure beta _) testName = [i|
+readmeMDContents (MultiLabelFMeasure _ _) testName = [i|
 Tag names and their component
 =============================
@ -547,6 +549,7 @@ en	The pen is mightier than the sword.
 pl	Baba z wozu, koniom lżej.
 |]
 trainContents (LikelihoodHashed b) = trainContents (LogLossHashed b)
 trainContents (PerplexityHashed b) = trainContents (LogLossHashed b)
 trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
 Basia ma psa
 Nie kupujemy kota w worku
@ -631,6 +634,7 @@ devInContents (SoftFMeasure _) = [hereLit|I have two kids
 7 April 2003
 |]
 devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
 devInContents (PerplexityHashed b) = devInContents (LogLossHashed b)
 devInContents (LogLossHashed _) = [hereLit|Nie kupuj	w worku
 Ona	psa
 |]
@ -706,6 +710,7 @@ pl
 en
 |]
 devExpectedContents (LikelihoodHashed b) = devExpectedContents (LogLossHashed b)
 devExpectedContents (PerplexityHashed b) = devExpectedContents (LogLossHashed b)
 devExpectedContents (LogLossHashed _) = [hereLit|kota
 ma
 |]
@ -786,6 +791,7 @@ A cada necio agrada su porrada.
 Kwiecień plecień, bo przeplata trochę zimy, trochę lata.
 |]
 testInContents (LikelihoodHashed b) = testInContents (LogLossHashed b)
 testInContents (PerplexityHashed b) = testInContents (LogLossHashed b)
 testInContents (LogLossHashed _) = [hereLit|Ala	ma
 Ona ma kota	worku
 |]
@ -864,6 +870,7 @@ es
 pl
 |]
 testExpectedContents (LikelihoodHashed b) = testExpectedContents (LogLossHashed b)
 testExpectedContents (PerplexityHashed b) = testExpectedContents (LogLossHashed b)
 testExpectedContents (LogLossHashed _) = [hereLit|ma
 w
 |]
@ -947,6 +954,7 @@ inHeaderContents (ProbabilisticSoftFMeasure b) = inHeaderContents (SoftFMeasure
 inHeaderContents (SoftFMeasure _) = Just ["Text"]
 inHeaderContents NMI = Just ["Utterance"]
 inHeaderContents (LikelihoodHashed b) = inHeaderContents (LogLossHashed b)
 inHeaderContents (PerplexityHashed b) = inHeaderContents (LogLossHashed b)
 inHeaderContents (LogLossHashed _) = Just ["LeftContext", "RightContext"]
 inHeaderContents CharMatch = Just ["Text"]
 inHeaderContents MAP = Just ["Dialect", "PolishPhrase"]
@ -979,6 +987,7 @@ outHeaderContents (ProbabilisticSoftFMeasure b) = outHeaderContents (SoftFMeasur
 outHeaderContents (SoftFMeasure _) = Just ["NamesFound"]
 outHeaderContents NMI = Just ["LanguageCode"]
 outHeaderContents (LikelihoodHashed b) = outHeaderContents (LogLossHashed b)
 outHeaderContents (PerplexityHashed b) = outHeaderContents (LogLossHashed b)
 outHeaderContents (LogLossHashed _) = Just ["GuessedWord"]
 outHeaderContents CharMatch = Just ["NormalizedText"]
 outHeaderContents MAP = Nothing
--- a/src/GEval/Metric.hs
+++ b/src/GEval/Metric.hs
@ -29,7 +29,8 @@ import Data.Attoparsec.Text (parseOnly)
 data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | CER | Accuracy MatchingSpecification | ClippEU
              | FMeasure Double | MacroFMeasure Double | NMI
              | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
-              | BIOF1 | BIOWeightedF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE
+              | BIOF1 | BIOWeightedF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | PerplexityHashed Word32
              | MAE | SMAPE
              | MultiLabelFMeasure Double MatchingSpecification
              | MultiLabelLogLoss | MultiLabelLikelihood
              | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double
@ -80,6 +81,12 @@ instance Show Metric where
                                                              ""
                                                            else
                                                              (show nbOfBits))
  show (PerplexityHashed nbOfBits) = "PerplexityHashed" ++ (if
                                                               nbOfBits == defaultLogLossHashedSize
                                                            then
                                                              ""
                                                            else
                                                              (show nbOfBits))
  show CharMatch = "CharMatch"
  show MAP = "MAP"
  show LogLoss = "LogLoss"
@ -115,28 +122,28 @@ applyMatchingSpecification _ metric = error $ "Matching specification cannot be
 instance Read Metric where
  readsPrec p ('M':'e':'a':'n':'/':theRest) = case readsPrec p theRest of
-    [(metric, theRest)] -> [(Mean metric, theRest)]
+    [(metric, theRest')] -> [(Mean metric, theRest')]
    _ -> []
  readsPrec p ('F':'u':'z':'z':'y':'/':theRest) = case readsPrec p theRest of
-    [(metric, theRest)] -> [(applyMatchingSpecification (const FuzzyMatch) metric, theRest)]
+    [(metric, theRest')] -> [(applyMatchingSpecification (const FuzzyMatch) metric, theRest')]
    _ -> []
  readsPrec p ('C':'u':'t':'L':'a':'b':'e':'l':'/':theRest) = case readsPrec p theRest of
-    [(metric, theRest)] -> [(applyMatchingSpecification CutLabel metric, theRest)]
+    [(metric, theRest')] -> [(applyMatchingSpecification CutLabel metric, theRest')]
    _ -> []
  readsPrec p ('S':'m':'a':'r':'t':'/':theRest) = case readsPrec p theRest of
-    [(metric, theRest)] -> [(applyMatchingSpecification SmartMatch metric, theRest)]
+    [(metric, theRest')] -> [(applyMatchingSpecification SmartMatch metric, theRest')]
    _ -> []
  readsPrec p ('H':'a':'r':'d':'e':'n':'/':theRest) = case readsPrec p theRest of
-    [(metric, theRest)] -> [(applyMatchingSpecification Harden metric, theRest)]
+    [(metric, theRest')] -> [(applyMatchingSpecification Harden metric, theRest')]
    _ -> []
  readsPrec p ('L':'e':'n':'i':'e':'n':'t':'H':'a':'r':'d':'e':'n':'/':theRest) = case readsPrec p theRest of
-    [(metric, theRest)] -> [(applyMatchingSpecification LenientHarden metric, theRest)]
+    [(metric, theRest')] -> [(applyMatchingSpecification LenientHarden metric, theRest')]
    _ -> []
  readsPrec p ('L':'o':'w':'e':'r':'/':theRest) = case readsPrec p theRest of
-    [(metric, theRest)] -> [(applyMatchingSpecification Lower metric, theRest)]
+    [(metric, theRest')] -> [(applyMatchingSpecification Lower metric, theRest')]
    _ -> []
  readsPrec p ('E':'x':'t':'r':'a':'c':'t':'N':'u':'m':'b':'e':'r':'/':theRest) = case readsPrec p theRest of
-    [(metric, theRest)] -> [(applyMatchingSpecification ExtractNumber metric, theRest)]
+    [(metric, theRest')] -> [(applyMatchingSpecification ExtractNumber metric, theRest')]
    _ -> []
  readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
  readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
@ -150,38 +157,41 @@ instance Read Metric where
  readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
  readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)]
  readsPrec p ('F':'L':'C':'-':'F':theRest) = case readsPrec p theRest of
-    [(beta, theRest)] -> [(FLCFMeasure beta, theRest)]
+    [(beta, theRest')] -> [(FLCFMeasure beta, theRest')]
    _ -> []
  readsPrec p ('F':theRest) = case readsPrec p theRest of
-    [(beta, theRest)] -> [(FMeasure beta, theRest)]
+    [(beta, theRest')] -> [(FMeasure beta, theRest')]
    _ -> []
  readsPrec p ('M':'a':'c':'r':'o':'-':'F':theRest) = case readsPrec p theRest of
-    [(beta, theRest)] -> [(MacroFMeasure beta, theRest)]
+    [(beta, theRest')] -> [(MacroFMeasure beta, theRest')]
    _ -> []
  readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
-    [(beta, theRest)] -> [(MultiLabelFMeasure beta ExactMatch, theRest)]
+    [(beta, theRest')] -> [(MultiLabelFMeasure beta ExactMatch, theRest')]
    _ -> []
  readsPrec p ('S':'o':'f':'t':'2':'D':'-':'F':theRest) = case readsPrec p theRest of
-    [(beta, theRest)] -> [(Soft2DFMeasure beta, theRest)]
+    [(beta, theRest')] -> [(Soft2DFMeasure beta, theRest')]
    _ -> []
  readsPrec p ('S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
-    [(beta, theRest)] -> [(SoftFMeasure beta, theRest)]
+    [(beta, theRest')] -> [(SoftFMeasure beta, theRest')]
    _ -> []
  readsPrec p ('P':'r':'o':'b':'a':'b':'i':'l':'i':'s':'t':'i':'c':'-':'M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
-    [(beta, theRest)] -> [(ProbabilisticMultiLabelFMeasure beta, theRest)]
+    [(beta, theRest')] -> [(ProbabilisticMultiLabelFMeasure beta, theRest')]
    _ -> []
  readsPrec p ('P':'r':'o':'b':'a':'b':'i':'l':'i':'s':'t':'i':'c':'-':'S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
-    [(beta, theRest)] -> [(ProbabilisticSoftFMeasure beta, theRest)]
+    [(beta, theRest')] -> [(ProbabilisticSoftFMeasure beta, theRest')]
    _ -> []
  readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
-    [(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
+    [(nbOfBits, theRest')] -> [(LogLossHashed nbOfBits, theRest')]
    _ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
  readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
-    [(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)]
+    [(nbOfBits, theRest')] -> [(LikelihoodHashed nbOfBits, theRest')]
    _ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)]
  readsPrec p ('P':'e':'r':'p':'l':'e':'x':'i':'t':'y':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
    [(nbOfBits, theRest')] -> [(PerplexityHashed nbOfBits, theRest')]
    _ -> [(PerplexityHashed defaultLogLossHashedSize, theRest)]
  readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
  readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
-  readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
+  readsPrec _ ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
  readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
  readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
  readsPrec _ ('B':'I':'O':'-':'W':'e':'i':'g':'h':'t':'e':'d':'-':'F':'1': theRest) = [(BIOWeightedF1, theRest)]
@ -220,6 +230,7 @@ getMetricOrdering (FLCFMeasure _) = TheHigherTheBetter
 getMetricOrdering NMI = TheHigherTheBetter
 getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
 getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
 getMetricOrdering (PerplexityHashed _) = TheLowerTheBetter
 getMetricOrdering CharMatch = TheHigherTheBetter
 getMetricOrdering MAP = TheHigherTheBetter
 getMetricOrdering LogLoss = TheLowerTheBetter
@ -239,10 +250,11 @@ getMetricOrdering (Mean metric) = getMetricOrdering metric
 metricCompare :: Metric -> MetricValue -> MetricValue -> Ordering
 metricCompare metric a b = metricCompare' (getMetricOrdering metric) a b
-  where metricCompare' TheHigherTheBetter a b = a `compare` b
+  where metricCompare' TheHigherTheBetter a' b' = a' `compare` b'
-        metricCompare' TheLowerTheBetter a b = b `compare` a
+        metricCompare' TheLowerTheBetter a' b' = b' `compare` a'
 bestPossibleValue :: Metric -> MetricValue
 bestPossibleValue (PerplexityHashed _) = 1.0
 bestPossibleValue metric = case getMetricOrdering metric of
  TheLowerTheBetter -> 0.0
  TheHigherTheBetter -> 1.0
@ -268,6 +280,7 @@ perfectOutLineFromExpectedLine :: Metric -> Text -> Text
 perfectOutLineFromExpectedLine (Mean metric) t = perfectOutLineFromExpectedLine metric t
 perfectOutLineFromExpectedLine (LogLossHashed _) t = addProbOne t
 perfectOutLineFromExpectedLine (LikelihoodHashed _) t = addProbOne t
 perfectOutLineFromExpectedLine (PerplexityHashed _) t = addProbOne t
 perfectOutLineFromExpectedLine BLEU t = getFirstColumn t
 perfectOutLineFromExpectedLine GLEU t = getFirstColumn t
 perfectOutLineFromExpectedLine ClippEU t = cleanMarginFromClippEU t
--- a/src/GEval/MetricsMechanics.hs
+++ b/src/GEval/MetricsMechanics.hs
@ -54,7 +54,8 @@ import qualified Data.HashMap.Strict as M
 singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | ACER | AAccuracy MatchingSpecification | AClippEU
                             | AFMeasure | AMacroFMeasure | ANMI
                             | ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood
-                             | ABIOF1 | ABIOWeightedF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
+                             | ABIOF1 | ABIOWeightedF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | APerplexityHashed
                             | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
                             | AMultiLabelLogLoss | AMultiLabelLikelihood
                             | ASoftFMeasure | AProbabilisticMultiLabelFMeasure | AProbabilisticSoftFMeasure | ASoft2DFMeasure
                             | AFLCFMeasure | AHaversine
@ -87,6 +88,7 @@ toHelper BIOF1Labels = ABIOF1Labels
 toHelper TokenAccuracy = ATokenAccuracy
 toHelper SegmentAccuracy = ASegmentAccuracy
 toHelper (LikelihoodHashed _) = ALikelihoodHashed
 toHelper (PerplexityHashed _) = APerplexityHashed
 toHelper MAE = AMAE
 toHelper SMAPE = ASMAPE
 toHelper (MultiLabelFMeasure _ matchingSpec) = AMultiLabelFMeasure matchingSpec
@ -124,6 +126,7 @@ type family ParsedExpectedType (t :: AMetric) :: * where
  ParsedExpectedType ANMI = Text
  ParsedExpectedType ALogLossHashed = Text
  ParsedExpectedType ALikelihoodHashed = Text
  ParsedExpectedType APerplexityHashed = Text
  ParsedExpectedType ACharMatch = Text
  ParsedExpectedType AMAP = [String]
  ParsedExpectedType ALogLoss = Double
@ -161,6 +164,7 @@ expectedParser SASoft2DFMeasure = controlledParse lineLabeledClippingsParser
 expectedParser SANMI = Right . id
 expectedParser SALogLossHashed = onlyStrip
 expectedParser SALikelihoodHashed = onlyStrip
 expectedParser SAPerplexityHashed = onlyStrip
 expectedParser SACharMatch = Right
 expectedParser SAMAP = splitByTabs
 expectedParser SALogLoss = doubleParser
@ -212,6 +216,7 @@ outputParser SASoft2DFMeasure = expectedParser SASoft2DFMeasure
 outputParser SANMI = expectedParser SANMI
 outputParser SALogLossHashed = onlyStrip
 outputParser SALikelihoodHashed = onlyStrip
 outputParser SAPerplexityHashed = onlyStrip
 outputParser SACharMatch = Right
 outputParser SAMAP = splitByTabs
 outputParser SALogLoss = doubleParser
@ -252,6 +257,7 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
  ItemIntermediateRepresentationType (AMultiLabelFMeasure _) = (Double, Int, Int)
  ItemIntermediateRepresentationType ALogLossHashed = (Text, Text)
  ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text)
  ItemIntermediateRepresentationType APerplexityHashed = (Text, Text)
  ItemIntermediateRepresentationType ACharMatch = (Text, Text)
  ItemIntermediateRepresentationType AWER = (Int, Int)
  ItemIntermediateRepresentationType ACER = (Int, Int)
@ -288,6 +294,7 @@ itemStep SASoft2DFMeasure = getSoft2DCounts
 itemStep SANMI = id
 itemStep SALogLossHashed = id
 itemStep SALikelihoodHashed = id
 itemStep SAPerplexityHashed = id
 itemStep SACharMatch = id
 itemStep SAMAP = uncurry calculateMAPForOneResult
 itemStep SALogLoss = itemLogLossError
--- a/src/GEval/MetricsMeta.hs
+++ b/src/GEval/MetricsMeta.hs
@ -23,7 +23,6 @@ import GEval.MatchingSpecification (MatchingSpecification(ExactMatch))
 import Text.Regex.PCRE.Heavy
 import Data.Either (fromRight)
 import Data.String.Here
 import Data.Maybe (fromMaybe)
 import Data.List (intercalate)
 import Text.Printf
@ -63,6 +62,7 @@ listOfAvailableMetrics = [RMSE,
                          ClippEU,
                          LogLossHashed defaultLogLossHashedSize,
                          LikelihoodHashed defaultLogLossHashedSize,
                          PerplexityHashed defaultLogLossHashedSize,
                          BIOF1,
                          BIOWeightedF1,
                          BIOF1Labels,
--- a/test/Spec.hs
+++ b/test/Spec.hs
@ -209,6 +209,9 @@ main = hspec $ do
      runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
    it "with log probs whose probs are summing up to less than 1.0" $ do
      runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
  describe "PerplexityHashed challenge" $ do
    it "simple example" $ do
      runGEvalTest "perplexity-hashed-simple" `shouldReturnAlmost` 11.006423790840
  describe "LikelihoodHashed challenge" $ do
    it "example with unnormalized values" $ do
      runGEvalTest "likelihood-hashed-not-normalized" `shouldReturnAlmost` 0.351043364110715
--- a/test/perplexity-hashed-simple/perplexity-hashed-simple-solution/test-A/out.tsv
+++ b/test/perplexity-hashed-simple/perplexity-hashed-simple-solution/test-A/out.tsv
@ -0,0 +1,3 @@
 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -0.916290731874155 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -2.30258509299405 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848 -6.23048144757848
 źdźbło:-0.6931471805599453 foo:-1.6094379124341003 az:-1.2039728043259361
 złoty:-0.916290731874155 :-0.5108256237659907
--- a/test/perplexity-hashed-simple/perplexity-hashed-simple/config.txt
+++ b/test/perplexity-hashed-simple/perplexity-hashed-simple/config.txt
@ -0,0 +1 @@
 --metric PerplexityHashed8
--- a/test/perplexity-hashed-simple/perplexity-hashed-simple/test-A/expected.tsv
+++ b/test/perplexity-hashed-simple/perplexity-hashed-simple/test-A/expected.tsv
@ -0,0 +1,3 @@
 ma
 źdźbło
 dolar