add sample toy challenge for LogLossHashed

2017-04-03 10:41:44 +02:00 · 2017-04-03 10:41:44 +02:00 · 6144ae6bdf
commit 6144ae6bdf
parent 59f19cbe18
2 changed files with 33 additions and 1 deletions
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@ -100,6 +100,21 @@ Cluster proverbs for languages.
 This is a sample challenge for flat clustering (unsupervised learning challenge).
 |] ++ (commonReadmeMDContents testName)

+readmeMDContents (LogLossHashed _) testName = [i|
+GEval sample challenge — language model evaluation
+==================================================
+
+Give a probability distribution for words.
+
+This is a sample challenge for evaluating language models.
+The metric is average log-loss calculated for 10-bit hashes.
+
+Train file is a just text file (one utterance per line).
+In an input file, left and right contexts (TAB-separated) are given.
+In an expected file, the word to be guessed is given.
+|] ++ (commonReadmeMDContents testName)
+
+
 readmeMDContents _ testName = [i|
 GEval sample challenge
 ======================
@ -168,6 +183,11 @@ en	The pen is mightier than the sword.
 pl	Baba z wozu, koniom lżej.
 |]

+trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
+Basia ma psa
+Nie kupujemy kota w worku
+Czesia ma kota
+|]
 trainContents _ = [hereLit|0.06	0.39	0	0.206
 1.00	1.00	1	0.017
 317.8	5.20	67	0.048
@ -189,6 +209,9 @@ devInContents (FMeasure _) = [hereLit|b	b	W	29520	779	-28	-32	a	0	0	0	0	0	0	0	0
 b	b	W	55200	1259	35	9	a	1	0	1	0	0	0	0	0	4000	4000
 |]

+devInContents (LogLossHashed _) = [hereLit|Nie kupuj	w worku
+Ona	psa
+|]
 devInContents _ = [hereLit|0.72	0	0.007
 9.54	62	0.054
 |]
@ -207,6 +230,9 @@ devExpectedContents NMI = [hereLit|en
 pl
 en
 |]
+devExpectedContents (LogLossHashed _) = [hereLit|kota
+ma
+|]
 devExpectedContents _ = [hereLit|0.82
 95.2
 |]
@ -227,6 +253,9 @@ W marcu, jak w garncu.
 A cada necio agrada su porrada.
 Kwiecień plecień, bo przeplata trochę zimy, trochę lata.
 |]
+testInContents (LogLossHashed _) = [hereLit|Ala	ma
+Ona ma kota	worku
+|]
 testInContents _ = [hereLit|1.52	2	0.093
 30.06	14	0.009
 |]
@ -247,6 +276,9 @@ pl
 es
 pl
 |]
+testExpectedContents (LogLossHashed _) = [hereLit|ma
+w
+|]
 testExpectedContents _ = [hereLit|0.11
 17.2
 |]
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@ -74,7 +74,7 @@ metricReader = option auto
                 <> value defaultMetric
                 <> showDefault
                 <> metavar "METRIC"
-                 <> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), BLEU, NMI or ClippEU" )
+                 <> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), BLEU, NMI, ClippEU or LogLossHashed" )

 runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
 runGEval args = do