diff --git a/geval.cabal b/geval.cabal
index 7d07ecb..8bf0c46 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -17,6 +17,7 @@ library
   hs-source-dirs:      src
   exposed-modules:     GEval.Core
                      , GEval.Metric
+                     , GEval.MetricsMeta
                      , GEval.EvaluationScheme
                      , GEval.CreateChallenge
                      , GEval.OptionsParser
@@ -137,6 +138,7 @@ test-suite geval-test
                      , silently
                      , vector
                      , statistics
+                     , filepath
   ghc-options:         -threaded -rtsopts -with-rtsopts=-N
   default-language:    Haskell2010
 
diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index 0feeef4..96edc11 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -187,7 +187,7 @@ data GEvalSpecialCommand = Init
                            | LineByLine | WorstFeatures
                            | Diff FilePath | MostWorseningFeatures FilePath
                            | PrintVersion | JustTokenize | Submit
-                           | Validate
+                           | Validate | ListMetrics
 
 data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
 
diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index 34bbd55..61d5a28 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -1,7 +1,8 @@
 {-# LANGUAGE QuasiQuotes #-}
 
 module GEval.CreateChallenge
-       (createChallenge)
+       (createChallenge,
+        testExpectedContents)
        where
 
 import GEval.Metric
diff --git a/src/GEval/EvaluationScheme.hs b/src/GEval/EvaluationScheme.hs
index a3677fe..29840c7 100644
--- a/src/GEval/EvaluationScheme.hs
+++ b/src/GEval/EvaluationScheme.hs
@@ -1,5 +1,5 @@
 module GEval.EvaluationScheme
-  (EvaluationScheme(..), evaluationSchemeMetric, applyPreprocessingOperations, evaluationSchemeName)
+  (EvaluationScheme(..), evaluationSchemeMetric, applyPreprocessingOperations, evaluationSchemeName, PreprocessingOperation(..))
   where
 
 import GEval.Metric
diff --git a/src/GEval/Metric.hs b/src/GEval/Metric.hs
index d2e1e40..ecd02b6 100644
--- a/src/GEval/Metric.hs
+++ b/src/GEval/Metric.hs
@@ -5,7 +5,6 @@ module GEval.Metric
    MetricOrdering(..),
    defaultLogLossHashedSize,
    getMetricOrdering,
-   listOfAvailableMetrics,
    bestPossibleValue,
    perfectOutLineFromExpectedLine,
    fixedNumberOfColumnsInExpected,
@@ -32,47 +31,6 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C
               | SoftFMeasure Double | ProbabilisticSoftFMeasure Double
               deriving (Eq)
 
--- | the list of available metrics, to be shown to the user or to be
--- | used for tests
-listOfAvailableMetrics :: [Metric]
-listOfAvailableMetrics = [RMSE,
-                          MSE,
-                          MAE,
-                          SMAPE,
-                          Pearson,
-                          Spearman,
-                          Accuracy,
-                          LogLoss,
-                          Likelihood,
-                          FMeasure 1.0,
-                          FMeasure 2.0,
-                          FMeasure 0.25,
-                          MacroFMeasure 1.0,
-                          MacroFMeasure 2.0,
-                          MacroFMeasure 0.25,
-                          MultiLabelFMeasure 1.0,
-                          MultiLabelFMeasure 2.0,
-                          MultiLabelFMeasure 0.25,
-                          MultiLabelLikelihood,
-                          MAP,
-                          BLEU,
-                          GLEU,
-                          WER,
-                          NMI,
-                          ClippEU,
-                          LogLossHashed defaultLogLossHashedSize,
-                          LikelihoodHashed defaultLogLossHashedSize,
-                          BIOF1,
-                          BIOF1Labels,
-                          TokenAccuracy,
-                          SoftFMeasure 1.0,
-                          SoftFMeasure 2.0,
-                          SoftFMeasure 0.25,
-                          ProbabilisticSoftFMeasure 1.0,
-                          ProbabilisticSoftFMeasure 2.0,
-                          ProbabilisticSoftFMeasure 0.25,
-                          CharMatch]
-
 instance Show Metric where
   show RMSE = "RMSE"
   show MSE  = "MSE"
diff --git a/src/GEval/MetricsMeta.hs b/src/GEval/MetricsMeta.hs
new file mode 100644
index 0000000..0a2c398
--- /dev/null
+++ b/src/GEval/MetricsMeta.hs
@@ -0,0 +1,133 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE QuasiQuotes #-}
+
+module GEval.MetricsMeta
+  (listOfAvailableMetrics,
+   listOfAvailableEvaluationSchemes,
+   extraInfo,
+   isEvaluationSchemeDescribed,
+   getEvaluationSchemeDescription,
+   outContents,
+   expectedScore,
+   allMetricsDescription)
+  where
+
+import GEval.Common
+import GEval.Metric
+import GEval.EvaluationScheme
+import GEval.CreateChallenge (testExpectedContents)
+import GEval.PrecisionRecall (weightedHarmonicMean)
+
+import Text.Regex.PCRE.Heavy
+import Data.Either (fromRight)
+import Data.String.Here
+
+import Data.List (intercalate)
+import Text.Printf
+
+-- | the list of available metrics, to be shown to the user or to be
+-- | used for tests
+listOfAvailableMetrics :: [Metric]
+listOfAvailableMetrics = [RMSE,
+                          MSE,
+                          MAE,
+                          SMAPE,
+                          Pearson,
+                          Spearman,
+                          Accuracy,
+                          LogLoss,
+                          Likelihood,
+                          FMeasure 1.0,
+                          FMeasure 2.0,
+                          FMeasure 0.25,
+                          MacroFMeasure 1.0,
+                          MacroFMeasure 2.0,
+                          MacroFMeasure 0.25,
+                          MultiLabelFMeasure 1.0,
+                          MultiLabelFMeasure 2.0,
+                          MultiLabelFMeasure 0.25,
+                          MultiLabelLikelihood,
+                          MAP,
+                          BLEU,
+                          GLEU,
+                          WER,
+                          NMI,
+                          ClippEU,
+                          LogLossHashed defaultLogLossHashedSize,
+                          LikelihoodHashed defaultLogLossHashedSize,
+                          BIOF1,
+                          BIOF1Labels,
+                          TokenAccuracy,
+                          SoftFMeasure 1.0,
+                          SoftFMeasure 2.0,
+                          SoftFMeasure 0.25,
+                          ProbabilisticSoftFMeasure 1.0,
+                          ProbabilisticSoftFMeasure 2.0,
+                          ProbabilisticSoftFMeasure 0.25,
+                          CharMatch]
+
+extraInfo :: EvaluationScheme -> Maybe String
+extraInfo (EvaluationScheme GLEU [])  = Just "\"Google GLEU\" not the grammar correction metric"
+extraInfo (EvaluationScheme BLEU [LowerCasing,
+                                 RegexpMatch _]) = Just "BLEU on lowercased strings, only Latin characters and digits considered"
+extraInfo _ = Nothing
+
+-- As we just started describing metrics (or, to be precise,
+-- evaluation schemes), we need keep track of which metric is
+-- described and which - not.
+-- When all the metrics are described, this function should be
+-- removed.
+isEvaluationSchemeDescribed :: EvaluationScheme -> Bool
+isEvaluationSchemeDescribed (EvaluationScheme metric []) = isMetricDescribed metric
+isEvaluationSchemeDescribed _ = False
+
+isMetricDescribed :: Metric -> Bool
+isMetricDescribed (SoftFMeasure _) = True
+isMetricDescribed _ = False
+
+getEvaluationSchemeDescription :: EvaluationScheme -> String
+getEvaluationSchemeDescription (EvaluationScheme metric []) = getMetricDescription metric
+
+getMetricDescription :: Metric -> String
+getMetricDescription (SoftFMeasure _) =
+  [i|"Soft" F-measure on intervals, i.e. partial "hits" are considered. For instance,
+if a label `foo` is expected for the span 2-9 and this label is returned but with
+the span 8-12, it is counted as 1/4 for recall and 2/5 for precision.
+|]
+
+outContents :: Metric -> String
+outContents (SoftFMeasure _) = [hereLit|inwords:1-4
+inwords:1-3 indigits:5
+|]
+
+expectedScore :: EvaluationScheme -> MetricValue
+expectedScore (EvaluationScheme (SoftFMeasure beta) []) = weightedHarmonicMean beta precision recall
+  where precision = 0.25
+        recall = 0.75
+
+listOfAvailableEvaluationSchemes :: [EvaluationScheme]
+listOfAvailableEvaluationSchemes = map (\m -> EvaluationScheme m []) listOfAvailableMetrics
+                                   ++ [
+                                   EvaluationScheme BLEU [LowerCasing,
+                                                          RegexpMatch (fromRight undefined $ compileM "\\s+|[a-z0-9]+" [])]
+                                   ]
+
+allMetricsDescription :: String
+allMetricsDescription =
+  intercalate "\n\n\n" $ map formatEvaluationSchemeDescription listOfAvailableEvaluationSchemes
+
+formatEvaluationSchemeDescription :: EvaluationScheme -> String
+formatEvaluationSchemeDescription scheme@(EvaluationScheme metric _) = show scheme ++ "\n" ++ description
+  where description = if isEvaluationSchemeDescribed scheme
+                      then (getEvaluationSchemeDescription scheme)
+                           ++ "\nExample\n"
+                           ++ (pasteLines "Expected output" "Sample output")
+                           ++ concat (map (\(exp, out) -> pasteLines exp out) $ zip (lines $ testExpectedContents metric)
+                                                                                   (lines $ outContents metric))
+                           ++ "\nMetric value: " ++ (printf "%.4f" $ expectedScore scheme)
+                      else noDescription
+        noDescription = [hereLit|THE METRIC HAS NO DESCRIPTION YET, PLEASE ADD AN ISSUE TO https://gitlab.com/filipg/geval/issues
+IF YOU WANT TO HAVE IT DESCRIBED|]
+
+pasteLines :: String -> String -> String
+pasteLines a b = printf "%-35s %s\n" a b
diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index e0cfb0f..83c17a5 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -27,6 +27,7 @@ import Data.Monoid ((<>))
 
 import GEval.Core
 import GEval.EvaluationScheme
+import GEval.MetricsMeta (extraInfo, listOfAvailableEvaluationSchemes, allMetricsDescription)
 import GEval.Common
 import GEval.CreateChallenge
 import GEval.LineByLine
@@ -35,6 +36,8 @@ import GEval.BlackBoxDebugging
 import GEval.Selector
 import GEval.Validation
 
+import Data.List (intercalate)
+
 import Data.Conduit.SmartSource
 
 fullOptionsParser = info (helper <*> optionsParser)
@@ -87,6 +90,10 @@ optionsParser = GEvalOptions
                 (flag' Validate
                     ( long "validate"
                       <> help "Validate challenge, it searches for potential errors in the given challenge path, like missing columns, files or format data."))
+                <|>
+                (flag' ListMetrics
+                    ( long "list-metrics"
+                      <> help "List all metrics with their descriptions"))
                 )
 
    <*> ((flag' FirstTheWorst
@@ -233,12 +240,21 @@ sel :: Maybe Metric -> Metric -> Metric
 sel Nothing m = m
 sel (Just m) _ = m
 
+
+
 metricReader :: Parser [EvaluationScheme]
 metricReader = many $ option auto         -- actually `some` should be used instead of `many`, the problem is that
                ( long "metric"            -- --metric might be in the config.txt file...
                  <> short 'm'
                  <> metavar "METRIC"
-                 <> help "Metric to be used - RMSE, MSE, MAE, SMAPE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MultiLabel-Likelihood, MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, TokenAccuracy, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25), probabilistic soft F-measure (specify as Probabilistic-Soft-F1, Probabilistic-Soft-F2, Probabilistic-Soft-F0.25) or CharMatch" )
+                 <> help ("Metric to be used, e.g.:" ++ intercalate ", " (map
+                                                                          (\s -> (show s) ++ (case extraInfo s of
+                                                                                               Just eI -> " (" ++ eI ++ ")"
+                                                                                               Nothing -> ""))
+                                                                          listOfAvailableEvaluationSchemes)))
+
+
+--                   RMSE, MSE, MAE, SMAPE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MultiLabel-Likelihood, MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, TokenAccuracy, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25), probabilistic soft F-measure (specify as Probabilistic-Soft-F1, Probabilistic-Soft-F2, Probabilistic-Soft-F0.25) or CharMatch" )
 
 altMetricReader :: Parser (Maybe EvaluationScheme)
 altMetricReader = optional $ option auto
@@ -341,6 +357,9 @@ runGEval''' (Just Submit) _ _ spec _ _ = do
 runGEval''' (Just Validate) _ _ spec _ _ = do
   validateChallenge spec
   return Nothing
+runGEval''' (Just ListMetrics) _ _ _ _ _ = do
+  listMetrics
+  return Nothing
 
 getGraphFilename :: Int -> FilePath -> FilePath
 getGraphFilename 0 fp = fp
@@ -401,3 +420,6 @@ Run:
 to validate a directory CHALLENGE representing a Gonito challenge.
 |]
   exitFailure
+
+listMetrics :: IO ()
+listMetrics = putStrLn allMetricsDescription
diff --git a/test/Spec.hs b/test/Spec.hs
index bbffbaf..25d95e3 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -4,6 +4,7 @@
 import Test.Hspec
 
 import GEval.Metric
+import GEval.MetricsMeta (listOfAvailableEvaluationSchemes, isEvaluationSchemeDescribed, expectedScore, outContents)
 import GEval.Core
 import GEval.Common
 import GEval.EvaluationScheme
@@ -33,6 +34,8 @@ import Data.Map.Strict
 
 import Data.Conduit.List (consume)
 
+import System.FilePath
+
 import System.Directory
 import System.Process
 import System.Exit
@@ -508,15 +511,27 @@ main = hspec $ do
       shapify "a" `shouldBe` (WordShape "a")
       shapify "B5" `shouldBe` (WordShape "A9")
   describe "create challenges and validate them" $ do
-    (flip mapM_) listOfAvailableMetrics $ \metric -> do
-        it (show metric) $ do
+    (flip mapM_) listOfAvailableEvaluationSchemes $ \scheme -> do
+        it (show scheme) $ do
           withSystemTempDirectory "geval-validation-test" $ \tempDir -> do
             let spec = defaultGEvalSpecification {
                   gesExpectedDirectory = Just tempDir,
-                  gesMetrics = [EvaluationScheme metric []],
+                  gesMetrics = [scheme],
                   gesPrecision = Just 4 }
             createChallenge True tempDir spec
             validationChallenge tempDir spec
+  describe "test sample outputs" $ do
+    (flip mapM_ ) (Prelude.filter isEvaluationSchemeDescribed listOfAvailableEvaluationSchemes) $ \scheme@(EvaluationScheme metric _) -> do
+      it (show scheme) $ do
+        withSystemTempDirectory "geval-sample-output-test" $ \tempDir -> do
+          let spec = defaultGEvalSpecification {
+                gesExpectedDirectory = Just tempDir,
+                gesMetrics = [scheme] }
+          createChallenge True tempDir spec
+          let outFile = tempDir </> "test-A" </> "out.tsv"
+          writeFile outFile (outContents metric)
+          obtainedScore <- (runGEval ["--expected-directory", tempDir, "--out-directory", tempDir]) >>= extractVal
+          obtainedScore `shouldBe` (expectedScore scheme)
   describe "submit" $ do
     it "current branch" $ do
       runGitTest "branch-test" (\_ -> getCurrentBranch) `shouldReturn` "develop"