diff --git a/src/GEval/MetricsMeta.hs b/src/GEval/MetricsMeta.hs
index c4b7b73..d5a887e 100644
--- a/src/GEval/MetricsMeta.hs
+++ b/src/GEval/MetricsMeta.hs
@@ -95,6 +95,7 @@ isEvaluationSchemeDescribed (EvaluationScheme metric []) = isMetricDescribed met
 isEvaluationSchemeDescribed _ = False
 
 isMetricDescribed :: Metric -> Bool
+isMetricDescribed (MultiLabelFMeasure 1.0 ExactMatch) = True
 isMetricDescribed (SoftFMeasure _) = True
 isMetricDescribed (Soft2DFMeasure _) = True
 isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True
@@ -108,6 +109,17 @@ getEvaluationSchemeDescription :: EvaluationScheme -> String
 getEvaluationSchemeDescription (EvaluationScheme metric []) = getMetricDescription metric
 
 getMetricDescription :: Metric -> String
+getMetricDescription (MultiLabelFMeasure 1.0 ExactMatch) =
+  [i|F-measure (or F-score), i.e. harmonic mean of the precision and
+recall. It can be calculated for any labels. Precision is the fraction
+of correct labels among the ones in the output, whereas recall is the
+fraction of instances in the gold standard that were correctly
+retrieved. Counts for precision and recall are calculated for the
+whole test set, in other words it is a micro-average (it is NOT the
+case that F-scores are calculated for each document or label class
+separately and then averaged). For a macro-average per document, see
+Mean/Multilabel-F1.
+|]
 getMetricDescription (SoftFMeasure _) =
   [i|"Soft" F-measure on intervals, i.e. partial "hits" are considered. For instance,
 if a label `foo` is expected for the span 2-9 and this label is returned but with
@@ -160,6 +172,10 @@ Accuracy is calculated separately for each item and then averaged.
 |]
 
 outContents :: Metric -> String
+outContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 first-name/3
+surname/2
+first-name/3
+|]
 outContents (SoftFMeasure _) = [hereLit|inwords:1-4
 inwords:1-3 indigits:5
 |]
@@ -183,6 +199,7 @@ N:1-4 V:6-7 A:9-13
 |]
 
 expectedScore :: EvaluationScheme -> MetricValue
+expectedScore (EvaluationScheme (MultiLabelFMeasure 1.0 ExactMatch) []) = 0.6666
 expectedScore (EvaluationScheme (SoftFMeasure beta) [])
   = let precision = 0.25
         recall = 0.75
@@ -240,6 +257,9 @@ formatEvaluationSchemeDescription scheme@(EvaluationScheme metric _) = show sche
 IF YOU WANT TO HAVE IT DESCRIBED|]
 
 formatDescription :: Metric -> String
+formatDescription (MultiLabelFMeasure _ ExactMatch) = [hereLit|Any label separated by spaces can be used. They are
+not intepreted in any way when the metric is calculated.
+|]
 formatDescription (SoftFMeasure _) = [hereLit|Each line is a sequence of entities separated by spaces, each entity is of
 the form LABEL:SPAN, where LABEL is any label and SPAN is defined using single integers, intervals or such
 units separated with commas.
@@ -265,6 +285,11 @@ formatDescription CER = [hereLit|Any text, whitespace and punctuation marks are
 |]
 
 scoreExplanation :: EvaluationScheme -> Maybe String
+scoreExplanation (EvaluationScheme (MultiLabelFMeasure _ ExactMatch) [])
+  = Just [hereLit|Out of the total 5 labels in the output, 3 are correct (person/1,3, first-name/1 and
+first-name/3), hence precision is 3/5=0.6, whereas out of the 4 labels in gold standard,
+again 3 were retrieved, so recall is 3/4=0.75. The harmonic mean of precision and recall
+is 2/(4/3 + 5/3) = 2/3 = 0.6666|]
 scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
   = Just [hereLit|We have a partial (0.75) success for the entity `inwords:1-4`, hence Recall = 0.75/1 = 0.75,
 Precision = (0 + 0.75 + 0) / 3 = 0.25, so F-score = 0.375|]