diff --git a/geval.cabal b/geval.cabal
index f29e0ec..362b770 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             1.1.2.0
+version:             1.2.0.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project
diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index 6eddbda..370a4ca 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -96,7 +96,7 @@ defaultLogLossHashedSize = 10
 -- | evaluation metric
 data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI
               | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
-              | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE
+              | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
               deriving (Eq)
 
 instance Show Metric where
@@ -126,8 +126,7 @@ instance Show Metric where
   show BIOF1 = "BIO-F1"
   show BIOF1Labels = "BIO-F1-Labels"
   show MAE = "MAE"
-
-
+  show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
 
 instance Read Metric where
   readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
@@ -139,6 +138,9 @@ instance Read Metric where
   readsPrec p ('F':theRest) = case readsPrec p theRest of
     [(beta, theRest)] -> [(FMeasure beta, theRest)]
     _ -> []
+  readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
+    [(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
+    _ -> []
   readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
     [(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
     _ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
@@ -173,6 +175,7 @@ getMetricOrdering Likelihood = TheHigherTheBetter
 getMetricOrdering BIOF1 = TheHigherTheBetter
 getMetricOrdering BIOF1Labels = TheHigherTheBetter
 getMetricOrdering MAE = TheLowerTheBetter
+getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
 
 isInputNeeded :: Metric -> Bool
 isInputNeeded CharMatch = True
@@ -521,7 +524,6 @@ gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCoun
         getCount (True, False)  = (0, 1, 0)
         getCount (False, True)  = (0, 0, 1)
         getCount (False, False) = (0, 0, 0)
-        countAgg = CC.foldl countFolder (0, 0, 0)
 
 gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
   where
@@ -566,6 +568,13 @@ gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWit
            entities <- parseBioSequenceIntoEntities s
            return $ Prelude.map eraseNormalisation entities
 
+gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords
+                                                               intoWords
+                                                               (getCounts (==))
+                                                               countAgg
+                                                               (fMeasureOnCounts beta)
+    where intoWords = Right . (Prelude.map unpack) . Data.Text.words
+
 countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
 countAgg = CC.foldl countFolder (0, 0, 0)
 
diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index b00258e..bf60c5b 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -229,6 +229,22 @@ The output should be given in the BIO format with the normalized forms given aft
 The metric is F1 counted on entities (not labels).
 |] ++ (commonReadmeMDContents testName)
 
+readmeMDContents (MultiLabelFMeasure beta) testName = [i|
+Tag names and their component
+=============================
+
+Tag names and their components (first name/surname) in a text.
+
+Tags:
+* person
+* surname
+* first-name
+
+For each tag a sequence of token IDs separated with commas should be given (after a colon).
+
+The metric is F1 on labels.
+|] ++ (commonReadmeMDContents testName)
+
 readmeMDContents _ testName = [i|
 GEval sample challenge
 ======================
@@ -324,6 +340,10 @@ trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surnam
 O O O O O	There is no name here
 B-firstname/JOHN I-surname/VON I-surname/NEUMANN	John von Nueman
 |]
+trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith	person:3,4,5 first-name:4 surname:5
+Steven bloody Brown	person:1,3 first-name:1 surname:3
+James and James	first-name:1 firstname:3
+|]
 trainContents _ = [hereLit|0.06	0.39	0	0.206
 1.00	1.00	1	0.017
 317.8	5.20	67	0.048
@@ -365,6 +385,10 @@ devInContents BIOF1Labels = devInContents BIOF1
 devInContents BIOF1 = [hereLit|Adam and Eve
 Mr Jan Kowalski
 |]
+devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
+I see him
+Barbara
+|]
 devInContents _ = [hereLit|0.72	0	0.007
 9.54	62	0.054
 |]
@@ -404,6 +428,10 @@ devExpectedContents BIOF1Labels = devExpectedContents BIOF1
 devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
 O B-firstname/JAN B-surname/KOWALSKI
 |]
+devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2
+
+first-name:1
+|]
 devExpectedContents _ = [hereLit|0.82
 95.2
 |]
@@ -445,6 +473,10 @@ testInContents BIOF1Labels = testInContents BIOF1
 testInContents BIOF1 = [hereLit|Alan Tring
 No name here
 |]
+testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
+Nobody is there
+I saw Marketa
+|]
 testInContents _ = [hereLit|1.52	2	0.093
 30.06	14	0.009
 |]
@@ -486,6 +518,10 @@ testExpectedContents BIOF1Labels = testExpectedContents BIOF1
 testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
 O O O
 |]
+testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3
+
+first-name:3
+|]
 testExpectedContents _ = [hereLit|0.11
 17.2
 |]
diff --git a/src/GEval/PrecisionRecall.hs b/src/GEval/PrecisionRecall.hs
index 62b2fed..bcaad4b 100644
--- a/src/GEval/PrecisionRecall.hs
+++ b/src/GEval/PrecisionRecall.hs
@@ -3,7 +3,8 @@
 module GEval.PrecisionRecall(calculateMAPForOneResult,
                              fMeasure, f1Measure, f2Measure, precision, recall,
                              fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
-                             precisionAndRecall, precisionAndRecallFromCounts, maxMatch, maxMatchOnOrdered)
+                             precisionAndRecall, precisionAndRecallFromCounts,
+                             maxMatch, maxMatchOnOrdered, getCounts)
        where
 
 import GEval.Common
@@ -30,8 +31,8 @@ f1Measure :: (a -> b -> Bool) -> [a] -> [b] -> Double
 f1Measure = fMeasure 1.0
 
 -- | Calculates both generalized) F-measure
-fMeasure :: Double
-         -> (a -> b -> Bool)  -- ^ beta parameter
+fMeasure :: Double          -- ^ beta parameter
+         -> (a -> b -> Bool)  -- ^ function to check whether there is a match
          -> [a]             -- ^ the ground truth
          -> [b]             -- ^ what we got
          -> Double          -- ^ f-Measure
@@ -55,6 +56,11 @@ fMeasureOnCounts beta (tp, nbExpected, nbGot) =
 countFolder :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int)
 countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
 
+getCounts :: (a -> b -> Bool) -> ([a], [b]) -> (Int, Int, Int)
+getCounts matchingFun (expected, got) = (maxMatch matchingFun expected got,
+                                      length expected,
+                                      length got)
+
 -- | Calculates both precision and recall.
 --
 -- (See https://en.wikipedia.org/wiki/Precision_and_recall)
diff --git a/test/Spec.hs b/test/Spec.hs
index 318d73e..6a905b6 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -212,6 +212,11 @@ main = hspec $ do
   describe "Likelihood" $ do
     it "simple" $ do
       runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
+  describe "MultiLabel-F" $ do
+    it "simple" $ do
+      runGEvalTest "multilabel-f1-simple" `shouldReturnAlmost` 0.66666666666
+    it "simple F2" $ do
+      runGEvalTest "multilabel-f2-simple" `shouldReturnAlmost` 0.441176470588235
   describe "evaluating single lines" $ do
     it "RMSE" $ do
       gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla")
diff --git a/test/multilabel-f1-simple/multilabel-f1-simple-solution/test-A/out.tsv b/test/multilabel-f1-simple/multilabel-f1-simple-solution/test-A/out.tsv
new file mode 100644
index 0000000..6a8bd3a
--- /dev/null
+++ b/test/multilabel-f1-simple/multilabel-f1-simple-solution/test-A/out.tsv
@@ -0,0 +1,4 @@
+foo bar baz
+uuu
+foo bar baz
+qqq aaa
diff --git a/test/multilabel-f1-simple/multilabel-f1-simple/config.txt b/test/multilabel-f1-simple/multilabel-f1-simple/config.txt
new file mode 100644
index 0000000..b79da4c
--- /dev/null
+++ b/test/multilabel-f1-simple/multilabel-f1-simple/config.txt
@@ -0,0 +1 @@
+--metric MultiLabel-F1
diff --git a/test/multilabel-f1-simple/multilabel-f1-simple/test-A/expected.tsv b/test/multilabel-f1-simple/multilabel-f1-simple/test-A/expected.tsv
new file mode 100644
index 0000000..64612c3
--- /dev/null
+++ b/test/multilabel-f1-simple/multilabel-f1-simple/test-A/expected.tsv
@@ -0,0 +1,4 @@
+foo bar baz
+
+foo
+qqq qqq
diff --git a/test/multilabel-f2-simple/multilabel-f2-simple-solution/test-A/out.tsv b/test/multilabel-f2-simple/multilabel-f2-simple-solution/test-A/out.tsv
new file mode 100644
index 0000000..faa9fc7
--- /dev/null
+++ b/test/multilabel-f2-simple/multilabel-f2-simple-solution/test-A/out.tsv
@@ -0,0 +1,4 @@
+bar:2
+  other:6
+xyz:12
+ t:6   t:8   t:9
diff --git a/test/multilabel-f2-simple/multilabel-f2-simple/config.txt b/test/multilabel-f2-simple/multilabel-f2-simple/config.txt
new file mode 100644
index 0000000..49da0d0
--- /dev/null
+++ b/test/multilabel-f2-simple/multilabel-f2-simple/config.txt
@@ -0,0 +1 @@
+--metric MultiLabel-F2
diff --git a/test/multilabel-f2-simple/multilabel-f2-simple/test-A/expected.tsv b/test/multilabel-f2-simple/multilabel-f2-simple/test-A/expected.tsv
new file mode 100644
index 0000000..80885dd
--- /dev/null
+++ b/test/multilabel-f2-simple/multilabel-f2-simple/test-A/expected.tsv
@@ -0,0 +1,4 @@
+foo:1 bar:2  baz:3
+other:5
+
+t:6 t:7 t:8