diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index cbe0184..526129f 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -48,6 +48,8 @@ module GEval.Core
       somethingWrongWithFilesMessage
     ) where
 
+import Debug.Trace
+
 import GEval.Metric
 import GEval.EvaluationScheme
 
@@ -649,15 +651,16 @@ gevalCore' (ProbabilisticSoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnot
 
 gevalCore' (Soft2DFMeasure beta) _ = gevalCoreWithoutInput parseLabeledClippings
                                                            parseLabeledClippings
-                                                           get2DCounts
-                                                           countAgg
-                                                           (fMeasureOnCounts beta)
+                                                           count2DFScore
+                                                           averageC
+                                                           id
                                                            noGraph
                       where
                         parseLabeledClippings = controlledParse lineLabeledClippingsParser
-                        get2DCounts (expected, got) = (coveredBy expected got,
-                                                       totalArea expected,
-                                                       totalArea got)
+                        count2DFScore (expected, got) = fMeasureOnCounts beta (tpArea, expArea, gotArea)
+                                                        where tpArea = coveredBy expected got
+                                                              expArea = totalArea expected
+                                                              gotArea = totalArea got
 
 gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep noGraph
   where
diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index e209fb9..2b72c6e 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -743,8 +743,8 @@ testExpectedContents MultiLabelLikelihood = testExpectedContents MultiLabelLogLo
 testExpectedContents MultiLabelLogLoss = [hereLit|SADNESS
 HATE
 |]
-testExpectedContents (Soft2DFMeasure _) = [hereLit|3/0,0,100,100
-1/10,10,1000,1000
+testExpectedContents (Soft2DFMeasure _) = [hereLit|foo:3/0,0,100,100
+bar:1/50,50,1000,1000
 |]
 testExpectedContents ClippEU = [hereLit|3/0,0,100,100/10
 1/10,10,1000,1000/10
diff --git a/src/GEval/MetricsMeta.hs b/src/GEval/MetricsMeta.hs
index 1c007df..9720c81 100644
--- a/src/GEval/MetricsMeta.hs
+++ b/src/GEval/MetricsMeta.hs
@@ -87,6 +87,7 @@ isEvaluationSchemeDescribed _ = False
 
 isMetricDescribed :: Metric -> Bool
 isMetricDescribed (SoftFMeasure _) = True
+isMetricDescribed (Soft2DFMeasure _) = True
 isMetricDescribed _ = False
 
 getEvaluationSchemeDescription :: EvaluationScheme -> String
@@ -96,18 +97,33 @@ getMetricDescription :: Metric -> String
 getMetricDescription (SoftFMeasure _) =
   [i|"Soft" F-measure on intervals, i.e. partial "hits" are considered. For instance,
 if a label `foo` is expected for the span 2-9 and this label is returned but with
-the span 8-12, it is counted as 1/4 for recall and 2/5 for precision.
+the span 8-12, it is counted as 2/8=0.25 instead of 0 or 1 when precision/recall counts
+are gathered.
+|]
+getMetricDescription (Soft2DFMeasure _) =
+  [i|"Soft" F-measure on rectangles, i.e. precision and recall is calculated for areas. For instance,
+if a label `foo` is expected for the rectangle (0, 0)-(100, 200) and this label is returned but with
+the span (50, 100)-(150, 150), it is treatd as recall=1/4 and precision=2/5. For each item (line) F-score
+is evaluated separately and finally averaged.
 |]
 
 outContents :: Metric -> String
 outContents (SoftFMeasure _) = [hereLit|inwords:1-4
 inwords:1-3 indigits:5
 |]
+outContents (Soft2DFMeasure _) = [hereLit|foo:3/250,130,340,217
+bar:1/0,0,100,200 foo:1/40,50,1000,1000 bar:1/400,600,1000,1000
+|]
 
 expectedScore :: EvaluationScheme -> MetricValue
-expectedScore (EvaluationScheme (SoftFMeasure beta) []) = weightedHarmonicMean beta precision recall
-  where precision = 0.25
+expectedScore (EvaluationScheme (SoftFMeasure beta) [])
+  = let precision = 0.25
         recall = 0.75
+      in weightedHarmonicMean beta precision recall
+expectedScore (EvaluationScheme (Soft2DFMeasure beta) [])
+  = let precision = 0.21117747440273
+        recall = 0.27423822714681
+      in (weightedHarmonicMean beta precision recall) / 2.0
 
 listOfAvailableEvaluationSchemes :: [EvaluationScheme]
 listOfAvailableEvaluationSchemes = map (\m -> EvaluationScheme m []) listOfAvailableMetrics
@@ -143,11 +159,20 @@ formatDescription (SoftFMeasure _) = [hereLit|Each line is a sequence of entitie
 the form LABEL:SPAN, where LABEL is any label and SPAN is defined using single integers, intervals or such
 units separated with commas.
 |]
+formatDescription (Soft2DFMeasure _) = [hereLit|Each line is a sequence of entities separated by spaces, each entity is of
+the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page number (starting from 1) and
+(X0, Y0) and (X1, Y1) are clipping corners.
+|]
 
 scoreExplanation :: EvaluationScheme -> Maybe String
 scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
   = Just [hereLit|We have a partial (0.75) success for the entity `inwords:1-4`, hence Recall = 0.75/1 = 0.75,
 Precision = (0 + 0.75 + 0) / 3 = 0.25, so F-score = 0.375|]
+scoreExplanation (EvaluationScheme (Soft2DFMeasure _) [])
+  = Just [hereLit|The F-score for the first item is 0 (the entity was found in the completely wrong place).
+As far as the second item is concerned, the total area that covered by the output is 50*150+600*400=247500.
+Hence, recall is 247500/902500=0.274 and precision - 247500/(20000+912000+240000)=0.211. Therefore, the F-score
+for the second item is 0.238 and the F-score for the whole set is (0 + 0.238)/2 = 0.119.|]
 
 pasteLines :: String -> String -> String
 pasteLines a b = printf "%-35s %s\n" a b
diff --git a/test/Spec.hs b/test/Spec.hs
index f635f88..57222e9 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -276,7 +276,7 @@ main = hspec $ do
       runGEvalTest "probabilistic-soft-f1-calibrated" `shouldReturnAlmost` 0.88888888888
   describe "Soft2D-F1" $ do
     it "simple test" $ do
-      runGEvalTest "soft2d-f1-simple" `shouldReturnAlmost` 0.30152621462832535
+      runGEvalTest "soft2d-f1-simple" `shouldReturnAlmost` 0.218457349437945
   describe "test edit-distance library" $ do
     it "for handling UTF8" $ do
       levenshteinDistance defaultEditCosts "źdźbło" "źd好bło" `shouldBe` 1
@@ -551,7 +551,7 @@ main = hspec $ do
           let outFile = tempDir </> "test-A" </> "out.tsv"
           writeFile outFile (outContents metric)
           obtainedScore <- (runGEval ["--expected-directory", tempDir, "--out-directory", tempDir]) >>= extractVal
-          obtainedScore `shouldBe` (expectedScore scheme)
+          obtainedScore `shouldBeAlmost` (expectedScore scheme)
   describe "submit" $ do
     it "current branch" $ do
       runGitTest "branch-test" (\_ -> getCurrentBranch) `shouldReturn` "develop"