From c3a6d94d1c3182f890bd4537b1b99417d17c3005 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Tue, 2 Aug 2016 08:37:29 +0200 Subject: [PATCH] start work on ClippEU --- geval.cabal | 4 ++ src/GEval/BLEU.hs | 4 -- src/GEval/ClippEU.hs | 81 ++++++++++++++++++++++++++++++++++++ src/GEval/PrecisionRecall.hs | 2 - test/Spec.hs | 23 ++++++++++ 5 files changed, 108 insertions(+), 6 deletions(-) create mode 100644 src/GEval/ClippEU.hs diff --git a/geval.cabal b/geval.cabal index 5176f1a..b12a371 100644 --- a/geval.cabal +++ b/geval.cabal @@ -19,6 +19,7 @@ library GEval.CreateChallenge , GEval.OptionsParser , GEval.BLEU + , GEval.ClippEU , GEval.PrecisionRecall build-depends: base >= 4.7 && < 5 , cond @@ -35,6 +36,7 @@ library , text , unix , fgl + , attoparsec default-language: Haskell2010 executable geval @@ -56,6 +58,8 @@ test-suite geval-test , hspec , HUnit , optparse-applicative + , text + , attoparsec ghc-options: -threaded -rtsopts -with-rtsopts=-N default-language: Haskell2010 diff --git a/src/GEval/BLEU.hs b/src/GEval/BLEU.hs index 0399676..6179d90 100644 --- a/src/GEval/BLEU.hs +++ b/src/GEval/BLEU.hs @@ -5,10 +5,6 @@ module GEval.BLEU import qualified Data.MultiSet as MS import Data.List (minimumBy, zip, zip3, zip4) -import Debug.Trace - - - bleuStep :: Ord a => [[a]] -> [a] -> (Int, Int, Int, Int, Int, Int, Int, Int, Int) bleuStep refs trans = (prec1, prec2, prec3, prec4, closestLen, len1, len2, len3, len4) where prec1 = precisionCountForNgrams id diff --git a/src/GEval/ClippEU.hs b/src/GEval/ClippEU.hs new file mode 100644 index 0000000..d1fc683 --- /dev/null +++ b/src/GEval/ClippEU.hs @@ -0,0 +1,81 @@ + +module GEval.ClippEU + where + +import Data.Attoparsec.Text +import Data.Text +import Control.Applicative +import Control.Exception + +newtype PageNumber = PageNumber Int + deriving (Eq, Show) + +data Point = Point Int Int + deriving (Show, Eq) + +data Rectangle = Rectangle Point Point + deriving (Show, Eq) + +data Clipping = Clipping PageNumber Rectangle + deriving (Show, Eq) + +data ClippingSpec = ClippingSpec PageNumber Rectangle Rectangle + deriving (Show, Eq) + +matchClippingToSpec :: ClippingSpec -> Clipping -> Bool +matchClippingToSpec (ClippingSpec pageNo (Rectangle (Point x0' y0') (Point x1' y1')) + (Rectangle (Point x0'' y0'') (Point x1'' y1''))) + (Clipping pageNo' (Rectangle (Point x0 y0) (Point x1 y1))) = + pageNo == pageNo' && + isInside x0 x0' x0'' && isInside y0 y0' y0'' && isInside x1 x1' x1'' && isInside y1 y1' y1'' + where isInside c c' c'' = c >= c' && c <= c'' || c <= c' && c >= c'' + + +lineClippingsParser :: Parser [Clipping] +lineClippingsParser = sepByWhitespaces clippingParser + +clippingParser :: Parser Clipping +clippingParser = do + pageNo <- PageNumber <$> decimal + char '/' + rectangle <- rectangleParser + return $ Clipping pageNo rectangle + +lineClippingSpecsParser :: Parser [ClippingSpec] +lineClippingSpecsParser = sepByWhitespaces clippingSpecParser + +sepByWhitespaces :: Parser a -> Parser [a] +sepByWhitespaces parser = possibleWhitespace *> parser `sepBy` whitespace <* possibleWhitespace <* endOfInput + +clippingSpecParser :: Parser ClippingSpec +clippingSpecParser = do + pageNo <- PageNumber <$> decimal + char '/' + rectangle <- rectangleParser + char '/' + margin <- decimal + return $ ClippingSpec pageNo rectangle (extendedRectangle margin rectangle) + +possibleWhitespace = many' (satisfy isHorizontalSpace) + +whitespace = many1 (satisfy isHorizontalSpace) + +extendedRectangle :: Int -> Rectangle -> Rectangle +extendedRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) = + Rectangle (Point (x0 `nonNegativeDiff` margin) (y0 `nonNegativeDiff` margin)) + (Point (x1 + margin) (y1 + margin)) + +nonNegativeDiff x y + | x < y = 0 + | otherwise = x - y + +rectangleParser :: Parser Rectangle +rectangleParser = do + x0 <- decimal + char ',' + y0 <- decimal + char ',' + x1 <- decimal + char ',' + y1 <- decimal + return $ Rectangle (Point x0 y0) (Point x1 y1) diff --git a/src/GEval/PrecisionRecall.hs b/src/GEval/PrecisionRecall.hs index 2302220..c1c4bef 100644 --- a/src/GEval/PrecisionRecall.hs +++ b/src/GEval/PrecisionRecall.hs @@ -8,8 +8,6 @@ module GEval.PrecisionRecall(fMeasure, f1Measure, f2Measure, precision, recall, import Data.Graph.Inductive import Data.Graph.Inductive.Query.MaxFlow -import Debug.Trace - f2Measure :: (a -> b -> Bool) -> [a] -> [b] -> Double f2Measure = fMeasure 2.0 diff --git a/test/Spec.hs b/test/Spec.hs index 82c6959..5388efc 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -1,10 +1,15 @@ +{-# LANGUAGE OverloadedStrings #-} + import Test.Hspec import GEval.Core import GEval.OptionsParser import GEval.BLEU +import GEval.ClippEU import GEval.PrecisionRecall +import Data.Attoparsec.Text import Options.Applicative +import Data.Text import qualified Test.HUnit as HU main :: IO () @@ -65,6 +70,24 @@ main = hspec $ do precision alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3, 4, 5] `shouldBeAlmost` 0.5 recall alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3, 4, 5] `shouldBeAlmost` 1.0 f1Measure alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3 , 4, 5] `shouldBeAlmost` 0.66666666666666 + describe "ClippEU" $ do + it "parsing rectangles" $ do + let (Right r) = parseOnly (lineClippingsParser <* endOfInput) "2/0,0,2,3 10/20,30,40,50 18/0,1,500,3 " + r `shouldBe` [Clipping (PageNumber 2) (Rectangle (Point 0 0) (Point 2 3)), + Clipping (PageNumber 10) (Rectangle (Point 20 30) (Point 40 50)), + Clipping (PageNumber 18) (Rectangle (Point 0 1) (Point 500 3))] + it "no rectangles" $ do + let (Right r) = parseOnly (lineClippingsParser <* endOfInput) "" + r `shouldBe` [] + it "just spaces" $ do + let (Right r) = parseOnly lineClippingsParser " " + r `shouldBe` [] + it "parsing specs" $ do + let (Right r) = parseOnly lineClippingSpecsParser " 2/0,0,2,3/5 10/20,30,40,50/10" + r `shouldBe` [ClippingSpec (PageNumber 2) (Rectangle (Point 0 0) (Point 2 3)) + (Rectangle (Point 0 0) (Point 7 8)), + ClippingSpec (PageNumber 10) (Rectangle (Point 20 30) (Point 40 50)) + (Rectangle (Point 10 20) (Point 50 60))] neverMatch :: Char -> Int -> Bool