start work on ClippEU

This commit is contained in:
Filip Gralinski 2016-08-02 08:37:29 +02:00 committed by Filip Gralinski
parent 0835bc3a4e
commit c3a6d94d1c
5 changed files with 108 additions and 6 deletions

View File

@ -19,6 +19,7 @@ library
GEval.CreateChallenge GEval.CreateChallenge
, GEval.OptionsParser , GEval.OptionsParser
, GEval.BLEU , GEval.BLEU
, GEval.ClippEU
, GEval.PrecisionRecall , GEval.PrecisionRecall
build-depends: base >= 4.7 && < 5 build-depends: base >= 4.7 && < 5
, cond , cond
@ -35,6 +36,7 @@ library
, text , text
, unix , unix
, fgl , fgl
, attoparsec
default-language: Haskell2010 default-language: Haskell2010
executable geval executable geval
@ -56,6 +58,8 @@ test-suite geval-test
, hspec , hspec
, HUnit , HUnit
, optparse-applicative , optparse-applicative
, text
, attoparsec
ghc-options: -threaded -rtsopts -with-rtsopts=-N ghc-options: -threaded -rtsopts -with-rtsopts=-N
default-language: Haskell2010 default-language: Haskell2010

View File

@ -5,10 +5,6 @@ module GEval.BLEU
import qualified Data.MultiSet as MS import qualified Data.MultiSet as MS
import Data.List (minimumBy, zip, zip3, zip4) import Data.List (minimumBy, zip, zip3, zip4)
import Debug.Trace
bleuStep :: Ord a => [[a]] -> [a] -> (Int, Int, Int, Int, Int, Int, Int, Int, Int) bleuStep :: Ord a => [[a]] -> [a] -> (Int, Int, Int, Int, Int, Int, Int, Int, Int)
bleuStep refs trans = (prec1, prec2, prec3, prec4, closestLen, len1, len2, len3, len4) bleuStep refs trans = (prec1, prec2, prec3, prec4, closestLen, len1, len2, len3, len4)
where prec1 = precisionCountForNgrams id where prec1 = precisionCountForNgrams id

81
src/GEval/ClippEU.hs Normal file
View File

@ -0,0 +1,81 @@
module GEval.ClippEU
where
import Data.Attoparsec.Text
import Data.Text
import Control.Applicative
import Control.Exception
newtype PageNumber = PageNumber Int
deriving (Eq, Show)
data Point = Point Int Int
deriving (Show, Eq)
data Rectangle = Rectangle Point Point
deriving (Show, Eq)
data Clipping = Clipping PageNumber Rectangle
deriving (Show, Eq)
data ClippingSpec = ClippingSpec PageNumber Rectangle Rectangle
deriving (Show, Eq)
matchClippingToSpec :: ClippingSpec -> Clipping -> Bool
matchClippingToSpec (ClippingSpec pageNo (Rectangle (Point x0' y0') (Point x1' y1'))
(Rectangle (Point x0'' y0'') (Point x1'' y1'')))
(Clipping pageNo' (Rectangle (Point x0 y0) (Point x1 y1))) =
pageNo == pageNo' &&
isInside x0 x0' x0'' && isInside y0 y0' y0'' && isInside x1 x1' x1'' && isInside y1 y1' y1''
where isInside c c' c'' = c >= c' && c <= c'' || c <= c' && c >= c''
lineClippingsParser :: Parser [Clipping]
lineClippingsParser = sepByWhitespaces clippingParser
clippingParser :: Parser Clipping
clippingParser = do
pageNo <- PageNumber <$> decimal
char '/'
rectangle <- rectangleParser
return $ Clipping pageNo rectangle
lineClippingSpecsParser :: Parser [ClippingSpec]
lineClippingSpecsParser = sepByWhitespaces clippingSpecParser
sepByWhitespaces :: Parser a -> Parser [a]
sepByWhitespaces parser = possibleWhitespace *> parser `sepBy` whitespace <* possibleWhitespace <* endOfInput
clippingSpecParser :: Parser ClippingSpec
clippingSpecParser = do
pageNo <- PageNumber <$> decimal
char '/'
rectangle <- rectangleParser
char '/'
margin <- decimal
return $ ClippingSpec pageNo rectangle (extendedRectangle margin rectangle)
possibleWhitespace = many' (satisfy isHorizontalSpace)
whitespace = many1 (satisfy isHorizontalSpace)
extendedRectangle :: Int -> Rectangle -> Rectangle
extendedRectangle margin (Rectangle (Point x0 y0) (Point x1 y1)) =
Rectangle (Point (x0 `nonNegativeDiff` margin) (y0 `nonNegativeDiff` margin))
(Point (x1 + margin) (y1 + margin))
nonNegativeDiff x y
| x < y = 0
| otherwise = x - y
rectangleParser :: Parser Rectangle
rectangleParser = do
x0 <- decimal
char ','
y0 <- decimal
char ','
x1 <- decimal
char ','
y1 <- decimal
return $ Rectangle (Point x0 y0) (Point x1 y1)

View File

@ -8,8 +8,6 @@ module GEval.PrecisionRecall(fMeasure, f1Measure, f2Measure, precision, recall,
import Data.Graph.Inductive import Data.Graph.Inductive
import Data.Graph.Inductive.Query.MaxFlow import Data.Graph.Inductive.Query.MaxFlow
import Debug.Trace
f2Measure :: (a -> b -> Bool) -> [a] -> [b] -> Double f2Measure :: (a -> b -> Bool) -> [a] -> [b] -> Double
f2Measure = fMeasure 2.0 f2Measure = fMeasure 2.0

View File

@ -1,10 +1,15 @@
{-# LANGUAGE OverloadedStrings #-}
import Test.Hspec import Test.Hspec
import GEval.Core import GEval.Core
import GEval.OptionsParser import GEval.OptionsParser
import GEval.BLEU import GEval.BLEU
import GEval.ClippEU
import GEval.PrecisionRecall import GEval.PrecisionRecall
import Data.Attoparsec.Text
import Options.Applicative import Options.Applicative
import Data.Text
import qualified Test.HUnit as HU import qualified Test.HUnit as HU
main :: IO () main :: IO ()
@ -65,6 +70,24 @@ main = hspec $ do
precision alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3, 4, 5] `shouldBeAlmost` 0.5 precision alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3, 4, 5] `shouldBeAlmost` 0.5
recall alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3, 4, 5] `shouldBeAlmost` 1.0 recall alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3, 4, 5] `shouldBeAlmost` 1.0
f1Measure alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3 , 4, 5] `shouldBeAlmost` 0.66666666666666 f1Measure alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3 , 4, 5] `shouldBeAlmost` 0.66666666666666
describe "ClippEU" $ do
it "parsing rectangles" $ do
let (Right r) = parseOnly (lineClippingsParser <* endOfInput) "2/0,0,2,3 10/20,30,40,50 18/0,1,500,3 "
r `shouldBe` [Clipping (PageNumber 2) (Rectangle (Point 0 0) (Point 2 3)),
Clipping (PageNumber 10) (Rectangle (Point 20 30) (Point 40 50)),
Clipping (PageNumber 18) (Rectangle (Point 0 1) (Point 500 3))]
it "no rectangles" $ do
let (Right r) = parseOnly (lineClippingsParser <* endOfInput) ""
r `shouldBe` []
it "just spaces" $ do
let (Right r) = parseOnly lineClippingsParser " "
r `shouldBe` []
it "parsing specs" $ do
let (Right r) = parseOnly lineClippingSpecsParser " 2/0,0,2,3/5 10/20,30,40,50/10"
r `shouldBe` [ClippingSpec (PageNumber 2) (Rectangle (Point 0 0) (Point 2 3))
(Rectangle (Point 0 0) (Point 7 8)),
ClippingSpec (PageNumber 10) (Rectangle (Point 20 30) (Point 40 50))
(Rectangle (Point 10 20) (Point 50 60))]
neverMatch :: Char -> Int -> Bool neverMatch :: Char -> Int -> Bool