From de901d4c64997b6c472eb558c43cf6f102a143cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Grali=C5=84ski?= Date: Fri, 11 Jan 2019 10:16:39 +0100 Subject: [PATCH] Add min-cartesian-feature (as optional value) --- src/GEval/BlackBoxDebugging.hs | 3 ++- src/GEval/LineByLine.hs | 3 ++- src/GEval/OptionsParser.hs | 9 ++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/GEval/BlackBoxDebugging.hs b/src/GEval/BlackBoxDebugging.hs index a49a76d..fa1620d 100644 --- a/src/GEval/BlackBoxDebugging.hs +++ b/src/GEval/BlackBoxDebugging.hs @@ -6,5 +6,6 @@ data BlackBoxDebuggingOptions = BlackBoxDebuggingOptions { bbdoMinFrequency :: Integer, bbdoWordShapes :: Bool, bbdoBigrams :: Bool, - bbdoCartesian :: Bool + bbdoCartesian :: Bool, + bbdoMinCartesianFrequency :: Maybe Integer } diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index 79e5bba..b71b8ff 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -32,6 +32,7 @@ import qualified Data.Conduit.Text as CT import Data.Text import Data.Text.Encoding import Data.Conduit.Rank +import Data.Maybe (fromMaybe) import Data.List (sortBy, sort, concat) @@ -129,7 +130,7 @@ formatFeatureWithPValue (FeatureWithPValue f p avg c) = featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m () featureExtractor spec bbdo = CC.map extract - .| finalFeatures (bbdoCartesian bbdo) (bbdoMinFrequency bbdo) + .| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo)) .| CC.map unwrapFeatures .| CC.concat where extract (rank, line@(LineRecord _ _ _ _ score)) = diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index 9c53e54..781863e 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -165,13 +165,16 @@ specParser = GEvalSpecification ) ) +defaultMinFrequency :: Integer +defaultMinFrequency = 1 + blackBoxDebuggingOptionsParser :: Parser BlackBoxDebuggingOptions blackBoxDebuggingOptionsParser = BlackBoxDebuggingOptions <$> option auto ( long "min-frequency" <> metavar "N" <> help "Minimum frequency for the worst features" - <> value 1 + <> value defaultMinFrequency <> showDefault) <*> switch ( long "word-shapes" @@ -182,6 +185,10 @@ blackBoxDebuggingOptionsParser = BlackBoxDebuggingOptions <*> switch ( long "cartesian" <> help "Consider Cartesian combination of all features (computationally expensive!)") + <*> optional (option auto + ( long "min-cartesian-frequency" + <> metavar "N" + <> help "When combining features into Cartesian features, consider only features whose frequency exceeds the threshold given")) singletonMaybe :: Maybe a -> Maybe [a] singletonMaybe (Just x) = Just [x]