Add min-cartesian-feature (as optional value)

This commit is contained in:
Filip Graliński 2019-01-11 10:16:39 +01:00
parent dbe1613052
commit de901d4c64
3 changed files with 12 additions and 3 deletions

View File

@ -6,5 +6,6 @@ data BlackBoxDebuggingOptions = BlackBoxDebuggingOptions {
bbdoMinFrequency :: Integer, bbdoMinFrequency :: Integer,
bbdoWordShapes :: Bool, bbdoWordShapes :: Bool,
bbdoBigrams :: Bool, bbdoBigrams :: Bool,
bbdoCartesian :: Bool bbdoCartesian :: Bool,
bbdoMinCartesianFrequency :: Maybe Integer
} }

View File

@ -32,6 +32,7 @@ import qualified Data.Conduit.Text as CT
import Data.Text import Data.Text
import Data.Text.Encoding import Data.Text.Encoding
import Data.Conduit.Rank import Data.Conduit.Rank
import Data.Maybe (fromMaybe)
import Data.List (sortBy, sort, concat) import Data.List (sortBy, sort, concat)
@ -129,7 +130,7 @@ formatFeatureWithPValue (FeatureWithPValue f p avg c) =
featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m () featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m ()
featureExtractor spec bbdo = CC.map extract featureExtractor spec bbdo = CC.map extract
.| finalFeatures (bbdoCartesian bbdo) (bbdoMinFrequency bbdo) .| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
.| CC.map unwrapFeatures .| CC.map unwrapFeatures
.| CC.concat .| CC.concat
where extract (rank, line@(LineRecord _ _ _ _ score)) = where extract (rank, line@(LineRecord _ _ _ _ score)) =

View File

@ -165,13 +165,16 @@ specParser = GEvalSpecification
) )
) )
defaultMinFrequency :: Integer
defaultMinFrequency = 1
blackBoxDebuggingOptionsParser :: Parser BlackBoxDebuggingOptions blackBoxDebuggingOptionsParser :: Parser BlackBoxDebuggingOptions
blackBoxDebuggingOptionsParser = BlackBoxDebuggingOptions blackBoxDebuggingOptionsParser = BlackBoxDebuggingOptions
<$> option auto <$> option auto
( long "min-frequency" ( long "min-frequency"
<> metavar "N" <> metavar "N"
<> help "Minimum frequency for the worst features" <> help "Minimum frequency for the worst features"
<> value 1 <> value defaultMinFrequency
<> showDefault) <> showDefault)
<*> switch <*> switch
( long "word-shapes" ( long "word-shapes"
@ -182,6 +185,10 @@ blackBoxDebuggingOptionsParser = BlackBoxDebuggingOptions
<*> switch <*> switch
( long "cartesian" ( long "cartesian"
<> help "Consider Cartesian combination of all features (computationally expensive!)") <> help "Consider Cartesian combination of all features (computationally expensive!)")
<*> optional (option auto
( long "min-cartesian-frequency"
<> metavar "N"
<> help "When combining features into Cartesian features, consider only features whose frequency exceeds the threshold given"))
singletonMaybe :: Maybe a -> Maybe [a] singletonMaybe :: Maybe a -> Maybe [a]
singletonMaybe (Just x) = Just [x] singletonMaybe (Just x) = Just [x]