Add min-cartesian-feature (as optional value)

This commit is contained in:
Filip Graliński 2019-01-11 10:16:39 +01:00
parent dbe1613052
commit de901d4c64
3 changed files with 12 additions and 3 deletions

View File

@ -6,5 +6,6 @@ data BlackBoxDebuggingOptions = BlackBoxDebuggingOptions {
bbdoMinFrequency :: Integer,
bbdoWordShapes :: Bool,
bbdoBigrams :: Bool,
bbdoCartesian :: Bool
bbdoCartesian :: Bool,
bbdoMinCartesianFrequency :: Maybe Integer
}

View File

@ -32,6 +32,7 @@ import qualified Data.Conduit.Text as CT
import Data.Text
import Data.Text.Encoding
import Data.Conduit.Rank
import Data.Maybe (fromMaybe)
import Data.List (sortBy, sort, concat)
@ -129,7 +130,7 @@ formatFeatureWithPValue (FeatureWithPValue f p avg c) =
featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m ()
featureExtractor spec bbdo = CC.map extract
.| finalFeatures (bbdoCartesian bbdo) (bbdoMinFrequency bbdo)
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
.| CC.map unwrapFeatures
.| CC.concat
where extract (rank, line@(LineRecord _ _ _ _ score)) =

View File

@ -165,13 +165,16 @@ specParser = GEvalSpecification
)
)
defaultMinFrequency :: Integer
defaultMinFrequency = 1
blackBoxDebuggingOptionsParser :: Parser BlackBoxDebuggingOptions
blackBoxDebuggingOptionsParser = BlackBoxDebuggingOptions
<$> option auto
( long "min-frequency"
<> metavar "N"
<> help "Minimum frequency for the worst features"
<> value 1
<> value defaultMinFrequency
<> showDefault)
<*> switch
( long "word-shapes"
@ -182,6 +185,10 @@ blackBoxDebuggingOptionsParser = BlackBoxDebuggingOptions
<*> switch
( long "cartesian"
<> help "Consider Cartesian combination of all features (computationally expensive!)")
<*> optional (option auto
( long "min-cartesian-frequency"
<> metavar "N"
<> help "When combining features into Cartesian features, consider only features whose frequency exceeds the threshold given"))
singletonMaybe :: Maybe a -> Maybe [a]
singletonMaybe (Just x) = Just [x]