Add cartesian features to black-box debugging

But it's very slow now, needs to be sped up
This commit is contained in:
Filip Graliński 2019-01-10 14:01:29 +01:00
parent 99e3a10791
commit 23aad86e72
4 changed files with 37 additions and 14 deletions

View File

@ -5,5 +5,6 @@ module GEval.BlackBoxDebugging
data BlackBoxDebuggingOptions = BlackBoxDebuggingOptions { data BlackBoxDebuggingOptions = BlackBoxDebuggingOptions {
bbdoMinFrequency :: Integer, bbdoMinFrequency :: Integer,
bbdoWordShapes :: Bool, bbdoWordShapes :: Bool,
bbdoBigrams :: Bool bbdoBigrams :: Bool,
bbdoCartesian :: Bool
} }

View File

@ -4,6 +4,7 @@
module GEval.FeatureExtractor module GEval.FeatureExtractor
(extractFeatures, (extractFeatures,
extractFeaturesFromTabbed, extractFeaturesFromTabbed,
cartesianFeatures,
Feature(..)) Feature(..))
where where
@ -15,9 +16,19 @@ import Text.WordShape
import GEval.BlackBoxDebugging import GEval.BlackBoxDebugging
import GEval.Common import GEval.Common
data Feature = SimpleFeature FeatureNamespace SimpleFeature data Feature = UnaryFeature PeggedFeature | CartesianFeature PeggedFeature PeggedFeature
deriving (Eq, Ord) deriving (Eq, Ord)
instance Show Feature where
show (UnaryFeature feature) = show feature
show (CartesianFeature featureA featureB) = (show featureA) ++ "~~" ++ (show featureB)
data PeggedFeature = PeggedFeature FeatureNamespace SimpleFeature
deriving (Eq, Ord)
instance Show PeggedFeature where
show (PeggedFeature namespace feature) = (show namespace) ++ ":" ++ (show feature)
data SimpleFeature = SimpleAtomicFeature AtomicFeature | BigramFeature AtomicFeature AtomicFeature data SimpleFeature = SimpleAtomicFeature AtomicFeature | BigramFeature AtomicFeature AtomicFeature
deriving (Eq, Ord) deriving (Eq, Ord)
@ -25,9 +36,6 @@ instance Show SimpleFeature where
show (SimpleAtomicFeature feature) = show feature show (SimpleAtomicFeature feature) = show feature
show (BigramFeature featureA featureB) = (show featureA) ++ "++" ++ (show featureB) show (BigramFeature featureA featureB) = (show featureA) ++ "++" ++ (show featureB)
instance Show Feature where
show (SimpleFeature namespace feature) = (show namespace) ++ ":" ++ (show feature)
data AtomicFeature = TextFeature Text | ShapeFeature WordShape data AtomicFeature = TextFeature Text | ShapeFeature WordShape
deriving (Eq, Ord) deriving (Eq, Ord)
@ -62,13 +70,16 @@ extractSimpleFeatures mTokenizer bbdo t = Data.List.concat $ (Prelude.map (Prelu
where atomss = extractAtomicFeatures mTokenizer bbdo t where atomss = extractAtomicFeatures mTokenizer bbdo t
bigramFeatures atoms = Prelude.map (\(a, b) -> BigramFeature a b) $ bigrams atoms bigramFeatures atoms = Prelude.map (\(a, b) -> BigramFeature a b) $ bigrams atoms
extractFeatures :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [Feature] extractFeatures :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFeature]
extractFeatures mTokenizer bbdo namespace record = extractFeatures mTokenizer bbdo namespace record =
Prelude.map (\af -> SimpleFeature (FeatureNamespace namespace) af) Prelude.map (\af -> PeggedFeature (FeatureNamespace namespace) af)
$ extractSimpleFeatures mTokenizer bbdo record $ extractSimpleFeatures mTokenizer bbdo record
extractFeaturesFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [Feature] extractFeaturesFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFeature]
extractFeaturesFromTabbed mTokenizer bbdo namespace record = extractFeaturesFromTabbed mTokenizer bbdo namespace record =
Data.List.concat Data.List.concat
$ Prelude.map (\(n, t) -> Prelude.map (\af -> SimpleFeature (FeatureTabbedNamespace namespace n) af) $ extractSimpleFeatures mTokenizer bbdo t) $ Prelude.map (\(n, t) -> Prelude.map (\af -> PeggedFeature (FeatureTabbedNamespace namespace n) af) $ extractSimpleFeatures mTokenizer bbdo t)
$ Prelude.zip [1..] (splitOn "\t" record) $ Prelude.zip [1..] (splitOn "\t" record)
cartesianFeatures :: [PeggedFeature] -> [Feature]
cartesianFeatures features = nub $ [CartesianFeature a b | a <- features, b <- features, a < b]

View File

@ -126,14 +126,22 @@ formatFeatureWithPValue (FeatureWithPValue f p avg c) =
featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m () featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m ()
featureExtractor spec bbdo = CC.map extract .| CC.concat featureExtractor spec bbdo = CC.map extract .| CC.concat
where extract (rank, LineRecord inLine expLine outLine _ score) = where extract (rank, line@(LineRecord _ _ _ _ score)) =
Prelude.map (\f -> RankedFeature f rank score) Prelude.map (\f -> RankedFeature f rank score)
$ Data.List.concat [ $ getFeatures mTokenizer bbdo line
extractFeatures mTokenizer bbdo "exp" expLine,
extractFeaturesFromTabbed mTokenizer bbdo "in" inLine,
extractFeatures mTokenizer bbdo "out" outLine]
mTokenizer = gesTokenizer spec mTokenizer = gesTokenizer spec
getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> LineRecord -> [Feature]
getFeatures mTokenizer bbdo (LineRecord inLine expLine outLine _ _) = Prelude.map UnaryFeature unaryFeatures ++
if bbdoCartesian bbdo
then cartesianFeatures unaryFeatures
else []
where unaryFeatures =
Data.List.concat [
extractFeatures mTokenizer bbdo "exp" expLine,
extractFeaturesFromTabbed mTokenizer bbdo "in" inLine,
extractFeatures mTokenizer bbdo "out" outLine]
uScoresCounter :: Monad m => Integer -> ConduitT RankedFeature FeatureWithPValue (StateT Integer m) () uScoresCounter :: Monad m => Integer -> ConduitT RankedFeature FeatureWithPValue (StateT Integer m) ()
uScoresCounter minFreq = CC.map (\(RankedFeature feature r score) -> (feature, (r, score, 1))) uScoresCounter minFreq = CC.map (\(RankedFeature feature r score) -> (feature, (r, score, 1)))
.| gobbleAndDo countUScores .| gobbleAndDo countUScores

View File

@ -178,6 +178,9 @@ blackBoxDebuggingOptionsParser = BlackBoxDebuggingOptions
<*> switch <*> switch
( long "bigrams" ( long "bigrams"
<> help "Consider feature bigrams") <> help "Consider feature bigrams")
<*> switch
( long "cartesian"
<> help "Consider Cartesian combination of all features (computationally expensive!)")
singletonMaybe :: Maybe a -> Maybe [a] singletonMaybe :: Maybe a -> Maybe [a]
singletonMaybe (Just x) = Just [x] singletonMaybe (Just x) = Just [x]