Add cartesian features to black-box debugging
But it's very slow now, needs to be sped up
This commit is contained in:
parent
99e3a10791
commit
23aad86e72
@ -5,5 +5,6 @@ module GEval.BlackBoxDebugging
|
|||||||
data BlackBoxDebuggingOptions = BlackBoxDebuggingOptions {
|
data BlackBoxDebuggingOptions = BlackBoxDebuggingOptions {
|
||||||
bbdoMinFrequency :: Integer,
|
bbdoMinFrequency :: Integer,
|
||||||
bbdoWordShapes :: Bool,
|
bbdoWordShapes :: Bool,
|
||||||
bbdoBigrams :: Bool
|
bbdoBigrams :: Bool,
|
||||||
|
bbdoCartesian :: Bool
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
module GEval.FeatureExtractor
|
module GEval.FeatureExtractor
|
||||||
(extractFeatures,
|
(extractFeatures,
|
||||||
extractFeaturesFromTabbed,
|
extractFeaturesFromTabbed,
|
||||||
|
cartesianFeatures,
|
||||||
Feature(..))
|
Feature(..))
|
||||||
where
|
where
|
||||||
|
|
||||||
@ -15,9 +16,19 @@ import Text.WordShape
|
|||||||
import GEval.BlackBoxDebugging
|
import GEval.BlackBoxDebugging
|
||||||
import GEval.Common
|
import GEval.Common
|
||||||
|
|
||||||
data Feature = SimpleFeature FeatureNamespace SimpleFeature
|
data Feature = UnaryFeature PeggedFeature | CartesianFeature PeggedFeature PeggedFeature
|
||||||
deriving (Eq, Ord)
|
deriving (Eq, Ord)
|
||||||
|
|
||||||
|
instance Show Feature where
|
||||||
|
show (UnaryFeature feature) = show feature
|
||||||
|
show (CartesianFeature featureA featureB) = (show featureA) ++ "~~" ++ (show featureB)
|
||||||
|
|
||||||
|
data PeggedFeature = PeggedFeature FeatureNamespace SimpleFeature
|
||||||
|
deriving (Eq, Ord)
|
||||||
|
|
||||||
|
instance Show PeggedFeature where
|
||||||
|
show (PeggedFeature namespace feature) = (show namespace) ++ ":" ++ (show feature)
|
||||||
|
|
||||||
data SimpleFeature = SimpleAtomicFeature AtomicFeature | BigramFeature AtomicFeature AtomicFeature
|
data SimpleFeature = SimpleAtomicFeature AtomicFeature | BigramFeature AtomicFeature AtomicFeature
|
||||||
deriving (Eq, Ord)
|
deriving (Eq, Ord)
|
||||||
|
|
||||||
@ -25,9 +36,6 @@ instance Show SimpleFeature where
|
|||||||
show (SimpleAtomicFeature feature) = show feature
|
show (SimpleAtomicFeature feature) = show feature
|
||||||
show (BigramFeature featureA featureB) = (show featureA) ++ "++" ++ (show featureB)
|
show (BigramFeature featureA featureB) = (show featureA) ++ "++" ++ (show featureB)
|
||||||
|
|
||||||
instance Show Feature where
|
|
||||||
show (SimpleFeature namespace feature) = (show namespace) ++ ":" ++ (show feature)
|
|
||||||
|
|
||||||
data AtomicFeature = TextFeature Text | ShapeFeature WordShape
|
data AtomicFeature = TextFeature Text | ShapeFeature WordShape
|
||||||
deriving (Eq, Ord)
|
deriving (Eq, Ord)
|
||||||
|
|
||||||
@ -62,13 +70,16 @@ extractSimpleFeatures mTokenizer bbdo t = Data.List.concat $ (Prelude.map (Prelu
|
|||||||
where atomss = extractAtomicFeatures mTokenizer bbdo t
|
where atomss = extractAtomicFeatures mTokenizer bbdo t
|
||||||
bigramFeatures atoms = Prelude.map (\(a, b) -> BigramFeature a b) $ bigrams atoms
|
bigramFeatures atoms = Prelude.map (\(a, b) -> BigramFeature a b) $ bigrams atoms
|
||||||
|
|
||||||
extractFeatures :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [Feature]
|
extractFeatures :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFeature]
|
||||||
extractFeatures mTokenizer bbdo namespace record =
|
extractFeatures mTokenizer bbdo namespace record =
|
||||||
Prelude.map (\af -> SimpleFeature (FeatureNamespace namespace) af)
|
Prelude.map (\af -> PeggedFeature (FeatureNamespace namespace) af)
|
||||||
$ extractSimpleFeatures mTokenizer bbdo record
|
$ extractSimpleFeatures mTokenizer bbdo record
|
||||||
|
|
||||||
extractFeaturesFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [Feature]
|
extractFeaturesFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFeature]
|
||||||
extractFeaturesFromTabbed mTokenizer bbdo namespace record =
|
extractFeaturesFromTabbed mTokenizer bbdo namespace record =
|
||||||
Data.List.concat
|
Data.List.concat
|
||||||
$ Prelude.map (\(n, t) -> Prelude.map (\af -> SimpleFeature (FeatureTabbedNamespace namespace n) af) $ extractSimpleFeatures mTokenizer bbdo t)
|
$ Prelude.map (\(n, t) -> Prelude.map (\af -> PeggedFeature (FeatureTabbedNamespace namespace n) af) $ extractSimpleFeatures mTokenizer bbdo t)
|
||||||
$ Prelude.zip [1..] (splitOn "\t" record)
|
$ Prelude.zip [1..] (splitOn "\t" record)
|
||||||
|
|
||||||
|
cartesianFeatures :: [PeggedFeature] -> [Feature]
|
||||||
|
cartesianFeatures features = nub $ [CartesianFeature a b | a <- features, b <- features, a < b]
|
||||||
|
@ -126,13 +126,21 @@ formatFeatureWithPValue (FeatureWithPValue f p avg c) =
|
|||||||
|
|
||||||
featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m ()
|
featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m ()
|
||||||
featureExtractor spec bbdo = CC.map extract .| CC.concat
|
featureExtractor spec bbdo = CC.map extract .| CC.concat
|
||||||
where extract (rank, LineRecord inLine expLine outLine _ score) =
|
where extract (rank, line@(LineRecord _ _ _ _ score)) =
|
||||||
Prelude.map (\f -> RankedFeature f rank score)
|
Prelude.map (\f -> RankedFeature f rank score)
|
||||||
$ Data.List.concat [
|
$ getFeatures mTokenizer bbdo line
|
||||||
|
mTokenizer = gesTokenizer spec
|
||||||
|
|
||||||
|
getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> LineRecord -> [Feature]
|
||||||
|
getFeatures mTokenizer bbdo (LineRecord inLine expLine outLine _ _) = Prelude.map UnaryFeature unaryFeatures ++
|
||||||
|
if bbdoCartesian bbdo
|
||||||
|
then cartesianFeatures unaryFeatures
|
||||||
|
else []
|
||||||
|
where unaryFeatures =
|
||||||
|
Data.List.concat [
|
||||||
extractFeatures mTokenizer bbdo "exp" expLine,
|
extractFeatures mTokenizer bbdo "exp" expLine,
|
||||||
extractFeaturesFromTabbed mTokenizer bbdo "in" inLine,
|
extractFeaturesFromTabbed mTokenizer bbdo "in" inLine,
|
||||||
extractFeatures mTokenizer bbdo "out" outLine]
|
extractFeatures mTokenizer bbdo "out" outLine]
|
||||||
mTokenizer = gesTokenizer spec
|
|
||||||
|
|
||||||
uScoresCounter :: Monad m => Integer -> ConduitT RankedFeature FeatureWithPValue (StateT Integer m) ()
|
uScoresCounter :: Monad m => Integer -> ConduitT RankedFeature FeatureWithPValue (StateT Integer m) ()
|
||||||
uScoresCounter minFreq = CC.map (\(RankedFeature feature r score) -> (feature, (r, score, 1)))
|
uScoresCounter minFreq = CC.map (\(RankedFeature feature r score) -> (feature, (r, score, 1)))
|
||||||
|
@ -178,6 +178,9 @@ blackBoxDebuggingOptionsParser = BlackBoxDebuggingOptions
|
|||||||
<*> switch
|
<*> switch
|
||||||
( long "bigrams"
|
( long "bigrams"
|
||||||
<> help "Consider feature bigrams")
|
<> help "Consider feature bigrams")
|
||||||
|
<*> switch
|
||||||
|
( long "cartesian"
|
||||||
|
<> help "Consider Cartesian combination of all features (computationally expensive!)")
|
||||||
|
|
||||||
singletonMaybe :: Maybe a -> Maybe [a]
|
singletonMaybe :: Maybe a -> Maybe [a]
|
||||||
singletonMaybe (Just x) = Just [x]
|
singletonMaybe (Just x) = Just [x]
|
||||||
|
Loading…
Reference in New Issue
Block a user