Add bigram features in black-box debugging
This commit is contained in:
parent
13f9629cbc
commit
99e3a10791
@ -4,5 +4,6 @@ module GEval.BlackBoxDebugging
|
||||
|
||||
data BlackBoxDebuggingOptions = BlackBoxDebuggingOptions {
|
||||
bbdoMinFrequency :: Integer,
|
||||
bbdoWordShapes :: Bool
|
||||
bbdoWordShapes :: Bool,
|
||||
bbdoBigrams :: Bool
|
||||
}
|
||||
|
@ -2,8 +2,8 @@
|
||||
|
||||
|
||||
module GEval.FeatureExtractor
|
||||
(extractUnigramFeatures,
|
||||
extractUnigramFeaturesFromTabbed,
|
||||
(extractFeatures,
|
||||
extractFeaturesFromTabbed,
|
||||
Feature(..))
|
||||
where
|
||||
|
||||
@ -13,10 +13,18 @@ import Data.Monoid ((<>))
|
||||
import Text.Tokenizer
|
||||
import Text.WordShape
|
||||
import GEval.BlackBoxDebugging
|
||||
import GEval.Common
|
||||
|
||||
data Feature = SimpleFeature FeatureNamespace AtomicFeature
|
||||
data Feature = SimpleFeature FeatureNamespace SimpleFeature
|
||||
deriving (Eq, Ord)
|
||||
|
||||
data SimpleFeature = SimpleAtomicFeature AtomicFeature | BigramFeature AtomicFeature AtomicFeature
|
||||
deriving (Eq, Ord)
|
||||
|
||||
instance Show SimpleFeature where
|
||||
show (SimpleAtomicFeature feature) = show feature
|
||||
show (BigramFeature featureA featureB) = (show featureA) ++ "++" ++ (show featureB)
|
||||
|
||||
instance Show Feature where
|
||||
show (SimpleFeature namespace feature) = (show namespace) ++ ":" ++ (show feature)
|
||||
|
||||
@ -46,15 +54,21 @@ extractAtomicFeatures mTokenizer bbdo t = [Data.List.map TextFeature tokens] ++
|
||||
else [])
|
||||
where tokens = nub $ (tokenizeForFeatures mTokenizer) t
|
||||
|
||||
extractSimpleFeatures :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> [SimpleFeature]
|
||||
extractSimpleFeatures mTokenizer bbdo t = Data.List.concat $ (Prelude.map (Prelude.map SimpleAtomicFeature) atomss) ++
|
||||
if bbdoBigrams bbdo
|
||||
then Prelude.map bigramFeatures atomss
|
||||
else []
|
||||
where atomss = extractAtomicFeatures mTokenizer bbdo t
|
||||
bigramFeatures atoms = Prelude.map (\(a, b) -> BigramFeature a b) $ bigrams atoms
|
||||
|
||||
extractUnigramFeatures :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [Feature]
|
||||
extractUnigramFeatures mTokenizer bbdo namespace record =
|
||||
extractFeatures :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [Feature]
|
||||
extractFeatures mTokenizer bbdo namespace record =
|
||||
Prelude.map (\af -> SimpleFeature (FeatureNamespace namespace) af)
|
||||
$ Data.List.concat
|
||||
$ extractAtomicFeatures mTokenizer bbdo record
|
||||
$ extractSimpleFeatures mTokenizer bbdo record
|
||||
|
||||
extractUnigramFeaturesFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [Feature]
|
||||
extractUnigramFeaturesFromTabbed mTokenizer bbdo namespace record =
|
||||
extractFeaturesFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [Feature]
|
||||
extractFeaturesFromTabbed mTokenizer bbdo namespace record =
|
||||
Data.List.concat
|
||||
$ Prelude.map (\(n, t) -> Prelude.map (\af -> SimpleFeature (FeatureTabbedNamespace namespace n) af) $ Data.List.concat $ extractAtomicFeatures mTokenizer bbdo t)
|
||||
$ Prelude.map (\(n, t) -> Prelude.map (\af -> SimpleFeature (FeatureTabbedNamespace namespace n) af) $ extractSimpleFeatures mTokenizer bbdo t)
|
||||
$ Prelude.zip [1..] (splitOn "\t" record)
|
||||
|
@ -129,9 +129,9 @@ featureExtractor spec bbdo = CC.map extract .| CC.concat
|
||||
where extract (rank, LineRecord inLine expLine outLine _ score) =
|
||||
Prelude.map (\f -> RankedFeature f rank score)
|
||||
$ Data.List.concat [
|
||||
extractUnigramFeatures mTokenizer bbdo "exp" expLine,
|
||||
extractUnigramFeaturesFromTabbed mTokenizer bbdo "in" inLine,
|
||||
extractUnigramFeatures mTokenizer bbdo "out" outLine]
|
||||
extractFeatures mTokenizer bbdo "exp" expLine,
|
||||
extractFeaturesFromTabbed mTokenizer bbdo "in" inLine,
|
||||
extractFeatures mTokenizer bbdo "out" outLine]
|
||||
mTokenizer = gesTokenizer spec
|
||||
|
||||
uScoresCounter :: Monad m => Integer -> ConduitT RankedFeature FeatureWithPValue (StateT Integer m) ()
|
||||
|
@ -175,6 +175,9 @@ blackBoxDebuggingOptionsParser = BlackBoxDebuggingOptions
|
||||
<*> switch
|
||||
( long "word-shapes"
|
||||
<> help "Consider word shapes")
|
||||
<*> switch
|
||||
( long "bigrams"
|
||||
<> help "Consider feature bigrams")
|
||||
|
||||
singletonMaybe :: Maybe a -> Maybe [a]
|
||||
singletonMaybe (Just x) = Just [x]
|
||||
|
Loading…
Reference in New Issue
Block a user