Refactor feature extraction

This commit is contained in:
Filip Graliński 2019-05-23 10:03:26 +02:00
parent 1c0395f2ed
commit 780b7016c5

View File

@ -148,15 +148,21 @@ extractSimpleFactors mTokenizer bbdo t = Data.List.concat $ (Prelude.map (Prelud
else [])
bigramFactors atoms = Prelude.map (\(a, b) -> BigramFactor a b) $ bigrams atoms
numericalFactor t = [NumericalFactor (readMaybe $ unpack t) (Data.Text.length t)]
extractFactorsFromField :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> FeatureNamespace -> Text -> [PeggedFactor]
extractFactorsFromField mTokenizer bbdo namespace record =
Prelude.map (\af -> PeggedFactor namespace af)
$ extractSimpleFactors mTokenizer bbdo record
extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFactor]
extractFactors mTokenizer bbdo namespace record =
Prelude.map (\af -> PeggedFactor (FeatureNamespace namespace) af)
$ extractSimpleFactors mTokenizer bbdo record
extractFactorsFromField mTokenizer bbdo (FeatureNamespace namespace) record
extractFactorsFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFactor]
extractFactorsFromTabbed mTokenizer bbdo namespace record =
Data.List.concat
$ Prelude.map (\(n, t) -> Prelude.map (\af -> PeggedFactor (FeatureTabbedNamespace namespace n) af) $ extractSimpleFactors mTokenizer bbdo t)
$ Prelude.map (\(n, t) -> extractFactorsFromField mTokenizer bbdo (FeatureTabbedNamespace namespace n) t)
$ Prelude.zip [1..] (splitOn "\t" record)
addCartesianFactors :: BlackBoxDebuggingOptions -> [LineWithPeggedFactors] -> [LineWithFactors]