Introduce existential features

This commit is contained in:
Filip Gralinski 2019-01-26 17:18:41 +01:00
parent dbf5c961af
commit ea5de5c719
2 changed files with 25 additions and 18 deletions

View File

@ -9,6 +9,7 @@ module GEval.FeatureExtractor
PeggedFactor(..), PeggedFactor(..),
Feature(..), Feature(..),
SimpleFactor(..), SimpleFactor(..),
ExistentialFactor(..),
AtomicFactor(..), AtomicFactor(..),
FeatureNamespace(..)) FeatureNamespace(..))
where where
@ -41,15 +42,21 @@ data PeggedFactor = PeggedFactor FeatureNamespace SimpleFactor
instance Show PeggedFactor where instance Show PeggedFactor where
show (PeggedFactor namespace factor) = (show namespace) ++ ":" ++ (show factor) show (PeggedFactor namespace factor) = (show namespace) ++ ":" ++ (show factor)
data SimpleFactor = SimpleAtomicFactor AtomicFactor | BigramFactor AtomicFactor AtomicFactor | NumericalFactor (Maybe Double) Int data SimpleFactor = SimpleExistentialFactor ExistentialFactor | NumericalFactor (Maybe Double) Int
deriving (Eq, Ord) deriving (Eq, Ord)
instance Show SimpleFactor where instance Show SimpleFactor where
show (SimpleAtomicFactor factor) = show factor show (SimpleExistentialFactor factor) = show factor
show (BigramFactor factorA factorB) = (show factorA) ++ "++" ++ (show factorB)
show (NumericalFactor (Just v) _) = ("=" ++ (show v)) show (NumericalFactor (Just v) _) = ("=" ++ (show v))
show (NumericalFactor (Nothing) l) = ("=#" ++ (show l)) show (NumericalFactor (Nothing) l) = ("=#" ++ (show l))
data ExistentialFactor = SimpleAtomicFactor AtomicFactor | BigramFactor AtomicFactor AtomicFactor
deriving (Eq, Ord)
instance Show ExistentialFactor where
show (SimpleAtomicFactor factor) = show factor
show (BigramFactor factorA factorB) = (show factorA) ++ "++" ++ (show factorB)
data AtomicFactor = TextFactor Text | ShapeFactor WordShape data AtomicFactor = TextFactor Text | ShapeFactor WordShape
deriving (Eq, Ord) deriving (Eq, Ord)
@ -77,15 +84,15 @@ extractAtomicFactors mTokenizer bbdo t = [Data.List.map TextFactor tokens] ++
where tokens = nub $ (tokenizeForFactors mTokenizer) t where tokens = nub $ (tokenizeForFactors mTokenizer) t
extractSimpleFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> [SimpleFactor] extractSimpleFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> [SimpleFactor]
extractSimpleFactors mTokenizer bbdo t = Data.List.concat $ (Prelude.map (Prelude.map SimpleAtomicFactor) atomss) ++ extractSimpleFactors mTokenizer bbdo t = Data.List.concat $ (Prelude.map (Prelude.map SimpleExistentialFactor) existentials) ++
(if bbdoBigrams bbdo
then Prelude.map bigramFactors atomss
else [])
++
(if bbdoConsiderNumericalFeatures bbdo (if bbdoConsiderNumericalFeatures bbdo
then [numericalFactor t] then [numericalFactor t]
else []) else [])
where atomss = extractAtomicFactors mTokenizer bbdo t where atomss = extractAtomicFactors mTokenizer bbdo t
existentials = (Prelude.map (Prelude.map SimpleAtomicFactor) atomss) ++
(if bbdoBigrams bbdo
then Prelude.map bigramFactors atomss
else [])
bigramFactors atoms = Prelude.map (\(a, b) -> BigramFactor a b) $ bigrams atoms bigramFactors atoms = Prelude.map (\(a, b) -> BigramFactor a b) $ bigrams atoms
numericalFactor t = [NumericalFactor (readMaybe $ unpack t) (Data.Text.length t)] numericalFactor t = [NumericalFactor (readMaybe $ unpack t) (Data.Text.length t)]
extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFactor] extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFactor]

View File

@ -522,23 +522,23 @@ main = hspec $ do
bbdoConsiderNumericalFeatures = True } bbdoConsiderNumericalFeatures = True }
(sort $ extractFactorsFromTabbed Nothing bbdo "in" "I like this\t34.3\ttests") `shouldBe` [ (sort $ extractFactorsFromTabbed Nothing bbdo "in" "I like this\t34.3\ttests") `shouldBe` [
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" 1)
(SimpleAtomicFactor (TextFactor "I")), (SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "I"))),
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" 1)
(SimpleAtomicFactor (TextFactor "like")), (SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "like"))),
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" 1)
(SimpleAtomicFactor (TextFactor "this")), (SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "this"))),
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" 1)
(BigramFactor (TextFactor "I") (TextFactor "like")), (SimpleExistentialFactor (BigramFactor (TextFactor "I") (TextFactor "like"))),
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" 1)
(BigramFactor (TextFactor "like") (TextFactor "this")), (SimpleExistentialFactor (BigramFactor (TextFactor "like") (TextFactor "this"))),
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" 1)
(NumericalFactor Nothing 11), (NumericalFactor Nothing 11),
PeggedFactor (FeatureTabbedNamespace "in" 2) PeggedFactor (FeatureTabbedNamespace "in" 2)
(SimpleAtomicFactor (TextFactor "34.3")), (SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "34.3"))),
PeggedFactor (FeatureTabbedNamespace "in" 2) PeggedFactor (FeatureTabbedNamespace "in" 2)
(NumericalFactor (Just 34.3) 4), (NumericalFactor (Just 34.3) 4),
PeggedFactor (FeatureTabbedNamespace "in" 3) PeggedFactor (FeatureTabbedNamespace "in" 3)
(SimpleAtomicFactor (TextFactor "tests")), (SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "tests"))),
PeggedFactor (FeatureTabbedNamespace "in" 3) PeggedFactor (FeatureTabbedNamespace "in" 3)
(NumericalFactor Nothing 5) ] (NumericalFactor Nothing 5) ]