Introduce existential features
This commit is contained in:
parent
dbf5c961af
commit
ea5de5c719
@ -9,6 +9,7 @@ module GEval.FeatureExtractor
|
|||||||
PeggedFactor(..),
|
PeggedFactor(..),
|
||||||
Feature(..),
|
Feature(..),
|
||||||
SimpleFactor(..),
|
SimpleFactor(..),
|
||||||
|
ExistentialFactor(..),
|
||||||
AtomicFactor(..),
|
AtomicFactor(..),
|
||||||
FeatureNamespace(..))
|
FeatureNamespace(..))
|
||||||
where
|
where
|
||||||
@ -41,15 +42,21 @@ data PeggedFactor = PeggedFactor FeatureNamespace SimpleFactor
|
|||||||
instance Show PeggedFactor where
|
instance Show PeggedFactor where
|
||||||
show (PeggedFactor namespace factor) = (show namespace) ++ ":" ++ (show factor)
|
show (PeggedFactor namespace factor) = (show namespace) ++ ":" ++ (show factor)
|
||||||
|
|
||||||
data SimpleFactor = SimpleAtomicFactor AtomicFactor | BigramFactor AtomicFactor AtomicFactor | NumericalFactor (Maybe Double) Int
|
data SimpleFactor = SimpleExistentialFactor ExistentialFactor | NumericalFactor (Maybe Double) Int
|
||||||
deriving (Eq, Ord)
|
deriving (Eq, Ord)
|
||||||
|
|
||||||
instance Show SimpleFactor where
|
instance Show SimpleFactor where
|
||||||
show (SimpleAtomicFactor factor) = show factor
|
show (SimpleExistentialFactor factor) = show factor
|
||||||
show (BigramFactor factorA factorB) = (show factorA) ++ "++" ++ (show factorB)
|
|
||||||
show (NumericalFactor (Just v) _) = ("=" ++ (show v))
|
show (NumericalFactor (Just v) _) = ("=" ++ (show v))
|
||||||
show (NumericalFactor (Nothing) l) = ("=#" ++ (show l))
|
show (NumericalFactor (Nothing) l) = ("=#" ++ (show l))
|
||||||
|
|
||||||
|
data ExistentialFactor = SimpleAtomicFactor AtomicFactor | BigramFactor AtomicFactor AtomicFactor
|
||||||
|
deriving (Eq, Ord)
|
||||||
|
|
||||||
|
instance Show ExistentialFactor where
|
||||||
|
show (SimpleAtomicFactor factor) = show factor
|
||||||
|
show (BigramFactor factorA factorB) = (show factorA) ++ "++" ++ (show factorB)
|
||||||
|
|
||||||
data AtomicFactor = TextFactor Text | ShapeFactor WordShape
|
data AtomicFactor = TextFactor Text | ShapeFactor WordShape
|
||||||
deriving (Eq, Ord)
|
deriving (Eq, Ord)
|
||||||
|
|
||||||
@ -77,15 +84,15 @@ extractAtomicFactors mTokenizer bbdo t = [Data.List.map TextFactor tokens] ++
|
|||||||
where tokens = nub $ (tokenizeForFactors mTokenizer) t
|
where tokens = nub $ (tokenizeForFactors mTokenizer) t
|
||||||
|
|
||||||
extractSimpleFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> [SimpleFactor]
|
extractSimpleFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> [SimpleFactor]
|
||||||
extractSimpleFactors mTokenizer bbdo t = Data.List.concat $ (Prelude.map (Prelude.map SimpleAtomicFactor) atomss) ++
|
extractSimpleFactors mTokenizer bbdo t = Data.List.concat $ (Prelude.map (Prelude.map SimpleExistentialFactor) existentials) ++
|
||||||
(if bbdoBigrams bbdo
|
(if bbdoConsiderNumericalFeatures bbdo
|
||||||
then Prelude.map bigramFactors atomss
|
then [numericalFactor t]
|
||||||
else [])
|
else [])
|
||||||
++
|
|
||||||
(if bbdoConsiderNumericalFeatures bbdo
|
|
||||||
then [numericalFactor t]
|
|
||||||
else [])
|
|
||||||
where atomss = extractAtomicFactors mTokenizer bbdo t
|
where atomss = extractAtomicFactors mTokenizer bbdo t
|
||||||
|
existentials = (Prelude.map (Prelude.map SimpleAtomicFactor) atomss) ++
|
||||||
|
(if bbdoBigrams bbdo
|
||||||
|
then Prelude.map bigramFactors atomss
|
||||||
|
else [])
|
||||||
bigramFactors atoms = Prelude.map (\(a, b) -> BigramFactor a b) $ bigrams atoms
|
bigramFactors atoms = Prelude.map (\(a, b) -> BigramFactor a b) $ bigrams atoms
|
||||||
numericalFactor t = [NumericalFactor (readMaybe $ unpack t) (Data.Text.length t)]
|
numericalFactor t = [NumericalFactor (readMaybe $ unpack t) (Data.Text.length t)]
|
||||||
extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFactor]
|
extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFactor]
|
||||||
|
14
test/Spec.hs
14
test/Spec.hs
@ -522,23 +522,23 @@ main = hspec $ do
|
|||||||
bbdoConsiderNumericalFeatures = True }
|
bbdoConsiderNumericalFeatures = True }
|
||||||
(sort $ extractFactorsFromTabbed Nothing bbdo "in" "I like this\t34.3\ttests") `shouldBe` [
|
(sort $ extractFactorsFromTabbed Nothing bbdo "in" "I like this\t34.3\ttests") `shouldBe` [
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||||
(SimpleAtomicFactor (TextFactor "I")),
|
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "I"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||||
(SimpleAtomicFactor (TextFactor "like")),
|
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "like"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||||
(SimpleAtomicFactor (TextFactor "this")),
|
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "this"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||||
(BigramFactor (TextFactor "I") (TextFactor "like")),
|
(SimpleExistentialFactor (BigramFactor (TextFactor "I") (TextFactor "like"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||||
(BigramFactor (TextFactor "like") (TextFactor "this")),
|
(SimpleExistentialFactor (BigramFactor (TextFactor "like") (TextFactor "this"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||||
(NumericalFactor Nothing 11),
|
(NumericalFactor Nothing 11),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 2)
|
PeggedFactor (FeatureTabbedNamespace "in" 2)
|
||||||
(SimpleAtomicFactor (TextFactor "34.3")),
|
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "34.3"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 2)
|
PeggedFactor (FeatureTabbedNamespace "in" 2)
|
||||||
(NumericalFactor (Just 34.3) 4),
|
(NumericalFactor (Just 34.3) 4),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 3)
|
PeggedFactor (FeatureTabbedNamespace "in" 3)
|
||||||
(SimpleAtomicFactor (TextFactor "tests")),
|
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "tests"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 3)
|
PeggedFactor (FeatureTabbedNamespace "in" 3)
|
||||||
(NumericalFactor Nothing 5) ]
|
(NumericalFactor Nothing 5) ]
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user