Introduce existential features
This commit is contained in:
parent
dbf5c961af
commit
ea5de5c719
@ -9,6 +9,7 @@ module GEval.FeatureExtractor
|
||||
PeggedFactor(..),
|
||||
Feature(..),
|
||||
SimpleFactor(..),
|
||||
ExistentialFactor(..),
|
||||
AtomicFactor(..),
|
||||
FeatureNamespace(..))
|
||||
where
|
||||
@ -41,15 +42,21 @@ data PeggedFactor = PeggedFactor FeatureNamespace SimpleFactor
|
||||
instance Show PeggedFactor where
|
||||
show (PeggedFactor namespace factor) = (show namespace) ++ ":" ++ (show factor)
|
||||
|
||||
data SimpleFactor = SimpleAtomicFactor AtomicFactor | BigramFactor AtomicFactor AtomicFactor | NumericalFactor (Maybe Double) Int
|
||||
data SimpleFactor = SimpleExistentialFactor ExistentialFactor | NumericalFactor (Maybe Double) Int
|
||||
deriving (Eq, Ord)
|
||||
|
||||
instance Show SimpleFactor where
|
||||
show (SimpleAtomicFactor factor) = show factor
|
||||
show (BigramFactor factorA factorB) = (show factorA) ++ "++" ++ (show factorB)
|
||||
show (SimpleExistentialFactor factor) = show factor
|
||||
show (NumericalFactor (Just v) _) = ("=" ++ (show v))
|
||||
show (NumericalFactor (Nothing) l) = ("=#" ++ (show l))
|
||||
|
||||
data ExistentialFactor = SimpleAtomicFactor AtomicFactor | BigramFactor AtomicFactor AtomicFactor
|
||||
deriving (Eq, Ord)
|
||||
|
||||
instance Show ExistentialFactor where
|
||||
show (SimpleAtomicFactor factor) = show factor
|
||||
show (BigramFactor factorA factorB) = (show factorA) ++ "++" ++ (show factorB)
|
||||
|
||||
data AtomicFactor = TextFactor Text | ShapeFactor WordShape
|
||||
deriving (Eq, Ord)
|
||||
|
||||
@ -77,15 +84,15 @@ extractAtomicFactors mTokenizer bbdo t = [Data.List.map TextFactor tokens] ++
|
||||
where tokens = nub $ (tokenizeForFactors mTokenizer) t
|
||||
|
||||
extractSimpleFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> [SimpleFactor]
|
||||
extractSimpleFactors mTokenizer bbdo t = Data.List.concat $ (Prelude.map (Prelude.map SimpleAtomicFactor) atomss) ++
|
||||
(if bbdoBigrams bbdo
|
||||
then Prelude.map bigramFactors atomss
|
||||
else [])
|
||||
++
|
||||
extractSimpleFactors mTokenizer bbdo t = Data.List.concat $ (Prelude.map (Prelude.map SimpleExistentialFactor) existentials) ++
|
||||
(if bbdoConsiderNumericalFeatures bbdo
|
||||
then [numericalFactor t]
|
||||
else [])
|
||||
where atomss = extractAtomicFactors mTokenizer bbdo t
|
||||
existentials = (Prelude.map (Prelude.map SimpleAtomicFactor) atomss) ++
|
||||
(if bbdoBigrams bbdo
|
||||
then Prelude.map bigramFactors atomss
|
||||
else [])
|
||||
bigramFactors atoms = Prelude.map (\(a, b) -> BigramFactor a b) $ bigrams atoms
|
||||
numericalFactor t = [NumericalFactor (readMaybe $ unpack t) (Data.Text.length t)]
|
||||
extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFactor]
|
||||
|
14
test/Spec.hs
14
test/Spec.hs
@ -522,23 +522,23 @@ main = hspec $ do
|
||||
bbdoConsiderNumericalFeatures = True }
|
||||
(sort $ extractFactorsFromTabbed Nothing bbdo "in" "I like this\t34.3\ttests") `shouldBe` [
|
||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||
(SimpleAtomicFactor (TextFactor "I")),
|
||||
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "I"))),
|
||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||
(SimpleAtomicFactor (TextFactor "like")),
|
||||
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "like"))),
|
||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||
(SimpleAtomicFactor (TextFactor "this")),
|
||||
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "this"))),
|
||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||
(BigramFactor (TextFactor "I") (TextFactor "like")),
|
||||
(SimpleExistentialFactor (BigramFactor (TextFactor "I") (TextFactor "like"))),
|
||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||
(BigramFactor (TextFactor "like") (TextFactor "this")),
|
||||
(SimpleExistentialFactor (BigramFactor (TextFactor "like") (TextFactor "this"))),
|
||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||
(NumericalFactor Nothing 11),
|
||||
PeggedFactor (FeatureTabbedNamespace "in" 2)
|
||||
(SimpleAtomicFactor (TextFactor "34.3")),
|
||||
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "34.3"))),
|
||||
PeggedFactor (FeatureTabbedNamespace "in" 2)
|
||||
(NumericalFactor (Just 34.3) 4),
|
||||
PeggedFactor (FeatureTabbedNamespace "in" 3)
|
||||
(SimpleAtomicFactor (TextFactor "tests")),
|
||||
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "tests"))),
|
||||
PeggedFactor (FeatureTabbedNamespace "in" 3)
|
||||
(NumericalFactor Nothing 5) ]
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user