Refactor CartesianFeature type

This commit is contained in:
Filip Gralinski 2019-01-26 18:00:36 +01:00
parent ea5de5c719
commit 1c3908b273
2 changed files with 21 additions and 6 deletions

View File

@ -7,16 +7,19 @@ module GEval.FeatureExtractor
LineWithFeatures(..),
LineWithPeggedFactors(..),
PeggedFactor(..),
PeggedExistentialFactor(..),
Feature(..),
SimpleFactor(..),
ExistentialFactor(..),
AtomicFactor(..),
FeatureNamespace(..))
FeatureNamespace(..),
filterExistentialFactors)
where
import Data.Text
import Data.List
import Data.Monoid ((<>))
import Data.Maybe (catMaybes)
import Text.Tokenizer
import Text.WordShape
import GEval.BlackBoxDebugging
@ -26,7 +29,7 @@ import Text.Read (readMaybe)
data LineWithFeatures = LineWithFeatures Double MetricValue [Feature]
deriving (Eq, Ord)
data Feature = UnaryFeature PeggedFactor | CartesianFeature PeggedFactor PeggedFactor
data Feature = UnaryFeature PeggedFactor | CartesianFeature PeggedExistentialFactor PeggedExistentialFactor
deriving (Eq, Ord)
instance Show Feature where
@ -42,6 +45,12 @@ data PeggedFactor = PeggedFactor FeatureNamespace SimpleFactor
instance Show PeggedFactor where
show (PeggedFactor namespace factor) = (show namespace) ++ ":" ++ (show factor)
data PeggedExistentialFactor = PeggedExistentialFactor FeatureNamespace ExistentialFactor
deriving (Eq, Ord)
instance Show PeggedExistentialFactor where
show (PeggedExistentialFactor namespace factor) = (show namespace) ++ ":" ++ (show factor)
data SimpleFactor = SimpleExistentialFactor ExistentialFactor | NumericalFactor (Maybe Double) Int
deriving (Eq, Ord)
@ -112,5 +121,10 @@ addCartesianFactors bbdo linesWithPeggedFactors = addCartesianFactors' (bbdoCart
= Prelude.map (\(LineWithPeggedFactors rank score fs) ->
LineWithFeatures rank score (Prelude.map UnaryFeature fs)) linesWithPeggedFactors
cartesianFeatures :: [PeggedFactor] -> [Feature]
cartesianFeatures :: [PeggedExistentialFactor] -> [Feature]
cartesianFeatures factors = nub $ [CartesianFeature a b | a <- factors, b <- factors, a < b]
filterExistentialFactors :: [PeggedFactor] -> [PeggedExistentialFactor]
filterExistentialFactors factors = catMaybes $ Prelude.map toExistential factors
where toExistential (PeggedFactor namespace (SimpleExistentialFactor factor)) = Just $ PeggedExistentialFactor namespace factor
toExistential _ = Nothing

View File

@ -174,17 +174,18 @@ finalFeatures True minFreq = do
$ M.toList
$ M.fromListWith (+)
$ Data.List.concat
$ Prelude.map (\(LineWithPeggedFactors _ _ fs) -> Prelude.map (\f -> (f, 1)) fs)
$ Prelude.map (\(LineWithPeggedFactors _ _ fs) -> Prelude.map (\f -> (f, 1)) $ filterExistentialFactors fs)
$ Prelude.map snd ls
(CC.yieldMany $ ls) .| CC.map (addCartesian unaryFeaturesFrequentEnough)
where addCartesian wanted (l, LineWithPeggedFactors rank score fs) = (l, LineWithFeatures rank score
$ ((Prelude.map UnaryFeature fs) ++
(cartesianFeatures $ Prelude.filter ((flip S.member) wanted) fs)))
(cartesianFeatures $ Prelude.filter ((flip S.member) wanted) $ filterExistentialFactors fs)))
filtreCartesian False = CC.map id
filtreCartesian True = CC.concatMapAccum step S.empty
where step f@(FeatureWithPValue (UnaryFeature p) _ _ _) mp = (S.insert p mp, [f])
where step f@(FeatureWithPValue (UnaryFeature (PeggedFactor namespace (SimpleExistentialFactor p))) _ _ _) mp = (S.insert (PeggedExistentialFactor namespace p) mp, [f])
step f@(FeatureWithPValue (UnaryFeature (PeggedFactor namespace (NumericalFactor _ _))) _ _ _) mp = (mp, [f])
step f@(FeatureWithPValue (CartesianFeature pA pB) _ _ _) mp = (mp, if pA `S.member` mp || pB `S.member` mp
then []
else [f])