diff --git a/src/GEval/FeatureExtractor.hs b/src/GEval/FeatureExtractor.hs index a37412a..3164acc 100644 --- a/src/GEval/FeatureExtractor.hs +++ b/src/GEval/FeatureExtractor.hs @@ -7,16 +7,19 @@ module GEval.FeatureExtractor LineWithFeatures(..), LineWithPeggedFactors(..), PeggedFactor(..), + PeggedExistentialFactor(..), Feature(..), SimpleFactor(..), ExistentialFactor(..), AtomicFactor(..), - FeatureNamespace(..)) + FeatureNamespace(..), + filterExistentialFactors) where import Data.Text import Data.List import Data.Monoid ((<>)) +import Data.Maybe (catMaybes) import Text.Tokenizer import Text.WordShape import GEval.BlackBoxDebugging @@ -26,7 +29,7 @@ import Text.Read (readMaybe) data LineWithFeatures = LineWithFeatures Double MetricValue [Feature] deriving (Eq, Ord) -data Feature = UnaryFeature PeggedFactor | CartesianFeature PeggedFactor PeggedFactor +data Feature = UnaryFeature PeggedFactor | CartesianFeature PeggedExistentialFactor PeggedExistentialFactor deriving (Eq, Ord) instance Show Feature where @@ -42,6 +45,12 @@ data PeggedFactor = PeggedFactor FeatureNamespace SimpleFactor instance Show PeggedFactor where show (PeggedFactor namespace factor) = (show namespace) ++ ":" ++ (show factor) +data PeggedExistentialFactor = PeggedExistentialFactor FeatureNamespace ExistentialFactor + deriving (Eq, Ord) + +instance Show PeggedExistentialFactor where + show (PeggedExistentialFactor namespace factor) = (show namespace) ++ ":" ++ (show factor) + data SimpleFactor = SimpleExistentialFactor ExistentialFactor | NumericalFactor (Maybe Double) Int deriving (Eq, Ord) @@ -112,5 +121,10 @@ addCartesianFactors bbdo linesWithPeggedFactors = addCartesianFactors' (bbdoCart = Prelude.map (\(LineWithPeggedFactors rank score fs) -> LineWithFeatures rank score (Prelude.map UnaryFeature fs)) linesWithPeggedFactors -cartesianFeatures :: [PeggedFactor] -> [Feature] +cartesianFeatures :: [PeggedExistentialFactor] -> [Feature] cartesianFeatures factors = nub $ [CartesianFeature a b | a <- factors, b <- factors, a < b] + +filterExistentialFactors :: [PeggedFactor] -> [PeggedExistentialFactor] +filterExistentialFactors factors = catMaybes $ Prelude.map toExistential factors + where toExistential (PeggedFactor namespace (SimpleExistentialFactor factor)) = Just $ PeggedExistentialFactor namespace factor + toExistential _ = Nothing diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index fdeac0c..753741c 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -174,17 +174,18 @@ finalFeatures True minFreq = do $ M.toList $ M.fromListWith (+) $ Data.List.concat - $ Prelude.map (\(LineWithPeggedFactors _ _ fs) -> Prelude.map (\f -> (f, 1)) fs) + $ Prelude.map (\(LineWithPeggedFactors _ _ fs) -> Prelude.map (\f -> (f, 1)) $ filterExistentialFactors fs) $ Prelude.map snd ls (CC.yieldMany $ ls) .| CC.map (addCartesian unaryFeaturesFrequentEnough) where addCartesian wanted (l, LineWithPeggedFactors rank score fs) = (l, LineWithFeatures rank score $ ((Prelude.map UnaryFeature fs) ++ - (cartesianFeatures $ Prelude.filter ((flip S.member) wanted) fs))) + (cartesianFeatures $ Prelude.filter ((flip S.member) wanted) $ filterExistentialFactors fs))) filtreCartesian False = CC.map id filtreCartesian True = CC.concatMapAccum step S.empty - where step f@(FeatureWithPValue (UnaryFeature p) _ _ _) mp = (S.insert p mp, [f]) + where step f@(FeatureWithPValue (UnaryFeature (PeggedFactor namespace (SimpleExistentialFactor p))) _ _ _) mp = (S.insert (PeggedExistentialFactor namespace p) mp, [f]) + step f@(FeatureWithPValue (UnaryFeature (PeggedFactor namespace (NumericalFactor _ _))) _ _ _) mp = (mp, [f]) step f@(FeatureWithPValue (CartesianFeature pA pB) _ _ _) mp = (mp, if pA `S.member` mp || pB `S.member` mp then [] else [f])