From 1aee476434db6084766c3b7ad534bd9a874901fd Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 14 Jan 2019 09:29:14 +0100 Subject: [PATCH] Add --filtre option --- src/GEval/Core.hs | 1 + src/GEval/LineByLine.hs | 72 +++++++++++++++++++++++++++----------- src/GEval/OptionsParser.hs | 28 +++++++++------ 3 files changed, 70 insertions(+), 31 deletions(-) diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 0f97c31..75c4bf2 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -278,6 +278,7 @@ data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest data GEvalOptions = GEvalOptions { geoSpecialCommand :: Maybe GEvalSpecialCommand, geoResultOrdering :: ResultOrdering, + geoFilter :: Maybe String, geoSpec :: GEvalSpecification, geoBlackBoxDebugginsOptions :: BlackBoxDebuggingOptions } diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index d86eb69..fdeac0c 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -63,10 +63,10 @@ import qualified Data.Set as S data LineRecord = LineRecord Text Text Text Word32 MetricValue deriving (Eq, Show) -runLineByLine :: ResultOrdering -> GEvalSpecification -> IO () -runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum +runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO () +runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum where consum :: ConduitT LineRecord Void (ResourceT IO) () - consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) + consum = (runFeatureFilter featureFilter spec bbdo .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [ formatScore score, escapeTabs inp, @@ -75,6 +75,16 @@ runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum formatScore :: MetricValue -> Text formatScore = Data.Text.pack . printf "%f" +runFeatureFilter :: (Monad m, FeatureSource s) => Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT s s m () +runFeatureFilter Nothing _ _ = doNothing +runFeatureFilter (Just feature) spec bbdo = CC.map (\l -> (fakeRank, l)) + .| featureExtractor mTokenizer bbdo + .| CC.filter (checkFeature feature) + .| CC.map fst + where mTokenizer = gesTokenizer spec + fakeRank = 0.0 + checkFeature feature (_, LineWithFeatures _ _ fs) = feature `elem` (Prelude.map show fs) + runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO () runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (worstFeaturesPipeline False spec bbdo) where ordering' = forceSomeOrdering ordering @@ -108,7 +118,7 @@ forceSomeOrdering _ = FirstTheWorst extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) () extractFeaturesAndPValues spec bbdo = totalCounter - .| featureExtractor spec bbdo + .| rankedFeatureExtractor spec bbdo .| uScoresCounter (bbdoMinFrequency bbdo) @@ -128,17 +138,34 @@ formatFeatureWithPValue (FeatureWithPValue f p avg c) = (pack $ printf "%0.8f" avg), (pack $ printf "%0.20f" p)] -featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m () -featureExtractor spec bbdo = CC.map extract - .| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo)) - .| CC.map unwrapFeatures - .| CC.concat - where extract (rank, line@(LineRecord _ _ _ _ score)) = - LineWithPeggedFactors rank score $ getFeatures mTokenizer bbdo line - mTokenizer = gesTokenizer spec +rankedFeatureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m () +rankedFeatureExtractor spec bbdo = featureExtractor mTokenizer bbdo + .| CC.map snd + .| CC.map unwrapFeatures + .| CC.concat + where mTokenizer = gesTokenizer spec unwrapFeatures (LineWithFeatures rank score fs) = Prelude.map (\f -> RankedFeature f rank score) fs -finalFeatures False _ = CC.map peggedToUnaryLine +class FeatureSource a where + getScore :: a -> MetricValue + mainLineRecord :: a -> LineRecord + +instance FeatureSource LineRecord where + getScore (LineRecord _ _ _ _ score) = score + mainLineRecord l = l + +instance FeatureSource (LineRecord, LineRecord) where + getScore (LineRecord _ _ _ _ scoreA, LineRecord _ _ _ _ scoreB) = scoreB - scoreA + mainLineRecord (_, l) = l + +featureExtractor :: (Monad m, FeatureSource s) => Maybe Tokenizer -> BlackBoxDebuggingOptions -> ConduitT (Double, s) (s, LineWithFeatures) m () +featureExtractor mTokenizer bbdo = CC.map extract + .| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo)) + where extract (rank, line) = + (line, LineWithPeggedFactors rank (getScore line) $ getFeatures mTokenizer bbdo (mainLineRecord line)) + +finalFeatures :: Monad m => Bool -> Integer -> ConduitT (a, LineWithPeggedFactors) (a, LineWithFeatures) m () +finalFeatures False _ = CC.map (\(l, p) -> (l, peggedToUnaryLine p)) finalFeatures True minFreq = do ls <- CC.sinkList let unaryFeaturesFrequentEnough = S.fromList @@ -147,12 +174,13 @@ finalFeatures True minFreq = do $ M.toList $ M.fromListWith (+) $ Data.List.concat - $ Prelude.map (\(LineWithPeggedFactors _ _ fs) -> Prelude.map (\f -> (f, 1)) fs) ls + $ Prelude.map (\(LineWithPeggedFactors _ _ fs) -> Prelude.map (\f -> (f, 1)) fs) + $ Prelude.map snd ls (CC.yieldMany $ ls) .| CC.map (addCartesian unaryFeaturesFrequentEnough) - where addCartesian wanted (LineWithPeggedFactors rank score fs) = LineWithFeatures rank score - $ ((Prelude.map UnaryFeature fs) ++ - (cartesianFeatures $ Prelude.filter ((flip S.member) wanted) fs)) + where addCartesian wanted (l, LineWithPeggedFactors rank score fs) = (l, LineWithFeatures rank score + $ ((Prelude.map UnaryFeature fs) ++ + (cartesianFeatures $ Prelude.filter ((flip S.member) wanted) fs))) filtreCartesian False = CC.map id filtreCartesian True = CC.concatMapAccum step S.empty @@ -254,10 +282,14 @@ gobbleAndDo fun = do l <- CC.sinkList CC.yieldMany $ fun l -runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO () -runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum +runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO () +runDiff ordering featureFilter otherOut spec bbdo = runDiffGeneralized ordering otherOut spec consum where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) () - consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) + consum = CL.filter shouldBeShown + .| runFeatureFilter featureFilter spec bbdo + .| CL.map (encodeUtf8 . formatOutput) + .| CC.unlinesAscii + .| CC.stdout shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) = outA /= outB && scoreA /= scoreB formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [ diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index 781863e..cd1d4cc 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -89,6 +89,10 @@ optionsParser = GEvalOptions <> short 'r' <> help "When in line-by-line or diff mode, sort the results from the best to the worst")) <|> pure KeepTheOriginalOrder) + <*> optional (strOption + ( long "filter" + <> metavar "FEATURE" + <> help "When in line-by-line or diff mode, show only items with a given feature")) <*> specParser <*> blackBoxDebuggingOptionsParser @@ -258,39 +262,41 @@ attemptToReadOptsFromConfigFile args opts = do runGEval'' :: GEvalOptions -> IO (Maybe [(SourceSpec, [MetricValue])]) runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoResultOrdering opts) + (geoFilter opts) (geoSpec opts) (geoBlackBoxDebugginsOptions opts) runGEval''' :: Maybe GEvalSpecialCommand -> ResultOrdering + -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO (Maybe [(SourceSpec, [MetricValue])]) -runGEval''' Nothing _ spec _ = do +runGEval''' Nothing _ _ spec _ = do vals <- geval spec return $ Just vals -runGEval''' (Just Init) _ spec _ = do +runGEval''' (Just Init) _ _ spec _ = do initChallenge spec return Nothing -runGEval''' (Just PrintVersion) _ _ _ = do +runGEval''' (Just PrintVersion) _ _ _ _ = do putStrLn ("geval " ++ showVersion version) return Nothing -runGEval''' (Just LineByLine) ordering spec _ = do - runLineByLine ordering spec +runGEval''' (Just LineByLine) ordering featureFilter spec bbdo = do + runLineByLine ordering featureFilter spec bbdo return Nothing -runGEval''' (Just WorstFeatures) ordering spec bbdo = do +runGEval''' (Just WorstFeatures) ordering _ spec bbdo = do runWorstFeatures ordering spec bbdo return Nothing -runGEval''' (Just (Diff otherOut)) ordering spec _ = do - runDiff ordering otherOut spec +runGEval''' (Just (Diff otherOut)) ordering featureFilter spec bbdo = do + runDiff ordering featureFilter otherOut spec bbdo return Nothing -runGEval''' (Just (MostWorseningFeatures otherOut)) ordering spec bbdo = do +runGEval''' (Just (MostWorseningFeatures otherOut)) ordering _ spec bbdo = do runMostWorseningFeatures ordering otherOut spec bbdo return Nothing -runGEval''' (Just JustTokenize) _ spec _ = do +runGEval''' (Just JustTokenize) _ _ spec _ = do justTokenize (gesTokenizer spec) return Nothing -runGEval''' (Just Submit) _ spec _ = do +runGEval''' (Just Submit) _ _ spec _ = do submit (gesGonitoHost spec) (gesToken spec) (gesGonitoGitAnnexRemote spec) return Nothing