Add --filtre option

This commit is contained in:
Filip Gralinski 2019-01-14 09:29:14 +01:00
parent 003a8106bb
commit 1aee476434
3 changed files with 70 additions and 31 deletions

View File

@ -278,6 +278,7 @@ data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
data GEvalOptions = GEvalOptions data GEvalOptions = GEvalOptions
{ geoSpecialCommand :: Maybe GEvalSpecialCommand, { geoSpecialCommand :: Maybe GEvalSpecialCommand,
geoResultOrdering :: ResultOrdering, geoResultOrdering :: ResultOrdering,
geoFilter :: Maybe String,
geoSpec :: GEvalSpecification, geoSpec :: GEvalSpecification,
geoBlackBoxDebugginsOptions :: BlackBoxDebuggingOptions } geoBlackBoxDebugginsOptions :: BlackBoxDebuggingOptions }

View File

@ -63,10 +63,10 @@ import qualified Data.Set as S
data LineRecord = LineRecord Text Text Text Word32 MetricValue data LineRecord = LineRecord Text Text Text Word32 MetricValue
deriving (Eq, Show) deriving (Eq, Show)
runLineByLine :: ResultOrdering -> GEvalSpecification -> IO () runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum
where consum :: ConduitT LineRecord Void (ResourceT IO) () where consum :: ConduitT LineRecord Void (ResourceT IO) ()
consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) consum = (runFeatureFilter featureFilter spec bbdo .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [ formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
formatScore score, formatScore score,
escapeTabs inp, escapeTabs inp,
@ -75,6 +75,16 @@ runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
formatScore :: MetricValue -> Text formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f" formatScore = Data.Text.pack . printf "%f"
runFeatureFilter :: (Monad m, FeatureSource s) => Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT s s m ()
runFeatureFilter Nothing _ _ = doNothing
runFeatureFilter (Just feature) spec bbdo = CC.map (\l -> (fakeRank, l))
.| featureExtractor mTokenizer bbdo
.| CC.filter (checkFeature feature)
.| CC.map fst
where mTokenizer = gesTokenizer spec
fakeRank = 0.0
checkFeature feature (_, LineWithFeatures _ _ fs) = feature `elem` (Prelude.map show fs)
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO () runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (worstFeaturesPipeline False spec bbdo) runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (worstFeaturesPipeline False spec bbdo)
where ordering' = forceSomeOrdering ordering where ordering' = forceSomeOrdering ordering
@ -108,7 +118,7 @@ forceSomeOrdering _ = FirstTheWorst
extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) () extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
extractFeaturesAndPValues spec bbdo = extractFeaturesAndPValues spec bbdo =
totalCounter totalCounter
.| featureExtractor spec bbdo .| rankedFeatureExtractor spec bbdo
.| uScoresCounter (bbdoMinFrequency bbdo) .| uScoresCounter (bbdoMinFrequency bbdo)
@ -128,17 +138,34 @@ formatFeatureWithPValue (FeatureWithPValue f p avg c) =
(pack $ printf "%0.8f" avg), (pack $ printf "%0.8f" avg),
(pack $ printf "%0.20f" p)] (pack $ printf "%0.20f" p)]
featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m () rankedFeatureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m ()
featureExtractor spec bbdo = CC.map extract rankedFeatureExtractor spec bbdo = featureExtractor mTokenizer bbdo
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo)) .| CC.map snd
.| CC.map unwrapFeatures .| CC.map unwrapFeatures
.| CC.concat .| CC.concat
where extract (rank, line@(LineRecord _ _ _ _ score)) = where mTokenizer = gesTokenizer spec
LineWithPeggedFactors rank score $ getFeatures mTokenizer bbdo line
mTokenizer = gesTokenizer spec
unwrapFeatures (LineWithFeatures rank score fs) = Prelude.map (\f -> RankedFeature f rank score) fs unwrapFeatures (LineWithFeatures rank score fs) = Prelude.map (\f -> RankedFeature f rank score) fs
finalFeatures False _ = CC.map peggedToUnaryLine class FeatureSource a where
getScore :: a -> MetricValue
mainLineRecord :: a -> LineRecord
instance FeatureSource LineRecord where
getScore (LineRecord _ _ _ _ score) = score
mainLineRecord l = l
instance FeatureSource (LineRecord, LineRecord) where
getScore (LineRecord _ _ _ _ scoreA, LineRecord _ _ _ _ scoreB) = scoreB - scoreA
mainLineRecord (_, l) = l
featureExtractor :: (Monad m, FeatureSource s) => Maybe Tokenizer -> BlackBoxDebuggingOptions -> ConduitT (Double, s) (s, LineWithFeatures) m ()
featureExtractor mTokenizer bbdo = CC.map extract
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
where extract (rank, line) =
(line, LineWithPeggedFactors rank (getScore line) $ getFeatures mTokenizer bbdo (mainLineRecord line))
finalFeatures :: Monad m => Bool -> Integer -> ConduitT (a, LineWithPeggedFactors) (a, LineWithFeatures) m ()
finalFeatures False _ = CC.map (\(l, p) -> (l, peggedToUnaryLine p))
finalFeatures True minFreq = do finalFeatures True minFreq = do
ls <- CC.sinkList ls <- CC.sinkList
let unaryFeaturesFrequentEnough = S.fromList let unaryFeaturesFrequentEnough = S.fromList
@ -147,12 +174,13 @@ finalFeatures True minFreq = do
$ M.toList $ M.toList
$ M.fromListWith (+) $ M.fromListWith (+)
$ Data.List.concat $ Data.List.concat
$ Prelude.map (\(LineWithPeggedFactors _ _ fs) -> Prelude.map (\f -> (f, 1)) fs) ls $ Prelude.map (\(LineWithPeggedFactors _ _ fs) -> Prelude.map (\f -> (f, 1)) fs)
$ Prelude.map snd ls
(CC.yieldMany $ ls) .| CC.map (addCartesian unaryFeaturesFrequentEnough) (CC.yieldMany $ ls) .| CC.map (addCartesian unaryFeaturesFrequentEnough)
where addCartesian wanted (LineWithPeggedFactors rank score fs) = LineWithFeatures rank score where addCartesian wanted (l, LineWithPeggedFactors rank score fs) = (l, LineWithFeatures rank score
$ ((Prelude.map UnaryFeature fs) ++ $ ((Prelude.map UnaryFeature fs) ++
(cartesianFeatures $ Prelude.filter ((flip S.member) wanted) fs)) (cartesianFeatures $ Prelude.filter ((flip S.member) wanted) fs)))
filtreCartesian False = CC.map id filtreCartesian False = CC.map id
filtreCartesian True = CC.concatMapAccum step S.empty filtreCartesian True = CC.concatMapAccum step S.empty
@ -254,10 +282,14 @@ gobbleAndDo fun = do
l <- CC.sinkList l <- CC.sinkList
CC.yieldMany $ fun l CC.yieldMany $ fun l
runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO () runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum runDiff ordering featureFilter otherOut spec bbdo = runDiffGeneralized ordering otherOut spec consum
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) () where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) consum = CL.filter shouldBeShown
.| runFeatureFilter featureFilter spec bbdo
.| CL.map (encodeUtf8 . formatOutput)
.| CC.unlinesAscii
.| CC.stdout
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) = shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
outA /= outB && scoreA /= scoreB outA /= outB && scoreA /= scoreB
formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [ formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [

View File

@ -89,6 +89,10 @@ optionsParser = GEvalOptions
<> short 'r' <> short 'r'
<> help "When in line-by-line or diff mode, sort the results from the best to the worst")) <> help "When in line-by-line or diff mode, sort the results from the best to the worst"))
<|> pure KeepTheOriginalOrder) <|> pure KeepTheOriginalOrder)
<*> optional (strOption
( long "filter"
<> metavar "FEATURE"
<> help "When in line-by-line or diff mode, show only items with a given feature"))
<*> specParser <*> specParser
<*> blackBoxDebuggingOptionsParser <*> blackBoxDebuggingOptionsParser
@ -258,39 +262,41 @@ attemptToReadOptsFromConfigFile args opts = do
runGEval'' :: GEvalOptions -> IO (Maybe [(SourceSpec, [MetricValue])]) runGEval'' :: GEvalOptions -> IO (Maybe [(SourceSpec, [MetricValue])])
runGEval'' opts = runGEval''' (geoSpecialCommand opts) runGEval'' opts = runGEval''' (geoSpecialCommand opts)
(geoResultOrdering opts) (geoResultOrdering opts)
(geoFilter opts)
(geoSpec opts) (geoSpec opts)
(geoBlackBoxDebugginsOptions opts) (geoBlackBoxDebugginsOptions opts)
runGEval''' :: Maybe GEvalSpecialCommand runGEval''' :: Maybe GEvalSpecialCommand
-> ResultOrdering -> ResultOrdering
-> Maybe String
-> GEvalSpecification -> GEvalSpecification
-> BlackBoxDebuggingOptions -> BlackBoxDebuggingOptions
-> IO (Maybe [(SourceSpec, [MetricValue])]) -> IO (Maybe [(SourceSpec, [MetricValue])])
runGEval''' Nothing _ spec _ = do runGEval''' Nothing _ _ spec _ = do
vals <- geval spec vals <- geval spec
return $ Just vals return $ Just vals
runGEval''' (Just Init) _ spec _ = do runGEval''' (Just Init) _ _ spec _ = do
initChallenge spec initChallenge spec
return Nothing return Nothing
runGEval''' (Just PrintVersion) _ _ _ = do runGEval''' (Just PrintVersion) _ _ _ _ = do
putStrLn ("geval " ++ showVersion version) putStrLn ("geval " ++ showVersion version)
return Nothing return Nothing
runGEval''' (Just LineByLine) ordering spec _ = do runGEval''' (Just LineByLine) ordering featureFilter spec bbdo = do
runLineByLine ordering spec runLineByLine ordering featureFilter spec bbdo
return Nothing return Nothing
runGEval''' (Just WorstFeatures) ordering spec bbdo = do runGEval''' (Just WorstFeatures) ordering _ spec bbdo = do
runWorstFeatures ordering spec bbdo runWorstFeatures ordering spec bbdo
return Nothing return Nothing
runGEval''' (Just (Diff otherOut)) ordering spec _ = do runGEval''' (Just (Diff otherOut)) ordering featureFilter spec bbdo = do
runDiff ordering otherOut spec runDiff ordering featureFilter otherOut spec bbdo
return Nothing return Nothing
runGEval''' (Just (MostWorseningFeatures otherOut)) ordering spec bbdo = do runGEval''' (Just (MostWorseningFeatures otherOut)) ordering _ spec bbdo = do
runMostWorseningFeatures ordering otherOut spec bbdo runMostWorseningFeatures ordering otherOut spec bbdo
return Nothing return Nothing
runGEval''' (Just JustTokenize) _ spec _ = do runGEval''' (Just JustTokenize) _ _ spec _ = do
justTokenize (gesTokenizer spec) justTokenize (gesTokenizer spec)
return Nothing return Nothing
runGEval''' (Just Submit) _ spec _ = do runGEval''' (Just Submit) _ _ spec _ = do
submit (gesGonitoHost spec) (gesToken spec) (gesGonitoGitAnnexRemote spec) submit (gesGonitoHost spec) (gesToken spec) (gesGonitoGitAnnexRemote spec)
return Nothing return Nothing