Add --filtre option

This commit is contained in:
Filip Gralinski 2019-01-14 09:29:14 +01:00
parent 003a8106bb
commit 1aee476434
3 changed files with 70 additions and 31 deletions

View File

@ -278,6 +278,7 @@ data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
data GEvalOptions = GEvalOptions
{ geoSpecialCommand :: Maybe GEvalSpecialCommand,
geoResultOrdering :: ResultOrdering,
geoFilter :: Maybe String,
geoSpec :: GEvalSpecification,
geoBlackBoxDebugginsOptions :: BlackBoxDebuggingOptions }

View File

@ -63,10 +63,10 @@ import qualified Data.Set as S
data LineRecord = LineRecord Text Text Text Word32 MetricValue
deriving (Eq, Show)
runLineByLine :: ResultOrdering -> GEvalSpecification -> IO ()
runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum
where consum :: ConduitT LineRecord Void (ResourceT IO) ()
consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
consum = (runFeatureFilter featureFilter spec bbdo .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
formatScore score,
escapeTabs inp,
@ -75,6 +75,16 @@ runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f"
runFeatureFilter :: (Monad m, FeatureSource s) => Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT s s m ()
runFeatureFilter Nothing _ _ = doNothing
runFeatureFilter (Just feature) spec bbdo = CC.map (\l -> (fakeRank, l))
.| featureExtractor mTokenizer bbdo
.| CC.filter (checkFeature feature)
.| CC.map fst
where mTokenizer = gesTokenizer spec
fakeRank = 0.0
checkFeature feature (_, LineWithFeatures _ _ fs) = feature `elem` (Prelude.map show fs)
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (worstFeaturesPipeline False spec bbdo)
where ordering' = forceSomeOrdering ordering
@ -108,7 +118,7 @@ forceSomeOrdering _ = FirstTheWorst
extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
extractFeaturesAndPValues spec bbdo =
totalCounter
.| featureExtractor spec bbdo
.| rankedFeatureExtractor spec bbdo
.| uScoresCounter (bbdoMinFrequency bbdo)
@ -128,17 +138,34 @@ formatFeatureWithPValue (FeatureWithPValue f p avg c) =
(pack $ printf "%0.8f" avg),
(pack $ printf "%0.20f" p)]
featureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m ()
featureExtractor spec bbdo = CC.map extract
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
rankedFeatureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFeature m ()
rankedFeatureExtractor spec bbdo = featureExtractor mTokenizer bbdo
.| CC.map snd
.| CC.map unwrapFeatures
.| CC.concat
where extract (rank, line@(LineRecord _ _ _ _ score)) =
LineWithPeggedFactors rank score $ getFeatures mTokenizer bbdo line
mTokenizer = gesTokenizer spec
where mTokenizer = gesTokenizer spec
unwrapFeatures (LineWithFeatures rank score fs) = Prelude.map (\f -> RankedFeature f rank score) fs
finalFeatures False _ = CC.map peggedToUnaryLine
class FeatureSource a where
getScore :: a -> MetricValue
mainLineRecord :: a -> LineRecord
instance FeatureSource LineRecord where
getScore (LineRecord _ _ _ _ score) = score
mainLineRecord l = l
instance FeatureSource (LineRecord, LineRecord) where
getScore (LineRecord _ _ _ _ scoreA, LineRecord _ _ _ _ scoreB) = scoreB - scoreA
mainLineRecord (_, l) = l
featureExtractor :: (Monad m, FeatureSource s) => Maybe Tokenizer -> BlackBoxDebuggingOptions -> ConduitT (Double, s) (s, LineWithFeatures) m ()
featureExtractor mTokenizer bbdo = CC.map extract
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
where extract (rank, line) =
(line, LineWithPeggedFactors rank (getScore line) $ getFeatures mTokenizer bbdo (mainLineRecord line))
finalFeatures :: Monad m => Bool -> Integer -> ConduitT (a, LineWithPeggedFactors) (a, LineWithFeatures) m ()
finalFeatures False _ = CC.map (\(l, p) -> (l, peggedToUnaryLine p))
finalFeatures True minFreq = do
ls <- CC.sinkList
let unaryFeaturesFrequentEnough = S.fromList
@ -147,12 +174,13 @@ finalFeatures True minFreq = do
$ M.toList
$ M.fromListWith (+)
$ Data.List.concat
$ Prelude.map (\(LineWithPeggedFactors _ _ fs) -> Prelude.map (\f -> (f, 1)) fs) ls
$ Prelude.map (\(LineWithPeggedFactors _ _ fs) -> Prelude.map (\f -> (f, 1)) fs)
$ Prelude.map snd ls
(CC.yieldMany $ ls) .| CC.map (addCartesian unaryFeaturesFrequentEnough)
where addCartesian wanted (LineWithPeggedFactors rank score fs) = LineWithFeatures rank score
where addCartesian wanted (l, LineWithPeggedFactors rank score fs) = (l, LineWithFeatures rank score
$ ((Prelude.map UnaryFeature fs) ++
(cartesianFeatures $ Prelude.filter ((flip S.member) wanted) fs))
(cartesianFeatures $ Prelude.filter ((flip S.member) wanted) fs)))
filtreCartesian False = CC.map id
filtreCartesian True = CC.concatMapAccum step S.empty
@ -254,10 +282,14 @@ gobbleAndDo fun = do
l <- CC.sinkList
CC.yieldMany $ fun l
runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runDiff ordering featureFilter otherOut spec bbdo = runDiffGeneralized ordering otherOut spec consum
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
consum = CL.filter shouldBeShown
.| runFeatureFilter featureFilter spec bbdo
.| CL.map (encodeUtf8 . formatOutput)
.| CC.unlinesAscii
.| CC.stdout
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
outA /= outB && scoreA /= scoreB
formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [

View File

@ -89,6 +89,10 @@ optionsParser = GEvalOptions
<> short 'r'
<> help "When in line-by-line or diff mode, sort the results from the best to the worst"))
<|> pure KeepTheOriginalOrder)
<*> optional (strOption
( long "filter"
<> metavar "FEATURE"
<> help "When in line-by-line or diff mode, show only items with a given feature"))
<*> specParser
<*> blackBoxDebuggingOptionsParser
@ -258,39 +262,41 @@ attemptToReadOptsFromConfigFile args opts = do
runGEval'' :: GEvalOptions -> IO (Maybe [(SourceSpec, [MetricValue])])
runGEval'' opts = runGEval''' (geoSpecialCommand opts)
(geoResultOrdering opts)
(geoFilter opts)
(geoSpec opts)
(geoBlackBoxDebugginsOptions opts)
runGEval''' :: Maybe GEvalSpecialCommand
-> ResultOrdering
-> Maybe String
-> GEvalSpecification
-> BlackBoxDebuggingOptions
-> IO (Maybe [(SourceSpec, [MetricValue])])
runGEval''' Nothing _ spec _ = do
runGEval''' Nothing _ _ spec _ = do
vals <- geval spec
return $ Just vals
runGEval''' (Just Init) _ spec _ = do
runGEval''' (Just Init) _ _ spec _ = do
initChallenge spec
return Nothing
runGEval''' (Just PrintVersion) _ _ _ = do
runGEval''' (Just PrintVersion) _ _ _ _ = do
putStrLn ("geval " ++ showVersion version)
return Nothing
runGEval''' (Just LineByLine) ordering spec _ = do
runLineByLine ordering spec
runGEval''' (Just LineByLine) ordering featureFilter spec bbdo = do
runLineByLine ordering featureFilter spec bbdo
return Nothing
runGEval''' (Just WorstFeatures) ordering spec bbdo = do
runGEval''' (Just WorstFeatures) ordering _ spec bbdo = do
runWorstFeatures ordering spec bbdo
return Nothing
runGEval''' (Just (Diff otherOut)) ordering spec _ = do
runDiff ordering otherOut spec
runGEval''' (Just (Diff otherOut)) ordering featureFilter spec bbdo = do
runDiff ordering featureFilter otherOut spec bbdo
return Nothing
runGEval''' (Just (MostWorseningFeatures otherOut)) ordering spec bbdo = do
runGEval''' (Just (MostWorseningFeatures otherOut)) ordering _ spec bbdo = do
runMostWorseningFeatures ordering otherOut spec bbdo
return Nothing
runGEval''' (Just JustTokenize) _ spec _ = do
runGEval''' (Just JustTokenize) _ _ spec _ = do
justTokenize (gesTokenizer spec)
return Nothing
runGEval''' (Just Submit) _ spec _ = do
runGEval''' (Just Submit) _ _ spec _ = do
submit (gesGonitoHost spec) (gesToken spec) (gesGonitoGitAnnexRemote spec)
return Nothing