Trying to get references
This commit is contained in:
parent
780b7016c5
commit
b4ad774623
@ -90,6 +90,7 @@ library
|
|||||||
, integration
|
, integration
|
||||||
, Chart
|
, Chart
|
||||||
, Chart-cairo
|
, Chart-cairo
|
||||||
|
, errors
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable geval
|
executable geval
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
module GEval.Annotation
|
module GEval.Annotation
|
||||||
(parseAnnotations, Annotation(..),
|
(parseAnnotations, Annotation(..),
|
||||||
parseObtainedAnnotations, ObtainedAnnotation(..),
|
parseObtainedAnnotations, ObtainedAnnotation(..),
|
||||||
matchScore, getProbabilisticSoftCounts)
|
matchScore, getProbabilisticSoftCounts, intSetParser)
|
||||||
where
|
where
|
||||||
|
|
||||||
import qualified Data.IntSet as IS
|
import qualified Data.IntSet as IS
|
||||||
|
@ -291,7 +291,8 @@ data GEvalSpecification = GEvalSpecification
|
|||||||
gesTokenizer :: Maybe Tokenizer,
|
gesTokenizer :: Maybe Tokenizer,
|
||||||
gesGonitoHost :: Maybe String,
|
gesGonitoHost :: Maybe String,
|
||||||
gesToken :: Maybe String,
|
gesToken :: Maybe String,
|
||||||
gesGonitoGitAnnexRemote :: Maybe String}
|
gesGonitoGitAnnexRemote :: Maybe String,
|
||||||
|
gesReferences :: Maybe String }
|
||||||
|
|
||||||
|
|
||||||
gesMainMetric :: GEvalSpecification -> Metric
|
gesMainMetric :: GEvalSpecification -> Metric
|
||||||
@ -378,7 +379,8 @@ defaultGEvalSpecification = GEvalSpecification {
|
|||||||
gesTokenizer = Nothing,
|
gesTokenizer = Nothing,
|
||||||
gesGonitoHost = Nothing,
|
gesGonitoHost = Nothing,
|
||||||
gesToken = Nothing,
|
gesToken = Nothing,
|
||||||
gesGonitoGitAnnexRemote = Nothing }
|
gesGonitoGitAnnexRemote = Nothing,
|
||||||
|
gesReferences = Nothing}
|
||||||
|
|
||||||
isEmptyFile :: FilePath -> IO (Bool)
|
isEmptyFile :: FilePath -> IO (Bool)
|
||||||
isEmptyFile path = do
|
isEmptyFile path = do
|
||||||
|
@ -17,19 +17,32 @@ module GEval.FeatureExtractor
|
|||||||
ExistentialFactor(..),
|
ExistentialFactor(..),
|
||||||
AtomicFactor(..),
|
AtomicFactor(..),
|
||||||
FeatureNamespace(..),
|
FeatureNamespace(..),
|
||||||
|
References(..),
|
||||||
|
ReferencesData(..),
|
||||||
filterExistentialFactors)
|
filterExistentialFactors)
|
||||||
where
|
where
|
||||||
|
|
||||||
import Data.Text
|
import Data.Text
|
||||||
import Data.List
|
import Data.List
|
||||||
import Data.Monoid ((<>))
|
import Data.Monoid ((<>))
|
||||||
import Data.Maybe (catMaybes)
|
import Data.Maybe (catMaybes, fromMaybe)
|
||||||
import Text.Tokenizer
|
import Text.Tokenizer
|
||||||
import Text.WordShape
|
import Text.WordShape
|
||||||
import GEval.BlackBoxDebugging
|
import GEval.BlackBoxDebugging
|
||||||
import GEval.Common
|
import GEval.Common
|
||||||
import Text.Read (readMaybe)
|
import Text.Read (readMaybe)
|
||||||
|
|
||||||
|
import Control.Error.Util (hush)
|
||||||
|
|
||||||
|
import Data.Attoparsec.Text
|
||||||
|
import Data.Attoparsec.Combinator
|
||||||
|
import Control.Applicative
|
||||||
|
|
||||||
|
import qualified Data.HashMap.Strict as H
|
||||||
|
|
||||||
|
import GEval.Annotation
|
||||||
|
import qualified Data.IntSet as IS
|
||||||
|
|
||||||
data Feature = UnaryFeature PeggedExistentialFactor
|
data Feature = UnaryFeature PeggedExistentialFactor
|
||||||
| CartesianFeature PeggedExistentialFactor PeggedExistentialFactor
|
| CartesianFeature PeggedExistentialFactor PeggedExistentialFactor
|
||||||
| NumericalFeature FeatureNamespace NumericalType NumericalDirection
|
| NumericalFeature FeatureNamespace NumericalType NumericalDirection
|
||||||
@ -124,6 +137,38 @@ instance Show FeatureNamespace where
|
|||||||
show (FeatureNamespace namespace) = unpack namespace
|
show (FeatureNamespace namespace) = unpack namespace
|
||||||
show (FeatureTabbedNamespace namespace column) = ((unpack namespace) ++ "<" ++ (show column) ++ ">")
|
show (FeatureTabbedNamespace namespace column) = ((unpack namespace) ++ "<" ++ (show column) ++ ">")
|
||||||
|
|
||||||
|
data References = References (H.HashMap Integer Text)
|
||||||
|
|
||||||
|
data ReferencesData = ReferencesData {
|
||||||
|
referencesDataReferences :: References,
|
||||||
|
referencesDataCurrentId :: Maybe Integer }
|
||||||
|
|
||||||
|
data ReferencePointer = ReferencePointer Integer IS.IntSet
|
||||||
|
|
||||||
|
getReferenced :: References -> ReferencePointer -> Maybe Text
|
||||||
|
getReferenced (References references) (ReferencePointer refId indexSet) = case H.lookup refId references of
|
||||||
|
Just t -> Just (getFrag t indexSet)
|
||||||
|
Nothing -> Nothing
|
||||||
|
|
||||||
|
getDirectOrReferenced :: Maybe References -> Text -> Text
|
||||||
|
getDirectOrReferenced Nothing record = record
|
||||||
|
getDirectOrReferenced (Just references) record = case parseReferencePointer record of
|
||||||
|
Just pointer -> fromMaybe record (getReferenced references pointer)
|
||||||
|
Nothing -> record
|
||||||
|
|
||||||
|
getFrag :: Text -> IS.IntSet -> Text
|
||||||
|
getFrag t indexSet = pack $ Data.List.map (\ix -> index t ix) $ IS.toAscList indexSet
|
||||||
|
|
||||||
|
parseReferencePointer :: Text -> Maybe ReferencePointer
|
||||||
|
parseReferencePointer t = hush $ parseOnly (referencePointerParser <* endOfInput) t
|
||||||
|
|
||||||
|
referencePointerParser :: Parser ReferencePointer
|
||||||
|
referencePointerParser = do
|
||||||
|
refId <- decimal
|
||||||
|
string " "
|
||||||
|
indexSet <- intSetParser
|
||||||
|
return $ ReferencePointer refId indexSet
|
||||||
|
|
||||||
tokenizeForFactors :: (Maybe Tokenizer) -> Text -> [Text]
|
tokenizeForFactors :: (Maybe Tokenizer) -> Text -> [Text]
|
||||||
tokenizeForFactors Nothing t = Data.List.filter (not . Data.Text.null) $ split splitPred t
|
tokenizeForFactors Nothing t = Data.List.filter (not . Data.Text.null) $ split splitPred t
|
||||||
where splitPred c = c == ' ' || c == '\t' || c == ':'
|
where splitPred c = c == ' ' || c == '\t' || c == ':'
|
||||||
@ -150,19 +195,19 @@ extractSimpleFactors mTokenizer bbdo t = Data.List.concat $ (Prelude.map (Prelud
|
|||||||
numericalFactor t = [NumericalFactor (readMaybe $ unpack t) (Data.Text.length t)]
|
numericalFactor t = [NumericalFactor (readMaybe $ unpack t) (Data.Text.length t)]
|
||||||
|
|
||||||
|
|
||||||
extractFactorsFromField :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> FeatureNamespace -> Text -> [PeggedFactor]
|
extractFactorsFromField :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> FeatureNamespace -> Text -> [PeggedFactor]
|
||||||
extractFactorsFromField mTokenizer bbdo namespace record =
|
extractFactorsFromField mTokenizer bbdo mReferenceData namespace record =
|
||||||
Prelude.map (\af -> PeggedFactor namespace af)
|
Prelude.map (\af -> PeggedFactor namespace af)
|
||||||
$ extractSimpleFactors mTokenizer bbdo record
|
$ extractSimpleFactors mTokenizer bbdo (getDirectOrReferenced (referencesDataReferences <$> mReferenceData) record)
|
||||||
|
|
||||||
extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFactor]
|
|
||||||
extractFactors mTokenizer bbdo namespace record =
|
|
||||||
extractFactorsFromField mTokenizer bbdo (FeatureNamespace namespace) record
|
|
||||||
|
|
||||||
extractFactorsFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFactor]
|
extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> Text -> Text -> [PeggedFactor]
|
||||||
extractFactorsFromTabbed mTokenizer bbdo namespace record =
|
extractFactors mTokenizer bbdo mReferencesData namespace record = extractFactorsFromField mTokenizer bbdo mReferencesData (FeatureNamespace namespace) record
|
||||||
|
|
||||||
|
extractFactorsFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> Text -> Text -> [PeggedFactor]
|
||||||
|
extractFactorsFromTabbed mTokenizer bbdo mReferencesData namespace record =
|
||||||
Data.List.concat
|
Data.List.concat
|
||||||
$ Prelude.map (\(n, t) -> extractFactorsFromField mTokenizer bbdo (FeatureTabbedNamespace namespace n) t)
|
$ Prelude.map (\(n, t) -> extractFactorsFromField mTokenizer bbdo mReferencesData (FeatureTabbedNamespace namespace n) t)
|
||||||
$ Prelude.zip [1..] (splitOn "\t" record)
|
$ Prelude.zip [1..] (splitOn "\t" record)
|
||||||
|
|
||||||
addCartesianFactors :: BlackBoxDebuggingOptions -> [LineWithPeggedFactors] -> [LineWithFactors]
|
addCartesianFactors :: BlackBoxDebuggingOptions -> [LineWithPeggedFactors] -> [LineWithFactors]
|
||||||
|
@ -61,16 +61,33 @@ import Statistics.Distribution (cumulative)
|
|||||||
import Statistics.Distribution.Normal (normalDistr)
|
import Statistics.Distribution.Normal (normalDistr)
|
||||||
import Data.Statistics.Kendall (kendallZ)
|
import Data.Statistics.Kendall (kendallZ)
|
||||||
|
|
||||||
|
import Data.Conduit.Binary (sourceFile)
|
||||||
|
|
||||||
|
import qualified Data.HashMap.Strict as H
|
||||||
import qualified Data.Map.Strict as M
|
import qualified Data.Map.Strict as M
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
|
|
||||||
data LineRecord = LineRecord Text Text Text Word32 MetricValue
|
data LineRecord = LineRecord Text Text Text Word32 MetricValue
|
||||||
deriving (Eq, Show)
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
readReferences :: FilePath -> IO References
|
||||||
|
readReferences referencesFilePath = do
|
||||||
|
h <- runResourceT $ runConduit $ (sourceFile referencesFilePath
|
||||||
|
.| CC.decodeUtf8Lenient
|
||||||
|
.| CT.lines
|
||||||
|
.| CC.map parseReferenceEntry
|
||||||
|
.| CC.foldl (\h (refId, t) -> H.insert refId t h) H.empty)
|
||||||
|
return $ References h
|
||||||
|
|
||||||
|
parseReferenceEntry :: Text -> (Integer, Text)
|
||||||
|
parseReferenceEntry line = (read $ unpack refId, t)
|
||||||
|
where [refId, t] = splitOn "\t" line
|
||||||
|
|
||||||
|
|
||||||
runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
||||||
runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum
|
runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum
|
||||||
where consum :: ConduitT LineRecord Void (ResourceT IO) ()
|
where consum :: Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
|
||||||
consum = (runFeatureFilter featureFilter spec bbdo .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
|
consum = (\mReferences -> (runFeatureFilter featureFilter spec bbdo mReferences .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout))
|
||||||
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
|
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
|
||||||
formatScore score,
|
formatScore score,
|
||||||
escapeTabs inp,
|
escapeTabs inp,
|
||||||
@ -79,10 +96,10 @@ runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized orderi
|
|||||||
formatScore :: MetricValue -> Text
|
formatScore :: MetricValue -> Text
|
||||||
formatScore = Data.Text.pack . printf "%f"
|
formatScore = Data.Text.pack . printf "%f"
|
||||||
|
|
||||||
runFeatureFilter :: (Monad m, FeatureSource s) => Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT s s m ()
|
runFeatureFilter :: (Monad m, FeatureSource s) => Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT s s m ()
|
||||||
runFeatureFilter Nothing _ _ = doNothing
|
runFeatureFilter Nothing _ _ _ = doNothing
|
||||||
runFeatureFilter (Just feature) spec bbdo = CC.map (\l -> (fakeRank, l))
|
runFeatureFilter (Just feature) spec bbdo mReferences = CC.map (\l -> (fakeRank, l))
|
||||||
.| featureExtractor mTokenizer bbdo
|
.| featureExtractor mTokenizer bbdo mReferences
|
||||||
.| CC.filter (checkFeature feature)
|
.| CC.filter (checkFeature feature)
|
||||||
.| CC.map fst
|
.| CC.map fst
|
||||||
where mTokenizer = gesTokenizer spec
|
where mTokenizer = gesTokenizer spec
|
||||||
@ -90,14 +107,14 @@ runFeatureFilter (Just feature) spec bbdo = CC.map (\l -> (fakeRank, l))
|
|||||||
checkFeature feature (_, LineWithFactors _ _ fs) = feature `elem` (Prelude.map show fs)
|
checkFeature feature (_, LineWithFactors _ _ fs) = feature `elem` (Prelude.map show fs)
|
||||||
|
|
||||||
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
||||||
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (worstFeaturesPipeline False spec bbdo)
|
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences)
|
||||||
where ordering' = forceSomeOrdering ordering
|
where ordering' = forceSomeOrdering ordering
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
worstFeaturesPipeline :: Bool -> GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT LineRecord Void (ResourceT IO) ()
|
worstFeaturesPipeline :: Bool -> GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
|
||||||
worstFeaturesPipeline reversed spec bbdo = rank (lessByMetric reversed $ gesMainMetric spec)
|
worstFeaturesPipeline reversed spec bbdo mReferences = rank (lessByMetric reversed $ gesMainMetric spec)
|
||||||
.| evalStateC 0 (extractFeaturesAndPValues spec bbdo)
|
.| evalStateC 0 (extractFeaturesAndPValues spec bbdo mReferences)
|
||||||
.| CC.filter (\(FeatureWithPValue _ p _ _) -> not $ isNaN p) -- NaN values would poison sorting
|
.| CC.filter (\(FeatureWithPValue _ p _ _) -> not $ isNaN p) -- NaN values would poison sorting
|
||||||
.| gobbleAndDo (sortBy featureOrder)
|
.| gobbleAndDo (sortBy featureOrder)
|
||||||
.| filtreCartesian (bbdoCartesian bbdo)
|
.| filtreCartesian (bbdoCartesian bbdo)
|
||||||
@ -120,10 +137,10 @@ forceSomeOrdering :: ResultOrdering -> ResultOrdering
|
|||||||
forceSomeOrdering FirstTheBest = FirstTheBest
|
forceSomeOrdering FirstTheBest = FirstTheBest
|
||||||
forceSomeOrdering _ = FirstTheWorst
|
forceSomeOrdering _ = FirstTheWorst
|
||||||
|
|
||||||
extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
|
extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
|
||||||
extractFeaturesAndPValues spec bbdo =
|
extractFeaturesAndPValues spec bbdo mReferences =
|
||||||
totalCounter
|
totalCounter
|
||||||
.| rankedFeatureExtractor spec bbdo
|
.| rankedFeatureExtractor spec bbdo mReferences
|
||||||
.| uScoresCounter (bbdoMinFrequency bbdo)
|
.| uScoresCounter (bbdoMinFrequency bbdo)
|
||||||
|
|
||||||
|
|
||||||
@ -143,8 +160,8 @@ formatFeatureWithPValue (FeatureWithPValue f p avg c) =
|
|||||||
(pack $ printf "%0.8f" avg),
|
(pack $ printf "%0.8f" avg),
|
||||||
(pack $ printf "%0.20f" p)]
|
(pack $ printf "%0.20f" p)]
|
||||||
|
|
||||||
rankedFeatureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFactor m ()
|
rankedFeatureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT (Double, LineRecord) RankedFactor m ()
|
||||||
rankedFeatureExtractor spec bbdo = featureExtractor mTokenizer bbdo
|
rankedFeatureExtractor spec bbdo mReferences = featureExtractor mTokenizer bbdo mReferences
|
||||||
.| CC.map snd
|
.| CC.map snd
|
||||||
.| CC.map unwrapFeatures
|
.| CC.map unwrapFeatures
|
||||||
.| CC.concat
|
.| CC.concat
|
||||||
@ -163,11 +180,11 @@ instance FeatureSource (LineRecord, LineRecord) where
|
|||||||
getScore (LineRecord _ _ _ _ scoreA, LineRecord _ _ _ _ scoreB) = scoreB - scoreA
|
getScore (LineRecord _ _ _ _ scoreA, LineRecord _ _ _ _ scoreB) = scoreB - scoreA
|
||||||
mainLineRecord (_, l) = l
|
mainLineRecord (_, l) = l
|
||||||
|
|
||||||
featureExtractor :: (Monad m, FeatureSource s) => Maybe Tokenizer -> BlackBoxDebuggingOptions -> ConduitT (Double, s) (s, LineWithFactors) m ()
|
featureExtractor :: (Monad m, FeatureSource s) => Maybe Tokenizer -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT (Double, s) (s, LineWithFactors) m ()
|
||||||
featureExtractor mTokenizer bbdo = CC.map extract
|
featureExtractor mTokenizer bbdo mReferences = CC.map extract
|
||||||
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
|
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
|
||||||
where extract (rank, line) =
|
where extract (rank, line) =
|
||||||
(line, LineWithPeggedFactors rank (getScore line) $ getFeatures mTokenizer bbdo (mainLineRecord line))
|
(line, LineWithPeggedFactors rank (getScore line) $ getFeatures mTokenizer bbdo mReferences (mainLineRecord line))
|
||||||
|
|
||||||
finalFeatures :: Monad m => Bool -> Integer -> ConduitT (a, LineWithPeggedFactors) (a, LineWithFactors) m ()
|
finalFeatures :: Monad m => Bool -> Integer -> ConduitT (a, LineWithPeggedFactors) (a, LineWithFactors) m ()
|
||||||
finalFeatures False _ = CC.map (\(l, p) -> (l, peggedToUnaryLine p))
|
finalFeatures False _ = CC.map (\(l, p) -> (l, peggedToUnaryLine p))
|
||||||
@ -199,12 +216,17 @@ filtreCartesian True = CC.concatMapAccum step S.empty
|
|||||||
peggedToUnaryLine :: LineWithPeggedFactors -> LineWithFactors
|
peggedToUnaryLine :: LineWithPeggedFactors -> LineWithFactors
|
||||||
peggedToUnaryLine (LineWithPeggedFactors rank score fs) = LineWithFactors rank score (Prelude.map UnaryFactor fs)
|
peggedToUnaryLine (LineWithPeggedFactors rank score fs) = LineWithFactors rank score (Prelude.map UnaryFactor fs)
|
||||||
|
|
||||||
getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> LineRecord -> [PeggedFactor]
|
getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> Maybe References -> LineRecord -> [PeggedFactor]
|
||||||
getFeatures mTokenizer bbdo (LineRecord inLine expLine outLine _ _) =
|
getFeatures mTokenizer bbdo mReferences (LineRecord inLine expLine outLine _ _) =
|
||||||
Data.List.concat [
|
Data.List.concat [
|
||||||
extractFactors mTokenizer bbdo "exp" expLine,
|
extractFactors mTokenizer bbdo mReferencesData "exp" expLine,
|
||||||
extractFactorsFromTabbed mTokenizer bbdo "in" inLine,
|
extractFactorsFromTabbed mTokenizer bbdo mReferencesData "in" inLine,
|
||||||
extractFactors mTokenizer bbdo "out" outLine]
|
extractFactors mTokenizer bbdo mReferencesData "out" outLine]
|
||||||
|
where mReferencesData = case mReferences of
|
||||||
|
Just references -> Just $ ReferencesData {
|
||||||
|
referencesDataReferences = references,
|
||||||
|
referencesDataCurrentId = Nothing }
|
||||||
|
Nothing -> Nothing
|
||||||
|
|
||||||
data FeatureAggregate = ExistentialFactorAggregate Double MetricValue Integer
|
data FeatureAggregate = ExistentialFactorAggregate Double MetricValue Integer
|
||||||
| NumericalValueAggregate [Double] [MetricValue] [Int] [MetricValue]
|
| NumericalValueAggregate [Double] [MetricValue] [Int] [MetricValue]
|
||||||
@ -327,10 +349,15 @@ lessByMetric reversed metric = lessByMetric' reversed (getMetricOrdering metric)
|
|||||||
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
|
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
|
||||||
scoreA < scoreB)
|
scoreA < scoreB)
|
||||||
|
|
||||||
runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
|
runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> (Maybe References -> ConduitT LineRecord Void (ResourceT IO) a) -> IO a
|
||||||
runLineByLineGeneralized ordering spec consum = do
|
runLineByLineGeneralized ordering spec consum = do
|
||||||
|
mReferences <- case gesReferences spec of
|
||||||
|
Just referencesFp -> do
|
||||||
|
references <- readReferences referencesFp
|
||||||
|
return $ Just references
|
||||||
|
Nothing -> return Nothing
|
||||||
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFilesSingleOut True spec
|
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFilesSingleOut True spec
|
||||||
gevalLineByLineCore metric mSelector preprocess inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
|
gevalLineByLineCore metric mSelector preprocess inputFilePath expectedFilePath outFilePath (sorter ordering .| consum mReferences)
|
||||||
where metric = gesMainMetric spec
|
where metric = gesMainMetric spec
|
||||||
mSelector = gesSelector spec
|
mSelector = gesSelector spec
|
||||||
preprocess = gesPreprocess spec
|
preprocess = gesPreprocess spec
|
||||||
@ -340,6 +367,7 @@ runLineByLineGeneralized ordering spec consum = do
|
|||||||
sortOrder FirstTheBest TheLowerTheBetter = compareScores
|
sortOrder FirstTheBest TheLowerTheBetter = compareScores
|
||||||
sortOrder _ _ = flip compareScores
|
sortOrder _ _ = flip compareScores
|
||||||
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
|
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
|
||||||
|
mReferences = Nothing
|
||||||
|
|
||||||
gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
|
gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
|
||||||
gobbleAndDo fun = do
|
gobbleAndDo fun = do
|
||||||
@ -348,9 +376,9 @@ gobbleAndDo fun = do
|
|||||||
|
|
||||||
runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
||||||
runDiff ordering featureFilter otherOut spec bbdo = runDiffGeneralized ordering otherOut spec consum
|
runDiff ordering featureFilter otherOut spec bbdo = runDiffGeneralized ordering otherOut spec consum
|
||||||
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
where consum :: Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
||||||
consum = CL.filter shouldBeShown
|
consum = \mReferences -> CL.filter shouldBeShown
|
||||||
.| runFeatureFilter featureFilter spec bbdo
|
.| runFeatureFilter featureFilter spec bbdo mReferences
|
||||||
.| CL.map (encodeUtf8 . formatOutput)
|
.| CL.map (encodeUtf8 . formatOutput)
|
||||||
.| CC.unlinesAscii
|
.| CC.unlinesAscii
|
||||||
.| CC.stdout
|
.| CC.stdout
|
||||||
@ -372,15 +400,16 @@ runMostWorseningFeatures ordering otherOut spec bbdo = runDiffGeneralized orderi
|
|||||||
KeepTheOriginalOrder -> False
|
KeepTheOriginalOrder -> False
|
||||||
FirstTheWorst -> False
|
FirstTheWorst -> False
|
||||||
FirstTheBest -> True
|
FirstTheBest -> True
|
||||||
consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
consum :: Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
||||||
consum = CC.map prepareFakeLineRecord
|
consum = \mReferences -> CC.map prepareFakeLineRecord
|
||||||
.| (worstFeaturesPipeline reversed spec bbdo)
|
.| (worstFeaturesPipeline reversed spec bbdo mReferences)
|
||||||
prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
|
prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
|
||||||
prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
|
prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
|
||||||
LineRecord inp exp out c (score - scorePrev)
|
LineRecord inp exp out c (score - scorePrev)
|
||||||
|
mReferences = Nothing
|
||||||
|
|
||||||
|
|
||||||
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
|
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> (Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a) -> IO a
|
||||||
runDiffGeneralized ordering otherOut spec consum = do
|
runDiffGeneralized ordering otherOut spec consum = do
|
||||||
(inputSource, expectedSource, outSource) <- checkAndGetFilesSingleOut True spec
|
(inputSource, expectedSource, outSource) <- checkAndGetFilesSingleOut True spec
|
||||||
ooss <- getSmartSourceSpec ((gesOutDirectory spec) </> (gesTestName spec)) "out.tsv" otherOut
|
ooss <- getSmartSourceSpec ((gesOutDirectory spec) </> (gesTestName spec)) "out.tsv" otherOut
|
||||||
@ -394,7 +423,7 @@ runDiffGeneralized ordering otherOut spec consum = do
|
|||||||
runResourceT $ runConduit $
|
runResourceT $ runConduit $
|
||||||
((getZipSource $ (,)
|
((getZipSource $ (,)
|
||||||
<$> ZipSource sourceA
|
<$> ZipSource sourceA
|
||||||
<*> ZipSource sourceB) .| sorter ordering .| consum)
|
<*> ZipSource sourceB) .| sorter ordering .| consum mReferences)
|
||||||
where metric = gesMainMetric spec
|
where metric = gesMainMetric spec
|
||||||
preprocess = gesPreprocess spec
|
preprocess = gesPreprocess spec
|
||||||
mSelector = gesSelector spec
|
mSelector = gesSelector spec
|
||||||
@ -406,6 +435,7 @@ runDiffGeneralized ordering otherOut spec consum = do
|
|||||||
compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1))
|
compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1))
|
||||||
((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2))
|
((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2))
|
||||||
= (n1 - o1) `compare` (n2 - o2)
|
= (n1 - o1) `compare` (n2 - o2)
|
||||||
|
mReferences = Nothing
|
||||||
|
|
||||||
|
|
||||||
escapeTabs :: Text -> Text
|
escapeTabs :: Text -> Text
|
||||||
|
@ -177,6 +177,12 @@ specParser = GEvalSpecification
|
|||||||
<> help "Submit ONLY: Specification of a git-annex remote."
|
<> help "Submit ONLY: Specification of a git-annex remote."
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
<*> ( optional . strOption $
|
||||||
|
( long "references"
|
||||||
|
<> metavar "FILE"
|
||||||
|
<> help "External text file referenced"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
defaultMinFrequency :: Integer
|
defaultMinFrequency :: Integer
|
||||||
defaultMinFrequency = 1
|
defaultMinFrequency = 1
|
||||||
|
@ -401,15 +401,16 @@ main = hspec $ do
|
|||||||
gesTokenizer = Nothing,
|
gesTokenizer = Nothing,
|
||||||
gesGonitoHost = Nothing,
|
gesGonitoHost = Nothing,
|
||||||
gesToken = Nothing,
|
gesToken = Nothing,
|
||||||
gesGonitoGitAnnexRemote = Nothing}
|
gesGonitoGitAnnexRemote = Nothing,
|
||||||
|
gesReferences = Nothing }
|
||||||
it "simple test" $ do
|
it "simple test" $ do
|
||||||
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge Data.Conduit.List.consume
|
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume)
|
||||||
Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
|
Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
|
||||||
"bar",
|
"bar",
|
||||||
"baz",
|
"baz",
|
||||||
"baq"]
|
"baq"]
|
||||||
it "test sorting" $ do
|
it "test sorting" $ do
|
||||||
results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume
|
results <- runLineByLineGeneralized FirstTheWorst sampleChallenge (const Data.Conduit.List.consume)
|
||||||
Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq"
|
Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq"
|
||||||
describe "handle --alt-metric option" $ do
|
describe "handle --alt-metric option" $ do
|
||||||
it "accuracy instead of likelihood" $ do
|
it "accuracy instead of likelihood" $ do
|
||||||
@ -546,7 +547,7 @@ main = hspec $ do
|
|||||||
bbdoCartesian = False,
|
bbdoCartesian = False,
|
||||||
bbdoMinCartesianFrequency = Nothing,
|
bbdoMinCartesianFrequency = Nothing,
|
||||||
bbdoConsiderNumericalFeatures = True }
|
bbdoConsiderNumericalFeatures = True }
|
||||||
(sort $ extractFactorsFromTabbed Nothing bbdo "in" "I like this\t34.3\ttests") `shouldBe` [
|
(sort $ extractFactorsFromTabbed Nothing bbdo Nothing "in" "I like this\t34.3\ttests") `shouldBe` [
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||||
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "I"))),
|
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "I"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
||||||
|
Loading…
Reference in New Issue
Block a user