Trying to get references

This commit is contained in:
Filip Graliński 2019-05-23 16:16:05 +02:00
parent 780b7016c5
commit b4ad774623
7 changed files with 145 additions and 60 deletions

View File

@ -90,6 +90,7 @@ library
, integration , integration
, Chart , Chart
, Chart-cairo , Chart-cairo
, errors
default-language: Haskell2010 default-language: Haskell2010
executable geval executable geval

View File

@ -3,7 +3,7 @@
module GEval.Annotation module GEval.Annotation
(parseAnnotations, Annotation(..), (parseAnnotations, Annotation(..),
parseObtainedAnnotations, ObtainedAnnotation(..), parseObtainedAnnotations, ObtainedAnnotation(..),
matchScore, getProbabilisticSoftCounts) matchScore, getProbabilisticSoftCounts, intSetParser)
where where
import qualified Data.IntSet as IS import qualified Data.IntSet as IS

View File

@ -291,7 +291,8 @@ data GEvalSpecification = GEvalSpecification
gesTokenizer :: Maybe Tokenizer, gesTokenizer :: Maybe Tokenizer,
gesGonitoHost :: Maybe String, gesGonitoHost :: Maybe String,
gesToken :: Maybe String, gesToken :: Maybe String,
gesGonitoGitAnnexRemote :: Maybe String} gesGonitoGitAnnexRemote :: Maybe String,
gesReferences :: Maybe String }
gesMainMetric :: GEvalSpecification -> Metric gesMainMetric :: GEvalSpecification -> Metric
@ -378,7 +379,8 @@ defaultGEvalSpecification = GEvalSpecification {
gesTokenizer = Nothing, gesTokenizer = Nothing,
gesGonitoHost = Nothing, gesGonitoHost = Nothing,
gesToken = Nothing, gesToken = Nothing,
gesGonitoGitAnnexRemote = Nothing } gesGonitoGitAnnexRemote = Nothing,
gesReferences = Nothing}
isEmptyFile :: FilePath -> IO (Bool) isEmptyFile :: FilePath -> IO (Bool)
isEmptyFile path = do isEmptyFile path = do

View File

@ -17,19 +17,32 @@ module GEval.FeatureExtractor
ExistentialFactor(..), ExistentialFactor(..),
AtomicFactor(..), AtomicFactor(..),
FeatureNamespace(..), FeatureNamespace(..),
References(..),
ReferencesData(..),
filterExistentialFactors) filterExistentialFactors)
where where
import Data.Text import Data.Text
import Data.List import Data.List
import Data.Monoid ((<>)) import Data.Monoid ((<>))
import Data.Maybe (catMaybes) import Data.Maybe (catMaybes, fromMaybe)
import Text.Tokenizer import Text.Tokenizer
import Text.WordShape import Text.WordShape
import GEval.BlackBoxDebugging import GEval.BlackBoxDebugging
import GEval.Common import GEval.Common
import Text.Read (readMaybe) import Text.Read (readMaybe)
import Control.Error.Util (hush)
import Data.Attoparsec.Text
import Data.Attoparsec.Combinator
import Control.Applicative
import qualified Data.HashMap.Strict as H
import GEval.Annotation
import qualified Data.IntSet as IS
data Feature = UnaryFeature PeggedExistentialFactor data Feature = UnaryFeature PeggedExistentialFactor
| CartesianFeature PeggedExistentialFactor PeggedExistentialFactor | CartesianFeature PeggedExistentialFactor PeggedExistentialFactor
| NumericalFeature FeatureNamespace NumericalType NumericalDirection | NumericalFeature FeatureNamespace NumericalType NumericalDirection
@ -124,6 +137,38 @@ instance Show FeatureNamespace where
show (FeatureNamespace namespace) = unpack namespace show (FeatureNamespace namespace) = unpack namespace
show (FeatureTabbedNamespace namespace column) = ((unpack namespace) ++ "<" ++ (show column) ++ ">") show (FeatureTabbedNamespace namespace column) = ((unpack namespace) ++ "<" ++ (show column) ++ ">")
data References = References (H.HashMap Integer Text)
data ReferencesData = ReferencesData {
referencesDataReferences :: References,
referencesDataCurrentId :: Maybe Integer }
data ReferencePointer = ReferencePointer Integer IS.IntSet
getReferenced :: References -> ReferencePointer -> Maybe Text
getReferenced (References references) (ReferencePointer refId indexSet) = case H.lookup refId references of
Just t -> Just (getFrag t indexSet)
Nothing -> Nothing
getDirectOrReferenced :: Maybe References -> Text -> Text
getDirectOrReferenced Nothing record = record
getDirectOrReferenced (Just references) record = case parseReferencePointer record of
Just pointer -> fromMaybe record (getReferenced references pointer)
Nothing -> record
getFrag :: Text -> IS.IntSet -> Text
getFrag t indexSet = pack $ Data.List.map (\ix -> index t ix) $ IS.toAscList indexSet
parseReferencePointer :: Text -> Maybe ReferencePointer
parseReferencePointer t = hush $ parseOnly (referencePointerParser <* endOfInput) t
referencePointerParser :: Parser ReferencePointer
referencePointerParser = do
refId <- decimal
string " "
indexSet <- intSetParser
return $ ReferencePointer refId indexSet
tokenizeForFactors :: (Maybe Tokenizer) -> Text -> [Text] tokenizeForFactors :: (Maybe Tokenizer) -> Text -> [Text]
tokenizeForFactors Nothing t = Data.List.filter (not . Data.Text.null) $ split splitPred t tokenizeForFactors Nothing t = Data.List.filter (not . Data.Text.null) $ split splitPred t
where splitPred c = c == ' ' || c == '\t' || c == ':' where splitPred c = c == ' ' || c == '\t' || c == ':'
@ -150,19 +195,19 @@ extractSimpleFactors mTokenizer bbdo t = Data.List.concat $ (Prelude.map (Prelud
numericalFactor t = [NumericalFactor (readMaybe $ unpack t) (Data.Text.length t)] numericalFactor t = [NumericalFactor (readMaybe $ unpack t) (Data.Text.length t)]
extractFactorsFromField :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> FeatureNamespace -> Text -> [PeggedFactor] extractFactorsFromField :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> FeatureNamespace -> Text -> [PeggedFactor]
extractFactorsFromField mTokenizer bbdo namespace record = extractFactorsFromField mTokenizer bbdo mReferenceData namespace record =
Prelude.map (\af -> PeggedFactor namespace af) Prelude.map (\af -> PeggedFactor namespace af)
$ extractSimpleFactors mTokenizer bbdo record $ extractSimpleFactors mTokenizer bbdo (getDirectOrReferenced (referencesDataReferences <$> mReferenceData) record)
extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFactor]
extractFactors mTokenizer bbdo namespace record =
extractFactorsFromField mTokenizer bbdo (FeatureNamespace namespace) record
extractFactorsFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [PeggedFactor] extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> Text -> Text -> [PeggedFactor]
extractFactorsFromTabbed mTokenizer bbdo namespace record = extractFactors mTokenizer bbdo mReferencesData namespace record = extractFactorsFromField mTokenizer bbdo mReferencesData (FeatureNamespace namespace) record
extractFactorsFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> Text -> Text -> [PeggedFactor]
extractFactorsFromTabbed mTokenizer bbdo mReferencesData namespace record =
Data.List.concat Data.List.concat
$ Prelude.map (\(n, t) -> extractFactorsFromField mTokenizer bbdo (FeatureTabbedNamespace namespace n) t) $ Prelude.map (\(n, t) -> extractFactorsFromField mTokenizer bbdo mReferencesData (FeatureTabbedNamespace namespace n) t)
$ Prelude.zip [1..] (splitOn "\t" record) $ Prelude.zip [1..] (splitOn "\t" record)
addCartesianFactors :: BlackBoxDebuggingOptions -> [LineWithPeggedFactors] -> [LineWithFactors] addCartesianFactors :: BlackBoxDebuggingOptions -> [LineWithPeggedFactors] -> [LineWithFactors]

View File

@ -61,16 +61,33 @@ import Statistics.Distribution (cumulative)
import Statistics.Distribution.Normal (normalDistr) import Statistics.Distribution.Normal (normalDistr)
import Data.Statistics.Kendall (kendallZ) import Data.Statistics.Kendall (kendallZ)
import Data.Conduit.Binary (sourceFile)
import qualified Data.HashMap.Strict as H
import qualified Data.Map.Strict as M import qualified Data.Map.Strict as M
import qualified Data.Set as S import qualified Data.Set as S
data LineRecord = LineRecord Text Text Text Word32 MetricValue data LineRecord = LineRecord Text Text Text Word32 MetricValue
deriving (Eq, Show) deriving (Eq, Show)
readReferences :: FilePath -> IO References
readReferences referencesFilePath = do
h <- runResourceT $ runConduit $ (sourceFile referencesFilePath
.| CC.decodeUtf8Lenient
.| CT.lines
.| CC.map parseReferenceEntry
.| CC.foldl (\h (refId, t) -> H.insert refId t h) H.empty)
return $ References h
parseReferenceEntry :: Text -> (Integer, Text)
parseReferenceEntry line = (read $ unpack refId, t)
where [refId, t] = splitOn "\t" line
runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO () runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum
where consum :: ConduitT LineRecord Void (ResourceT IO) () where consum :: Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
consum = (runFeatureFilter featureFilter spec bbdo .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) consum = (\mReferences -> (runFeatureFilter featureFilter spec bbdo mReferences .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout))
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [ formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
formatScore score, formatScore score,
escapeTabs inp, escapeTabs inp,
@ -79,10 +96,10 @@ runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized orderi
formatScore :: MetricValue -> Text formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f" formatScore = Data.Text.pack . printf "%f"
runFeatureFilter :: (Monad m, FeatureSource s) => Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT s s m () runFeatureFilter :: (Monad m, FeatureSource s) => Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT s s m ()
runFeatureFilter Nothing _ _ = doNothing runFeatureFilter Nothing _ _ _ = doNothing
runFeatureFilter (Just feature) spec bbdo = CC.map (\l -> (fakeRank, l)) runFeatureFilter (Just feature) spec bbdo mReferences = CC.map (\l -> (fakeRank, l))
.| featureExtractor mTokenizer bbdo .| featureExtractor mTokenizer bbdo mReferences
.| CC.filter (checkFeature feature) .| CC.filter (checkFeature feature)
.| CC.map fst .| CC.map fst
where mTokenizer = gesTokenizer spec where mTokenizer = gesTokenizer spec
@ -90,14 +107,14 @@ runFeatureFilter (Just feature) spec bbdo = CC.map (\l -> (fakeRank, l))
checkFeature feature (_, LineWithFactors _ _ fs) = feature `elem` (Prelude.map show fs) checkFeature feature (_, LineWithFactors _ _ fs) = feature `elem` (Prelude.map show fs)
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO () runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (worstFeaturesPipeline False spec bbdo) runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences)
where ordering' = forceSomeOrdering ordering where ordering' = forceSomeOrdering ordering
worstFeaturesPipeline :: Bool -> GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT LineRecord Void (ResourceT IO) () worstFeaturesPipeline :: Bool -> GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
worstFeaturesPipeline reversed spec bbdo = rank (lessByMetric reversed $ gesMainMetric spec) worstFeaturesPipeline reversed spec bbdo mReferences = rank (lessByMetric reversed $ gesMainMetric spec)
.| evalStateC 0 (extractFeaturesAndPValues spec bbdo) .| evalStateC 0 (extractFeaturesAndPValues spec bbdo mReferences)
.| CC.filter (\(FeatureWithPValue _ p _ _) -> not $ isNaN p) -- NaN values would poison sorting .| CC.filter (\(FeatureWithPValue _ p _ _) -> not $ isNaN p) -- NaN values would poison sorting
.| gobbleAndDo (sortBy featureOrder) .| gobbleAndDo (sortBy featureOrder)
.| filtreCartesian (bbdoCartesian bbdo) .| filtreCartesian (bbdoCartesian bbdo)
@ -120,10 +137,10 @@ forceSomeOrdering :: ResultOrdering -> ResultOrdering
forceSomeOrdering FirstTheBest = FirstTheBest forceSomeOrdering FirstTheBest = FirstTheBest
forceSomeOrdering _ = FirstTheWorst forceSomeOrdering _ = FirstTheWorst
extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) () extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
extractFeaturesAndPValues spec bbdo = extractFeaturesAndPValues spec bbdo mReferences =
totalCounter totalCounter
.| rankedFeatureExtractor spec bbdo .| rankedFeatureExtractor spec bbdo mReferences
.| uScoresCounter (bbdoMinFrequency bbdo) .| uScoresCounter (bbdoMinFrequency bbdo)
@ -143,8 +160,8 @@ formatFeatureWithPValue (FeatureWithPValue f p avg c) =
(pack $ printf "%0.8f" avg), (pack $ printf "%0.8f" avg),
(pack $ printf "%0.20f" p)] (pack $ printf "%0.20f" p)]
rankedFeatureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> ConduitT (Double, LineRecord) RankedFactor m () rankedFeatureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT (Double, LineRecord) RankedFactor m ()
rankedFeatureExtractor spec bbdo = featureExtractor mTokenizer bbdo rankedFeatureExtractor spec bbdo mReferences = featureExtractor mTokenizer bbdo mReferences
.| CC.map snd .| CC.map snd
.| CC.map unwrapFeatures .| CC.map unwrapFeatures
.| CC.concat .| CC.concat
@ -163,11 +180,11 @@ instance FeatureSource (LineRecord, LineRecord) where
getScore (LineRecord _ _ _ _ scoreA, LineRecord _ _ _ _ scoreB) = scoreB - scoreA getScore (LineRecord _ _ _ _ scoreA, LineRecord _ _ _ _ scoreB) = scoreB - scoreA
mainLineRecord (_, l) = l mainLineRecord (_, l) = l
featureExtractor :: (Monad m, FeatureSource s) => Maybe Tokenizer -> BlackBoxDebuggingOptions -> ConduitT (Double, s) (s, LineWithFactors) m () featureExtractor :: (Monad m, FeatureSource s) => Maybe Tokenizer -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT (Double, s) (s, LineWithFactors) m ()
featureExtractor mTokenizer bbdo = CC.map extract featureExtractor mTokenizer bbdo mReferences = CC.map extract
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo)) .| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
where extract (rank, line) = where extract (rank, line) =
(line, LineWithPeggedFactors rank (getScore line) $ getFeatures mTokenizer bbdo (mainLineRecord line)) (line, LineWithPeggedFactors rank (getScore line) $ getFeatures mTokenizer bbdo mReferences (mainLineRecord line))
finalFeatures :: Monad m => Bool -> Integer -> ConduitT (a, LineWithPeggedFactors) (a, LineWithFactors) m () finalFeatures :: Monad m => Bool -> Integer -> ConduitT (a, LineWithPeggedFactors) (a, LineWithFactors) m ()
finalFeatures False _ = CC.map (\(l, p) -> (l, peggedToUnaryLine p)) finalFeatures False _ = CC.map (\(l, p) -> (l, peggedToUnaryLine p))
@ -199,12 +216,17 @@ filtreCartesian True = CC.concatMapAccum step S.empty
peggedToUnaryLine :: LineWithPeggedFactors -> LineWithFactors peggedToUnaryLine :: LineWithPeggedFactors -> LineWithFactors
peggedToUnaryLine (LineWithPeggedFactors rank score fs) = LineWithFactors rank score (Prelude.map UnaryFactor fs) peggedToUnaryLine (LineWithPeggedFactors rank score fs) = LineWithFactors rank score (Prelude.map UnaryFactor fs)
getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> LineRecord -> [PeggedFactor] getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> Maybe References -> LineRecord -> [PeggedFactor]
getFeatures mTokenizer bbdo (LineRecord inLine expLine outLine _ _) = getFeatures mTokenizer bbdo mReferences (LineRecord inLine expLine outLine _ _) =
Data.List.concat [ Data.List.concat [
extractFactors mTokenizer bbdo "exp" expLine, extractFactors mTokenizer bbdo mReferencesData "exp" expLine,
extractFactorsFromTabbed mTokenizer bbdo "in" inLine, extractFactorsFromTabbed mTokenizer bbdo mReferencesData "in" inLine,
extractFactors mTokenizer bbdo "out" outLine] extractFactors mTokenizer bbdo mReferencesData "out" outLine]
where mReferencesData = case mReferences of
Just references -> Just $ ReferencesData {
referencesDataReferences = references,
referencesDataCurrentId = Nothing }
Nothing -> Nothing
data FeatureAggregate = ExistentialFactorAggregate Double MetricValue Integer data FeatureAggregate = ExistentialFactorAggregate Double MetricValue Integer
| NumericalValueAggregate [Double] [MetricValue] [Int] [MetricValue] | NumericalValueAggregate [Double] [MetricValue] [Int] [MetricValue]
@ -327,10 +349,15 @@ lessByMetric reversed metric = lessByMetric' reversed (getMetricOrdering metric)
(\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) -> (\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) ->
scoreA < scoreB) scoreA < scoreB)
runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> (Maybe References -> ConduitT LineRecord Void (ResourceT IO) a) -> IO a
runLineByLineGeneralized ordering spec consum = do runLineByLineGeneralized ordering spec consum = do
mReferences <- case gesReferences spec of
Just referencesFp -> do
references <- readReferences referencesFp
return $ Just references
Nothing -> return Nothing
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFilesSingleOut True spec (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFilesSingleOut True spec
gevalLineByLineCore metric mSelector preprocess inputFilePath expectedFilePath outFilePath (sorter ordering .| consum) gevalLineByLineCore metric mSelector preprocess inputFilePath expectedFilePath outFilePath (sorter ordering .| consum mReferences)
where metric = gesMainMetric spec where metric = gesMainMetric spec
mSelector = gesSelector spec mSelector = gesSelector spec
preprocess = gesPreprocess spec preprocess = gesPreprocess spec
@ -340,6 +367,7 @@ runLineByLineGeneralized ordering spec consum = do
sortOrder FirstTheBest TheLowerTheBetter = compareScores sortOrder FirstTheBest TheLowerTheBetter = compareScores
sortOrder _ _ = flip compareScores sortOrder _ _ = flip compareScores
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2 compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
mReferences = Nothing
gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m () gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
gobbleAndDo fun = do gobbleAndDo fun = do
@ -348,9 +376,9 @@ gobbleAndDo fun = do
runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO () runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runDiff ordering featureFilter otherOut spec bbdo = runDiffGeneralized ordering otherOut spec consum runDiff ordering featureFilter otherOut spec bbdo = runDiffGeneralized ordering otherOut spec consum
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) () where consum :: Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = CL.filter shouldBeShown consum = \mReferences -> CL.filter shouldBeShown
.| runFeatureFilter featureFilter spec bbdo .| runFeatureFilter featureFilter spec bbdo mReferences
.| CL.map (encodeUtf8 . formatOutput) .| CL.map (encodeUtf8 . formatOutput)
.| CC.unlinesAscii .| CC.unlinesAscii
.| CC.stdout .| CC.stdout
@ -372,15 +400,16 @@ runMostWorseningFeatures ordering otherOut spec bbdo = runDiffGeneralized orderi
KeepTheOriginalOrder -> False KeepTheOriginalOrder -> False
FirstTheWorst -> False FirstTheWorst -> False
FirstTheBest -> True FirstTheBest -> True
consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) () consum :: Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = CC.map prepareFakeLineRecord consum = \mReferences -> CC.map prepareFakeLineRecord
.| (worstFeaturesPipeline reversed spec bbdo) .| (worstFeaturesPipeline reversed spec bbdo mReferences)
prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) = prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
LineRecord inp exp out c (score - scorePrev) LineRecord inp exp out c (score - scorePrev)
mReferences = Nothing
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> (Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a) -> IO a
runDiffGeneralized ordering otherOut spec consum = do runDiffGeneralized ordering otherOut spec consum = do
(inputSource, expectedSource, outSource) <- checkAndGetFilesSingleOut True spec (inputSource, expectedSource, outSource) <- checkAndGetFilesSingleOut True spec
ooss <- getSmartSourceSpec ((gesOutDirectory spec) </> (gesTestName spec)) "out.tsv" otherOut ooss <- getSmartSourceSpec ((gesOutDirectory spec) </> (gesTestName spec)) "out.tsv" otherOut
@ -394,7 +423,7 @@ runDiffGeneralized ordering otherOut spec consum = do
runResourceT $ runConduit $ runResourceT $ runConduit $
((getZipSource $ (,) ((getZipSource $ (,)
<$> ZipSource sourceA <$> ZipSource sourceA
<*> ZipSource sourceB) .| sorter ordering .| consum) <*> ZipSource sourceB) .| sorter ordering .| consum mReferences)
where metric = gesMainMetric spec where metric = gesMainMetric spec
preprocess = gesPreprocess spec preprocess = gesPreprocess spec
mSelector = gesSelector spec mSelector = gesSelector spec
@ -406,6 +435,7 @@ runDiffGeneralized ordering otherOut spec consum = do
compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1)) compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1))
((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2)) ((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2))
= (n1 - o1) `compare` (n2 - o2) = (n1 - o1) `compare` (n2 - o2)
mReferences = Nothing
escapeTabs :: Text -> Text escapeTabs :: Text -> Text

View File

@ -177,6 +177,12 @@ specParser = GEvalSpecification
<> help "Submit ONLY: Specification of a git-annex remote." <> help "Submit ONLY: Specification of a git-annex remote."
) )
) )
<*> ( optional . strOption $
( long "references"
<> metavar "FILE"
<> help "External text file referenced"
)
)
defaultMinFrequency :: Integer defaultMinFrequency :: Integer
defaultMinFrequency = 1 defaultMinFrequency = 1

View File

@ -401,15 +401,16 @@ main = hspec $ do
gesTokenizer = Nothing, gesTokenizer = Nothing,
gesGonitoHost = Nothing, gesGonitoHost = Nothing,
gesToken = Nothing, gesToken = Nothing,
gesGonitoGitAnnexRemote = Nothing} gesGonitoGitAnnexRemote = Nothing,
gesReferences = Nothing }
it "simple test" $ do it "simple test" $ do
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge Data.Conduit.List.consume results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume)
Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo", Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
"bar", "bar",
"baz", "baz",
"baq"] "baq"]
it "test sorting" $ do it "test sorting" $ do
results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume results <- runLineByLineGeneralized FirstTheWorst sampleChallenge (const Data.Conduit.List.consume)
Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq" Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq"
describe "handle --alt-metric option" $ do describe "handle --alt-metric option" $ do
it "accuracy instead of likelihood" $ do it "accuracy instead of likelihood" $ do
@ -546,7 +547,7 @@ main = hspec $ do
bbdoCartesian = False, bbdoCartesian = False,
bbdoMinCartesianFrequency = Nothing, bbdoMinCartesianFrequency = Nothing,
bbdoConsiderNumericalFeatures = True } bbdoConsiderNumericalFeatures = True }
(sort $ extractFactorsFromTabbed Nothing bbdo "in" "I like this\t34.3\ttests") `shouldBe` [ (sort $ extractFactorsFromTabbed Nothing bbdo Nothing "in" "I like this\t34.3\ttests") `shouldBe` [
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" 1)
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "I"))), (SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "I"))),
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" 1)