Show column names in extracted features
This commit is contained in:
parent
6d586c7238
commit
8d429b01cb
@ -1,7 +1,7 @@
|
|||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
module Data.Conduit.Header
|
module Data.Conduit.Header
|
||||||
(processHeader, TabularHeader, readHeaderFile)
|
(processHeader, TabularHeader(..), readHeaderFile)
|
||||||
where
|
where
|
||||||
|
|
||||||
import Data.Text
|
import Data.Text
|
||||||
|
@ -19,6 +19,7 @@ module GEval.FeatureExtractor
|
|||||||
FeatureNamespace(..),
|
FeatureNamespace(..),
|
||||||
References(..),
|
References(..),
|
||||||
ReferencesData(..),
|
ReferencesData(..),
|
||||||
|
FeatureIndex(..),
|
||||||
toTextualContent,
|
toTextualContent,
|
||||||
filterExistentialFactors)
|
filterExistentialFactors)
|
||||||
where
|
where
|
||||||
@ -39,6 +40,8 @@ import Data.Attoparsec.Text
|
|||||||
import Data.Attoparsec.Combinator
|
import Data.Attoparsec.Combinator
|
||||||
import Control.Applicative
|
import Control.Applicative
|
||||||
|
|
||||||
|
import Data.Conduit.Header
|
||||||
|
|
||||||
import qualified Data.HashMap.Strict as H
|
import qualified Data.HashMap.Strict as H
|
||||||
|
|
||||||
import GEval.Annotation
|
import GEval.Annotation
|
||||||
@ -131,12 +134,19 @@ instance Show AtomicFactor where
|
|||||||
show (TextFactor t) = unpack t
|
show (TextFactor t) = unpack t
|
||||||
show (ShapeFactor (WordShape t)) = 'S':'H':'A':'P':'E':':':(unpack t)
|
show (ShapeFactor (WordShape t)) = 'S':'H':'A':'P':'E':':':(unpack t)
|
||||||
|
|
||||||
data FeatureNamespace = FeatureNamespace Text | FeatureTabbedNamespace Text Int
|
data FeatureIndex = ColumnByNumber Int | ColumnByName Text
|
||||||
|
deriving (Eq, Ord)
|
||||||
|
|
||||||
|
instance Show FeatureIndex where
|
||||||
|
show (ColumnByNumber ix) = show ix
|
||||||
|
show (ColumnByName name) = unpack name
|
||||||
|
|
||||||
|
data FeatureNamespace = FeatureNamespace Text | FeatureTabbedNamespace Text FeatureIndex
|
||||||
deriving (Eq, Ord)
|
deriving (Eq, Ord)
|
||||||
|
|
||||||
instance Show FeatureNamespace where
|
instance Show FeatureNamespace where
|
||||||
show (FeatureNamespace namespace) = unpack namespace
|
show (FeatureNamespace namespace) = unpack namespace
|
||||||
show (FeatureTabbedNamespace namespace column) = ((unpack namespace) ++ "<" ++ (show column) ++ ">")
|
show (FeatureTabbedNamespace namespace columnIndex) = ((unpack namespace) ++ "<" ++ (show columnIndex) ++ ">")
|
||||||
|
|
||||||
data References = References (H.HashMap Integer Text)
|
data References = References (H.HashMap Integer Text)
|
||||||
|
|
||||||
@ -205,11 +215,15 @@ extractFactorsFromField mTokenizer bbdo mReferenceData namespace record =
|
|||||||
extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> Text -> Text -> [PeggedFactor]
|
extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> Text -> Text -> [PeggedFactor]
|
||||||
extractFactors mTokenizer bbdo mReferencesData namespace record = extractFactorsFromField mTokenizer bbdo mReferencesData (FeatureNamespace namespace) record
|
extractFactors mTokenizer bbdo mReferencesData namespace record = extractFactorsFromField mTokenizer bbdo mReferencesData (FeatureNamespace namespace) record
|
||||||
|
|
||||||
extractFactorsFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> Text -> Text -> [PeggedFactor]
|
extractFactorsFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> Text -> Text -> Maybe TabularHeader -> [PeggedFactor]
|
||||||
extractFactorsFromTabbed mTokenizer bbdo mReferencesData namespace record =
|
extractFactorsFromTabbed mTokenizer bbdo mReferencesData namespace record mHeader =
|
||||||
Data.List.concat
|
Data.List.concat
|
||||||
$ Prelude.map (\(n, t) -> extractFactorsFromField mTokenizer bbdo mReferencesData (FeatureTabbedNamespace namespace n) t)
|
$ Prelude.map (\(n, t) -> extractFactorsFromField mTokenizer bbdo mReferencesData (FeatureTabbedNamespace namespace n) t)
|
||||||
$ Prelude.zip [1..] (splitOn "\t" record)
|
$ Prelude.zip (generateColumnNames mHeader) (splitOn "\t" record)
|
||||||
|
|
||||||
|
generateColumnNames :: Maybe TabularHeader -> [FeatureIndex]
|
||||||
|
generateColumnNames Nothing = Data.List.map ColumnByNumber [1..]
|
||||||
|
generateColumnNames (Just (TabularHeader fields)) = Data.List.map ColumnByName fields
|
||||||
|
|
||||||
addCartesianFactors :: BlackBoxDebuggingOptions -> [LineWithPeggedFactors] -> [LineWithFactors]
|
addCartesianFactors :: BlackBoxDebuggingOptions -> [LineWithPeggedFactors] -> [LineWithFactors]
|
||||||
addCartesianFactors bbdo linesWithPeggedFactors = addCartesianFactors' (bbdoCartesian bbdo) linesWithPeggedFactors
|
addCartesianFactors bbdo linesWithPeggedFactors = addCartesianFactors' (bbdoCartesian bbdo) linesWithPeggedFactors
|
||||||
|
@ -98,9 +98,11 @@ parseReferenceEntry line = (read $ unpack refId, t)
|
|||||||
where [refId, t] = splitOn "\t" line
|
where [refId, t] = splitOn "\t" line
|
||||||
|
|
||||||
runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
||||||
runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum
|
runLineByLine ordering featureFilter spec bbdo = do
|
||||||
where consum :: Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
|
mInHeader <- readHeaderFileWrapper $ getInHeader spec
|
||||||
consum = (\mReferences -> (runFeatureFilter featureFilter spec bbdo mReferences .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout))
|
runLineByLineGeneralized ordering spec (consum mInHeader)
|
||||||
|
where consum :: Maybe TabularHeader -> Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
|
||||||
|
consum = (\mInHeader -> \mReferences -> (runFeatureFilter featureFilter spec bbdo mReferences mInHeader .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout))
|
||||||
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
|
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
|
||||||
formatScore score,
|
formatScore score,
|
||||||
escapeTabs inp,
|
escapeTabs inp,
|
||||||
@ -155,15 +157,15 @@ markBadFeatures worstFeaturesMap mTokenizer bbdo field line =
|
|||||||
$ Prelude.map (featureToLineSpan worstFeaturesMap)
|
$ Prelude.map (featureToLineSpan worstFeaturesMap)
|
||||||
$ extractFactors mTokenizer bbdo Nothing field line
|
$ extractFactors mTokenizer bbdo Nothing field line
|
||||||
|
|
||||||
markBadFeaturesInTabbed :: (M.Map PeggedFactor Double) -> (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [LineSpan]
|
markBadFeaturesInTabbed :: (M.Map PeggedFactor Double) -> (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> Maybe TabularHeader -> [LineSpan]
|
||||||
markBadFeaturesInTabbed worstFeaturesMap mTokenizer bbdo field line =
|
markBadFeaturesInTabbed worstFeaturesMap mTokenizer bbdo field line mInHeader =
|
||||||
catMaybes
|
catMaybes
|
||||||
$ Prelude.map (featureToLineSpan worstFeaturesMap)
|
$ Prelude.map (featureToLineSpan worstFeaturesMap)
|
||||||
$ extractFactorsFromTabbed mTokenizer bbdo Nothing field line
|
$ extractFactorsFromTabbed mTokenizer bbdo Nothing field line mInHeader
|
||||||
|
|
||||||
|
|
||||||
doMarking worstFeaturesMap mTokenizer bbdo (LineRecord inpLine expLine outLine _ score) =
|
doMarking worstFeaturesMap mTokenizer mInHeader bbdo (LineRecord inpLine expLine outLine _ score) =
|
||||||
SpanLineRecord (markBadFeaturesInTabbed worstFeaturesMap mTokenizer bbdo "in" inpLine)
|
SpanLineRecord (markBadFeaturesInTabbed worstFeaturesMap mTokenizer bbdo "in" inpLine mInHeader)
|
||||||
(markBadFeatures worstFeaturesMap mTokenizer bbdo "exp" expLine)
|
(markBadFeatures worstFeaturesMap mTokenizer bbdo "exp" expLine)
|
||||||
(markBadFeatures worstFeaturesMap mTokenizer bbdo "out" outLine)
|
(markBadFeatures worstFeaturesMap mTokenizer bbdo "out" outLine)
|
||||||
score
|
score
|
||||||
@ -184,16 +186,17 @@ runLineByLineWithWorstFeaturesGeneralized :: ResultOrdering
|
|||||||
-> IO r
|
-> IO r
|
||||||
runLineByLineWithWorstFeaturesGeneralized ordering featureFilter spec bbdo consum = do
|
runLineByLineWithWorstFeaturesGeneralized ordering featureFilter spec bbdo consum = do
|
||||||
hPutStrLn stderr "Looking for worst features..."
|
hPutStrLn stderr "Looking for worst features..."
|
||||||
worstFeatures <- runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences (CL.take 100))
|
mInHeader <- readHeaderFileWrapper $ getInHeader spec
|
||||||
|
worstFeatures <- runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences mInHeader (CL.take 100))
|
||||||
let worstFeaturesMap = M.fromList
|
let worstFeaturesMap = M.fromList
|
||||||
$ catMaybes
|
$ catMaybes
|
||||||
$ Prelude.map featureToFactor
|
$ Prelude.map featureToFactor
|
||||||
$ Prelude.map (\(FeatureWithPValue feature pValue _ _) -> (feature, pValue)) worstFeatures
|
$ Prelude.map (\(FeatureWithPValue feature pValue _ _) -> (feature, pValue)) worstFeatures
|
||||||
|
mInHeader <- readHeaderFileWrapper $ getInHeader spec
|
||||||
runLineByLineGeneralized ordering spec (consum' worstFeaturesMap)
|
runLineByLineGeneralized ordering spec (consum' worstFeaturesMap mInHeader)
|
||||||
where consum' worstFeaturesMap = (\mReferences -> (runFeatureFilter featureFilter spec bbdo mReferences
|
where consum' worstFeaturesMap mInHeader = (\mReferences -> (runFeatureFilter featureFilter spec bbdo mReferences mInHeader
|
||||||
.| CL.map (doMarking worstFeaturesMap mTokenizer bbdo)
|
.| CL.map (doMarking worstFeaturesMap mTokenizer mInHeader bbdo)
|
||||||
.| consum))
|
.| consum))
|
||||||
ordering' = forceSomeOrdering ordering
|
ordering' = forceSomeOrdering ordering
|
||||||
mTokenizer = gesTokenizer spec
|
mTokenizer = gesTokenizer spec
|
||||||
|
|
||||||
@ -206,18 +209,21 @@ runFeatureFilter :: (Monad m, FeatureSource s) => Maybe String
|
|||||||
-> GEvalSpecification
|
-> GEvalSpecification
|
||||||
-> BlackBoxDebuggingOptions
|
-> BlackBoxDebuggingOptions
|
||||||
-> Maybe References
|
-> Maybe References
|
||||||
|
-> Maybe TabularHeader
|
||||||
-> ConduitT s s m ()
|
-> ConduitT s s m ()
|
||||||
runFeatureFilter Nothing _ _ _ = doNothing
|
runFeatureFilter Nothing _ _ _ _ = doNothing
|
||||||
runFeatureFilter (Just feature) spec bbdo mReferences = CC.map (\l -> (fakeRank, l))
|
runFeatureFilter (Just feature) spec bbdo mReferences mInHeader = CC.map (\l -> (fakeRank, l))
|
||||||
.| featureExtractor mTokenizer bbdo mReferences
|
.| featureExtractor mTokenizer bbdo mReferences mInHeader
|
||||||
.| CC.filter (checkFeature feature)
|
.| CC.filter (checkFeature feature)
|
||||||
.| CC.map fst
|
.| CC.map fst
|
||||||
where mTokenizer = gesTokenizer spec
|
where mTokenizer = gesTokenizer spec
|
||||||
fakeRank = 0.0
|
fakeRank = 0.0
|
||||||
checkFeature feature (_, LineWithFactors _ _ fs) = feature `elem` (Prelude.map show fs)
|
checkFeature feature (_, LineWithFactors _ _ fs) = feature `elem` (Prelude.map show fs)
|
||||||
|
|
||||||
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
||||||
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences consumFeatures)
|
runWorstFeatures ordering spec bbdo = do
|
||||||
|
mInHeader <- readHeaderFileWrapper $ getInHeader spec
|
||||||
|
runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences mInHeader consumFeatures)
|
||||||
where ordering' = forceSomeOrdering ordering
|
where ordering' = forceSomeOrdering ordering
|
||||||
|
|
||||||
consumFeatures = CL.map (encodeUtf8 . formatFeatureWithPValue)
|
consumFeatures = CL.map (encodeUtf8 . formatFeatureWithPValue)
|
||||||
@ -228,10 +234,11 @@ worstFeaturesPipeline :: Bool
|
|||||||
-> GEvalSpecification
|
-> GEvalSpecification
|
||||||
-> BlackBoxDebuggingOptions
|
-> BlackBoxDebuggingOptions
|
||||||
-> Maybe References
|
-> Maybe References
|
||||||
|
-> Maybe TabularHeader
|
||||||
-> ConduitT FeatureWithPValue Void (ResourceT IO) a
|
-> ConduitT FeatureWithPValue Void (ResourceT IO) a
|
||||||
-> ConduitT LineRecord Void (ResourceT IO) a
|
-> ConduitT LineRecord Void (ResourceT IO) a
|
||||||
worstFeaturesPipeline reversed spec bbdo mReferences consum = rank (lessByMetric reversed $ gesMainMetric spec)
|
worstFeaturesPipeline reversed spec bbdo mReferences mInHeader consum = rank (lessByMetric reversed $ gesMainMetric spec)
|
||||||
.| evalStateC 0 (extractFeaturesAndPValues spec bbdo mReferences)
|
.| evalStateC 0 (extractFeaturesAndPValues spec bbdo mReferences mInHeader)
|
||||||
.| CC.filter (\(FeatureWithPValue _ p _ _) -> not $ isNaN p) -- NaN values would poison sorting
|
.| CC.filter (\(FeatureWithPValue _ p _ _) -> not $ isNaN p) -- NaN values would poison sorting
|
||||||
.| gobbleAndDo (sortBy featureOrder)
|
.| gobbleAndDo (sortBy featureOrder)
|
||||||
.| filtreCartesian (bbdoCartesian bbdo)
|
.| filtreCartesian (bbdoCartesian bbdo)
|
||||||
@ -245,10 +252,10 @@ forceSomeOrdering :: ResultOrdering -> ResultOrdering
|
|||||||
forceSomeOrdering FirstTheBest = FirstTheBest
|
forceSomeOrdering FirstTheBest = FirstTheBest
|
||||||
forceSomeOrdering _ = FirstTheWorst
|
forceSomeOrdering _ = FirstTheWorst
|
||||||
|
|
||||||
extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
|
extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> Maybe TabularHeader -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
|
||||||
extractFeaturesAndPValues spec bbdo mReferences =
|
extractFeaturesAndPValues spec bbdo mReferences mInHeader =
|
||||||
totalCounter
|
totalCounter
|
||||||
.| rankedFeatureExtractor spec bbdo mReferences
|
.| rankedFeatureExtractor spec bbdo mReferences mInHeader
|
||||||
.| uScoresCounter (bbdoMinFrequency bbdo)
|
.| uScoresCounter (bbdoMinFrequency bbdo)
|
||||||
|
|
||||||
|
|
||||||
@ -268,11 +275,11 @@ formatFeatureWithPValue (FeatureWithPValue f p avg c) =
|
|||||||
(pack $ printf "%0.8f" avg),
|
(pack $ printf "%0.8f" avg),
|
||||||
(pack $ printf "%0.20f" p)]
|
(pack $ printf "%0.20f" p)]
|
||||||
|
|
||||||
rankedFeatureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT (Double, LineRecord) RankedFactor m ()
|
rankedFeatureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> Maybe TabularHeader -> ConduitT (Double, LineRecord) RankedFactor m ()
|
||||||
rankedFeatureExtractor spec bbdo mReferences = featureExtractor mTokenizer bbdo mReferences
|
rankedFeatureExtractor spec bbdo mReferences mInHeader = featureExtractor mTokenizer bbdo mReferences mInHeader
|
||||||
.| CC.map snd
|
.| CC.map snd
|
||||||
.| CC.map unwrapFeatures
|
.| CC.map unwrapFeatures
|
||||||
.| CC.concat
|
.| CC.concat
|
||||||
where mTokenizer = gesTokenizer spec
|
where mTokenizer = gesTokenizer spec
|
||||||
unwrapFeatures (LineWithFactors rank score fs) = Prelude.map (\f -> RankedFactor f rank score) fs
|
unwrapFeatures (LineWithFactors rank score fs) = Prelude.map (\f -> RankedFactor f rank score) fs
|
||||||
|
|
||||||
@ -288,11 +295,11 @@ instance FeatureSource (LineRecord, LineRecord) where
|
|||||||
getScore (LineRecord _ _ _ _ scoreA, LineRecord _ _ _ _ scoreB) = scoreB - scoreA
|
getScore (LineRecord _ _ _ _ scoreA, LineRecord _ _ _ _ scoreB) = scoreB - scoreA
|
||||||
mainLineRecord (_, l) = l
|
mainLineRecord (_, l) = l
|
||||||
|
|
||||||
featureExtractor :: (Monad m, FeatureSource s) => Maybe Tokenizer -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT (Double, s) (s, LineWithFactors) m ()
|
featureExtractor :: (Monad m, FeatureSource s) => Maybe Tokenizer -> BlackBoxDebuggingOptions -> Maybe References -> Maybe TabularHeader -> ConduitT (Double, s) (s, LineWithFactors) m ()
|
||||||
featureExtractor mTokenizer bbdo mReferences = CC.map extract
|
featureExtractor mTokenizer bbdo mReferences mInHeader = CC.map extract
|
||||||
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
|
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
|
||||||
where extract (rank, line) =
|
where extract (rank, line) =
|
||||||
(line, LineWithPeggedFactors rank (getScore line) $ getFeatures mTokenizer bbdo mReferences (mainLineRecord line))
|
(line, LineWithPeggedFactors rank (getScore line) $ getFeatures mTokenizer bbdo mReferences (mainLineRecord line) mInHeader)
|
||||||
|
|
||||||
finalFeatures :: Monad m => Bool -> Integer -> ConduitT (a, LineWithPeggedFactors) (a, LineWithFactors) m ()
|
finalFeatures :: Monad m => Bool -> Integer -> ConduitT (a, LineWithPeggedFactors) (a, LineWithFactors) m ()
|
||||||
finalFeatures False _ = CC.map (\(l, p) -> (l, peggedToUnaryLine p))
|
finalFeatures False _ = CC.map (\(l, p) -> (l, peggedToUnaryLine p))
|
||||||
@ -324,11 +331,11 @@ filtreCartesian True = CC.concatMapAccum step S.empty
|
|||||||
peggedToUnaryLine :: LineWithPeggedFactors -> LineWithFactors
|
peggedToUnaryLine :: LineWithPeggedFactors -> LineWithFactors
|
||||||
peggedToUnaryLine (LineWithPeggedFactors rank score fs) = LineWithFactors rank score (Prelude.map UnaryFactor fs)
|
peggedToUnaryLine (LineWithPeggedFactors rank score fs) = LineWithFactors rank score (Prelude.map UnaryFactor fs)
|
||||||
|
|
||||||
getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> Maybe References -> LineRecord -> [PeggedFactor]
|
getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> Maybe References -> LineRecord -> Maybe TabularHeader -> [PeggedFactor]
|
||||||
getFeatures mTokenizer bbdo mReferences (LineRecord inLine expLine outLine _ _) =
|
getFeatures mTokenizer bbdo mReferences (LineRecord inLine expLine outLine _ _) mInHeader =
|
||||||
Data.List.concat [
|
Data.List.concat [
|
||||||
extractFactors mTokenizer bbdo mReferencesData "exp" expLine,
|
extractFactors mTokenizer bbdo mReferencesData "exp" expLine,
|
||||||
extractFactorsFromTabbed mTokenizer bbdo mReferencesData "in" inLine,
|
extractFactorsFromTabbed mTokenizer bbdo mReferencesData "in" inLine mInHeader,
|
||||||
extractFactors mTokenizer bbdo mReferencesData "out" outLine]
|
extractFactors mTokenizer bbdo mReferencesData "out" outLine]
|
||||||
where mReferencesData = case mReferences of
|
where mReferencesData = case mReferences of
|
||||||
Just references -> Just $ ReferencesData {
|
Just references -> Just $ ReferencesData {
|
||||||
@ -487,13 +494,15 @@ gobbleAndDo fun = do
|
|||||||
CC.yieldMany $ fun l
|
CC.yieldMany $ fun l
|
||||||
|
|
||||||
runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
||||||
runDiff ordering featureFilter otherOut spec bbdo = runDiffGeneralized ordering otherOut spec consum
|
runDiff ordering featureFilter otherOut spec bbdo = do
|
||||||
where consum :: Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
mInHeader <- readHeaderFileWrapper $ getInHeader spec
|
||||||
consum = \mReferences -> CL.filter shouldBeShown
|
runDiffGeneralized ordering otherOut spec (consum mInHeader)
|
||||||
.| runFeatureFilter featureFilter spec bbdo mReferences
|
where consum :: Maybe TabularHeader -> Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
||||||
.| CL.map (encodeUtf8 . formatOutput)
|
consum = \mInHeader -> \mReferences -> CL.filter shouldBeShown
|
||||||
.| CC.unlinesAscii
|
.| runFeatureFilter featureFilter spec bbdo mReferences mInHeader
|
||||||
.| CC.stdout
|
.| CL.map (encodeUtf8 . formatOutput)
|
||||||
|
.| CC.unlinesAscii
|
||||||
|
.| CC.stdout
|
||||||
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
|
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
|
||||||
outA /= outB && scoreA /= scoreB
|
outA /= outB && scoreA /= scoreB
|
||||||
formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [
|
formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [
|
||||||
@ -533,15 +542,17 @@ runMultiOutputGeneralized spec consum = do
|
|||||||
mSelector = gesSelector spec
|
mSelector = gesSelector spec
|
||||||
|
|
||||||
runMostWorseningFeatures :: ResultOrdering -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
runMostWorseningFeatures :: ResultOrdering -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
||||||
runMostWorseningFeatures ordering otherOut spec bbdo = runDiffGeneralized ordering' otherOut spec consum
|
runMostWorseningFeatures ordering otherOut spec bbdo = do
|
||||||
|
mInHeader <- readHeaderFileWrapper $ getInHeader spec
|
||||||
|
runDiffGeneralized ordering' otherOut spec (consum mInHeader)
|
||||||
where ordering' = forceSomeOrdering ordering
|
where ordering' = forceSomeOrdering ordering
|
||||||
reversed = case ordering of
|
reversed = case ordering of
|
||||||
KeepTheOriginalOrder -> False
|
KeepTheOriginalOrder -> False
|
||||||
FirstTheWorst -> False
|
FirstTheWorst -> False
|
||||||
FirstTheBest -> True
|
FirstTheBest -> True
|
||||||
consum :: Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
consum :: Maybe TabularHeader -> Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
||||||
consum = \mReferences -> CC.map prepareFakeLineRecord
|
consum = \mInHeader -> \mReferences -> CC.map prepareFakeLineRecord
|
||||||
.| (worstFeaturesPipeline reversed spec bbdo mReferences consumFeatures)
|
.| (worstFeaturesPipeline reversed spec bbdo mReferences mInHeader consumFeatures)
|
||||||
prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
|
prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
|
||||||
prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
|
prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
|
||||||
LineRecord inp exp out c (score - scorePrev)
|
LineRecord inp exp out c (score - scorePrev)
|
||||||
|
22
test/Spec.hs
22
test/Spec.hs
@ -654,26 +654,26 @@ main = hspec $ do
|
|||||||
bbdoCartesian = False,
|
bbdoCartesian = False,
|
||||||
bbdoMinCartesianFrequency = Nothing,
|
bbdoMinCartesianFrequency = Nothing,
|
||||||
bbdoConsiderNumericalFeatures = True }
|
bbdoConsiderNumericalFeatures = True }
|
||||||
(sort $ extractFactorsFromTabbed Nothing bbdo Nothing "in" "I like this\t34.3\ttests") `shouldBe` [
|
(sort $ extractFactorsFromTabbed Nothing bbdo Nothing "in" "I like this\t34.3\ttests" Nothing) `shouldBe` [
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 1))
|
||||||
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "I"))),
|
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "I"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 1))
|
||||||
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "like"))),
|
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "like"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 1))
|
||||||
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "this"))),
|
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "this"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 1))
|
||||||
(SimpleExistentialFactor (BigramFactor (TextFactor "I") (TextFactor "like"))),
|
(SimpleExistentialFactor (BigramFactor (TextFactor "I") (TextFactor "like"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 1))
|
||||||
(SimpleExistentialFactor (BigramFactor (TextFactor "like") (TextFactor "this"))),
|
(SimpleExistentialFactor (BigramFactor (TextFactor "like") (TextFactor "this"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 1)
|
PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 1))
|
||||||
(NumericalFactor Nothing 11),
|
(NumericalFactor Nothing 11),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 2)
|
PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 2))
|
||||||
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "34.3"))),
|
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "34.3"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 2)
|
PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 2))
|
||||||
(NumericalFactor (Just 34.3) 4),
|
(NumericalFactor (Just 34.3) 4),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 3)
|
PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 3))
|
||||||
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "tests"))),
|
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "tests"))),
|
||||||
PeggedFactor (FeatureTabbedNamespace "in" 3)
|
PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 3))
|
||||||
(NumericalFactor Nothing 5) ]
|
(NumericalFactor Nothing 5) ]
|
||||||
describe "Kendall's tau" $ do
|
describe "Kendall's tau" $ do
|
||||||
it "tau" $ do
|
it "tau" $ do
|
||||||
|
Loading…
Reference in New Issue
Block a user