Show column names in extracted features

This commit is contained in:
Filip Gralinski 2020-02-22 12:13:45 +01:00
parent 6d586c7238
commit 8d429b01cb
4 changed files with 89 additions and 64 deletions

View File

@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Data.Conduit.Header module Data.Conduit.Header
(processHeader, TabularHeader, readHeaderFile) (processHeader, TabularHeader(..), readHeaderFile)
where where
import Data.Text import Data.Text

View File

@ -19,6 +19,7 @@ module GEval.FeatureExtractor
FeatureNamespace(..), FeatureNamespace(..),
References(..), References(..),
ReferencesData(..), ReferencesData(..),
FeatureIndex(..),
toTextualContent, toTextualContent,
filterExistentialFactors) filterExistentialFactors)
where where
@ -39,6 +40,8 @@ import Data.Attoparsec.Text
import Data.Attoparsec.Combinator import Data.Attoparsec.Combinator
import Control.Applicative import Control.Applicative
import Data.Conduit.Header
import qualified Data.HashMap.Strict as H import qualified Data.HashMap.Strict as H
import GEval.Annotation import GEval.Annotation
@ -131,12 +134,19 @@ instance Show AtomicFactor where
show (TextFactor t) = unpack t show (TextFactor t) = unpack t
show (ShapeFactor (WordShape t)) = 'S':'H':'A':'P':'E':':':(unpack t) show (ShapeFactor (WordShape t)) = 'S':'H':'A':'P':'E':':':(unpack t)
data FeatureNamespace = FeatureNamespace Text | FeatureTabbedNamespace Text Int data FeatureIndex = ColumnByNumber Int | ColumnByName Text
deriving (Eq, Ord)
instance Show FeatureIndex where
show (ColumnByNumber ix) = show ix
show (ColumnByName name) = unpack name
data FeatureNamespace = FeatureNamespace Text | FeatureTabbedNamespace Text FeatureIndex
deriving (Eq, Ord) deriving (Eq, Ord)
instance Show FeatureNamespace where instance Show FeatureNamespace where
show (FeatureNamespace namespace) = unpack namespace show (FeatureNamespace namespace) = unpack namespace
show (FeatureTabbedNamespace namespace column) = ((unpack namespace) ++ "<" ++ (show column) ++ ">") show (FeatureTabbedNamespace namespace columnIndex) = ((unpack namespace) ++ "<" ++ (show columnIndex) ++ ">")
data References = References (H.HashMap Integer Text) data References = References (H.HashMap Integer Text)
@ -205,11 +215,15 @@ extractFactorsFromField mTokenizer bbdo mReferenceData namespace record =
extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> Text -> Text -> [PeggedFactor] extractFactors :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> Text -> Text -> [PeggedFactor]
extractFactors mTokenizer bbdo mReferencesData namespace record = extractFactorsFromField mTokenizer bbdo mReferencesData (FeatureNamespace namespace) record extractFactors mTokenizer bbdo mReferencesData namespace record = extractFactorsFromField mTokenizer bbdo mReferencesData (FeatureNamespace namespace) record
extractFactorsFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> Text -> Text -> [PeggedFactor] extractFactorsFromTabbed :: (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Maybe ReferencesData -> Text -> Text -> Maybe TabularHeader -> [PeggedFactor]
extractFactorsFromTabbed mTokenizer bbdo mReferencesData namespace record = extractFactorsFromTabbed mTokenizer bbdo mReferencesData namespace record mHeader =
Data.List.concat Data.List.concat
$ Prelude.map (\(n, t) -> extractFactorsFromField mTokenizer bbdo mReferencesData (FeatureTabbedNamespace namespace n) t) $ Prelude.map (\(n, t) -> extractFactorsFromField mTokenizer bbdo mReferencesData (FeatureTabbedNamespace namespace n) t)
$ Prelude.zip [1..] (splitOn "\t" record) $ Prelude.zip (generateColumnNames mHeader) (splitOn "\t" record)
generateColumnNames :: Maybe TabularHeader -> [FeatureIndex]
generateColumnNames Nothing = Data.List.map ColumnByNumber [1..]
generateColumnNames (Just (TabularHeader fields)) = Data.List.map ColumnByName fields
addCartesianFactors :: BlackBoxDebuggingOptions -> [LineWithPeggedFactors] -> [LineWithFactors] addCartesianFactors :: BlackBoxDebuggingOptions -> [LineWithPeggedFactors] -> [LineWithFactors]
addCartesianFactors bbdo linesWithPeggedFactors = addCartesianFactors' (bbdoCartesian bbdo) linesWithPeggedFactors addCartesianFactors bbdo linesWithPeggedFactors = addCartesianFactors' (bbdoCartesian bbdo) linesWithPeggedFactors

View File

@ -98,9 +98,11 @@ parseReferenceEntry line = (read $ unpack refId, t)
where [refId, t] = splitOn "\t" line where [refId, t] = splitOn "\t" line
runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO () runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum runLineByLine ordering featureFilter spec bbdo = do
where consum :: Maybe References -> ConduitT LineRecord Void (ResourceT IO) () mInHeader <- readHeaderFileWrapper $ getInHeader spec
consum = (\mReferences -> (runFeatureFilter featureFilter spec bbdo mReferences .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)) runLineByLineGeneralized ordering spec (consum mInHeader)
where consum :: Maybe TabularHeader -> Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
consum = (\mInHeader -> \mReferences -> (runFeatureFilter featureFilter spec bbdo mReferences mInHeader .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout))
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [ formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
formatScore score, formatScore score,
escapeTabs inp, escapeTabs inp,
@ -155,15 +157,15 @@ markBadFeatures worstFeaturesMap mTokenizer bbdo field line =
$ Prelude.map (featureToLineSpan worstFeaturesMap) $ Prelude.map (featureToLineSpan worstFeaturesMap)
$ extractFactors mTokenizer bbdo Nothing field line $ extractFactors mTokenizer bbdo Nothing field line
markBadFeaturesInTabbed :: (M.Map PeggedFactor Double) -> (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> [LineSpan] markBadFeaturesInTabbed :: (M.Map PeggedFactor Double) -> (Maybe Tokenizer) -> BlackBoxDebuggingOptions -> Text -> Text -> Maybe TabularHeader -> [LineSpan]
markBadFeaturesInTabbed worstFeaturesMap mTokenizer bbdo field line = markBadFeaturesInTabbed worstFeaturesMap mTokenizer bbdo field line mInHeader =
catMaybes catMaybes
$ Prelude.map (featureToLineSpan worstFeaturesMap) $ Prelude.map (featureToLineSpan worstFeaturesMap)
$ extractFactorsFromTabbed mTokenizer bbdo Nothing field line $ extractFactorsFromTabbed mTokenizer bbdo Nothing field line mInHeader
doMarking worstFeaturesMap mTokenizer bbdo (LineRecord inpLine expLine outLine _ score) = doMarking worstFeaturesMap mTokenizer mInHeader bbdo (LineRecord inpLine expLine outLine _ score) =
SpanLineRecord (markBadFeaturesInTabbed worstFeaturesMap mTokenizer bbdo "in" inpLine) SpanLineRecord (markBadFeaturesInTabbed worstFeaturesMap mTokenizer bbdo "in" inpLine mInHeader)
(markBadFeatures worstFeaturesMap mTokenizer bbdo "exp" expLine) (markBadFeatures worstFeaturesMap mTokenizer bbdo "exp" expLine)
(markBadFeatures worstFeaturesMap mTokenizer bbdo "out" outLine) (markBadFeatures worstFeaturesMap mTokenizer bbdo "out" outLine)
score score
@ -184,16 +186,17 @@ runLineByLineWithWorstFeaturesGeneralized :: ResultOrdering
-> IO r -> IO r
runLineByLineWithWorstFeaturesGeneralized ordering featureFilter spec bbdo consum = do runLineByLineWithWorstFeaturesGeneralized ordering featureFilter spec bbdo consum = do
hPutStrLn stderr "Looking for worst features..." hPutStrLn stderr "Looking for worst features..."
worstFeatures <- runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences (CL.take 100)) mInHeader <- readHeaderFileWrapper $ getInHeader spec
worstFeatures <- runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences mInHeader (CL.take 100))
let worstFeaturesMap = M.fromList let worstFeaturesMap = M.fromList
$ catMaybes $ catMaybes
$ Prelude.map featureToFactor $ Prelude.map featureToFactor
$ Prelude.map (\(FeatureWithPValue feature pValue _ _) -> (feature, pValue)) worstFeatures $ Prelude.map (\(FeatureWithPValue feature pValue _ _) -> (feature, pValue)) worstFeatures
mInHeader <- readHeaderFileWrapper $ getInHeader spec
runLineByLineGeneralized ordering spec (consum' worstFeaturesMap) runLineByLineGeneralized ordering spec (consum' worstFeaturesMap mInHeader)
where consum' worstFeaturesMap = (\mReferences -> (runFeatureFilter featureFilter spec bbdo mReferences where consum' worstFeaturesMap mInHeader = (\mReferences -> (runFeatureFilter featureFilter spec bbdo mReferences mInHeader
.| CL.map (doMarking worstFeaturesMap mTokenizer bbdo) .| CL.map (doMarking worstFeaturesMap mTokenizer mInHeader bbdo)
.| consum)) .| consum))
ordering' = forceSomeOrdering ordering ordering' = forceSomeOrdering ordering
mTokenizer = gesTokenizer spec mTokenizer = gesTokenizer spec
@ -206,18 +209,21 @@ runFeatureFilter :: (Monad m, FeatureSource s) => Maybe String
-> GEvalSpecification -> GEvalSpecification
-> BlackBoxDebuggingOptions -> BlackBoxDebuggingOptions
-> Maybe References -> Maybe References
-> Maybe TabularHeader
-> ConduitT s s m () -> ConduitT s s m ()
runFeatureFilter Nothing _ _ _ = doNothing runFeatureFilter Nothing _ _ _ _ = doNothing
runFeatureFilter (Just feature) spec bbdo mReferences = CC.map (\l -> (fakeRank, l)) runFeatureFilter (Just feature) spec bbdo mReferences mInHeader = CC.map (\l -> (fakeRank, l))
.| featureExtractor mTokenizer bbdo mReferences .| featureExtractor mTokenizer bbdo mReferences mInHeader
.| CC.filter (checkFeature feature) .| CC.filter (checkFeature feature)
.| CC.map fst .| CC.map fst
where mTokenizer = gesTokenizer spec where mTokenizer = gesTokenizer spec
fakeRank = 0.0 fakeRank = 0.0
checkFeature feature (_, LineWithFactors _ _ fs) = feature `elem` (Prelude.map show fs) checkFeature feature (_, LineWithFactors _ _ fs) = feature `elem` (Prelude.map show fs)
runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO () runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences consumFeatures) runWorstFeatures ordering spec bbdo = do
mInHeader <- readHeaderFileWrapper $ getInHeader spec
runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences mInHeader consumFeatures)
where ordering' = forceSomeOrdering ordering where ordering' = forceSomeOrdering ordering
consumFeatures = CL.map (encodeUtf8 . formatFeatureWithPValue) consumFeatures = CL.map (encodeUtf8 . formatFeatureWithPValue)
@ -228,10 +234,11 @@ worstFeaturesPipeline :: Bool
-> GEvalSpecification -> GEvalSpecification
-> BlackBoxDebuggingOptions -> BlackBoxDebuggingOptions
-> Maybe References -> Maybe References
-> Maybe TabularHeader
-> ConduitT FeatureWithPValue Void (ResourceT IO) a -> ConduitT FeatureWithPValue Void (ResourceT IO) a
-> ConduitT LineRecord Void (ResourceT IO) a -> ConduitT LineRecord Void (ResourceT IO) a
worstFeaturesPipeline reversed spec bbdo mReferences consum = rank (lessByMetric reversed $ gesMainMetric spec) worstFeaturesPipeline reversed spec bbdo mReferences mInHeader consum = rank (lessByMetric reversed $ gesMainMetric spec)
.| evalStateC 0 (extractFeaturesAndPValues spec bbdo mReferences) .| evalStateC 0 (extractFeaturesAndPValues spec bbdo mReferences mInHeader)
.| CC.filter (\(FeatureWithPValue _ p _ _) -> not $ isNaN p) -- NaN values would poison sorting .| CC.filter (\(FeatureWithPValue _ p _ _) -> not $ isNaN p) -- NaN values would poison sorting
.| gobbleAndDo (sortBy featureOrder) .| gobbleAndDo (sortBy featureOrder)
.| filtreCartesian (bbdoCartesian bbdo) .| filtreCartesian (bbdoCartesian bbdo)
@ -245,10 +252,10 @@ forceSomeOrdering :: ResultOrdering -> ResultOrdering
forceSomeOrdering FirstTheBest = FirstTheBest forceSomeOrdering FirstTheBest = FirstTheBest
forceSomeOrdering _ = FirstTheWorst forceSomeOrdering _ = FirstTheWorst
extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) () extractFeaturesAndPValues :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> Maybe TabularHeader -> ConduitT (Double, LineRecord) FeatureWithPValue (StateT Integer m) ()
extractFeaturesAndPValues spec bbdo mReferences = extractFeaturesAndPValues spec bbdo mReferences mInHeader =
totalCounter totalCounter
.| rankedFeatureExtractor spec bbdo mReferences .| rankedFeatureExtractor spec bbdo mReferences mInHeader
.| uScoresCounter (bbdoMinFrequency bbdo) .| uScoresCounter (bbdoMinFrequency bbdo)
@ -268,11 +275,11 @@ formatFeatureWithPValue (FeatureWithPValue f p avg c) =
(pack $ printf "%0.8f" avg), (pack $ printf "%0.8f" avg),
(pack $ printf "%0.20f" p)] (pack $ printf "%0.20f" p)]
rankedFeatureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT (Double, LineRecord) RankedFactor m () rankedFeatureExtractor :: Monad m => GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> Maybe TabularHeader -> ConduitT (Double, LineRecord) RankedFactor m ()
rankedFeatureExtractor spec bbdo mReferences = featureExtractor mTokenizer bbdo mReferences rankedFeatureExtractor spec bbdo mReferences mInHeader = featureExtractor mTokenizer bbdo mReferences mInHeader
.| CC.map snd .| CC.map snd
.| CC.map unwrapFeatures .| CC.map unwrapFeatures
.| CC.concat .| CC.concat
where mTokenizer = gesTokenizer spec where mTokenizer = gesTokenizer spec
unwrapFeatures (LineWithFactors rank score fs) = Prelude.map (\f -> RankedFactor f rank score) fs unwrapFeatures (LineWithFactors rank score fs) = Prelude.map (\f -> RankedFactor f rank score) fs
@ -288,11 +295,11 @@ instance FeatureSource (LineRecord, LineRecord) where
getScore (LineRecord _ _ _ _ scoreA, LineRecord _ _ _ _ scoreB) = scoreB - scoreA getScore (LineRecord _ _ _ _ scoreA, LineRecord _ _ _ _ scoreB) = scoreB - scoreA
mainLineRecord (_, l) = l mainLineRecord (_, l) = l
featureExtractor :: (Monad m, FeatureSource s) => Maybe Tokenizer -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT (Double, s) (s, LineWithFactors) m () featureExtractor :: (Monad m, FeatureSource s) => Maybe Tokenizer -> BlackBoxDebuggingOptions -> Maybe References -> Maybe TabularHeader -> ConduitT (Double, s) (s, LineWithFactors) m ()
featureExtractor mTokenizer bbdo mReferences = CC.map extract featureExtractor mTokenizer bbdo mReferences mInHeader = CC.map extract
.| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo)) .| finalFeatures (bbdoCartesian bbdo) (fromMaybe (bbdoMinFrequency bbdo) (bbdoMinCartesianFrequency bbdo))
where extract (rank, line) = where extract (rank, line) =
(line, LineWithPeggedFactors rank (getScore line) $ getFeatures mTokenizer bbdo mReferences (mainLineRecord line)) (line, LineWithPeggedFactors rank (getScore line) $ getFeatures mTokenizer bbdo mReferences (mainLineRecord line) mInHeader)
finalFeatures :: Monad m => Bool -> Integer -> ConduitT (a, LineWithPeggedFactors) (a, LineWithFactors) m () finalFeatures :: Monad m => Bool -> Integer -> ConduitT (a, LineWithPeggedFactors) (a, LineWithFactors) m ()
finalFeatures False _ = CC.map (\(l, p) -> (l, peggedToUnaryLine p)) finalFeatures False _ = CC.map (\(l, p) -> (l, peggedToUnaryLine p))
@ -324,11 +331,11 @@ filtreCartesian True = CC.concatMapAccum step S.empty
peggedToUnaryLine :: LineWithPeggedFactors -> LineWithFactors peggedToUnaryLine :: LineWithPeggedFactors -> LineWithFactors
peggedToUnaryLine (LineWithPeggedFactors rank score fs) = LineWithFactors rank score (Prelude.map UnaryFactor fs) peggedToUnaryLine (LineWithPeggedFactors rank score fs) = LineWithFactors rank score (Prelude.map UnaryFactor fs)
getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> Maybe References -> LineRecord -> [PeggedFactor] getFeatures :: Maybe Tokenizer -> BlackBoxDebuggingOptions -> Maybe References -> LineRecord -> Maybe TabularHeader -> [PeggedFactor]
getFeatures mTokenizer bbdo mReferences (LineRecord inLine expLine outLine _ _) = getFeatures mTokenizer bbdo mReferences (LineRecord inLine expLine outLine _ _) mInHeader =
Data.List.concat [ Data.List.concat [
extractFactors mTokenizer bbdo mReferencesData "exp" expLine, extractFactors mTokenizer bbdo mReferencesData "exp" expLine,
extractFactorsFromTabbed mTokenizer bbdo mReferencesData "in" inLine, extractFactorsFromTabbed mTokenizer bbdo mReferencesData "in" inLine mInHeader,
extractFactors mTokenizer bbdo mReferencesData "out" outLine] extractFactors mTokenizer bbdo mReferencesData "out" outLine]
where mReferencesData = case mReferences of where mReferencesData = case mReferences of
Just references -> Just $ ReferencesData { Just references -> Just $ ReferencesData {
@ -487,13 +494,15 @@ gobbleAndDo fun = do
CC.yieldMany $ fun l CC.yieldMany $ fun l
runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO () runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runDiff ordering featureFilter otherOut spec bbdo = runDiffGeneralized ordering otherOut spec consum runDiff ordering featureFilter otherOut spec bbdo = do
where consum :: Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) () mInHeader <- readHeaderFileWrapper $ getInHeader spec
consum = \mReferences -> CL.filter shouldBeShown runDiffGeneralized ordering otherOut spec (consum mInHeader)
.| runFeatureFilter featureFilter spec bbdo mReferences where consum :: Maybe TabularHeader -> Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
.| CL.map (encodeUtf8 . formatOutput) consum = \mInHeader -> \mReferences -> CL.filter shouldBeShown
.| CC.unlinesAscii .| runFeatureFilter featureFilter spec bbdo mReferences mInHeader
.| CC.stdout .| CL.map (encodeUtf8 . formatOutput)
.| CC.unlinesAscii
.| CC.stdout
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) = shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
outA /= outB && scoreA /= scoreB outA /= outB && scoreA /= scoreB
formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [ formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [
@ -533,15 +542,17 @@ runMultiOutputGeneralized spec consum = do
mSelector = gesSelector spec mSelector = gesSelector spec
runMostWorseningFeatures :: ResultOrdering -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO () runMostWorseningFeatures :: ResultOrdering -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
runMostWorseningFeatures ordering otherOut spec bbdo = runDiffGeneralized ordering' otherOut spec consum runMostWorseningFeatures ordering otherOut spec bbdo = do
mInHeader <- readHeaderFileWrapper $ getInHeader spec
runDiffGeneralized ordering' otherOut spec (consum mInHeader)
where ordering' = forceSomeOrdering ordering where ordering' = forceSomeOrdering ordering
reversed = case ordering of reversed = case ordering of
KeepTheOriginalOrder -> False KeepTheOriginalOrder -> False
FirstTheWorst -> False FirstTheWorst -> False
FirstTheBest -> True FirstTheBest -> True
consum :: Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) () consum :: Maybe TabularHeader -> Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = \mReferences -> CC.map prepareFakeLineRecord consum = \mInHeader -> \mReferences -> CC.map prepareFakeLineRecord
.| (worstFeaturesPipeline reversed spec bbdo mReferences consumFeatures) .| (worstFeaturesPipeline reversed spec bbdo mReferences mInHeader consumFeatures)
prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) = prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
LineRecord inp exp out c (score - scorePrev) LineRecord inp exp out c (score - scorePrev)

View File

@ -654,26 +654,26 @@ main = hspec $ do
bbdoCartesian = False, bbdoCartesian = False,
bbdoMinCartesianFrequency = Nothing, bbdoMinCartesianFrequency = Nothing,
bbdoConsiderNumericalFeatures = True } bbdoConsiderNumericalFeatures = True }
(sort $ extractFactorsFromTabbed Nothing bbdo Nothing "in" "I like this\t34.3\ttests") `shouldBe` [ (sort $ extractFactorsFromTabbed Nothing bbdo Nothing "in" "I like this\t34.3\ttests" Nothing) `shouldBe` [
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 1))
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "I"))), (SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "I"))),
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 1))
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "like"))), (SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "like"))),
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 1))
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "this"))), (SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "this"))),
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 1))
(SimpleExistentialFactor (BigramFactor (TextFactor "I") (TextFactor "like"))), (SimpleExistentialFactor (BigramFactor (TextFactor "I") (TextFactor "like"))),
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 1))
(SimpleExistentialFactor (BigramFactor (TextFactor "like") (TextFactor "this"))), (SimpleExistentialFactor (BigramFactor (TextFactor "like") (TextFactor "this"))),
PeggedFactor (FeatureTabbedNamespace "in" 1) PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 1))
(NumericalFactor Nothing 11), (NumericalFactor Nothing 11),
PeggedFactor (FeatureTabbedNamespace "in" 2) PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 2))
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "34.3"))), (SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "34.3"))),
PeggedFactor (FeatureTabbedNamespace "in" 2) PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 2))
(NumericalFactor (Just 34.3) 4), (NumericalFactor (Just 34.3) 4),
PeggedFactor (FeatureTabbedNamespace "in" 3) PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 3))
(SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "tests"))), (SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "tests"))),
PeggedFactor (FeatureTabbedNamespace "in" 3) PeggedFactor (FeatureTabbedNamespace "in" (ColumnByNumber 3))
(NumericalFactor Nothing 5) ] (NumericalFactor Nothing 5) ]
describe "Kendall's tau" $ do describe "Kendall's tau" $ do
it "tau" $ do it "tau" $ do