Add p<X> operations for choosing the most confident items
This commit is contained in:
parent
ac6320bf70
commit
40bf850423
@ -59,6 +59,8 @@ library
|
|||||||
, GEval.Model
|
, GEval.Model
|
||||||
, GEval.ModelTraining
|
, GEval.ModelTraining
|
||||||
, GEval.MatchingSpecification
|
, GEval.MatchingSpecification
|
||||||
|
, GEval.Confidence
|
||||||
|
, Data.Conduit.Utils
|
||||||
, Paths_geval
|
, Paths_geval
|
||||||
build-depends: base >= 4.7 && < 5
|
build-depends: base >= 4.7 && < 5
|
||||||
, cond
|
, cond
|
||||||
|
@ -7,6 +7,7 @@ module Data.Conduit.AutoDecompress
|
|||||||
|
|
||||||
import Data.Conduit
|
import Data.Conduit
|
||||||
import Data.Conduit.Combinators
|
import Data.Conduit.Combinators
|
||||||
|
import Data.Conduit.Utils
|
||||||
import Data.ByteString
|
import Data.ByteString
|
||||||
import Data.Conduit.Zlib
|
import Data.Conduit.Zlib
|
||||||
import Data.Word8
|
import Data.Word8
|
||||||
@ -40,6 +41,3 @@ lookAtMagicNumbers (31, 139) = ungzip
|
|||||||
lookAtMagicNumbers (66, 90) = BZ.bunzip2
|
lookAtMagicNumbers (66, 90) = BZ.bunzip2
|
||||||
lookAtMagicNumbers (253, 55) = XZ.decompress Nothing
|
lookAtMagicNumbers (253, 55) = XZ.decompress Nothing
|
||||||
lookAtMagicNumbers _ = doNothing
|
lookAtMagicNumbers _ = doNothing
|
||||||
|
|
||||||
doNothing :: Monad m => ConduitT a a m ()
|
|
||||||
doNothing = Data.Conduit.Combinators.filter (const True)
|
|
||||||
|
17
src/Data/Conduit/Utils.hs
Normal file
17
src/Data/Conduit/Utils.hs
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
module Data.Conduit.Utils
|
||||||
|
(gobbleAndDo,
|
||||||
|
doNothing)
|
||||||
|
where
|
||||||
|
|
||||||
|
import Data.Conduit
|
||||||
|
import Data.Conduit.Combinators
|
||||||
|
|
||||||
|
|
||||||
|
doNothing :: Monad m => ConduitT a a m ()
|
||||||
|
doNothing = Data.Conduit.Combinators.filter (const True)
|
||||||
|
|
||||||
|
gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
|
||||||
|
gobbleAndDo fun = do
|
||||||
|
l <- sinkList
|
||||||
|
yieldMany $ fun l
|
13
src/GEval/Confidence.hs
Normal file
13
src/GEval/Confidence.hs
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
module GEval.Confidence
|
||||||
|
(totalLineConfidence)
|
||||||
|
where
|
||||||
|
|
||||||
|
import Data.Text
|
||||||
|
import GEval.ProbList
|
||||||
|
import GEval.Probability
|
||||||
|
|
||||||
|
totalLineConfidence :: Text -> Double
|
||||||
|
totalLineConfidence t = case parseIntoProbList t of
|
||||||
|
ProbList [] -> 1.0
|
||||||
|
ProbList wordWithProbs -> (product $ Prelude.map (\(WordWithProb _ p) -> getP p) wordWithProbs) ** (1/l)
|
||||||
|
where l = fromIntegral $ Prelude.length wordWithProbs
|
@ -66,6 +66,7 @@ import GEval.Model (ModelType)
|
|||||||
|
|
||||||
import Data.Conduit
|
import Data.Conduit
|
||||||
import Data.Conduit.Combinators as CC
|
import Data.Conduit.Combinators as CC
|
||||||
|
import Data.Conduit.Utils
|
||||||
import qualified Data.Conduit.Text as CT
|
import qualified Data.Conduit.Text as CT
|
||||||
import Control.Monad.Trans.Resource
|
import Control.Monad.Trans.Resource
|
||||||
import qualified Data.Conduit.List as CL
|
import qualified Data.Conduit.List as CL
|
||||||
@ -133,6 +134,7 @@ isInputNeeded (EvaluationScheme _ ops) = hasFiltering ops
|
|||||||
hasFiltering :: [PreprocessingOperation] -> Bool
|
hasFiltering :: [PreprocessingOperation] -> Bool
|
||||||
hasFiltering [] = False
|
hasFiltering [] = False
|
||||||
hasFiltering ((FeatureFilter _):_) = True
|
hasFiltering ((FeatureFilter _):_) = True
|
||||||
|
hasFiltering ((TopConfidence _):_) = True
|
||||||
hasFiltering (_:ops) = hasFiltering ops
|
hasFiltering (_:ops) = hasFiltering ops
|
||||||
|
|
||||||
-- | Could output be preprocessable
|
-- | Could output be preprocessable
|
||||||
@ -306,7 +308,7 @@ extensionsHandled = ["tsv", "jsonl"]
|
|||||||
data LineSource m = LineSource (ConduitT () Text m ()) (Text -> ItemTarget) (Text -> Text) SourceSpec Word32
|
data LineSource m = LineSource (ConduitT () Text m ()) (Text -> ItemTarget) (Text -> Text) SourceSpec Word32
|
||||||
|
|
||||||
data LineSourcesSpecification m = LineSourcesSpecification {
|
data LineSourcesSpecification m = LineSourcesSpecification {
|
||||||
lineSourcesFilter :: Filter,
|
lineSourcesFilter :: GeneralizedFilter,
|
||||||
lineSourcesInputSource :: LineSource m,
|
lineSourcesInputSource :: LineSource m,
|
||||||
lineSourcesExpectedSource :: LineSource m,
|
lineSourcesExpectedSource :: LineSource m,
|
||||||
lineSourcesOutputSource :: LineSource m }
|
lineSourcesOutputSource :: LineSource m }
|
||||||
@ -359,7 +361,7 @@ addSchemeSpecifics :: EvaluationScheme -> DataSource -> DataSource
|
|||||||
addSchemeSpecifics scheme@(EvaluationScheme metric _) dataSource =
|
addSchemeSpecifics scheme@(EvaluationScheme metric _) dataSource =
|
||||||
dataSource {
|
dataSource {
|
||||||
dataSourceChallengeData = (dataSourceChallengeData dataSource) {
|
dataSourceChallengeData = (dataSourceChallengeData dataSource) {
|
||||||
challengeDataSourceFilter = getFilterForScheme (challengeDataSourceInHeader $ dataSourceChallengeData dataSource) scheme,
|
challengeDataSourceFilter = getGeneralizedFilterForScheme (challengeDataSourceInHeader $ dataSourceChallengeData dataSource) scheme,
|
||||||
challengeDataSourceOutPreprocess = outPreprocess,
|
challengeDataSourceOutPreprocess = outPreprocess,
|
||||||
challengeDataSourceInPreprocess = inPreprocess
|
challengeDataSourceInPreprocess = inPreprocess
|
||||||
}}
|
}}
|
||||||
@ -1039,12 +1041,14 @@ defineContinuation aggregator finalStep generateGraph = do
|
|||||||
return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v)
|
return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v)
|
||||||
|
|
||||||
fromSpecificationToWithoutInput :: LineSourcesSpecification (ResourceT m) -> WithoutInput m e o
|
fromSpecificationToWithoutInput :: LineSourcesSpecification (ResourceT m) -> WithoutInput m e o
|
||||||
fromSpecificationToWithoutInput lsSpec = case lineSourcesFilter lsSpec of
|
fromSpecificationToWithoutInput lsSpec =
|
||||||
NoFilter -> WithoutInput expectedSource outSource
|
if hasNoFilter theFilter
|
||||||
theFilter -> WithoutInputButFiltered theFilter inputSource expectedSource outSource
|
then WithoutInput expectedSource outSource
|
||||||
|
else WithoutInputButFiltered theFilter inputSource expectedSource outSource
|
||||||
where expectedSource = lineSourcesExpectedSource lsSpec
|
where expectedSource = lineSourcesExpectedSource lsSpec
|
||||||
outSource = lineSourcesOutputSource lsSpec
|
outSource = lineSourcesOutputSource lsSpec
|
||||||
inputSource = lineSourcesInputSource lsSpec
|
inputSource = lineSourcesInputSource lsSpec
|
||||||
|
theFilter = (lineSourcesFilter lsSpec)
|
||||||
|
|
||||||
fromSpecificationToWithInput :: LineSourcesSpecification (ResourceT m) -> WithInput m i e o
|
fromSpecificationToWithInput :: LineSourcesSpecification (ResourceT m) -> WithInput m i e o
|
||||||
fromSpecificationToWithInput lsSpec = WithInput theFilter inpSource expectedSource outSource
|
fromSpecificationToWithInput lsSpec = WithInput theFilter inpSource expectedSource outSource
|
||||||
@ -1070,7 +1074,7 @@ class EvaluationContext ctxt m where
|
|||||||
checkStepM :: ((Word32, ParsedRecord ctxt) -> (ResourceT m) c) -> (Word32, WrappedParsedRecord ctxt) -> (ResourceT m) (Maybe c)
|
checkStepM :: ((Word32, ParsedRecord ctxt) -> (ResourceT m) c) -> (Word32, WrappedParsedRecord ctxt) -> (ResourceT m) (Maybe c)
|
||||||
|
|
||||||
data WithoutInput m e o = WithoutInput (LineSource (ResourceT m)) (LineSource (ResourceT m))
|
data WithoutInput m e o = WithoutInput (LineSource (ResourceT m)) (LineSource (ResourceT m))
|
||||||
| WithoutInputButFiltered Filter (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m))
|
| WithoutInputButFiltered GeneralizedFilter (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m))
|
||||||
|
|
||||||
instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where
|
instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where
|
||||||
data ParserSpec (WithoutInput m e o) = ParserSpecWithoutInput (ItemTarget -> Either String e) (ItemTarget -> Either String o)
|
data ParserSpec (WithoutInput m e o) = ParserSpecWithoutInput (ItemTarget -> Either String e) (ItemTarget -> Either String o)
|
||||||
@ -1091,7 +1095,8 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (Withou
|
|||||||
<*> ZipSource (getZipSource $ (,)
|
<*> ZipSource (getZipSource $ (,)
|
||||||
<$> ZipSource (sourceItems expectedLineSource)
|
<$> ZipSource (sourceItems expectedLineSource)
|
||||||
<*> ZipSource (sourceItems outLineSource)))
|
<*> ZipSource (sourceItems outLineSource)))
|
||||||
.| CC.filter (applyFilter theFilter)
|
.| CC.filter (applyFilter $ generalizedFilterFilter theFilter)
|
||||||
|
.| topperConduit (generalizedFilterTopper theFilter)
|
||||||
.| CC.map (\(TargetRecord _ y z) -> WrappedParsedRecordWithoutInput (applyParser expParser y) (applyParser outParser z))
|
.| CC.map (\(TargetRecord _ y z) -> WrappedParsedRecordWithoutInput (applyParser expParser y) (applyParser outParser z))
|
||||||
|
|
||||||
checkStep _ step (lineNo, WrappedParsedRecordWithoutInput (Got expectedItem) (Got outItem)) = Just $ step (lineNo, ParsedRecordWithoutInput expectedItem outItem)
|
checkStep _ step (lineNo, WrappedParsedRecordWithoutInput (Got expectedItem) (Got outItem)) = Just $ step (lineNo, ParsedRecordWithoutInput expectedItem outItem)
|
||||||
@ -1110,7 +1115,7 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (Withou
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
data WithInput m i e o = WithInput Filter (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m))
|
data WithInput m i e o = WithInput GeneralizedFilter (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m))
|
||||||
|
|
||||||
getInputFilePath :: WithInput m i e o -> SourceSpec
|
getInputFilePath :: WithInput m i e o -> SourceSpec
|
||||||
getInputFilePath (WithInput _ (LineSource _ _ _ inputFilePath _) _ _) = inputFilePath
|
getInputFilePath (WithInput _ (LineSource _ _ _ inputFilePath _) _ _) = inputFilePath
|
||||||
@ -1128,7 +1133,8 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithIn
|
|||||||
<*> ZipSource (getZipSource $ (,)
|
<*> ZipSource (getZipSource $ (,)
|
||||||
<$> ZipSource (sourceItems expectedLineSource)
|
<$> ZipSource (sourceItems expectedLineSource)
|
||||||
<*> ZipSource (sourceItems outLineSource)))
|
<*> ZipSource (sourceItems outLineSource)))
|
||||||
.| CC.filter (applyFilter theFilter)
|
.| CC.filter (applyFilter $ generalizedFilterFilter theFilter)
|
||||||
|
.| topperConduit (generalizedFilterTopper theFilter)
|
||||||
.| CC.map (\(TargetRecord x y z) -> WrappedParsedRecordWithInput (applyParser inpParser x) (applyParser expParser y) (applyParser outParser z))
|
.| CC.map (\(TargetRecord x y z) -> WrappedParsedRecordWithInput (applyParser inpParser x) (applyParser expParser y) (applyParser outParser z))
|
||||||
checkStep _ step (lineNo, WrappedParsedRecordWithInput (Got inputItem) (Got expectedItem) (Got outItem)) = Just $ step (lineNo, ParsedRecordWithInput inputItem expectedItem outItem)
|
checkStep _ step (lineNo, WrappedParsedRecordWithInput (Got inputItem) (Got expectedItem) (Got outItem)) = Just $ step (lineNo, ParsedRecordWithInput inputItem expectedItem outItem)
|
||||||
checkStep _ _ (lineNo, WrappedParsedRecordWithInput _ _ (Wrong m)) = throw $ UnexpectedData lineNo m
|
checkStep _ _ (lineNo, WrappedParsedRecordWithInput _ _ (Wrong m)) = throw $ UnexpectedData lineNo m
|
||||||
@ -1157,7 +1163,7 @@ threeLineSource (WithInput theFilter inputLineSource expectedLineSource outLineS
|
|||||||
<*> (ZipSource $ getZipSource $ (,)
|
<*> (ZipSource $ getZipSource $ (,)
|
||||||
<$> ZipSource (linesAsItems expectedLineSource)
|
<$> ZipSource (linesAsItems expectedLineSource)
|
||||||
<*> ZipSource (linesAsItems outLineSource)))
|
<*> ZipSource (linesAsItems outLineSource)))
|
||||||
.| (CC.filter (applyFilterToSourceItems theFilter))
|
.| (CC.filter (applyFilterToSourceItems $ generalizedFilterFilter theFilter))
|
||||||
.| (CC.map (\(x, (y,z)) -> WrappedParsedRecordWithInput x y z))
|
.| (CC.map (\(x, (y,z)) -> WrappedParsedRecordWithInput x y z))
|
||||||
|
|
||||||
averageC :: MonadResource m => ConduitT Double Void m Double
|
averageC :: MonadResource m => ConduitT Double Void m Double
|
||||||
@ -1201,3 +1207,15 @@ applyFilterToSourceItems filter (Got x, (Got y, Got z)) = applyFilter filter tar
|
|||||||
(Got (RawItemTarget y))
|
(Got (RawItemTarget y))
|
||||||
(Got (RawItemTarget z))
|
(Got (RawItemTarget z))
|
||||||
applyFilterToSourceItems _ special = True
|
applyFilterToSourceItems _ special = True
|
||||||
|
|
||||||
|
topperConduit NoTopper = doNothing
|
||||||
|
topperConduit (Topper percentage scorer) = gobbleAndDo (findTop percentage scorer)
|
||||||
|
|
||||||
|
findTop :: Double -> (TargetRecord -> Double) -> [TargetRecord] -> [TargetRecord]
|
||||||
|
findTop percentage scorer records =
|
||||||
|
Prelude.map fst
|
||||||
|
$ Prelude.take n
|
||||||
|
$ sortBy (\a b -> snd b `compare` snd a)
|
||||||
|
$ Prelude.map (\r -> (r, scorer r)) records
|
||||||
|
where n = ceiling ((percentage * (fromIntegral $ (Prelude.length records) - 1)) / 100.0)
|
||||||
|
-- -1 due to the special entry Done
|
||||||
|
@ -5,8 +5,11 @@ module GEval.DataSource
|
|||||||
DataSource(..),
|
DataSource(..),
|
||||||
TargetRecord(..),
|
TargetRecord(..),
|
||||||
Filter(..),
|
Filter(..),
|
||||||
|
Topper(..),
|
||||||
|
GeneralizedFilter(..),
|
||||||
noFilter,
|
noFilter,
|
||||||
getFilterForScheme,
|
hasNoFilter,
|
||||||
|
getGeneralizedFilterForScheme,
|
||||||
applyFilter)
|
applyFilter)
|
||||||
where
|
where
|
||||||
|
|
||||||
@ -15,6 +18,7 @@ import GEval.Selector (ItemTarget(..), TargetRecord(..))
|
|||||||
import GEval.FeatureExtractor (getFeatures)
|
import GEval.FeatureExtractor (getFeatures)
|
||||||
import GEval.BlackBoxDebugging
|
import GEval.BlackBoxDebugging
|
||||||
import GEval.EvaluationScheme
|
import GEval.EvaluationScheme
|
||||||
|
import GEval.Confidence (totalLineConfidence)
|
||||||
|
|
||||||
import Data.Text
|
import Data.Text
|
||||||
|
|
||||||
@ -25,19 +29,45 @@ import GEval.Selector
|
|||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
|
|
||||||
data Filter = NoFilter | FilterByFeatures (Maybe TabularHeader) (S.Set String)
|
data Filter = NoFilter | FilterByFeatures (Maybe TabularHeader) (S.Set String)
|
||||||
|
data Topper = NoTopper | Topper Double (TargetRecord -> Double)
|
||||||
|
|
||||||
noFilter :: Filter
|
data GeneralizedFilter = GeneralizedFilter {
|
||||||
noFilter = NoFilter
|
generalizedFilterFilter :: Filter,
|
||||||
|
generalizedFilterTopper :: Topper }
|
||||||
|
|
||||||
|
noFilter :: GeneralizedFilter
|
||||||
|
noFilter = GeneralizedFilter {
|
||||||
|
generalizedFilterFilter = NoFilter,
|
||||||
|
generalizedFilterTopper = NoTopper }
|
||||||
|
|
||||||
|
hasNoFilter :: GeneralizedFilter -> Bool
|
||||||
|
hasNoFilter gfilter = case generalizedFilterFilter gfilter of
|
||||||
|
NoFilter -> case generalizedFilterTopper gfilter of
|
||||||
|
NoTopper -> True
|
||||||
|
_ -> False
|
||||||
|
_ -> False
|
||||||
|
|
||||||
applyFilter :: Filter -> TargetRecord -> Bool
|
applyFilter :: Filter -> TargetRecord -> Bool
|
||||||
applyFilter NoFilter _ = True
|
applyFilter NoFilter _ = True
|
||||||
applyFilter (FilterByFeatures mInHeader featureSpec) tR = applyFeatureFilter mInHeader featureSpec tR
|
applyFilter (FilterByFeatures mInHeader featureSpec) tR = applyFeatureFilter mInHeader featureSpec tR
|
||||||
|
|
||||||
|
getGeneralizedFilterForScheme :: Maybe TabularHeader -> EvaluationScheme -> GeneralizedFilter
|
||||||
|
getGeneralizedFilterForScheme mTabHeader scheme = GeneralizedFilter {
|
||||||
|
generalizedFilterFilter = getFilterForScheme mTabHeader scheme,
|
||||||
|
generalizedFilterTopper = getTopperForScheme scheme
|
||||||
|
}
|
||||||
|
|
||||||
getFilterForScheme :: Maybe TabularHeader -> EvaluationScheme -> Filter
|
getFilterForScheme :: Maybe TabularHeader -> EvaluationScheme -> Filter
|
||||||
getFilterForScheme mTabHeader (EvaluationScheme _ ops) = case findFilter ops of
|
getFilterForScheme mTabHeader (EvaluationScheme _ ops) = case findFilter ops of
|
||||||
[] -> NoFilter
|
[] -> NoFilter
|
||||||
fs -> FilterByFeatures mTabHeader (S.fromList $ Prelude.map (unpack . fixIndex) fs)
|
fs -> FilterByFeatures mTabHeader (S.fromList $ Prelude.map (unpack . fixIndex) fs)
|
||||||
|
|
||||||
|
getTopperForScheme :: EvaluationScheme -> Topper
|
||||||
|
getTopperForScheme (EvaluationScheme _ ops) = case findTopper ops of
|
||||||
|
[] -> NoTopper
|
||||||
|
[topper] -> topper
|
||||||
|
_ -> error "only one topper expected"
|
||||||
|
|
||||||
fixIndex = replace "[" "<" . replace "]" ">"
|
fixIndex = replace "[" "<" . replace "]" ">"
|
||||||
|
|
||||||
findFilter :: [PreprocessingOperation] -> [Text]
|
findFilter :: [PreprocessingOperation] -> [Text]
|
||||||
@ -45,6 +75,18 @@ findFilter [] = []
|
|||||||
findFilter ((FeatureFilter f):ops) = (f:(findFilter ops))
|
findFilter ((FeatureFilter f):ops) = (f:(findFilter ops))
|
||||||
findFilter (_:ops) = findFilter ops
|
findFilter (_:ops) = findFilter ops
|
||||||
|
|
||||||
|
findTopper :: [PreprocessingOperation] -> [Topper]
|
||||||
|
findTopper [] = []
|
||||||
|
findTopper ((TopConfidence percentage):ops) = [Topper percentage totalRecordConfidence]
|
||||||
|
findTopper (_:ops) = findTopper ops
|
||||||
|
|
||||||
|
totalRecordConfidence :: TargetRecord -> Double
|
||||||
|
totalRecordConfidence (TargetRecord _ _ (Got out)) = case out of
|
||||||
|
RawItemTarget t -> totalLineConfidence t
|
||||||
|
totalRecordConfidence (TargetRecord _ _ (Wrong _)) = 9999.0
|
||||||
|
totalRecordConfidence (TargetRecord _ _ Done) = -9999
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
applyFeatureFilter :: Maybe TabularHeader -> S.Set String -> TargetRecord -> Bool
|
applyFeatureFilter :: Maybe TabularHeader -> S.Set String -> TargetRecord -> Bool
|
||||||
applyFeatureFilter mInHeader featureSpec tR = featureSpec `S.isSubsetOf` (S.fromList $ Prelude.map show fs)
|
applyFeatureFilter mInHeader featureSpec tR = featureSpec `S.isSubsetOf` (S.fromList $ Prelude.map show fs)
|
||||||
@ -71,7 +113,7 @@ data ChallengeDataSource = ChallengeDataSource {
|
|||||||
challengeDataSourceSelector :: Maybe Selector,
|
challengeDataSourceSelector :: Maybe Selector,
|
||||||
challengeDataSourceOutPreprocess :: Text -> Text,
|
challengeDataSourceOutPreprocess :: Text -> Text,
|
||||||
challengeDataSourceInPreprocess :: Text -> Text,
|
challengeDataSourceInPreprocess :: Text -> Text,
|
||||||
challengeDataSourceFilter :: Filter,
|
challengeDataSourceFilter :: GeneralizedFilter,
|
||||||
challengeDataSourceInHeader :: Maybe TabularHeader,
|
challengeDataSourceInHeader :: Maybe TabularHeader,
|
||||||
challengeDataSourceOutHeader :: Maybe TabularHeader,
|
challengeDataSourceOutHeader :: Maybe TabularHeader,
|
||||||
-- whether the data will be shown preprocessed (not only
|
-- whether the data will be shown preprocessed (not only
|
||||||
|
@ -36,6 +36,7 @@ data PreprocessingOperation = RegexpMatch Regex
|
|||||||
| SetPriority Int
|
| SetPriority Int
|
||||||
| RegexpSubstition Regex Text
|
| RegexpSubstition Regex Text
|
||||||
| FeatureFilter Text
|
| FeatureFilter Text
|
||||||
|
| TopConfidence Double
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
leftParameterBracket :: Char
|
leftParameterBracket :: Char
|
||||||
@ -67,6 +68,7 @@ readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest
|
|||||||
readOps ('P':theRest) = handleParametrizedOp (SetPriority . read) theRest
|
readOps ('P':theRest) = handleParametrizedOp (SetPriority . read) theRest
|
||||||
readOps ('s':theRest) = handleParametrizedBinaryOp (\a b -> RegexpSubstition (compile (BSU.fromString a) []) (pack b)) theRest
|
readOps ('s':theRest) = handleParametrizedBinaryOp (\a b -> RegexpSubstition (compile (BSU.fromString a) []) (pack b)) theRest
|
||||||
readOps ('f':theRest) = handleParametrizedOp (FeatureFilter . pack) theRest
|
readOps ('f':theRest) = handleParametrizedOp (FeatureFilter . pack) theRest
|
||||||
|
readOps ('p':theRest) = handleParametrizedOp (TopConfidence . read) theRest
|
||||||
-- this is not the right way to do this, but try catch at least unknown flags
|
-- this is not the right way to do this, but try catch at least unknown flags
|
||||||
readOps t@(c:_) = if isLetter c
|
readOps t@(c:_) = if isLetter c
|
||||||
then throw $ UnknownFlags t
|
then throw $ UnknownFlags t
|
||||||
@ -101,7 +103,7 @@ parseParameter [] = (Nothing, [])
|
|||||||
parseParameter t@(fChar:theRest) =
|
parseParameter t@(fChar:theRest) =
|
||||||
if fChar == leftParameterBracket
|
if fChar == leftParameterBracket
|
||||||
then case break (== rightParameterBracket) theRest of
|
then case break (== rightParameterBracket) theRest of
|
||||||
(s, []) -> throw $ UnknownFlags t
|
(_, []) -> throw $ UnknownFlags t
|
||||||
(param, (_:theRest')) -> (Just param, theRest')
|
(param, (_:theRest')) -> (Just param, theRest')
|
||||||
else throw $ UnknownFlags t
|
else throw $ UnknownFlags t
|
||||||
|
|
||||||
@ -145,6 +147,7 @@ instance Show PreprocessingOperation where
|
|||||||
show (SetPriority p) = parametrizedOperation "P" (show p)
|
show (SetPriority p) = parametrizedOperation "P" (show p)
|
||||||
show (RegexpSubstition (Regex _ regexp) s) = "s" ++ (formatParameter $ BSU.toString regexp) ++ (formatParameter $ unpack s)
|
show (RegexpSubstition (Regex _ regexp) s) = "s" ++ (formatParameter $ BSU.toString regexp) ++ (formatParameter $ unpack s)
|
||||||
show (FeatureFilter featureSpec) = parametrizedOperation "f" (unpack featureSpec)
|
show (FeatureFilter featureSpec) = parametrizedOperation "f" (unpack featureSpec)
|
||||||
|
show (TopConfidence percentage) = parametrizedOperation "p" (show percentage)
|
||||||
|
|
||||||
applySubstitution :: Regex -> Text -> Text -> Text
|
applySubstitution :: Regex -> Text -> Text -> Text
|
||||||
applySubstitution r substitution t =
|
applySubstitution r substitution t =
|
||||||
@ -181,3 +184,4 @@ applyPreprocessingOperation (SetName _) = id
|
|||||||
applyPreprocessingOperation (SetPriority _) = id
|
applyPreprocessingOperation (SetPriority _) = id
|
||||||
applyPreprocessingOperation (RegexpSubstition regex substition) = applySubstitution regex substition
|
applyPreprocessingOperation (RegexpSubstition regex substition) = applySubstitution regex substition
|
||||||
applyPreprocessingOperation (FeatureFilter _) = id
|
applyPreprocessingOperation (FeatureFilter _) = id
|
||||||
|
applyPreprocessingOperation (TopConfidence _) = id
|
||||||
|
@ -33,7 +33,7 @@ import Text.Tokenizer
|
|||||||
|
|
||||||
import System.IO
|
import System.IO
|
||||||
|
|
||||||
import Data.Conduit.AutoDecompress (doNothing)
|
import Data.Conduit.Utils
|
||||||
|
|
||||||
import Data.Conduit
|
import Data.Conduit
|
||||||
import qualified Data.Conduit.List as CL
|
import qualified Data.Conduit.List as CL
|
||||||
@ -480,11 +480,6 @@ runLineByLineGeneralized ordering spec consum = do
|
|||||||
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
|
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
|
||||||
mReferences = Nothing
|
mReferences = Nothing
|
||||||
|
|
||||||
gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
|
|
||||||
gobbleAndDo fun = do
|
|
||||||
l <- CC.sinkList
|
|
||||||
CC.yieldMany $ fun l
|
|
||||||
|
|
||||||
runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
runDiff :: ResultOrdering -> Maybe String -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
|
||||||
runDiff ordering featureFilter otherOut spec bbdo = do
|
runDiff ordering featureFilter otherOut spec bbdo = do
|
||||||
mInHeader <- readHeaderFileWrapper $ getInHeader spec
|
mInHeader <- readHeaderFileWrapper $ getInHeader spec
|
||||||
|
@ -31,6 +31,7 @@ import GEval.FeatureExtractor
|
|||||||
import GEval.Selector
|
import GEval.Selector
|
||||||
import GEval.CreateChallenge
|
import GEval.CreateChallenge
|
||||||
import GEval.Validation
|
import GEval.Validation
|
||||||
|
import GEval.Confidence
|
||||||
import Data.Conduit.Bootstrap
|
import Data.Conduit.Bootstrap
|
||||||
|
|
||||||
import Data.Map.Strict
|
import Data.Map.Strict
|
||||||
@ -373,6 +374,8 @@ main = hspec $ do
|
|||||||
runGEvalTest "multilabel-f1-ie-fuzzy-harden" `shouldReturnAlmost` 0.555555555
|
runGEvalTest "multilabel-f1-ie-fuzzy-harden" `shouldReturnAlmost` 0.555555555
|
||||||
it "information extraction" $ do
|
it "information extraction" $ do
|
||||||
runGEvalTest "multilabel-f1-ie-probs" `shouldReturnAlmost` 0.1111111111
|
runGEvalTest "multilabel-f1-ie-probs" `shouldReturnAlmost` 0.1111111111
|
||||||
|
it "with top confident" $ do
|
||||||
|
runGEvalTest "top-confidence" `shouldReturnAlmost` 0.857142857142857
|
||||||
describe "Mean/MultiLabel-F" $ do
|
describe "Mean/MultiLabel-F" $ do
|
||||||
it "simple" $ do
|
it "simple" $ do
|
||||||
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
|
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
|
||||||
@ -801,6 +804,11 @@ main = hspec $ do
|
|||||||
loess (DVU.fromList [0.2, 0.6, 1.0])
|
loess (DVU.fromList [0.2, 0.6, 1.0])
|
||||||
(DVU.fromList [-0.6, 0.2, 1.0])
|
(DVU.fromList [-0.6, 0.2, 1.0])
|
||||||
0.4 `shouldBeAlmost` (-0.2)
|
0.4 `shouldBeAlmost` (-0.2)
|
||||||
|
describe "Confidence for a line" $ do
|
||||||
|
it "simple case" $ do
|
||||||
|
totalLineConfidence "foo:0.6" `shouldBeAlmost` 0.6
|
||||||
|
it "more than one" $ do
|
||||||
|
totalLineConfidence "foo:1.0 bar:0.6 bar:0.5" `shouldBeAlmost` 0.669432952768766
|
||||||
describe "Calibration" $ do
|
describe "Calibration" $ do
|
||||||
it "empty list" $ do
|
it "empty list" $ do
|
||||||
calibration [] [] `shouldBeAlmost` 1.0
|
calibration [] [] `shouldBeAlmost` 1.0
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
foo:0.96 qqq:0.001 bar:0.95
|
||||||
|
foo:0.7 baz:0.6
|
||||||
|
bar:0.7
|
||||||
|
tok:0.9999
|
||||||
|
baz:0.8
|
|
1
test/top-confidence/top-confidence/config.txt
Normal file
1
test/top-confidence/top-confidence/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric MultiLabel-F1:f<in[1]:yep>s<qqq:\S+><>p<50>
|
5
test/top-confidence/top-confidence/test-A/expected.tsv
Normal file
5
test/top-confidence/top-confidence/test-A/expected.tsv
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
foo bar baq
|
||||||
|
foo baz baq baq baq baq baq
|
||||||
|
bat
|
||||||
|
kwa kwa kwa
|
||||||
|
baz
|
|
5
test/top-confidence/top-confidence/test-A/in.tsv
Normal file
5
test/top-confidence/top-confidence/test-A/in.tsv
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
yep
|
||||||
|
yep
|
||||||
|
yep
|
||||||
|
no
|
||||||
|
yep
|
|
Loading…
Reference in New Issue
Block a user