Remove some warnings
This commit is contained in:
parent
612792799a
commit
ef2697a2be
@ -66,13 +66,10 @@ import GEval.Model (ModelType)
|
|||||||
|
|
||||||
import Data.Conduit
|
import Data.Conduit
|
||||||
import Data.Conduit.Combinators as CC
|
import Data.Conduit.Combinators as CC
|
||||||
import qualified Data.Conduit.Binary as CB
|
|
||||||
import qualified Data.Conduit.Text as CT
|
import qualified Data.Conduit.Text as CT
|
||||||
import Control.Monad.Trans.Resource
|
import Control.Monad.Trans.Resource
|
||||||
import qualified Data.Conduit.List as CL
|
import qualified Data.Conduit.List as CL
|
||||||
import Data.Text
|
import Data.Text
|
||||||
import Data.Text.Read as TR
|
|
||||||
import Control.Applicative
|
|
||||||
import Control.Exception
|
import Control.Exception
|
||||||
import Control.Conditional (unlessM, whenM)
|
import Control.Conditional (unlessM, whenM)
|
||||||
import qualified System.Directory as D
|
import qualified System.Directory as D
|
||||||
@ -80,35 +77,25 @@ import System.Posix
|
|||||||
import System.FilePath
|
import System.FilePath
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
import Data.Either (rights)
|
import Data.Either (rights)
|
||||||
import Data.Tuple
|
|
||||||
import qualified Data.List.Split as DLS
|
|
||||||
import Data.List (sortBy, isSuffixOf, minimum, maximum)
|
import Data.List (sortBy, isSuffixOf, minimum, maximum)
|
||||||
import Text.NaturalComp
|
import Text.NaturalComp
|
||||||
|
|
||||||
import Control.Monad.IO.Class
|
import Control.Monad.IO.Class
|
||||||
import Control.Monad ((<=<), filterM)
|
|
||||||
|
|
||||||
import Data.Attoparsec.Text (parseOnly)
|
|
||||||
|
|
||||||
import Data.Conduit.SmartSource
|
import Data.Conduit.SmartSource
|
||||||
import Data.Conduit.Header
|
import Data.Conduit.Header
|
||||||
|
|
||||||
import qualified Data.IntSet as IS
|
|
||||||
|
|
||||||
import GEval.BLEU
|
|
||||||
import GEval.Common
|
import GEval.Common
|
||||||
import GEval.Clippings
|
|
||||||
import GEval.PrecisionRecall
|
import GEval.PrecisionRecall
|
||||||
import GEval.ClusteringMetrics
|
import GEval.ClusteringMetrics
|
||||||
import GEval.LogLossHashed
|
import GEval.LogLossHashed
|
||||||
import GEval.CharMatch
|
import GEval.CharMatch
|
||||||
import GEval.BIO
|
|
||||||
import GEval.ProbList
|
|
||||||
import GEval.WER
|
import GEval.WER
|
||||||
import Data.Conduit.AutoDecompress
|
import Data.Conduit.AutoDecompress
|
||||||
import Text.Tokenizer
|
import Text.Tokenizer
|
||||||
import GEval.Selector
|
import GEval.Selector
|
||||||
import GEval.Annotation
|
|
||||||
import GEval.BlackBoxDebugging
|
import GEval.BlackBoxDebugging
|
||||||
import Data.Conduit.Bootstrap
|
import Data.Conduit.Bootstrap
|
||||||
import GEval.DataSource
|
import GEval.DataSource
|
||||||
@ -116,7 +103,6 @@ import GEval.MatchingSpecification
|
|||||||
|
|
||||||
import qualified Data.HashMap.Strict as M
|
import qualified Data.HashMap.Strict as M
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
import qualified Data.Vector.Generic as VG
|
|
||||||
|
|
||||||
import qualified Data.Vector.Unboxed as DVU
|
import qualified Data.Vector.Unboxed as DVU
|
||||||
|
|
||||||
@ -125,7 +111,6 @@ import Statistics.Correlation
|
|||||||
import Data.Statistics.Calibration (softCalibration)
|
import Data.Statistics.Calibration (softCalibration)
|
||||||
import Data.Statistics.Loess (clippedLoess)
|
import Data.Statistics.Loess (clippedLoess)
|
||||||
|
|
||||||
import Data.Proxy
|
|
||||||
|
|
||||||
import Data.Word
|
import Data.Word
|
||||||
|
|
||||||
@ -145,6 +130,7 @@ isInputNeeded :: EvaluationScheme -> Bool
|
|||||||
isInputNeeded (EvaluationScheme CharMatch _) = True
|
isInputNeeded (EvaluationScheme CharMatch _) = True
|
||||||
isInputNeeded (EvaluationScheme _ ops) = hasFiltering ops
|
isInputNeeded (EvaluationScheme _ ops) = hasFiltering ops
|
||||||
|
|
||||||
|
hasFiltering :: [PreprocessingOperation] -> Bool
|
||||||
hasFiltering [] = False
|
hasFiltering [] = False
|
||||||
hasFiltering ((FeatureFilter _):_) = True
|
hasFiltering ((FeatureFilter _):_) = True
|
||||||
hasFiltering (_:ops) = hasFiltering ops
|
hasFiltering (_:ops) = hasFiltering ops
|
||||||
@ -189,13 +175,19 @@ isPreprocessable MultiLabelLikelihood = False
|
|||||||
isPreprocessable (Mean metric) = isPreprocessable metric
|
isPreprocessable (Mean metric) = isPreprocessable metric
|
||||||
isPreprocessable Haversine = False
|
isPreprocessable Haversine = False
|
||||||
|
|
||||||
|
isInputModifiable :: Metric -> Bool
|
||||||
isInputModifiable CharMatch = True
|
isInputModifiable CharMatch = True
|
||||||
isInputModifiable _ = False
|
isInputModifiable _ = False
|
||||||
|
|
||||||
|
defaultOutDirectory :: FilePath
|
||||||
defaultOutDirectory = "."
|
defaultOutDirectory = "."
|
||||||
|
defaultTestName :: String
|
||||||
defaultTestName = "test-A"
|
defaultTestName = "test-A"
|
||||||
|
defaultOutFile :: FilePath
|
||||||
defaultOutFile = "out.tsv"
|
defaultOutFile = "out.tsv"
|
||||||
|
defaultExpectedFile :: String
|
||||||
defaultExpectedFile = "expected.tsv"
|
defaultExpectedFile = "expected.tsv"
|
||||||
|
defaultInputFile :: String
|
||||||
defaultInputFile = "in.tsv"
|
defaultInputFile = "in.tsv"
|
||||||
|
|
||||||
defaultMetric :: Metric
|
defaultMetric :: Metric
|
||||||
@ -250,6 +242,7 @@ getInHeader spec = getHeader spec gesInHeader
|
|||||||
getOutHeader :: GEvalSpecification -> Maybe FilePath
|
getOutHeader :: GEvalSpecification -> Maybe FilePath
|
||||||
getOutHeader spec = getHeader spec gesOutHeader
|
getOutHeader spec = getHeader spec gesOutHeader
|
||||||
|
|
||||||
|
getHeader :: GEvalSpecification -> (GEvalSpecification -> Maybe FilePath) -> Maybe FilePath
|
||||||
getHeader spec selector = case selector spec of
|
getHeader spec selector = case selector spec of
|
||||||
Just headerFile -> Just $ getExpectedDirectory spec </> headerFile
|
Just headerFile -> Just $ getExpectedDirectory spec </> headerFile
|
||||||
Nothing -> Nothing
|
Nothing -> Nothing
|
||||||
@ -277,6 +270,7 @@ data GEvalOptions = GEvalOptions
|
|||||||
geoMarkWorstFeatures :: Bool }
|
geoMarkWorstFeatures :: Bool }
|
||||||
|
|
||||||
|
|
||||||
|
defaultGEvalSpecification :: GEvalSpecification
|
||||||
defaultGEvalSpecification = GEvalSpecification {
|
defaultGEvalSpecification = GEvalSpecification {
|
||||||
gesOutDirectory = defaultOutDirectory,
|
gesOutDirectory = defaultOutDirectory,
|
||||||
gesExpectedDirectory = Nothing,
|
gesExpectedDirectory = Nothing,
|
||||||
@ -540,6 +534,7 @@ getDataFormatFromFilePath path =
|
|||||||
then dropExtension path
|
then dropExtension path
|
||||||
else path
|
else path
|
||||||
|
|
||||||
|
dataDecoder :: Monad m => DataFormat -> Maybe Selector -> ConduitT Text ItemTarget m ()
|
||||||
dataDecoder fmt mSelector = CC.map (select fmt mSelector)
|
dataDecoder fmt mSelector = CC.map (select fmt mSelector)
|
||||||
|
|
||||||
gevalCoreOnSingleLines :: Metric
|
gevalCoreOnSingleLines :: Metric
|
||||||
@ -777,6 +772,7 @@ gevalCoreOnSourcesStandardWay metric lsSpec =
|
|||||||
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
|
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
|
||||||
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
||||||
|
|
||||||
|
helperLogLossHashed :: (MonadUnliftIO m, MonadThrow m) => Word32 -> (Double -> Double) -> LineSourcesSpecification (ResourceT m) -> m MetricOutput
|
||||||
helperLogLossHashed nbOfBits finalStep lsSpec =
|
helperLogLossHashed nbOfBits finalStep lsSpec =
|
||||||
gevalCore''' (ParserSpecWithoutInput (liftOp (Right . id)) (liftOp tentativeParser)) (\(lineNo, (t,d)) -> calculateLogLoss nbOfBits lineNo t (parseDistributionWrapper nbOfBits lineNo d)) averageC (finalStep . negate) noGraph (fromSpecificationToWithoutInput lsSpec)
|
gevalCore''' (ParserSpecWithoutInput (liftOp (Right . id)) (liftOp tentativeParser)) (\(lineNo, (t,d)) -> calculateLogLoss nbOfBits lineNo t (parseDistributionWrapper nbOfBits lineNo d)) averageC (finalStep . negate) noGraph (fromSpecificationToWithoutInput lsSpec)
|
||||||
where -- Unfortunately, we're parsing the distribution twice. We need to
|
where -- Unfortunately, we're parsing the distribution twice. We need to
|
||||||
@ -787,6 +783,7 @@ helperLogLossHashed nbOfBits finalStep lsSpec =
|
|||||||
Right _ -> Right t
|
Right _ -> Right t
|
||||||
Left m -> Left m
|
Left m -> Left m
|
||||||
|
|
||||||
|
generalizedProbabilisticFMeasure :: (MonadUnliftIO m, MonadThrow m, ItemIntermediateRepresentationType t ~ ([Double], [Double], Double, Int)) => Double -> SAMetric t -> LineSourcesSpecification (ResourceT m) -> m MetricOutput
|
||||||
generalizedProbabilisticFMeasure beta metric = gevalCoreWithoutInput metric
|
generalizedProbabilisticFMeasure beta metric = gevalCoreWithoutInput metric
|
||||||
probabilisticSoftAgg
|
probabilisticSoftAgg
|
||||||
(fMeasureOnProbabilisticCounts beta)
|
(fMeasureOnProbabilisticCounts beta)
|
||||||
@ -1041,6 +1038,7 @@ defineContinuation aggregator finalStep generateGraph = do
|
|||||||
v <- aggregator
|
v <- aggregator
|
||||||
return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v)
|
return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v)
|
||||||
|
|
||||||
|
fromSpecificationToWithoutInput :: LineSourcesSpecification (ResourceT m) -> WithoutInput m e o
|
||||||
fromSpecificationToWithoutInput lsSpec = case lineSourcesFilter lsSpec of
|
fromSpecificationToWithoutInput lsSpec = case lineSourcesFilter lsSpec of
|
||||||
NoFilter -> WithoutInput expectedSource outSource
|
NoFilter -> WithoutInput expectedSource outSource
|
||||||
theFilter -> WithoutInputButFiltered theFilter inputSource expectedSource outSource
|
theFilter -> WithoutInputButFiltered theFilter inputSource expectedSource outSource
|
||||||
@ -1048,6 +1046,7 @@ fromSpecificationToWithoutInput lsSpec = case lineSourcesFilter lsSpec of
|
|||||||
outSource = lineSourcesOutputSource lsSpec
|
outSource = lineSourcesOutputSource lsSpec
|
||||||
inputSource = lineSourcesInputSource lsSpec
|
inputSource = lineSourcesInputSource lsSpec
|
||||||
|
|
||||||
|
fromSpecificationToWithInput :: LineSourcesSpecification (ResourceT m) -> WithInput m i e o
|
||||||
fromSpecificationToWithInput lsSpec = WithInput theFilter inpSource expectedSource outSource
|
fromSpecificationToWithInput lsSpec = WithInput theFilter inpSource expectedSource outSource
|
||||||
where inpSource = lineSourcesInputSource lsSpec
|
where inpSource = lineSourcesInputSource lsSpec
|
||||||
expectedSource = lineSourcesExpectedSource lsSpec
|
expectedSource = lineSourcesExpectedSource lsSpec
|
||||||
@ -1113,6 +1112,7 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (Withou
|
|||||||
|
|
||||||
data WithInput m i e o = WithInput Filter (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m))
|
data WithInput m i e o = WithInput Filter (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m))
|
||||||
|
|
||||||
|
getInputFilePath :: WithInput m i e o -> SourceSpec
|
||||||
getInputFilePath (WithInput _ (LineSource _ _ _ inputFilePath _) _ _) = inputFilePath
|
getInputFilePath (WithInput _ (LineSource _ _ _ inputFilePath _) _ _) = inputFilePath
|
||||||
|
|
||||||
instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
|
instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
|
||||||
|
@ -23,7 +23,7 @@ import GEval.Clippings (totalArea, coveredBy, clippEUMatchStep)
|
|||||||
import GEval.BIO (gatherCountsForBIO, gatherSeparatedCountsForBIO)
|
import GEval.BIO (gatherCountsForBIO, gatherSeparatedCountsForBIO)
|
||||||
|
|
||||||
import GEval.Probability
|
import GEval.Probability
|
||||||
import GEval.PrecisionRecall (weightedMaxMatch, fMeasureOnCounts, calculateMAPForOneResult, getProbabilisticCounts, getCounts)
|
import GEval.PrecisionRecall (weightedMaxMatch, calculateMAPForOneResult, getProbabilisticCounts)
|
||||||
|
|
||||||
import Control.Exception
|
import Control.Exception
|
||||||
|
|
||||||
@ -312,18 +312,25 @@ itemStep SAMultiLabelLikelihood = uncurry countLogLossOnProbList
|
|||||||
itemStep SAHaversine = haversine
|
itemStep SAHaversine = haversine
|
||||||
|
|
||||||
|
|
||||||
|
doubleParser :: Text -> Either String Double
|
||||||
doubleParser = getValue . TR.double
|
doubleParser = getValue . TR.double
|
||||||
|
|
||||||
|
intoWords :: Text -> Either a [Text]
|
||||||
intoWords = Right . Data.Text.words
|
intoWords = Right . Data.Text.words
|
||||||
|
|
||||||
|
intoStringWords :: Text -> Either a [String]
|
||||||
intoStringWords = Right . Prelude.words . unpack
|
intoStringWords = Right . Prelude.words . unpack
|
||||||
|
|
||||||
|
alternativeSentencesParser :: Text -> Either a [[String]]
|
||||||
alternativeSentencesParser = Right . map Prelude.words . DLS.splitOn "\t" . unpack
|
alternativeSentencesParser = Right . map Prelude.words . DLS.splitOn "\t" . unpack
|
||||||
|
|
||||||
|
onlyStrip :: Text -> Either a Text
|
||||||
onlyStrip = Right . strip
|
onlyStrip = Right . strip
|
||||||
|
|
||||||
|
justStrip :: Text -> Either a (Maybe Text)
|
||||||
justStrip = Right . Just . strip
|
justStrip = Right . Just . strip
|
||||||
|
|
||||||
|
predictedParser :: Text -> Maybe Text
|
||||||
predictedParser got =
|
predictedParser got =
|
||||||
-- first try to parse what we got as a probability distribution
|
-- first try to parse what we got as a probability distribution
|
||||||
-- (like the one used for Likelikehood/LogLossHashed metric)
|
-- (like the one used for Likelikehood/LogLossHashed metric)
|
||||||
@ -334,13 +341,16 @@ predictedParser got =
|
|||||||
where pairs = catMaybes $ map wordSpecToPair wordSpecs
|
where pairs = catMaybes $ map wordSpecToPair wordSpecs
|
||||||
Left _ -> Just got
|
Left _ -> Just got
|
||||||
|
|
||||||
|
splitByTabs :: Text -> Either a [[Char]]
|
||||||
splitByTabs = Right . DLS.splitOn "\t" . unpack
|
splitByTabs = Right . DLS.splitOn "\t" . unpack
|
||||||
|
|
||||||
|
zeroOneParser :: Text -> Either String Bool
|
||||||
zeroOneParser = expected <=< (getValue . TR.decimal)
|
zeroOneParser = expected <=< (getValue . TR.decimal)
|
||||||
where expected 1 = Right True
|
where expected 1 = Right True
|
||||||
expected 0 = Right False
|
expected 0 = Right False
|
||||||
expected _ = Left "expected 0 or 1"
|
expected _ = Left "expected 0 or 1"
|
||||||
|
|
||||||
|
probToZeroOneParser :: Text -> Either String Bool
|
||||||
probToZeroOneParser = detected <=< (getValue . TR.double)
|
probToZeroOneParser = detected <=< (getValue . TR.double)
|
||||||
where -- output value could be a probability (for compatibility with other measures)
|
where -- output value could be a probability (for compatibility with other measures)
|
||||||
detected prob
|
detected prob
|
||||||
@ -361,8 +371,10 @@ controlledParse parser t =
|
|||||||
(Right v) -> Right v
|
(Right v) -> Right v
|
||||||
(Left _) -> Left "cannot parse line"
|
(Left _) -> Left "cannot parse line"
|
||||||
|
|
||||||
|
smape :: (Double, Double) -> Double
|
||||||
smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out))
|
smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out))
|
||||||
|
|
||||||
|
hitOrMiss :: (Text, Text) -> Double
|
||||||
hitOrMiss (exp, got) =
|
hitOrMiss (exp, got) =
|
||||||
-- first try to parse what we got as a probability distribution
|
-- first try to parse what we got as a probability distribution
|
||||||
-- (like the one used for Likelikehood/LogLossHashed metric)
|
-- (like the one used for Likelikehood/LogLossHashed metric)
|
||||||
@ -393,6 +405,7 @@ getCount (True, False) = (0, 1, 0)
|
|||||||
getCount (False, True) = (0, 0, 1)
|
getCount (False, True) = (0, 0, 1)
|
||||||
getCount (False, False) = (0, 0, 0)
|
getCount (False, False) = (0, 0, 0)
|
||||||
|
|
||||||
|
getClassesInvolved :: Eq a => (Maybe a, Maybe a) -> (Maybe a, Maybe a, Maybe a)
|
||||||
getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing)
|
getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing)
|
||||||
getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness
|
getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness
|
||||||
getClassesInvolved (Just a, Just b) = if a == b
|
getClassesInvolved (Just a, Just b) = if a == b
|
||||||
@ -404,8 +417,10 @@ getWeightedCounts matchFun (expected, got) = (weightedMaxMatch matchFun expected
|
|||||||
Prelude.length expected,
|
Prelude.length expected,
|
||||||
Prelude.length got)
|
Prelude.length got)
|
||||||
|
|
||||||
|
getSoftCounts :: EntityWithProbability e => ([BareEntity e], [e]) -> (Double, Int, Int)
|
||||||
getSoftCounts args = getWeightedCounts matchScore args
|
getSoftCounts args = getWeightedCounts matchScore args
|
||||||
|
|
||||||
|
getSoft2DCounts :: ([LabeledClipping], [LabeledClipping]) -> (Integer, Integer, Integer)
|
||||||
getSoft2DCounts (expected, got) = (tpArea, expArea, gotArea)
|
getSoft2DCounts (expected, got) = (tpArea, expArea, gotArea)
|
||||||
where tpArea = coveredBy expected got
|
where tpArea = coveredBy expected got
|
||||||
expArea = totalArea expected
|
expArea = totalArea expected
|
||||||
|
Loading…
Reference in New Issue
Block a user