Remove some warnings

This commit is contained in:
Filip Gralinski 2021-08-20 19:39:11 +02:00
parent 612792799a
commit ef2697a2be
2 changed files with 31 additions and 16 deletions

View File

@ -66,13 +66,10 @@ import GEval.Model (ModelType)
import Data.Conduit import Data.Conduit
import Data.Conduit.Combinators as CC import Data.Conduit.Combinators as CC
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Text as CT import qualified Data.Conduit.Text as CT
import Control.Monad.Trans.Resource import Control.Monad.Trans.Resource
import qualified Data.Conduit.List as CL import qualified Data.Conduit.List as CL
import Data.Text import Data.Text
import Data.Text.Read as TR
import Control.Applicative
import Control.Exception import Control.Exception
import Control.Conditional (unlessM, whenM) import Control.Conditional (unlessM, whenM)
import qualified System.Directory as D import qualified System.Directory as D
@ -80,35 +77,25 @@ import System.Posix
import System.FilePath import System.FilePath
import Data.Maybe import Data.Maybe
import Data.Either (rights) import Data.Either (rights)
import Data.Tuple
import qualified Data.List.Split as DLS
import Data.List (sortBy, isSuffixOf, minimum, maximum) import Data.List (sortBy, isSuffixOf, minimum, maximum)
import Text.NaturalComp import Text.NaturalComp
import Control.Monad.IO.Class import Control.Monad.IO.Class
import Control.Monad ((<=<), filterM)
import Data.Attoparsec.Text (parseOnly)
import Data.Conduit.SmartSource import Data.Conduit.SmartSource
import Data.Conduit.Header import Data.Conduit.Header
import qualified Data.IntSet as IS
import GEval.BLEU
import GEval.Common import GEval.Common
import GEval.Clippings
import GEval.PrecisionRecall import GEval.PrecisionRecall
import GEval.ClusteringMetrics import GEval.ClusteringMetrics
import GEval.LogLossHashed import GEval.LogLossHashed
import GEval.CharMatch import GEval.CharMatch
import GEval.BIO
import GEval.ProbList
import GEval.WER import GEval.WER
import Data.Conduit.AutoDecompress import Data.Conduit.AutoDecompress
import Text.Tokenizer import Text.Tokenizer
import GEval.Selector import GEval.Selector
import GEval.Annotation
import GEval.BlackBoxDebugging import GEval.BlackBoxDebugging
import Data.Conduit.Bootstrap import Data.Conduit.Bootstrap
import GEval.DataSource import GEval.DataSource
@ -116,7 +103,6 @@ import GEval.MatchingSpecification
import qualified Data.HashMap.Strict as M import qualified Data.HashMap.Strict as M
import qualified Data.Vector as V import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as DVU import qualified Data.Vector.Unboxed as DVU
@ -125,7 +111,6 @@ import Statistics.Correlation
import Data.Statistics.Calibration (softCalibration) import Data.Statistics.Calibration (softCalibration)
import Data.Statistics.Loess (clippedLoess) import Data.Statistics.Loess (clippedLoess)
import Data.Proxy
import Data.Word import Data.Word
@ -145,6 +130,7 @@ isInputNeeded :: EvaluationScheme -> Bool
isInputNeeded (EvaluationScheme CharMatch _) = True isInputNeeded (EvaluationScheme CharMatch _) = True
isInputNeeded (EvaluationScheme _ ops) = hasFiltering ops isInputNeeded (EvaluationScheme _ ops) = hasFiltering ops
hasFiltering :: [PreprocessingOperation] -> Bool
hasFiltering [] = False hasFiltering [] = False
hasFiltering ((FeatureFilter _):_) = True hasFiltering ((FeatureFilter _):_) = True
hasFiltering (_:ops) = hasFiltering ops hasFiltering (_:ops) = hasFiltering ops
@ -189,13 +175,19 @@ isPreprocessable MultiLabelLikelihood = False
isPreprocessable (Mean metric) = isPreprocessable metric isPreprocessable (Mean metric) = isPreprocessable metric
isPreprocessable Haversine = False isPreprocessable Haversine = False
isInputModifiable :: Metric -> Bool
isInputModifiable CharMatch = True isInputModifiable CharMatch = True
isInputModifiable _ = False isInputModifiable _ = False
defaultOutDirectory :: FilePath
defaultOutDirectory = "." defaultOutDirectory = "."
defaultTestName :: String
defaultTestName = "test-A" defaultTestName = "test-A"
defaultOutFile :: FilePath
defaultOutFile = "out.tsv" defaultOutFile = "out.tsv"
defaultExpectedFile :: String
defaultExpectedFile = "expected.tsv" defaultExpectedFile = "expected.tsv"
defaultInputFile :: String
defaultInputFile = "in.tsv" defaultInputFile = "in.tsv"
defaultMetric :: Metric defaultMetric :: Metric
@ -250,6 +242,7 @@ getInHeader spec = getHeader spec gesInHeader
getOutHeader :: GEvalSpecification -> Maybe FilePath getOutHeader :: GEvalSpecification -> Maybe FilePath
getOutHeader spec = getHeader spec gesOutHeader getOutHeader spec = getHeader spec gesOutHeader
getHeader :: GEvalSpecification -> (GEvalSpecification -> Maybe FilePath) -> Maybe FilePath
getHeader spec selector = case selector spec of getHeader spec selector = case selector spec of
Just headerFile -> Just $ getExpectedDirectory spec </> headerFile Just headerFile -> Just $ getExpectedDirectory spec </> headerFile
Nothing -> Nothing Nothing -> Nothing
@ -277,6 +270,7 @@ data GEvalOptions = GEvalOptions
geoMarkWorstFeatures :: Bool } geoMarkWorstFeatures :: Bool }
defaultGEvalSpecification :: GEvalSpecification
defaultGEvalSpecification = GEvalSpecification { defaultGEvalSpecification = GEvalSpecification {
gesOutDirectory = defaultOutDirectory, gesOutDirectory = defaultOutDirectory,
gesExpectedDirectory = Nothing, gesExpectedDirectory = Nothing,
@ -540,6 +534,7 @@ getDataFormatFromFilePath path =
then dropExtension path then dropExtension path
else path else path
dataDecoder :: Monad m => DataFormat -> Maybe Selector -> ConduitT Text ItemTarget m ()
dataDecoder fmt mSelector = CC.map (select fmt mSelector) dataDecoder fmt mSelector = CC.map (select fmt mSelector)
gevalCoreOnSingleLines :: Metric gevalCoreOnSingleLines :: Metric
@ -777,6 +772,7 @@ gevalCoreOnSourcesStandardWay metric lsSpec =
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y) trans step (ParsedRecordWithoutInput x y) = step (x, y)
helperLogLossHashed :: (MonadUnliftIO m, MonadThrow m) => Word32 -> (Double -> Double) -> LineSourcesSpecification (ResourceT m) -> m MetricOutput
helperLogLossHashed nbOfBits finalStep lsSpec = helperLogLossHashed nbOfBits finalStep lsSpec =
gevalCore''' (ParserSpecWithoutInput (liftOp (Right . id)) (liftOp tentativeParser)) (\(lineNo, (t,d)) -> calculateLogLoss nbOfBits lineNo t (parseDistributionWrapper nbOfBits lineNo d)) averageC (finalStep . negate) noGraph (fromSpecificationToWithoutInput lsSpec) gevalCore''' (ParserSpecWithoutInput (liftOp (Right . id)) (liftOp tentativeParser)) (\(lineNo, (t,d)) -> calculateLogLoss nbOfBits lineNo t (parseDistributionWrapper nbOfBits lineNo d)) averageC (finalStep . negate) noGraph (fromSpecificationToWithoutInput lsSpec)
where -- Unfortunately, we're parsing the distribution twice. We need to where -- Unfortunately, we're parsing the distribution twice. We need to
@ -787,6 +783,7 @@ helperLogLossHashed nbOfBits finalStep lsSpec =
Right _ -> Right t Right _ -> Right t
Left m -> Left m Left m -> Left m
generalizedProbabilisticFMeasure :: (MonadUnliftIO m, MonadThrow m, ItemIntermediateRepresentationType t ~ ([Double], [Double], Double, Int)) => Double -> SAMetric t -> LineSourcesSpecification (ResourceT m) -> m MetricOutput
generalizedProbabilisticFMeasure beta metric = gevalCoreWithoutInput metric generalizedProbabilisticFMeasure beta metric = gevalCoreWithoutInput metric
probabilisticSoftAgg probabilisticSoftAgg
(fMeasureOnProbabilisticCounts beta) (fMeasureOnProbabilisticCounts beta)
@ -1041,6 +1038,7 @@ defineContinuation aggregator finalStep generateGraph = do
v <- aggregator v <- aggregator
return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v) return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v)
fromSpecificationToWithoutInput :: LineSourcesSpecification (ResourceT m) -> WithoutInput m e o
fromSpecificationToWithoutInput lsSpec = case lineSourcesFilter lsSpec of fromSpecificationToWithoutInput lsSpec = case lineSourcesFilter lsSpec of
NoFilter -> WithoutInput expectedSource outSource NoFilter -> WithoutInput expectedSource outSource
theFilter -> WithoutInputButFiltered theFilter inputSource expectedSource outSource theFilter -> WithoutInputButFiltered theFilter inputSource expectedSource outSource
@ -1048,6 +1046,7 @@ fromSpecificationToWithoutInput lsSpec = case lineSourcesFilter lsSpec of
outSource = lineSourcesOutputSource lsSpec outSource = lineSourcesOutputSource lsSpec
inputSource = lineSourcesInputSource lsSpec inputSource = lineSourcesInputSource lsSpec
fromSpecificationToWithInput :: LineSourcesSpecification (ResourceT m) -> WithInput m i e o
fromSpecificationToWithInput lsSpec = WithInput theFilter inpSource expectedSource outSource fromSpecificationToWithInput lsSpec = WithInput theFilter inpSource expectedSource outSource
where inpSource = lineSourcesInputSource lsSpec where inpSource = lineSourcesInputSource lsSpec
expectedSource = lineSourcesExpectedSource lsSpec expectedSource = lineSourcesExpectedSource lsSpec
@ -1113,6 +1112,7 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (Withou
data WithInput m i e o = WithInput Filter (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m)) data WithInput m i e o = WithInput Filter (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m))
getInputFilePath :: WithInput m i e o -> SourceSpec
getInputFilePath (WithInput _ (LineSource _ _ _ inputFilePath _) _ _) = inputFilePath getInputFilePath (WithInput _ (LineSource _ _ _ inputFilePath _) _ _) = inputFilePath
instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where

View File

@ -23,7 +23,7 @@ import GEval.Clippings (totalArea, coveredBy, clippEUMatchStep)
import GEval.BIO (gatherCountsForBIO, gatherSeparatedCountsForBIO) import GEval.BIO (gatherCountsForBIO, gatherSeparatedCountsForBIO)
import GEval.Probability import GEval.Probability
import GEval.PrecisionRecall (weightedMaxMatch, fMeasureOnCounts, calculateMAPForOneResult, getProbabilisticCounts, getCounts) import GEval.PrecisionRecall (weightedMaxMatch, calculateMAPForOneResult, getProbabilisticCounts)
import Control.Exception import Control.Exception
@ -312,18 +312,25 @@ itemStep SAMultiLabelLikelihood = uncurry countLogLossOnProbList
itemStep SAHaversine = haversine itemStep SAHaversine = haversine
doubleParser :: Text -> Either String Double
doubleParser = getValue . TR.double doubleParser = getValue . TR.double
intoWords :: Text -> Either a [Text]
intoWords = Right . Data.Text.words intoWords = Right . Data.Text.words
intoStringWords :: Text -> Either a [String]
intoStringWords = Right . Prelude.words . unpack intoStringWords = Right . Prelude.words . unpack
alternativeSentencesParser :: Text -> Either a [[String]]
alternativeSentencesParser = Right . map Prelude.words . DLS.splitOn "\t" . unpack alternativeSentencesParser = Right . map Prelude.words . DLS.splitOn "\t" . unpack
onlyStrip :: Text -> Either a Text
onlyStrip = Right . strip onlyStrip = Right . strip
justStrip :: Text -> Either a (Maybe Text)
justStrip = Right . Just . strip justStrip = Right . Just . strip
predictedParser :: Text -> Maybe Text
predictedParser got = predictedParser got =
-- first try to parse what we got as a probability distribution -- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric) -- (like the one used for Likelikehood/LogLossHashed metric)
@ -334,13 +341,16 @@ predictedParser got =
where pairs = catMaybes $ map wordSpecToPair wordSpecs where pairs = catMaybes $ map wordSpecToPair wordSpecs
Left _ -> Just got Left _ -> Just got
splitByTabs :: Text -> Either a [[Char]]
splitByTabs = Right . DLS.splitOn "\t" . unpack splitByTabs = Right . DLS.splitOn "\t" . unpack
zeroOneParser :: Text -> Either String Bool
zeroOneParser = expected <=< (getValue . TR.decimal) zeroOneParser = expected <=< (getValue . TR.decimal)
where expected 1 = Right True where expected 1 = Right True
expected 0 = Right False expected 0 = Right False
expected _ = Left "expected 0 or 1" expected _ = Left "expected 0 or 1"
probToZeroOneParser :: Text -> Either String Bool
probToZeroOneParser = detected <=< (getValue . TR.double) probToZeroOneParser = detected <=< (getValue . TR.double)
where -- output value could be a probability (for compatibility with other measures) where -- output value could be a probability (for compatibility with other measures)
detected prob detected prob
@ -361,8 +371,10 @@ controlledParse parser t =
(Right v) -> Right v (Right v) -> Right v
(Left _) -> Left "cannot parse line" (Left _) -> Left "cannot parse line"
smape :: (Double, Double) -> Double
smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out)) smape (exp, out) = (abs (exp-out)) `safeDoubleDiv` ((abs exp) + (abs out))
hitOrMiss :: (Text, Text) -> Double
hitOrMiss (exp, got) = hitOrMiss (exp, got) =
-- first try to parse what we got as a probability distribution -- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric) -- (like the one used for Likelikehood/LogLossHashed metric)
@ -393,6 +405,7 @@ getCount (True, False) = (0, 1, 0)
getCount (False, True) = (0, 0, 1) getCount (False, True) = (0, 0, 1)
getCount (False, False) = (0, 0, 0) getCount (False, False) = (0, 0, 0)
getClassesInvolved :: Eq a => (Maybe a, Maybe a) -> (Maybe a, Maybe a, Maybe a)
getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing) getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing)
getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness
getClassesInvolved (Just a, Just b) = if a == b getClassesInvolved (Just a, Just b) = if a == b
@ -404,8 +417,10 @@ getWeightedCounts matchFun (expected, got) = (weightedMaxMatch matchFun expected
Prelude.length expected, Prelude.length expected,
Prelude.length got) Prelude.length got)
getSoftCounts :: EntityWithProbability e => ([BareEntity e], [e]) -> (Double, Int, Int)
getSoftCounts args = getWeightedCounts matchScore args getSoftCounts args = getWeightedCounts matchScore args
getSoft2DCounts :: ([LabeledClipping], [LabeledClipping]) -> (Integer, Integer, Integer)
getSoft2DCounts (expected, got) = (tpArea, expArea, gotArea) getSoft2DCounts (expected, got) = (tpArea, expArea, gotArea)
where tpArea = coveredBy expected got where tpArea = coveredBy expected got
expArea = totalArea expected expArea = totalArea expected