diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index ebe7b8d..0fb4b16 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -113,6 +113,7 @@ import GEval.Annotation import GEval.BlackBoxDebugging import Data.Conduit.Bootstrap import GEval.DataSource +import GEval.MatchingSpecification import qualified Data.HashMap.Strict as M import qualified Data.Vector as V @@ -522,7 +523,7 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces -- some metrics are handled by Bootstrap due to legacy issues, -- fix on the way handleBootstrap :: Metric -> Bool -handleBootstrap (Mean (MultiLabelFMeasure _)) = True +handleBootstrap (Mean (MultiLabelFMeasure _ _)) = True handleBootstrap (Mean _) = False handleBootstrap CharMatch = False handleBootstrap (LogLossHashed _) = False @@ -567,13 +568,16 @@ gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => -> m (MetricOutput) -- ^ metric values for the output against the expected output -- for the time being hardcoded -gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do +gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta matchingSpec)) lsSpec = do gevalRunPipeline parserSpec (trans step) finalPipeline context where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser)) context = fromSpecificationToWithoutInput lsSpec - step = itemStep SAMultiLabelFMeasure - expParser = expectedParser SAMultiLabelFMeasure - outParser = outputParser SAMultiLabelFMeasure + step = case toSing matchingSpec of + SomeSing s -> itemStep (SAMultiLabelFMeasure s) + expParser = case toSing matchingSpec of + SomeSing s -> expectedParser (SAMultiLabelFMeasure s) + outParser = case toSing matchingSpec of + SomeSing s -> outputParser (SAMultiLabelFMeasure s) finalPipeline = fixer ( CL.map (fMeasureOnCounts beta) .| (bootstrapC numberOfSamples @@ -630,10 +634,10 @@ gevalCoreOnSources (LogLossHashed nbOfBits) = helperLogLossHashed nbOfBits id gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToLikehood -gevalCoreOnSources (Mean (MultiLabelFMeasure beta)) +gevalCoreOnSources (Mean (MultiLabelFMeasure beta matchingSpec)) = gevalCoreWithoutInputOnItemTargets (Right . intoWords) (Right . getWords) - ((fMeasureOnCounts beta) . (getCounts (==))) + ((fMeasureOnCounts beta) . (getWeightedCounts (getMatchingFunctionForString matchingSpec))) averageC id noGraph @@ -661,12 +665,13 @@ gevalCoreOnSources (Mean WER) gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being" -- only MultiLabel-F1 handled for JSONs for the time being... -gevalCoreOnSources (MultiLabelFMeasure beta) = gevalCoreWithoutInputOnItemTargets (Right . intoWords) - (Right . getWords) - (getCounts (==)) - countAgg - (fMeasureOnCounts beta) - noGraph +gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) = + gevalCoreWithoutInputOnItemTargets (Right . intoWords) + (Right . getWords) + (getWeightedCounts (getMatchingFunctionForString matchingSpec)) + countAgg + (fMeasureOnCounts beta) + noGraph where getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts @@ -848,12 +853,13 @@ gevalRunPipeline' parserSpec itemStep finalPipeline context = do -continueGEvalCalculations :: (MonadIO m) => +continueGEvalCalculations :: forall m t . (MonadIO m) => SAMetric t -> Metric -> ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) MetricOutput -continueGEvalCalculations SAMultiLabelFMeasure (MultiLabelFMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph +continueGEvalCalculations (SAMultiLabelFMeasure matchingSpec) (MultiLabelFMeasure beta matchingSpec') + = defineContinuation countAgg (fMeasureOnCounts beta) noGraph continueGEvalCalculations SALikelihood Likelihood = defineContinuation averageC logLossToLikehood noGraph diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 6649e80..7fea41e 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -10,6 +10,7 @@ import GEval.EvaluationScheme import GEval.Common (GEvalException(..), FormattingOptions(..)) import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName) import GEval.Submit (tokenFileName) +import GEval.MatchingSpecification (MatchingSpecification(ExactMatch)) import qualified System.Directory as D import Control.Conditional (whenM) @@ -334,8 +335,8 @@ character (inclusively). |] ++ (commonReadmeMDContents testName) -readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName -readmeMDContents (MultiLabelFMeasure beta) testName = [i| +readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta ExactMatch) testName +readmeMDContents (MultiLabelFMeasure beta _) testName = [i| Tag names and their component ============================= @@ -535,8 +536,8 @@ trainContents TokenAccuracy = [hereLit|* V N I like cats trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19 The student's smart N:1-6 N:8-10 V:12-13 A:15-18 Mary's dog is nice |] -trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta) -trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5 +trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta ExactMatch) +trainContents (MultiLabelFMeasure _ _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5 Steven bloody Brown person/1,3 first-name/1 surname/3 James and James first-name/1 firstname/3 |] @@ -608,8 +609,8 @@ Ala has a cat devInContents SegmentAccuracy = [hereLit|John is smart Mary's intelligent |] -devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta) -devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here +devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta ExactMatch) +devInContents (MultiLabelFMeasure _ _) = [hereLit|Jan Kowalski is here I see him Barbara |] @@ -676,8 +677,8 @@ N V * N devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13 N:1-4 V:6-7 A:9-19 |] -devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta) -devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2 +devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta ExactMatch) +devExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,2 first-name/1 surname/2 first-name/1 |] @@ -749,8 +750,8 @@ I know testInContents SegmentAccuracy = [hereLit|Mary's cat is old John is young |] -testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta) -testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith +testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta ExactMatch) +testInContents (MultiLabelFMeasure _ _) = [hereLit|John bloody Smith Nobody is there I saw Marketa |] @@ -818,8 +819,8 @@ testExpectedContents TokenAccuracy = [hereLit|* V N testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17 N:1-4 V:6-7 A:9-13 |] -testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta) -testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3 +testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta ExactMatch) +testExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 surname/3 first-name/3 |] @@ -877,8 +878,8 @@ inHeaderContents BIOF1Labels = inHeaderContents BIOF1 inHeaderContents BIOF1 = Just ["Text"] inHeaderContents TokenAccuracy = Just ["TokenizedText"] inHeaderContents SegmentAccuracy = Just ["Segment"] -inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta) -inHeaderContents (MultiLabelFMeasure _) = Just ["Text"] +inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta ExactMatch) +inHeaderContents (MultiLabelFMeasure _ _) = Just ["Text"] inHeaderContents MultiLabelLikelihood = inHeaderContents MultiLabelLogLoss inHeaderContents MultiLabelLogLoss = Just ["Utterance"] inHeaderContents (Soft2DFMeasure _) = inHeaderContents ClippEU @@ -905,8 +906,8 @@ outHeaderContents BIOF1Labels = outHeaderContents BIOF1 outHeaderContents BIOF1 = Just ["BIOOutput"] outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"] outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"] -outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta) -outHeaderContents (MultiLabelFMeasure _) = Just ["Entities"] +outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta ExactMatch) +outHeaderContents (MultiLabelFMeasure _ _) = Just ["Entities"] outHeaderContents MultiLabelLikelihood = outHeaderContents MultiLabelLogLoss outHeaderContents MultiLabelLogLoss = Just ["Emotion"] outHeaderContents (Soft2DFMeasure _) = Just ["Rectangle"]