continue work on fuzzy matching

This commit is contained in:
Filip Gralinski 2020-07-01 21:08:33 +02:00
parent 4e3ff20e2c
commit bbeb3ce397
3 changed files with 16 additions and 15 deletions

View File

@ -578,7 +578,7 @@ gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta matchingS
outParser = case toSing matchingSpec of
SomeSing s -> outputParser (SAMultiLabelFMeasure s)
finalPipeline = fixer (
CL.map (fMeasureOnCounts' beta)
CL.map (fMeasureOnCounts beta)
.| (bootstrapC numberOfSamples
$ continueGEvalCalculations SAMSE MSE))
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
@ -667,7 +667,7 @@ gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiL
gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) =
gevalCoreWithoutInputOnItemTargets (Right . intoWords)
(Right . getWords)
(getCounts (==))
(getWeightedCounts (getMatchingFunctionForString matchingSpec))
countAgg
(fMeasureOnCounts beta)
noGraph

View File

@ -26,7 +26,12 @@ singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. ide
deriving (Eq)
|]
getMatchingFunction :: MatchingSpecification -> Text -> Text -> Double
getMatchingFunction ExactMatch = (\a b -> 1.0)
getMatchingFunction FuzzyMatch = (\a b -> 1.0)
getMatchingFunction (CutLabel smatchSpec)= getMatchingFunction smatchSpec
getMatchingFunctionForString :: MatchingSpecification -> String -> String -> Double
getMatchingFunctionForString ExactMatch a b
| a == b = 1.0
| otherwise = 0.0
getMatchingFunctionForString FuzzyMatch a b = 1.0
getMatchingFunctionForString (CutLabel smatchSpec) a b = getMatchingFunctionForString smatchSpec a b
getMatchingFunctionForText :: MatchingSpecification -> Text -> Text -> Double
getMatchingFunctionForText matchSpec a b = getMatchingFunctionForString matchSpec (unpack a) (unpack b)

View File

@ -228,17 +228,16 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
ItemIntermediateRepresentationType AProbabilisticSoftFMeasure = ([Double], [Double], Double, Int)
ItemIntermediateRepresentationType APearson = (Double, Double)
ItemIntermediateRepresentationType ASpearman = (Double, Double)
ItemIntermediateRepresentationType (AMultiLabelFMeasure ms) = (MatchingCount ms, Int, Int)
-- FIXME
-- It would be better to distinguish ExactMatch here (for which we could return (Int, Int, Int)
-- ant other possibilities, but it resulted in too much down-the-rabbit hole with types.
ItemIntermediateRepresentationType (AMultiLabelFMeasure _) = (Double, Int, Int)
ItemIntermediateRepresentationType ALogLossHashed = (Text, Text)
ItemIntermediateRepresentationType ALikelihoodHashed = (Text, Text)
ItemIntermediateRepresentationType ACharMatch = (Text, Text)
ItemIntermediateRepresentationType AWER = (Int, Int)
ItemIntermediateRepresentationType t = Double
type family MatchingCount (t :: MatchingSpecification) where
MatchingCount ExactMatch = Int
MatchingCount _ = Double
itemStep :: SAMetric t -> (ParsedExpectedType t, ParsedOutputType t) -> ItemIntermediateRepresentationType t
itemStep SARMSE = itemSquaredError
itemStep SAMSE = itemSquaredError
@ -269,10 +268,7 @@ itemStep SATokenAccuracy = countHitsAndTotals
itemStep SASegmentAccuracy = uncurry segmentAccuracy
itemStep SAMAE = itemAbsoluteError
itemStep SASMAPE = smape
itemStep (SAMultiLabelFMeasure SExactMatch) = getCounts (==)
itemStep (SAMultiLabelFMeasure SFuzzyMatch) = getWeightedCounts (getMatchingFunction $ fromSing SFuzzyMatch)
itemStep (SAMultiLabelFMeasure smatchSpec@(SCutLabel _))
= getWeightedCounts (getMatchingFunction $ fromSing smatchSpec)
itemStep (SAMultiLabelFMeasure smatchSpec) = getWeightedCounts (getMatchingFunctionForText $ fromSing smatchSpec)
itemStep SAMultiLabelLogLoss = uncurry countLogLossOnProbList
itemStep SAMultiLabelLikelihood = uncurry countLogLossOnProbList