Add hardening
This commit is contained in:
parent
236712c52b
commit
6c295a3325
@ -31,9 +31,13 @@ singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. ide
|
||||
-- is matched and then proceed with some matching spec.
|
||||
| SmartMatch MatchingSpecification -- ^ do fuzzy matching only on values
|
||||
-- containing letters
|
||||
| Harden MatchingSpecification -- ^ harden a soft match
|
||||
deriving (Eq)
|
||||
|]
|
||||
|
||||
hardeningThreshold :: Double
|
||||
hardeningThreshold = 0.8
|
||||
|
||||
getMatchingFunctionForString :: MatchingSpecification -> String -> String -> Double
|
||||
getMatchingFunctionForString ExactMatch got expected
|
||||
| got == expected = 1.0
|
||||
@ -51,6 +55,11 @@ getMatchingFunctionForString (SmartMatch smatchSpec) got expected = getMatchingF
|
||||
then smatchSpec
|
||||
else ExactMatch
|
||||
|
||||
getMatchingFunctionForString (Harden smatchSpec) got expected = if softMatch >= hardeningThreshold
|
||||
then 1.0
|
||||
else 0.0
|
||||
where softMatch = getMatchingFunctionForString smatchSpec got expected
|
||||
|
||||
-- | Whether suitable for fuzzy matching when in the "smart" match mode.
|
||||
-- At the moment we check whether it contains at least one letter
|
||||
-- (we require the exact match for, for instance, numbers written with digits.
|
||||
|
@ -84,6 +84,7 @@ instance Show Metric where
|
||||
show (MultiLabelFMeasure beta FuzzyMatch) = "Fuzzy/" ++ (show $ MultiLabelFMeasure beta ExactMatch)
|
||||
show (MultiLabelFMeasure beta (CutLabel matchSpec)) = "CutLabel/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||
show (MultiLabelFMeasure beta (SmartMatch matchSpec)) = "Smart/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||
show (MultiLabelFMeasure beta (Harden matchSpec)) = "Harden/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||
show MultiLabelLogLoss = "MultiLabel-Logloss"
|
||||
show MultiLabelLikelihood = "MultiLabel-Likelihood"
|
||||
show (Mean metric) = "Mean/" ++ (show metric)
|
||||
@ -108,6 +109,9 @@ instance Read Metric where
|
||||
readsPrec p ('S':'m':'a':'r':'t':'/':theRest) = case readsPrec p theRest of
|
||||
[(metric, theRest)] -> [(applyMatchingSpecification SmartMatch metric, theRest)]
|
||||
_ -> []
|
||||
readsPrec p ('H':'a':'r':'d':'e':'n':'/':theRest) = case readsPrec p theRest of
|
||||
[(metric, theRest)] -> [(applyMatchingSpecification Harden metric, theRest)]
|
||||
_ -> []
|
||||
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
|
||||
readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
|
||||
|
@ -351,6 +351,8 @@ main = hspec $ do
|
||||
runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777
|
||||
it "information extraction with smart fuzzy matching" $ do
|
||||
runGEvalTest "multilabel-f1-ie-fuzzy-smart" `shouldReturnAlmost` 0.598444
|
||||
it "information extraction with smart fuzzy matching hardened" $ do
|
||||
runGEvalTest "multilabel-f1-ie-fuzzy-harden" `shouldReturnAlmost` 0.555555555
|
||||
describe "Mean/MultiLabel-F" $ do
|
||||
it "simple" $ do
|
||||
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
|
||||
|
@ -0,0 +1,3 @@
|
||||
important-person=JOHN_BROWN important-person=JOHN_SMITH company-name=Axaxaxaas_Mlo profit=12031
|
||||
company-name=Foo_Bar profit=1220
|
||||
company-name=Whatever important-person=PIERRE_MENARD
|
|
@ -0,0 +1 @@
|
||||
--metric Harden/CutLabel/Smart/Fuzzy/MultiLabel-F1:ls<_(inc|ltd)\.?(\s|$)><\2>
|
@ -0,0 +1,3 @@
|
||||
company-name=Axaxaxas_Mlö profit=12031 important-person=John_Smith important-person=James_Brown
|
||||
company-name=Orbis_Tertius profit=1020 important-person=Anna_Smith
|
||||
company-name=Whatever_Inc profit=5600 important-person=Pierre_Menard
|
|
Loading…
Reference in New Issue
Block a user