Add smart mode
This commit is contained in:
parent
00a2fc7d19
commit
236712c52b
@ -18,6 +18,9 @@ module GEval.MatchingSpecification
|
||||
import Data.Singletons.TH
|
||||
import Data.Text
|
||||
import Data.List.Extra (breakOn)
|
||||
import Data.Char (isLetter)
|
||||
import Data.List (find)
|
||||
import Data.Maybe (isJust)
|
||||
|
||||
import Text.EditDistance
|
||||
|
||||
@ -26,6 +29,8 @@ singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. ide
|
||||
| FuzzyMatch -- ^ fuzzy match by Levenshtein distance
|
||||
| CutLabel MatchingSpecification -- ^ require that the label (part before up to `=`)
|
||||
-- is matched and then proceed with some matching spec.
|
||||
| SmartMatch MatchingSpecification -- ^ do fuzzy matching only on values
|
||||
-- containing letters
|
||||
deriving (Eq)
|
||||
|]
|
||||
|
||||
@ -41,6 +46,16 @@ getMatchingFunctionForString (CutLabel smatchSpec) a b = getMatchingFunctionForS
|
||||
where a' = cutLabel a
|
||||
b' = cutLabel b
|
||||
|
||||
getMatchingFunctionForString (SmartMatch smatchSpec) got expected = getMatchingFunctionForString chosenMatch got expected
|
||||
where chosenMatch = if wantedBySmartMatch expected
|
||||
then smatchSpec
|
||||
else ExactMatch
|
||||
|
||||
-- | Whether suitable for fuzzy matching when in the "smart" match mode.
|
||||
-- At the moment we check whether it contains at least one letter
|
||||
-- (we require the exact match for, for instance, numbers written with digits.
|
||||
wantedBySmartMatch = isJust . (Data.List.find isLetter)
|
||||
|
||||
-- | Remove the label along with the separator (the equal sign)
|
||||
cutLabel :: String -> String
|
||||
cutLabel t = case Data.List.Extra.breakOn "=" t of
|
||||
|
@ -83,6 +83,7 @@ instance Show Metric where
|
||||
show (MultiLabelFMeasure beta ExactMatch) = "MultiLabel-F" ++ (show beta)
|
||||
show (MultiLabelFMeasure beta FuzzyMatch) = "Fuzzy/" ++ (show $ MultiLabelFMeasure beta ExactMatch)
|
||||
show (MultiLabelFMeasure beta (CutLabel matchSpec)) = "CutLabel/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||
show (MultiLabelFMeasure beta (SmartMatch matchSpec)) = "Smart/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||
show MultiLabelLogLoss = "MultiLabel-Logloss"
|
||||
show MultiLabelLikelihood = "MultiLabel-Likelihood"
|
||||
show (Mean metric) = "Mean/" ++ (show metric)
|
||||
@ -104,6 +105,9 @@ instance Read Metric where
|
||||
readsPrec p ('C':'u':'t':'L':'a':'b':'e':'l':'/':theRest) = case readsPrec p theRest of
|
||||
[(metric, theRest)] -> [(applyMatchingSpecification CutLabel metric, theRest)]
|
||||
_ -> []
|
||||
readsPrec p ('S':'m':'a':'r':'t':'/':theRest) = case readsPrec p theRest of
|
||||
[(metric, theRest)] -> [(applyMatchingSpecification SmartMatch metric, theRest)]
|
||||
_ -> []
|
||||
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
|
||||
readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
|
||||
|
@ -349,6 +349,8 @@ main = hspec $ do
|
||||
runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444
|
||||
it "information extraction with fuzzy matching" $ do
|
||||
runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777
|
||||
it "information extraction with smart fuzzy matching" $ do
|
||||
runGEvalTest "multilabel-f1-ie-fuzzy-smart" `shouldReturnAlmost` 0.598444
|
||||
describe "Mean/MultiLabel-F" $ do
|
||||
it "simple" $ do
|
||||
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
|
||||
|
@ -0,0 +1,3 @@
|
||||
important-person=JOHN_BROWN important-person=JOHN_SMITH company-name=Axaxaxaxas_Mlo profit=12031
|
||||
company-name=Foo_Bar profit=1220
|
||||
company-name=Whatever important-person=PIERRE_MENARD
|
|
@ -0,0 +1 @@
|
||||
--metric CutLabel/Smart/Fuzzy/MultiLabel-F1:ls<_(inc|ltd)\.?(\s|$)><\2>
|
@ -0,0 +1,3 @@
|
||||
company-name=Axaxaxas_Mlö profit=12031 important-person=John_Smith important-person=James_Brown
|
||||
company-name=Orbis_Tertius profit=1020 important-person=Anna_Smith
|
||||
company-name=Whatever_Inc profit=5600 important-person=Pierre_Menard
|
|
Loading…
Reference in New Issue
Block a user