Add smart mode
This commit is contained in:
parent
00a2fc7d19
commit
236712c52b
@ -18,6 +18,9 @@ module GEval.MatchingSpecification
|
|||||||
import Data.Singletons.TH
|
import Data.Singletons.TH
|
||||||
import Data.Text
|
import Data.Text
|
||||||
import Data.List.Extra (breakOn)
|
import Data.List.Extra (breakOn)
|
||||||
|
import Data.Char (isLetter)
|
||||||
|
import Data.List (find)
|
||||||
|
import Data.Maybe (isJust)
|
||||||
|
|
||||||
import Text.EditDistance
|
import Text.EditDistance
|
||||||
|
|
||||||
@ -26,6 +29,8 @@ singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. ide
|
|||||||
| FuzzyMatch -- ^ fuzzy match by Levenshtein distance
|
| FuzzyMatch -- ^ fuzzy match by Levenshtein distance
|
||||||
| CutLabel MatchingSpecification -- ^ require that the label (part before up to `=`)
|
| CutLabel MatchingSpecification -- ^ require that the label (part before up to `=`)
|
||||||
-- is matched and then proceed with some matching spec.
|
-- is matched and then proceed with some matching spec.
|
||||||
|
| SmartMatch MatchingSpecification -- ^ do fuzzy matching only on values
|
||||||
|
-- containing letters
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|]
|
|]
|
||||||
|
|
||||||
@ -41,6 +46,16 @@ getMatchingFunctionForString (CutLabel smatchSpec) a b = getMatchingFunctionForS
|
|||||||
where a' = cutLabel a
|
where a' = cutLabel a
|
||||||
b' = cutLabel b
|
b' = cutLabel b
|
||||||
|
|
||||||
|
getMatchingFunctionForString (SmartMatch smatchSpec) got expected = getMatchingFunctionForString chosenMatch got expected
|
||||||
|
where chosenMatch = if wantedBySmartMatch expected
|
||||||
|
then smatchSpec
|
||||||
|
else ExactMatch
|
||||||
|
|
||||||
|
-- | Whether suitable for fuzzy matching when in the "smart" match mode.
|
||||||
|
-- At the moment we check whether it contains at least one letter
|
||||||
|
-- (we require the exact match for, for instance, numbers written with digits.
|
||||||
|
wantedBySmartMatch = isJust . (Data.List.find isLetter)
|
||||||
|
|
||||||
-- | Remove the label along with the separator (the equal sign)
|
-- | Remove the label along with the separator (the equal sign)
|
||||||
cutLabel :: String -> String
|
cutLabel :: String -> String
|
||||||
cutLabel t = case Data.List.Extra.breakOn "=" t of
|
cutLabel t = case Data.List.Extra.breakOn "=" t of
|
||||||
|
@ -83,6 +83,7 @@ instance Show Metric where
|
|||||||
show (MultiLabelFMeasure beta ExactMatch) = "MultiLabel-F" ++ (show beta)
|
show (MultiLabelFMeasure beta ExactMatch) = "MultiLabel-F" ++ (show beta)
|
||||||
show (MultiLabelFMeasure beta FuzzyMatch) = "Fuzzy/" ++ (show $ MultiLabelFMeasure beta ExactMatch)
|
show (MultiLabelFMeasure beta FuzzyMatch) = "Fuzzy/" ++ (show $ MultiLabelFMeasure beta ExactMatch)
|
||||||
show (MultiLabelFMeasure beta (CutLabel matchSpec)) = "CutLabel/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
show (MultiLabelFMeasure beta (CutLabel matchSpec)) = "CutLabel/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||||
|
show (MultiLabelFMeasure beta (SmartMatch matchSpec)) = "Smart/" ++ (show $ MultiLabelFMeasure beta matchSpec)
|
||||||
show MultiLabelLogLoss = "MultiLabel-Logloss"
|
show MultiLabelLogLoss = "MultiLabel-Logloss"
|
||||||
show MultiLabelLikelihood = "MultiLabel-Likelihood"
|
show MultiLabelLikelihood = "MultiLabel-Likelihood"
|
||||||
show (Mean metric) = "Mean/" ++ (show metric)
|
show (Mean metric) = "Mean/" ++ (show metric)
|
||||||
@ -104,6 +105,9 @@ instance Read Metric where
|
|||||||
readsPrec p ('C':'u':'t':'L':'a':'b':'e':'l':'/':theRest) = case readsPrec p theRest of
|
readsPrec p ('C':'u':'t':'L':'a':'b':'e':'l':'/':theRest) = case readsPrec p theRest of
|
||||||
[(metric, theRest)] -> [(applyMatchingSpecification CutLabel metric, theRest)]
|
[(metric, theRest)] -> [(applyMatchingSpecification CutLabel metric, theRest)]
|
||||||
_ -> []
|
_ -> []
|
||||||
|
readsPrec p ('S':'m':'a':'r':'t':'/':theRest) = case readsPrec p theRest of
|
||||||
|
[(metric, theRest)] -> [(applyMatchingSpecification SmartMatch metric, theRest)]
|
||||||
|
_ -> []
|
||||||
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||||
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
|
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
|
||||||
readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
|
readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
|
||||||
|
@ -349,6 +349,8 @@ main = hspec $ do
|
|||||||
runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444
|
runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444
|
||||||
it "information extraction with fuzzy matching" $ do
|
it "information extraction with fuzzy matching" $ do
|
||||||
runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777
|
runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777
|
||||||
|
it "information extraction with smart fuzzy matching" $ do
|
||||||
|
runGEvalTest "multilabel-f1-ie-fuzzy-smart" `shouldReturnAlmost` 0.598444
|
||||||
describe "Mean/MultiLabel-F" $ do
|
describe "Mean/MultiLabel-F" $ do
|
||||||
it "simple" $ do
|
it "simple" $ do
|
||||||
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
|
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
important-person=JOHN_BROWN important-person=JOHN_SMITH company-name=Axaxaxaxas_Mlo profit=12031
|
||||||
|
company-name=Foo_Bar profit=1220
|
||||||
|
company-name=Whatever important-person=PIERRE_MENARD
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric CutLabel/Smart/Fuzzy/MultiLabel-F1:ls<_(inc|ltd)\.?(\s|$)><\2>
|
@ -0,0 +1,3 @@
|
|||||||
|
company-name=Axaxaxas_Mlö profit=12031 important-person=John_Smith important-person=James_Brown
|
||||||
|
company-name=Orbis_Tertius profit=1020 important-person=Anna_Smith
|
||||||
|
company-name=Whatever_Inc profit=5600 important-person=Pierre_Menard
|
|
Loading…
Reference in New Issue
Block a user