First version of fuzzy matching working

This commit is contained in:
Filip Gralinski 2020-07-02 08:16:05 +02:00
parent bbeb3ce397
commit 00a2fc7d19
3 changed files with 20 additions and 5 deletions

View File

@ -115,6 +115,7 @@ library
, random
, rainbow
, yaml
, extra
default-language: Haskell2010
executable geval

View File

@ -17,6 +17,9 @@ module GEval.MatchingSpecification
import Data.Singletons.TH
import Data.Text
import Data.List.Extra (breakOn)
import Text.EditDistance
-- | The data type for storing a matching specification
singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. identity is required
@ -27,11 +30,22 @@ singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. ide
|]
getMatchingFunctionForString :: MatchingSpecification -> String -> String -> Double
getMatchingFunctionForString ExactMatch a b
| a == b = 1.0
getMatchingFunctionForString ExactMatch got expected
| got == expected = 1.0
| otherwise = 0.0
getMatchingFunctionForString FuzzyMatch a b = 1.0
getMatchingFunctionForString (CutLabel smatchSpec) a b = getMatchingFunctionForString smatchSpec a b
getMatchingFunctionForString FuzzyMatch got expected = max 0.0 (1.0 - charError)
where charError = (fromIntegral editDist) / (fromIntegral $ Prelude.length expected)
editDist = levenshteinDistance defaultEditCosts got expected
getMatchingFunctionForString (CutLabel smatchSpec) a b = getMatchingFunctionForString smatchSpec a' b'
where a' = cutLabel a
b' = cutLabel b
-- | Remove the label along with the separator (the equal sign)
cutLabel :: String -> String
cutLabel t = case Data.List.Extra.breakOn "=" t of
(t, "") -> t -- no label
(_, valWithSeparator) -> Prelude.tail valWithSeparator
getMatchingFunctionForText :: MatchingSpecification -> Text -> Text -> Double
getMatchingFunctionForText matchSpec a b = getMatchingFunctionForString matchSpec (unpack a) (unpack b)

View File

@ -348,7 +348,7 @@ main = hspec $ do
it "information extraction with flags" $ do
runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444
it "information extraction with fuzzy matching" $ do
runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.6928
runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777
describe "Mean/MultiLabel-F" $ do
it "simple" $ do
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5