First version of fuzzy matching working
This commit is contained in:
parent
bbeb3ce397
commit
00a2fc7d19
@ -115,6 +115,7 @@ library
|
||||
, random
|
||||
, rainbow
|
||||
, yaml
|
||||
, extra
|
||||
default-language: Haskell2010
|
||||
|
||||
executable geval
|
||||
|
@ -17,6 +17,9 @@ module GEval.MatchingSpecification
|
||||
|
||||
import Data.Singletons.TH
|
||||
import Data.Text
|
||||
import Data.List.Extra (breakOn)
|
||||
|
||||
import Text.EditDistance
|
||||
|
||||
-- | The data type for storing a matching specification
|
||||
singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. identity is required
|
||||
@ -27,11 +30,22 @@ singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. ide
|
||||
|]
|
||||
|
||||
getMatchingFunctionForString :: MatchingSpecification -> String -> String -> Double
|
||||
getMatchingFunctionForString ExactMatch a b
|
||||
| a == b = 1.0
|
||||
getMatchingFunctionForString ExactMatch got expected
|
||||
| got == expected = 1.0
|
||||
| otherwise = 0.0
|
||||
getMatchingFunctionForString FuzzyMatch a b = 1.0
|
||||
getMatchingFunctionForString (CutLabel smatchSpec) a b = getMatchingFunctionForString smatchSpec a b
|
||||
getMatchingFunctionForString FuzzyMatch got expected = max 0.0 (1.0 - charError)
|
||||
where charError = (fromIntegral editDist) / (fromIntegral $ Prelude.length expected)
|
||||
editDist = levenshteinDistance defaultEditCosts got expected
|
||||
|
||||
getMatchingFunctionForString (CutLabel smatchSpec) a b = getMatchingFunctionForString smatchSpec a' b'
|
||||
where a' = cutLabel a
|
||||
b' = cutLabel b
|
||||
|
||||
-- | Remove the label along with the separator (the equal sign)
|
||||
cutLabel :: String -> String
|
||||
cutLabel t = case Data.List.Extra.breakOn "=" t of
|
||||
(t, "") -> t -- no label
|
||||
(_, valWithSeparator) -> Prelude.tail valWithSeparator
|
||||
|
||||
getMatchingFunctionForText :: MatchingSpecification -> Text -> Text -> Double
|
||||
getMatchingFunctionForText matchSpec a b = getMatchingFunctionForString matchSpec (unpack a) (unpack b)
|
||||
|
@ -348,7 +348,7 @@ main = hspec $ do
|
||||
it "information extraction with flags" $ do
|
||||
runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444
|
||||
it "information extraction with fuzzy matching" $ do
|
||||
runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.6928
|
||||
runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777
|
||||
describe "Mean/MultiLabel-F" $ do
|
||||
it "simple" $ do
|
||||
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
|
||||
|
Loading…
Reference in New Issue
Block a user