First version of fuzzy matching working
This commit is contained in:
parent
bbeb3ce397
commit
00a2fc7d19
@ -115,6 +115,7 @@ library
|
|||||||
, random
|
, random
|
||||||
, rainbow
|
, rainbow
|
||||||
, yaml
|
, yaml
|
||||||
|
, extra
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable geval
|
executable geval
|
||||||
|
@ -17,6 +17,9 @@ module GEval.MatchingSpecification
|
|||||||
|
|
||||||
import Data.Singletons.TH
|
import Data.Singletons.TH
|
||||||
import Data.Text
|
import Data.Text
|
||||||
|
import Data.List.Extra (breakOn)
|
||||||
|
|
||||||
|
import Text.EditDistance
|
||||||
|
|
||||||
-- | The data type for storing a matching specification
|
-- | The data type for storing a matching specification
|
||||||
singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. identity is required
|
singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. identity is required
|
||||||
@ -27,11 +30,22 @@ singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. ide
|
|||||||
|]
|
|]
|
||||||
|
|
||||||
getMatchingFunctionForString :: MatchingSpecification -> String -> String -> Double
|
getMatchingFunctionForString :: MatchingSpecification -> String -> String -> Double
|
||||||
getMatchingFunctionForString ExactMatch a b
|
getMatchingFunctionForString ExactMatch got expected
|
||||||
| a == b = 1.0
|
| got == expected = 1.0
|
||||||
| otherwise = 0.0
|
| otherwise = 0.0
|
||||||
getMatchingFunctionForString FuzzyMatch a b = 1.0
|
getMatchingFunctionForString FuzzyMatch got expected = max 0.0 (1.0 - charError)
|
||||||
getMatchingFunctionForString (CutLabel smatchSpec) a b = getMatchingFunctionForString smatchSpec a b
|
where charError = (fromIntegral editDist) / (fromIntegral $ Prelude.length expected)
|
||||||
|
editDist = levenshteinDistance defaultEditCosts got expected
|
||||||
|
|
||||||
|
getMatchingFunctionForString (CutLabel smatchSpec) a b = getMatchingFunctionForString smatchSpec a' b'
|
||||||
|
where a' = cutLabel a
|
||||||
|
b' = cutLabel b
|
||||||
|
|
||||||
|
-- | Remove the label along with the separator (the equal sign)
|
||||||
|
cutLabel :: String -> String
|
||||||
|
cutLabel t = case Data.List.Extra.breakOn "=" t of
|
||||||
|
(t, "") -> t -- no label
|
||||||
|
(_, valWithSeparator) -> Prelude.tail valWithSeparator
|
||||||
|
|
||||||
getMatchingFunctionForText :: MatchingSpecification -> Text -> Text -> Double
|
getMatchingFunctionForText :: MatchingSpecification -> Text -> Text -> Double
|
||||||
getMatchingFunctionForText matchSpec a b = getMatchingFunctionForString matchSpec (unpack a) (unpack b)
|
getMatchingFunctionForText matchSpec a b = getMatchingFunctionForString matchSpec (unpack a) (unpack b)
|
||||||
|
@ -348,7 +348,7 @@ main = hspec $ do
|
|||||||
it "information extraction with flags" $ do
|
it "information extraction with flags" $ do
|
||||||
runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444
|
runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444
|
||||||
it "information extraction with fuzzy matching" $ do
|
it "information extraction with fuzzy matching" $ do
|
||||||
runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.6928
|
runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777
|
||||||
describe "Mean/MultiLabel-F" $ do
|
describe "Mean/MultiLabel-F" $ do
|
||||||
it "simple" $ do
|
it "simple" $ do
|
||||||
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
|
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
|
||||||
|
Loading…
Reference in New Issue
Block a user