diff --git a/geval.cabal b/geval.cabal index 998d8b7..dc1af28 100644 --- a/geval.cabal +++ b/geval.cabal @@ -115,6 +115,7 @@ library , random , rainbow , yaml + , extra default-language: Haskell2010 executable geval diff --git a/src/GEval/MatchingSpecification.hs b/src/GEval/MatchingSpecification.hs index 81b0bc7..0ec7e8f 100644 --- a/src/GEval/MatchingSpecification.hs +++ b/src/GEval/MatchingSpecification.hs @@ -17,6 +17,9 @@ module GEval.MatchingSpecification import Data.Singletons.TH import Data.Text +import Data.List.Extra (breakOn) + +import Text.EditDistance -- | The data type for storing a matching specification singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. identity is required @@ -27,11 +30,22 @@ singletons [d|data MatchingSpecification = ExactMatch -- ^ exact match, i.e. ide |] getMatchingFunctionForString :: MatchingSpecification -> String -> String -> Double -getMatchingFunctionForString ExactMatch a b - | a == b = 1.0 +getMatchingFunctionForString ExactMatch got expected + | got == expected = 1.0 | otherwise = 0.0 -getMatchingFunctionForString FuzzyMatch a b = 1.0 -getMatchingFunctionForString (CutLabel smatchSpec) a b = getMatchingFunctionForString smatchSpec a b +getMatchingFunctionForString FuzzyMatch got expected = max 0.0 (1.0 - charError) + where charError = (fromIntegral editDist) / (fromIntegral $ Prelude.length expected) + editDist = levenshteinDistance defaultEditCosts got expected + +getMatchingFunctionForString (CutLabel smatchSpec) a b = getMatchingFunctionForString smatchSpec a' b' + where a' = cutLabel a + b' = cutLabel b + +-- | Remove the label along with the separator (the equal sign) +cutLabel :: String -> String +cutLabel t = case Data.List.Extra.breakOn "=" t of + (t, "") -> t -- no label + (_, valWithSeparator) -> Prelude.tail valWithSeparator getMatchingFunctionForText :: MatchingSpecification -> Text -> Text -> Double getMatchingFunctionForText matchSpec a b = getMatchingFunctionForString matchSpec (unpack a) (unpack b) diff --git a/test/Spec.hs b/test/Spec.hs index 8934376..3bdd0ec 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -348,7 +348,7 @@ main = hspec $ do it "information extraction with flags" $ do runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444 it "information extraction with fuzzy matching" $ do - runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.6928 + runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777 describe "Mean/MultiLabel-F" $ do it "simple" $ do runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5