Handle more than one possibility in TokenAccuracy

2018-10-24 08:02:34 +02:00 · 2018-10-24 08:02:34 +02:00 · 9322307813
commit 9322307813
parent b2a0cd28f0
4 changed files with 7 additions and 3 deletions
--- a/geval.cabal
+++ b/geval.cabal
@ -1,5 +1,5 @@
 name:                geval
-version:             1.10.0.0
+version:             1.10.1.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@ -703,7 +703,7 @@ gevalCore' TokenAccuracy _ = gevalCoreWithoutInput intoTokens
         matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int)
         matchFun (h, t) (e, o)
           | e == (pack "*") = (h, t)
-           | o == e = (h + 1, t + 1)
+           | o `Prelude.elem` (splitOn (pack ";") e) = (h + 1, t + 1)
           | otherwise = (h, t + 1)
         hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0)

--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@ -263,6 +263,10 @@ Get part of speech tags for each token
 This is a sample challenge for TokenAccuracy. We just
 count the accuracy per token and skip entries marked as "*"
 in the expected file.
+
+More than one option separated with semicolons can be given
+in the expected file (but not in the output file).
+
 |] ++ (commonReadmeMDContents testName)

 readmeMDContents (MultiLabelFMeasure beta) testName = [i|
--- a/test/token-accuracy-simple/token-accuracy-simple/test-A/expected.tsv
+++ b/test/token-accuracy-simple/token-accuracy-simple/test-A/expected.tsv
@ -1,3 +1,3 @@
-foo * * bar
+qqqq;foo * * bar
 baz
 foo bar baz