diff --git a/src/GEval/BIO.hs b/src/GEval/BIO.hs index a3e8819..f213ab4 100644 --- a/src/GEval/BIO.hs +++ b/src/GEval/BIO.hs @@ -19,6 +19,13 @@ import GEval.Common data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text) deriving (Eq, Show) +formatBioLabel :: BIOLabel -> T.Text +formatBioLabel Outside = "O" +formatBioLabel (Beginning label Nothing) = T.concat ["B-", label] +formatBioLabel (Beginning label (Just normalized)) = T.concat ["B-", label, "/", normalized] +formatBioLabel (Inside label Nothing) = T.concat ["I-", label] +formatBioLabel (Inside label (Just normalized)) = T.concat ["I-", label, "/", normalized] + data TaggedSpan = TaggedSpan Int Int deriving (Eq, Show) @@ -45,7 +52,7 @@ labelSplitToEntity labs@(h@(_,begIx):t) = if isBeginning h && all (\tp -> isInsi then Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized else - Left "something wrong with label sequence" + Left $ "inconsistent label sequence `" ++ (T.unpack $ T.intercalate " " $ map (formatBioLabel . fst) labs) ++ "`" where isBeginning (Beginning _ _, _) = True isBeginning _ = False isInside (Inside _ _, _) = True diff --git a/test/Spec.hs b/test/Spec.hs index 68f595c..d183bc0 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -258,6 +258,8 @@ main = hspec $ do runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5 it "check perfect score" $ do runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0 + it "check inconsistent input" $ do + runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`") describe "automatic decompression" $ do it "more complex test" $ do runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923 diff --git a/test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv b/test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv new file mode 100644 index 0000000..5b95d34 --- /dev/null +++ b/test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv @@ -0,0 +1,2 @@ +O B-CITY/WARSZAWA I-CITY/WARSZAWA +O B-NAME/JOHN I-FOO/SMITH I-FOO/X O diff --git a/test/bio-f1-error/bio-f1-error/config.txt b/test/bio-f1-error/bio-f1-error/config.txt new file mode 100644 index 0000000..70977e1 --- /dev/null +++ b/test/bio-f1-error/bio-f1-error/config.txt @@ -0,0 +1 @@ +--metric BIO-F1 diff --git a/test/bio-f1-error/bio-f1-error/test-A/expected.tsv b/test/bio-f1-error/bio-f1-error/test-A/expected.tsv new file mode 100644 index 0000000..f6465b5 --- /dev/null +++ b/test/bio-f1-error/bio-f1-error/test-A/expected.tsv @@ -0,0 +1,2 @@ +O B-CITY/WARSZAWA I-CITY/WARSZAWA +O B-NAME/JOHN I-NAME/SMITH O O