better diagnostic messages for BIO

This commit is contained in:
Filip Graliński 2018-05-25 14:44:19 +02:00
parent 3e201d11ef
commit 881a77e239
5 changed files with 15 additions and 1 deletions

View File

@ -19,6 +19,13 @@ import GEval.Common
data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text) data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text)
deriving (Eq, Show) deriving (Eq, Show)
formatBioLabel :: BIOLabel -> T.Text
formatBioLabel Outside = "O"
formatBioLabel (Beginning label Nothing) = T.concat ["B-", label]
formatBioLabel (Beginning label (Just normalized)) = T.concat ["B-", label, "/", normalized]
formatBioLabel (Inside label Nothing) = T.concat ["I-", label]
formatBioLabel (Inside label (Just normalized)) = T.concat ["I-", label, "/", normalized]
data TaggedSpan = TaggedSpan Int Int data TaggedSpan = TaggedSpan Int Int
deriving (Eq, Show) deriving (Eq, Show)
@ -45,7 +52,7 @@ labelSplitToEntity labs@(h@(_,begIx):t) = if isBeginning h && all (\tp -> isInsi
then then
Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized
else else
Left "something wrong with label sequence" Left $ "inconsistent label sequence `" ++ (T.unpack $ T.intercalate " " $ map (formatBioLabel . fst) labs) ++ "`"
where isBeginning (Beginning _ _, _) = True where isBeginning (Beginning _ _, _) = True
isBeginning _ = False isBeginning _ = False
isInside (Inside _ _, _) = True isInside (Inside _ _, _) = True

View File

@ -258,6 +258,8 @@ main = hspec $ do
runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5 runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
it "check perfect score" $ do it "check perfect score" $ do
runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0 runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
it "check inconsistent input" $ do
runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`")
describe "automatic decompression" $ do describe "automatic decompression" $ do
it "more complex test" $ do it "more complex test" $ do
runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923 runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923

View File

@ -0,0 +1,2 @@
O B-CITY/WARSZAWA I-CITY/WARSZAWA
O B-NAME/JOHN I-FOO/SMITH I-FOO/X O
1 O B-CITY/WARSZAWA I-CITY/WARSZAWA
2 O B-NAME/JOHN I-FOO/SMITH I-FOO/X O

View File

@ -0,0 +1 @@
--metric BIO-F1

View File

@ -0,0 +1,2 @@
O B-CITY/WARSZAWA I-CITY/WARSZAWA
O B-NAME/JOHN I-NAME/SMITH O O
1 O B-CITY/WARSZAWA I-CITY/WARSZAWA
2 O B-NAME/JOHN I-NAME/SMITH O O