better diagnostic messages for BIO
This commit is contained in:
parent
3e201d11ef
commit
881a77e239
@ -19,6 +19,13 @@ import GEval.Common
|
|||||||
data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text)
|
data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text)
|
||||||
deriving (Eq, Show)
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
formatBioLabel :: BIOLabel -> T.Text
|
||||||
|
formatBioLabel Outside = "O"
|
||||||
|
formatBioLabel (Beginning label Nothing) = T.concat ["B-", label]
|
||||||
|
formatBioLabel (Beginning label (Just normalized)) = T.concat ["B-", label, "/", normalized]
|
||||||
|
formatBioLabel (Inside label Nothing) = T.concat ["I-", label]
|
||||||
|
formatBioLabel (Inside label (Just normalized)) = T.concat ["I-", label, "/", normalized]
|
||||||
|
|
||||||
data TaggedSpan = TaggedSpan Int Int
|
data TaggedSpan = TaggedSpan Int Int
|
||||||
deriving (Eq, Show)
|
deriving (Eq, Show)
|
||||||
|
|
||||||
@ -45,7 +52,7 @@ labelSplitToEntity labs@(h@(_,begIx):t) = if isBeginning h && all (\tp -> isInsi
|
|||||||
then
|
then
|
||||||
Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized
|
Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized
|
||||||
else
|
else
|
||||||
Left "something wrong with label sequence"
|
Left $ "inconsistent label sequence `" ++ (T.unpack $ T.intercalate " " $ map (formatBioLabel . fst) labs) ++ "`"
|
||||||
where isBeginning (Beginning _ _, _) = True
|
where isBeginning (Beginning _ _, _) = True
|
||||||
isBeginning _ = False
|
isBeginning _ = False
|
||||||
isInside (Inside _ _, _) = True
|
isInside (Inside _ _, _) = True
|
||||||
|
@ -258,6 +258,8 @@ main = hspec $ do
|
|||||||
runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
|
runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
|
||||||
it "check perfect score" $ do
|
it "check perfect score" $ do
|
||||||
runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
|
runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
|
||||||
|
it "check inconsistent input" $ do
|
||||||
|
runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`")
|
||||||
describe "automatic decompression" $ do
|
describe "automatic decompression" $ do
|
||||||
it "more complex test" $ do
|
it "more complex test" $ do
|
||||||
runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923
|
runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923
|
||||||
|
2
test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv
Normal file
2
test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
O B-CITY/WARSZAWA I-CITY/WARSZAWA
|
||||||
|
O B-NAME/JOHN I-FOO/SMITH I-FOO/X O
|
|
1
test/bio-f1-error/bio-f1-error/config.txt
Normal file
1
test/bio-f1-error/bio-f1-error/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric BIO-F1
|
2
test/bio-f1-error/bio-f1-error/test-A/expected.tsv
Normal file
2
test/bio-f1-error/bio-f1-error/test-A/expected.tsv
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
O B-CITY/WARSZAWA I-CITY/WARSZAWA
|
||||||
|
O B-NAME/JOHN I-NAME/SMITH O O
|
|
Loading…
Reference in New Issue
Block a user