add BIO-F1-Labels metric
This commit is contained in:
parent
65e8d2562e
commit
4768931221
@ -1,7 +1,9 @@
|
|||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
module GEval.BIO
|
module GEval.BIO
|
||||||
(BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities, TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO)
|
(BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities,
|
||||||
|
TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO,
|
||||||
|
eraseNormalisation)
|
||||||
where
|
where
|
||||||
|
|
||||||
import GEval.PrecisionRecall
|
import GEval.PrecisionRecall
|
||||||
@ -32,6 +34,9 @@ data TaggedSpan = TaggedSpan Int Int
|
|||||||
data TaggedEntity = TaggedEntity TaggedSpan T.Text (Maybe T.Text)
|
data TaggedEntity = TaggedEntity TaggedSpan T.Text (Maybe T.Text)
|
||||||
deriving (Eq, Show)
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
eraseNormalisation :: TaggedEntity -> TaggedEntity
|
||||||
|
eraseNormalisation (TaggedEntity span label normalized) = (TaggedEntity span label Nothing)
|
||||||
|
|
||||||
gatherCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> (Int, Int, Int)
|
gatherCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> (Int, Int, Int)
|
||||||
gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, length expected, length got)
|
gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, length expected, length got)
|
||||||
where
|
where
|
||||||
|
@ -84,7 +84,7 @@ defaultLogLossHashedSize :: Word32
|
|||||||
defaultLogLossHashedSize = 10
|
defaultLogLossHashedSize = 10
|
||||||
|
|
||||||
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
|
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
|
||||||
| MAP | LogLoss | Likelihood | BIOF1 | LikelihoodHashed Word32
|
| MAP | LogLoss | Likelihood | BIOF1 | BIOF1Labels | LikelihoodHashed Word32
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
instance Show Metric where
|
instance Show Metric where
|
||||||
@ -112,6 +112,7 @@ instance Show Metric where
|
|||||||
show LogLoss = "LogLoss"
|
show LogLoss = "LogLoss"
|
||||||
show Likelihood = "Likelihood"
|
show Likelihood = "Likelihood"
|
||||||
show BIOF1 = "BIO-F1"
|
show BIOF1 = "BIO-F1"
|
||||||
|
show BIOF1Labels = "BIO-F1-Labels"
|
||||||
|
|
||||||
instance Read Metric where
|
instance Read Metric where
|
||||||
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||||
@ -133,6 +134,7 @@ instance Read Metric where
|
|||||||
readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
|
readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
|
||||||
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
|
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
|
||||||
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
|
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
|
||||||
|
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
|
||||||
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
|
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
|
||||||
|
|
||||||
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
|
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
|
||||||
@ -152,6 +154,7 @@ getMetricOrdering MAP = TheHigherTheBetter
|
|||||||
getMetricOrdering LogLoss = TheLowerTheBetter
|
getMetricOrdering LogLoss = TheLowerTheBetter
|
||||||
getMetricOrdering Likelihood = TheHigherTheBetter
|
getMetricOrdering Likelihood = TheHigherTheBetter
|
||||||
getMetricOrdering BIOF1 = TheHigherTheBetter
|
getMetricOrdering BIOF1 = TheHigherTheBetter
|
||||||
|
getMetricOrdering BIOF1Labels = TheHigherTheBetter
|
||||||
|
|
||||||
defaultOutDirectory = "."
|
defaultOutDirectory = "."
|
||||||
defaultTestName = "test-A"
|
defaultTestName = "test-A"
|
||||||
@ -437,6 +440,11 @@ gevalCore' CharMatch inputLineSource = helper inputLineSource
|
|||||||
|
|
||||||
gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
|
gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
|
||||||
|
|
||||||
|
gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
|
||||||
|
where parseBioSequenceIntoEntitiesWithoutNormalization s = do
|
||||||
|
entities <- parseBioSequenceIntoEntities s
|
||||||
|
return $ Prelude.map eraseNormalisation entities
|
||||||
|
|
||||||
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
|
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
|
||||||
countAgg = CC.foldl countFolder (0, 0, 0)
|
countAgg = CC.foldl countFolder (0, 0, 0)
|
||||||
|
|
||||||
|
@ -215,6 +215,7 @@ This a sample challenge for the likelihood metric.
|
|||||||
|
|
||||||
|] ++ (commonReadmeMDContents testName)
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
|
readmeMDContents BIOF1Labels testName = readmeMDContents BIOF1 testName
|
||||||
readmeMDContents BIOF1 testName = [i|
|
readmeMDContents BIOF1 testName = [i|
|
||||||
Tag and normalize names
|
Tag and normalize names
|
||||||
=======================
|
=======================
|
||||||
@ -318,6 +319,7 @@ trainContents LogLoss = [hereLit|0.0 Hell, no!!!
|
|||||||
1.0 Lekker!!!
|
1.0 Lekker!!!
|
||||||
0.0 Boring, boring, boring
|
0.0 Boring, boring, boring
|
||||||
|]
|
|]
|
||||||
|
trainContents BIOF1Labels = trainContents BIOF1
|
||||||
trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond
|
trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond
|
||||||
O O O O O There is no name here
|
O O O O O There is no name here
|
||||||
B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|
B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|
||||||
@ -359,6 +361,7 @@ devInContents LogLoss = [hereLit|Great stuff!
|
|||||||
Boring stuff
|
Boring stuff
|
||||||
That's good
|
That's good
|
||||||
|]
|
|]
|
||||||
|
devInContents BIOF1Labels = devInContents BIOF1
|
||||||
devInContents BIOF1 = [hereLit|Adam and Eve
|
devInContents BIOF1 = [hereLit|Adam and Eve
|
||||||
Mr Jan Kowalski
|
Mr Jan Kowalski
|
||||||
|]
|
|]
|
||||||
@ -397,6 +400,7 @@ devExpectedContents LogLoss = [hereLit|1.0
|
|||||||
0.0
|
0.0
|
||||||
1.0
|
1.0
|
||||||
|]
|
|]
|
||||||
|
devExpectedContents BIOF1Labels = devExpectedContents BIOF1
|
||||||
devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
|
devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
|
||||||
O B-firstname/JAN B-surname/KOWALSKI
|
O B-firstname/JAN B-surname/KOWALSKI
|
||||||
|]
|
|]
|
||||||
@ -437,6 +441,7 @@ testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
|
|||||||
Super-duper!!
|
Super-duper!!
|
||||||
That is incredibly boring.
|
That is incredibly boring.
|
||||||
|]
|
|]
|
||||||
|
testInContents BIOF1Labels = testInContents BIOF1
|
||||||
testInContents BIOF1 = [hereLit|Alan Tring
|
testInContents BIOF1 = [hereLit|Alan Tring
|
||||||
No name here
|
No name here
|
||||||
|]
|
|]
|
||||||
@ -477,6 +482,7 @@ testExpectedContents LogLoss = [hereLit|1.0
|
|||||||
1.0
|
1.0
|
||||||
0.0
|
0.0
|
||||||
|]
|
|]
|
||||||
|
testExpectedContents BIOF1Labels = testExpectedContents BIOF1
|
||||||
testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
|
testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
|
||||||
O O O
|
O O O
|
||||||
|]
|
|]
|
||||||
|
@ -110,7 +110,7 @@ metricReader = option auto
|
|||||||
<> value defaultMetric
|
<> value defaultMetric
|
||||||
<> showDefault
|
<> showDefault
|
||||||
<> metavar "METRIC"
|
<> metavar "METRIC"
|
||||||
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1 or CharMatch" )
|
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" )
|
||||||
|
|
||||||
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
|
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
|
||||||
runGEval args = do
|
runGEval args = do
|
||||||
|
@ -257,6 +257,8 @@ main = hspec $ do
|
|||||||
TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] `shouldBe` (1, 1, 2)
|
TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] `shouldBe` (1, 1, 2)
|
||||||
it "check F1 on a more complicated example" $ do
|
it "check F1 on a more complicated example" $ do
|
||||||
runGEvalTest "bio-f1-complex" `shouldReturnAlmost` 0.625
|
runGEvalTest "bio-f1-complex" `shouldReturnAlmost` 0.625
|
||||||
|
it "check F1 on labels only" $ do
|
||||||
|
runGEvalTest "bio-f1-complex-labels" `shouldReturnAlmost` 0.6666666666
|
||||||
it "calculate F1" $ do
|
it "calculate F1" $ do
|
||||||
runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
|
runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
|
||||||
it "calculate F1 with underscores rather than minus signs" $ do
|
it "calculate F1 with underscores rather than minus signs" $ do
|
||||||
|
@ -0,0 +1,6 @@
|
|||||||
|
B-wrong
|
||||||
|
B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK_CITY
|
||||||
|
B-surname/BROWN B-surname/SMIT
|
||||||
|
B-month B-month O O B-foo/bar
|
||||||
|
O B-class I-class I-class
|
||||||
|
O O
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric BIO-F1-Labels
|
@ -0,0 +1,6 @@
|
|||||||
|
O
|
||||||
|
B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
|
||||||
|
O B-surname/SMITH
|
||||||
|
B-month/JULY O O O B-foo/bar
|
||||||
|
O B-class I-class I-class
|
||||||
|
O O
|
|
Loading…
Reference in New Issue
Block a user