diff --git a/geval.cabal b/geval.cabal index f29e0ec..362b770 100644 --- a/geval.cabal +++ b/geval.cabal @@ -1,5 +1,5 @@ name: geval -version: 1.1.2.0 +version: 1.2.0.0 synopsis: Machine learning evaluation tools description: Please see README.md homepage: http://github.com/name/project diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 6eddbda..370a4ca 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -96,7 +96,7 @@ defaultLogLossHashedSize = 10 -- | evaluation metric data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood - | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE + | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double deriving (Eq) instance Show Metric where @@ -126,8 +126,7 @@ instance Show Metric where show BIOF1 = "BIO-F1" show BIOF1Labels = "BIO-F1-Labels" show MAE = "MAE" - - + show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta) instance Read Metric where readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)] @@ -139,6 +138,9 @@ instance Read Metric where readsPrec p ('F':theRest) = case readsPrec p theRest of [(beta, theRest)] -> [(FMeasure beta, theRest)] _ -> [] + readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of + [(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)] + _ -> [] readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of [(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)] _ -> [(LogLossHashed defaultLogLossHashedSize, theRest)] @@ -173,6 +175,7 @@ getMetricOrdering Likelihood = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter getMetricOrdering BIOF1Labels = TheHigherTheBetter getMetricOrdering MAE = TheLowerTheBetter +getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter isInputNeeded :: Metric -> Bool isInputNeeded CharMatch = True @@ -521,7 +524,6 @@ gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCoun getCount (True, False) = (0, 1, 0) getCount (False, True) = (0, 0, 1) getCount (False, False) = (0, 0, 0) - countAgg = CC.foldl countFolder (0, 0, 0) gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep where @@ -566,6 +568,13 @@ gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWit entities <- parseBioSequenceIntoEntities s return $ Prelude.map eraseNormalisation entities +gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords + intoWords + (getCounts (==)) + countAgg + (fMeasureOnCounts beta) + where intoWords = Right . (Prelude.map unpack) . Data.Text.words + countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int) countAgg = CC.foldl countFolder (0, 0, 0) diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index b00258e..bf60c5b 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -229,6 +229,22 @@ The output should be given in the BIO format with the normalized forms given aft The metric is F1 counted on entities (not labels). |] ++ (commonReadmeMDContents testName) +readmeMDContents (MultiLabelFMeasure beta) testName = [i| +Tag names and their component +============================= + +Tag names and their components (first name/surname) in a text. + +Tags: +* person +* surname +* first-name + +For each tag a sequence of token IDs separated with commas should be given (after a colon). + +The metric is F1 on labels. +|] ++ (commonReadmeMDContents testName) + readmeMDContents _ testName = [i| GEval sample challenge ====================== @@ -324,6 +340,10 @@ trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surnam O O O O O There is no name here B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman |] +trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person:3,4,5 first-name:4 surname:5 +Steven bloody Brown person:1,3 first-name:1 surname:3 +James and James first-name:1 firstname:3 +|] trainContents _ = [hereLit|0.06 0.39 0 0.206 1.00 1.00 1 0.017 317.8 5.20 67 0.048 @@ -365,6 +385,10 @@ devInContents BIOF1Labels = devInContents BIOF1 devInContents BIOF1 = [hereLit|Adam and Eve Mr Jan Kowalski |] +devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here +I see him +Barbara +|] devInContents _ = [hereLit|0.72 0 0.007 9.54 62 0.054 |] @@ -404,6 +428,10 @@ devExpectedContents BIOF1Labels = devExpectedContents BIOF1 devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE O B-firstname/JAN B-surname/KOWALSKI |] +devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2 + +first-name:1 +|] devExpectedContents _ = [hereLit|0.82 95.2 |] @@ -445,6 +473,10 @@ testInContents BIOF1Labels = testInContents BIOF1 testInContents BIOF1 = [hereLit|Alan Tring No name here |] +testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith +Nobody is there +I saw Marketa +|] testInContents _ = [hereLit|1.52 2 0.093 30.06 14 0.009 |] @@ -486,6 +518,10 @@ testExpectedContents BIOF1Labels = testExpectedContents BIOF1 testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING O O O |] +testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3 + +first-name:3 +|] testExpectedContents _ = [hereLit|0.11 17.2 |] diff --git a/src/GEval/PrecisionRecall.hs b/src/GEval/PrecisionRecall.hs index 62b2fed..bcaad4b 100644 --- a/src/GEval/PrecisionRecall.hs +++ b/src/GEval/PrecisionRecall.hs @@ -3,7 +3,8 @@ module GEval.PrecisionRecall(calculateMAPForOneResult, fMeasure, f1Measure, f2Measure, precision, recall, fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder, - precisionAndRecall, precisionAndRecallFromCounts, maxMatch, maxMatchOnOrdered) + precisionAndRecall, precisionAndRecallFromCounts, + maxMatch, maxMatchOnOrdered, getCounts) where import GEval.Common @@ -30,8 +31,8 @@ f1Measure :: (a -> b -> Bool) -> [a] -> [b] -> Double f1Measure = fMeasure 1.0 -- | Calculates both generalized) F-measure -fMeasure :: Double - -> (a -> b -> Bool) -- ^ beta parameter +fMeasure :: Double -- ^ beta parameter + -> (a -> b -> Bool) -- ^ function to check whether there is a match -> [a] -- ^ the ground truth -> [b] -- ^ what we got -> Double -- ^ f-Measure @@ -55,6 +56,11 @@ fMeasureOnCounts beta (tp, nbExpected, nbGot) = countFolder :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int) countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3) +getCounts :: (a -> b -> Bool) -> ([a], [b]) -> (Int, Int, Int) +getCounts matchingFun (expected, got) = (maxMatch matchingFun expected got, + length expected, + length got) + -- | Calculates both precision and recall. -- -- (See https://en.wikipedia.org/wiki/Precision_and_recall) diff --git a/test/Spec.hs b/test/Spec.hs index 318d73e..6a905b6 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -212,6 +212,11 @@ main = hspec $ do describe "Likelihood" $ do it "simple" $ do runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866 + describe "MultiLabel-F" $ do + it "simple" $ do + runGEvalTest "multilabel-f1-simple" `shouldReturnAlmost` 0.66666666666 + it "simple F2" $ do + runGEvalTest "multilabel-f2-simple" `shouldReturnAlmost` 0.441176470588235 describe "evaluating single lines" $ do it "RMSE" $ do gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla") diff --git a/test/multilabel-f1-simple/multilabel-f1-simple-solution/test-A/out.tsv b/test/multilabel-f1-simple/multilabel-f1-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..6a8bd3a --- /dev/null +++ b/test/multilabel-f1-simple/multilabel-f1-simple-solution/test-A/out.tsv @@ -0,0 +1,4 @@ +foo bar baz +uuu +foo bar baz +qqq aaa diff --git a/test/multilabel-f1-simple/multilabel-f1-simple/config.txt b/test/multilabel-f1-simple/multilabel-f1-simple/config.txt new file mode 100644 index 0000000..b79da4c --- /dev/null +++ b/test/multilabel-f1-simple/multilabel-f1-simple/config.txt @@ -0,0 +1 @@ +--metric MultiLabel-F1 diff --git a/test/multilabel-f1-simple/multilabel-f1-simple/test-A/expected.tsv b/test/multilabel-f1-simple/multilabel-f1-simple/test-A/expected.tsv new file mode 100644 index 0000000..64612c3 --- /dev/null +++ b/test/multilabel-f1-simple/multilabel-f1-simple/test-A/expected.tsv @@ -0,0 +1,4 @@ +foo bar baz + +foo +qqq qqq diff --git a/test/multilabel-f2-simple/multilabel-f2-simple-solution/test-A/out.tsv b/test/multilabel-f2-simple/multilabel-f2-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..faa9fc7 --- /dev/null +++ b/test/multilabel-f2-simple/multilabel-f2-simple-solution/test-A/out.tsv @@ -0,0 +1,4 @@ +bar:2 + other:6 +xyz:12 + t:6 t:8 t:9 diff --git a/test/multilabel-f2-simple/multilabel-f2-simple/config.txt b/test/multilabel-f2-simple/multilabel-f2-simple/config.txt new file mode 100644 index 0000000..49da0d0 --- /dev/null +++ b/test/multilabel-f2-simple/multilabel-f2-simple/config.txt @@ -0,0 +1 @@ +--metric MultiLabel-F2 diff --git a/test/multilabel-f2-simple/multilabel-f2-simple/test-A/expected.tsv b/test/multilabel-f2-simple/multilabel-f2-simple/test-A/expected.tsv new file mode 100644 index 0000000..80885dd --- /dev/null +++ b/test/multilabel-f2-simple/multilabel-f2-simple/test-A/expected.tsv @@ -0,0 +1,4 @@ +foo:1 bar:2 baz:3 +other:5 + +t:6 t:7 t:8