From 06fd09334965a4ae2d6c05817265fa8f93535299 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Tue, 15 May 2018 08:07:47 +0200 Subject: [PATCH 01/24] probs can be given for LogLossHashed --- src/GEval/LogLossHashed.hs | 42 +++++++++++++++++-- test/Spec.hs | 4 ++ .../test-A/out.tsv | 3 ++ .../config.txt | 1 + .../test-A/expected.tsv | 3 ++ .../test-A/out.tsv | 5 +++ .../log-loss-hashed-probs/config.txt | 1 + .../log-loss-hashed-probs/test-A/expected.tsv | 5 +++ 8 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized-solution/test-A/out.tsv create mode 100644 test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/config.txt create mode 100644 test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/test-A/expected.tsv create mode 100644 test/log-loss-hashed-probs/log-loss-hashed-probs-solution/test-A/out.tsv create mode 100644 test/log-loss-hashed-probs/log-loss-hashed-probs/config.txt create mode 100644 test/log-loss-hashed-probs/log-loss-hashed-probs/test-A/expected.tsv diff --git a/src/GEval/LogLossHashed.hs b/src/GEval/LogLossHashed.hs index 3e092ab..bed3c7b 100644 --- a/src/GEval/LogLossHashed.hs +++ b/src/GEval/LogLossHashed.hs @@ -36,24 +36,33 @@ parseDistribution nbOfBits seed distroSpec = -- a direct list of 2^nbOfBits log probs else parseDistributionFromLogProbList nbOfBits distroSpec +isProbTotalIncorrect :: Double -> Bool +isProbTotalIncorrect probTotal = probTotal > 1.0 || probTotal < (1.0 - epsilon) + where epsilon = 0.00000001 + normalizeDistribution :: HashedDistribution -> HashedDistribution normalizeDistribution distro = -- we do softmax (if needed) - if probSum > 1.0 || probSum < (1.0 - epsilon) + if isProbTotalIncorrect probSum then normalized else distro where probSum = V.foldl' (\s l -> (s + exp l)) 0.0 distro normalized = V.map (\v -> log ((exp v) / probSum)) distro - epsilon = 0.00000001 type DistroMonad s = ReaderT (VM.MVector s Double) (ST s) data WordSpec = AnyWord | SpecificWord T.Text + deriving (Eq, Show) + +isAnyWord AnyWord = True +isAnyWord _ = False + data WordSpecWithLogProb = WordSpecWithLogProb WordSpec Double parseDistributionFromWordList :: Word32 -> Word32 -> T.Text -> Either String HashedDistribution -parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<< ( - processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec) +parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<< + lookForProbs =<< + (processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec) getWordSpecWithLogProb :: T.Text -> Either String WordSpecWithLogProb getWordSpecWithLogProb t = @@ -77,6 +86,31 @@ parseDistributionFromWordList' nbOfBits seed specs = runST $ do frozen <- V.freeze emp return $ Right frozen +lookForProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb] +lookForProbs specs + | areProbs specs = Right $ toLogProbs $ normalizeProbs specs + | otherwise = Right $ specs + +areProbs :: [WordSpecWithLogProb] -> Bool +areProbs specs = all isProb specs && any isPositiveProb specs + where isProb (WordSpecWithLogProb _ v) = v >= 0.0 && v <= 1.0 + isPositiveProb (WordSpecWithLogProb _ p) = p > 0.0 && p <= 1.0 + +toLogProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb] +toLogProbs = map (\(WordSpecWithLogProb w p) -> (WordSpecWithLogProb w (log p))) + +normalizeProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb] +normalizeProbs specs = if isProbTotalIncorrect probTotal + then + if probTotal > 1.0 || any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs + then + map (\(WordSpecWithLogProb w p) -> WordSpecWithLogProb w (p / probTotal)) specs + else + ((WordSpecWithLogProb AnyWord (1-probTotal)):specs) + else + specs + where probTotal = sum $ map (\(WordSpecWithLogProb _ p) -> p) specs + addSpecs :: Word32 -> Word32 -> [WordSpecWithLogProb] -> DistroMonad s () addSpecs nbOfBits seed = mapM_ (updateDistro nbOfBits seed) diff --git a/test/Spec.hs b/test/Spec.hs index 9be9045..27b6653 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -95,6 +95,10 @@ main = hspec $ do runGEvalTest "log-loss-hashed-simple" `shouldReturnAlmost` 2.398479083333333 it "example with unnormalized values" $ do runGEvalTest "log-loss-hashed-not-normalized" `shouldReturnAlmost` 1.0468455186722887 + it "with probs instead of log probs" $ do + runGEvalTest "log-loss-hashed-probs" `shouldReturnAlmost` 4.11631293099392 + it "with probs instead of log probs (with normalization)" $ do + runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853 describe "reading options" $ do it "can get the metric" $ do extractMetric "bleu-complex" `shouldReturn` (Just BLEU) diff --git a/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized-solution/test-A/out.tsv b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized-solution/test-A/out.tsv new file mode 100644 index 0000000..d1dbc6f --- /dev/null +++ b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized-solution/test-A/out.tsv @@ -0,0 +1,3 @@ +1:0.5 2:0.6 3:1.0 4:1.0 5:0.9 +1:0.3 2:0.2 3:0.3 +1:0.2 :0.6 diff --git a/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/config.txt b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/config.txt new file mode 100644 index 0000000..8121298 --- /dev/null +++ b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/config.txt @@ -0,0 +1 @@ +--metric LogLossHashed10 diff --git a/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/test-A/expected.tsv b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/test-A/expected.tsv new file mode 100644 index 0000000..39a8593 --- /dev/null +++ b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/test-A/expected.tsv @@ -0,0 +1,3 @@ +1 +3 +1 diff --git a/test/log-loss-hashed-probs/log-loss-hashed-probs-solution/test-A/out.tsv b/test/log-loss-hashed-probs/log-loss-hashed-probs-solution/test-A/out.tsv new file mode 100644 index 0000000..2658337 --- /dev/null +++ b/test/log-loss-hashed-probs/log-loss-hashed-probs-solution/test-A/out.tsv @@ -0,0 +1,5 @@ +A:0.6 B:0.4 +C:0.2 A:0.1 +A:0.4 C:0.4 +D:1.0 +C:0.4 B:0.5 :0.1 diff --git a/test/log-loss-hashed-probs/log-loss-hashed-probs/config.txt b/test/log-loss-hashed-probs/log-loss-hashed-probs/config.txt new file mode 100644 index 0000000..8121298 --- /dev/null +++ b/test/log-loss-hashed-probs/log-loss-hashed-probs/config.txt @@ -0,0 +1 @@ +--metric LogLossHashed10 diff --git a/test/log-loss-hashed-probs/log-loss-hashed-probs/test-A/expected.tsv b/test/log-loss-hashed-probs/log-loss-hashed-probs/test-A/expected.tsv new file mode 100644 index 0000000..b8ccfd3 --- /dev/null +++ b/test/log-loss-hashed-probs/log-loss-hashed-probs/test-A/expected.tsv @@ -0,0 +1,5 @@ +A +A +B +D +A From 9fc4beaba169eb9d443f17ea245861dbf1d06565 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Tue, 15 May 2018 08:14:52 +0200 Subject: [PATCH 02/24] improve sample challenge for LogLossHashed --- src/GEval/CreateChallenge.hs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index b0e53a5..3b31b3f 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -113,6 +113,35 @@ The metric is average log-loss calculated for 10-bit hashes. Train file is a just text file (one utterance per line). In an input file, left and right contexts (TAB-separated) are given. In an expected file, the word to be guessed is given. + +Format of the output files +-------------------------- + +For each input line, a probability distribution for words in a gap +must be given: + + word1:logprob1 word2:logprob2 ... wordN:logprobN :logprob0 + +where *logprobi* is the logarithm of the probability for *wordi* and +*logprob0* is the logarithm of the probability mass for all the other +words (it will be spread between all 1024 fingerprint values). If the +respective probabilities do not sum up to 1, they will be normalised with +softmax. + +Note: the separator here is space, not TAB! + +### Probs + +Probabilities could be given (instead of logprobs): + + * if **all** values look as probs and **at least value** is positive, we treat + the values as probs rather then logprobs (single value 0.0 is treated + as a logprob, i.e. probability 1.0!); + * if their sum is greater than 1.0, then we normalize simply by dividing by the sum; + * if the sum is smaller than 1.0 and there is no entry for all the other words, + we add such an entry for the missing probability mass; + * if the sum is smaller than 1.0 and there is an entry for all the other words, + we normalize by dividing by the sum. |] ++ (commonReadmeMDContents testName) readmeMDContents CharMatch testName = [i| From a1c357948e8dd7ae615ffab2bd10f25ad7328ea1 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Tue, 15 May 2018 08:16:51 +0200 Subject: [PATCH 03/24] bump up version --- geval.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geval.cabal b/geval.cabal index 42531ec..8cfc841 100644 --- a/geval.cabal +++ b/geval.cabal @@ -1,5 +1,5 @@ name: geval -version: 0.5.4.0 +version: 0.5.5.0 synopsis: Machine learning evaluation tools description: Please see README.md homepage: http://github.com/name/project From 82e794ae3c53024116c2a5fca3a18fa6378d25e6 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Tue, 15 May 2018 09:38:13 +0200 Subject: [PATCH 04/24] implement BIO-F1 --- geval.cabal | 1 + src/GEval/BIO.hs | 99 +++++++++++++++++++ src/GEval/Core.hs | 13 ++- src/GEval/CreateChallenge.hs | 28 ++++++ src/GEval/OptionsParser.hs | 2 +- src/GEval/PrecisionRecall.hs | 14 ++- test/Spec.hs | 59 ++++++++++- .../bio-f1-complex-solution/test-A/out.tsv | 6 ++ test/bio-f1-complex/bio-f1-complex/config.txt | 1 + .../bio-f1-complex/test-A/expected.tsv | 6 ++ .../bio-f1-perfect-solution/test-A/out.tsv | 4 + test/bio-f1-perfect/bio-f1-perfect/config.txt | 1 + .../bio-f1-perfect/test-A/expected.tsv | 4 + .../bio-f1-simple-solution/test-A/out.tsv | 3 + test/bio-f1-simple/bio-f1-simple/config.txt | 1 + .../bio-f1-simple/test-A/expected.tsv | 3 + 16 files changed, 239 insertions(+), 6 deletions(-) create mode 100644 src/GEval/BIO.hs create mode 100644 test/bio-f1-complex/bio-f1-complex-solution/test-A/out.tsv create mode 100644 test/bio-f1-complex/bio-f1-complex/config.txt create mode 100644 test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv create mode 100644 test/bio-f1-perfect/bio-f1-perfect-solution/test-A/out.tsv create mode 100644 test/bio-f1-perfect/bio-f1-perfect/config.txt create mode 100644 test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv create mode 100644 test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv create mode 100644 test/bio-f1-simple/bio-f1-simple/config.txt create mode 100644 test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv diff --git a/geval.cabal b/geval.cabal index 8cfc841..23e3672 100644 --- a/geval.cabal +++ b/geval.cabal @@ -26,6 +26,7 @@ library , GEval.LogLossHashed , GEval.CharMatch , GEval.LineByLine + , GEval.BIO build-depends: base >= 4.7 && < 5 , cond , conduit diff --git a/src/GEval/BIO.hs b/src/GEval/BIO.hs new file mode 100644 index 0000000..a3e8819 --- /dev/null +++ b/src/GEval/BIO.hs @@ -0,0 +1,99 @@ +{-# LANGUAGE OverloadedStrings #-} + +module GEval.BIO + (BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities, TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO) + where + +import GEval.PrecisionRecall + +import qualified Data.Text as T + +import Data.Attoparsec.Text +import Data.Attoparsec.Combinator +import Control.Applicative +import Data.Char +import Data.Maybe (catMaybes) + +import GEval.Common + +data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text) + deriving (Eq, Show) + +data TaggedSpan = TaggedSpan Int Int + deriving (Eq, Show) + +data TaggedEntity = TaggedEntity TaggedSpan T.Text (Maybe T.Text) + deriving (Eq, Show) + +gatherCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> (Int, Int, Int) +gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, length expected, length got) + where + laterThan (TaggedEntity (TaggedSpan a _) _ _) (TaggedEntity (TaggedSpan b _) _ _) = a > b + +parseBioSequenceIntoEntities :: T.Text -> Either String [TaggedEntity] +parseBioSequenceIntoEntities t = labelsIntoEntities =<< (parseOnly (bioSequenceParser <* endOfInput) t) + +labelsIntoEntities :: [BIOLabel] -> Either String [TaggedEntity] +labelsIntoEntities labels = labelsIntoEntities' $ zip labels [1..] + +labelsIntoEntities' :: [(BIOLabel, Int)] -> Either String [TaggedEntity] +labelsIntoEntities' labelsWithPositions = mapM labelSplitToEntity labelsGathered + where labelsGathered = splitLabels labelsWithPositions + +labelSplitToEntity :: [(BIOLabel, Int)] -> Either String TaggedEntity +labelSplitToEntity labs@(h@(_,begIx):t) = if isBeginning h && all (\tp -> isInside tp && tt tp == btp) t + then + Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized + else + Left "something wrong with label sequence" + where isBeginning (Beginning _ _, _) = True + isBeginning _ = False + isInside (Inside _ _, _) = True + isInside _ = False + tt (Beginning t _, _) = t + tt (Inside t _, _) = t + btp = tt h + lastItemIx = case t of + [] -> begIx + _ -> let (_, ix) = last t + in ix + normalized (Beginning _ n, _) = n + normalized (Inside _ n, _) = n + mNormalized = if all (\tp -> normalized tp == Nothing) labs + then + Nothing + else + Just $ T.intercalate "_" $ catMaybes $ map normalized labs + +splitLabels :: [(BIOLabel, Int)] -> [[(BIOLabel, Int)]] +splitLabels [] = [] +splitLabels ((Outside, _):r) = splitLabels r +splitLabels (e@(_, ix):r) = + case splitLabels r of + l@(((Beginning _ _, _):_):_) -> ([e]:l) + (s@((Inside _ _, ix'):_):l) -> if ix' == ix + 1 + then + ((e:s):l) + else + ([e]:(s:l)) + [] -> [[e]] + +bioSequenceParser :: Parser [BIOLabel] +bioSequenceParser = sepByWhitespaces bioLabelParser + +bioLabelParser :: Parser BIOLabel +bioLabelParser = + (string "O" *> pure Outside) <|> + (do + labelType <- bioMarkerParser + string "-" + label <- takeWhile1 (\c -> not (isSpace c) && c /= '/') + normalized <- (do + string "/" + normalized <- takeWhile1 (not . isSpace) + return $ Just normalized) <|> pure Nothing + return $ labelType label normalized) + +bioMarkerParser :: Parser (T.Text -> Maybe T.Text -> BIOLabel) +bioMarkerParser = + (string "B" *> pure Beginning) <|> (string "I" *> pure Inside) diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 6b29f7e..2ba9b27 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -53,6 +53,7 @@ import qualified System.Directory as D import System.Posix import System.FilePath import Data.Maybe +import Data.Tuple import qualified Data.List.Split as DLS import Control.Monad.IO.Class @@ -67,6 +68,7 @@ import GEval.PrecisionRecall import GEval.ClusteringMetrics import GEval.LogLossHashed import GEval.CharMatch +import GEval.BIO import qualified Data.HashMap.Strict as M @@ -80,7 +82,7 @@ defaultLogLossHashedSize :: Word32 defaultLogLossHashedSize = 10 data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch - | MAP | LogLoss + | MAP | LogLoss | BIOF1 deriving (Eq) instance Show Metric where @@ -100,6 +102,7 @@ instance Show Metric where show CharMatch = "CharMatch" show MAP = "MAP" show LogLoss = "LogLoss" + show BIOF1 = "BIO-F1" instance Read Metric where readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)] @@ -117,6 +120,7 @@ instance Read Metric where readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)] readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)] readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)] + readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)] data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter @@ -132,6 +136,7 @@ getMetricOrdering (LogLossHashed _) = TheLowerTheBetter getMetricOrdering CharMatch = TheHigherTheBetter getMetricOrdering MAP = TheHigherTheBetter getMetricOrdering LogLoss = TheLowerTheBetter +getMetricOrdering BIOF1 = TheHigherTheBetter defaultOutDirectory = "." defaultTestName = "test-A" @@ -381,7 +386,11 @@ gevalCore' CharMatch inputLineSource = helper inputLineSource helper inputLineSource expectedLineSource outputLineSource = do gevalCoreGeneralized (ParserSpecWithInput (Right . unpack) (Right . unpack) (Right . unpack)) step countAgg (fMeasureOnCounts charMatchBeta) (WithInput inputLineSource expectedLineSource outputLineSource) step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out - countAgg = CC.foldl countFolder (0, 0, 0) + +gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts + +countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int) +countAgg = CC.foldl countFolder (0, 0, 0) parseDistributionWrapper :: Word32 -> Word32 -> Text -> HashedDistribution parseDistributionWrapper nbOfBits seed distroSpec = case parseDistribution nbOfBits seed distroSpec of diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 3b31b3f..09a85e4 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -199,6 +199,18 @@ This a sample challenge for the log-loss metric. |] ++ (commonReadmeMDContents testName) +readmeMDContents BIOF1 testName = [i| +Tag and normalize names +======================= + +Tag names in the tokenized text and normalized them. + +The output should be given in the BIO format with the normalized forms given after slashes (see +`dev-0/expected.tsv` for an example). + +The metric is F1 counted on entities (not labels). +|] ++ (commonReadmeMDContents testName) + readmeMDContents _ testName = [i| GEval sample challenge ====================== @@ -288,6 +300,10 @@ trainContents LogLoss = [hereLit|0.0 Hell, no!!! 1.0 Lekker!!! 0.0 Boring, boring, boring |] +trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond +O O O O O There is no name here +B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman +|] trainContents _ = [hereLit|0.06 0.39 0 0.206 1.00 1.00 1 0.017 317.8 5.20 67 0.048 @@ -323,6 +339,9 @@ devInContents LogLoss = [hereLit|Great stuff! Boring stuff That's good |] +devInContents BIOF1 = [hereLit|Adam and Eve +Mr Jan Kowalski +|] devInContents _ = [hereLit|0.72 0 0.007 9.54 62 0.054 |] @@ -356,6 +375,9 @@ devExpectedContents LogLoss = [hereLit|1.0 0.0 1.0 |] +devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE +O B-firstname/JAN B-surname/KOWALSKI +|] devExpectedContents _ = [hereLit|0.82 95.2 |] @@ -391,6 +413,9 @@ testInContents LogLoss = [hereLit|That's great, ha, ha, I love it! Super-duper!! That is incredibly boring. |] +testInContents BIOF1 = [hereLit|Alan Tring +No name here +|] testInContents _ = [hereLit|1.52 2 0.093 30.06 14 0.009 |] @@ -426,6 +451,9 @@ testExpectedContents LogLoss = [hereLit|1.0 1.0 0.0 |] +testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING +O O O +|] testExpectedContents _ = [hereLit|0.11 17.2 |] diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index d5fe2f8..bad86fe 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -100,7 +100,7 @@ metricReader = option auto <> value defaultMetric <> showDefault <> metavar "METRIC" - <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed or CharMatch" ) + <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, BIO-F1 or CharMatch" ) runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue)) runGEval args = do diff --git a/src/GEval/PrecisionRecall.hs b/src/GEval/PrecisionRecall.hs index 8781914..16879b2 100644 --- a/src/GEval/PrecisionRecall.hs +++ b/src/GEval/PrecisionRecall.hs @@ -3,7 +3,7 @@ module GEval.PrecisionRecall(calculateMAPForOneResult, fMeasure, f1Measure, f2Measure, precision, recall, fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder, - precisionAndRecall, precisionAndRecallFromCounts, maxMatch) + precisionAndRecall, precisionAndRecallFromCounts, maxMatch, maxMatchOnOrdered) where import GEval.Common @@ -65,6 +65,17 @@ precision matchFun expected got = fst $ precisionAndRecall matchFun expected got recall :: (a -> b -> Bool) -> [a] -> [b] -> Double recall matchFun expected got = snd $ precisionAndRecall matchFun expected got + +maxMatchOnOrdered :: Eq a => (a -> a -> Bool) -> [a] -> [a] -> Int +maxMatchOnOrdered laterThan expected got = + let (matched, _) = foldl' step (0, expected) got + in matched + where step (matched, l@(h:t)) g + | h == g = (matched+1, t) + | h `laterThan` g = (matched, l) + | otherwise = step (matched, t) g + step (matched, []) g = (matched, []) + -- counting maximum match with maximum bipartite matching -- (we build an auxiliary graph and do a max-flow on this) maxMatch :: (a -> b -> Bool) -> [a] -> [b] -> Int @@ -72,7 +83,6 @@ maxMatch matchFun expected got = mf where (b, e, g) = buildGraph matchFun expected got mf = maxFlow g (fst b) (fst e) - buildGraph :: (a -> b -> Bool) -> [a] -> [b] -> (LNode Int, LNode Int, Gr Int Int) buildGraph matchFun expected got = (b, e, g) where ((b, e), (_, g)) = buildGraph' matchFun expected got diff --git a/test/Spec.hs b/test/Spec.hs index 27b6653..8cd27bf 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -8,6 +8,7 @@ import GEval.BLEU import GEval.ClippEU import GEval.PrecisionRecall import GEval.ClusteringMetrics +import GEval.BIO import Data.Attoparsec.Text import Options.Applicative import Data.Text @@ -191,7 +192,63 @@ main = hspec $ do gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla") (LineInFile "stub2" 1 "3.4") (LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8 - + describe "BIO format" $ do + it "just parse" $ do + let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name" + r `shouldBe` [Outside, + Beginning "city" (Just "NEW_YORK"), + Inside "city" Nothing, + Beginning "city" (Just "KALISZ"), + Inside "city" Nothing, + Outside, + Beginning "name" Nothing] + it "simplest entity" $ do + let (Right ents) = parseBioSequenceIntoEntities "B-city" + ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "city" Nothing] + it "multi-word entity" $ do + let (Right ents) = parseBioSequenceIntoEntities "B-date I-date" + ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "date" Nothing] + it "multi-word entity with normalized text" $ do + let (Right ents) = parseBioSequenceIntoEntities "B-date/FOO I-date/BAR" + ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "date" (Just "FOO_BAR")] + it "simplest entity with something outside" $ do + let (Right ents) = parseBioSequenceIntoEntities "O B-city" + ents `shouldBe` [TaggedEntity (TaggedSpan 2 2) "city" Nothing] + it "another simple case" $ do + let (Right ents) = parseBioSequenceIntoEntities "B-city B-city" + ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "city" Nothing, + TaggedEntity (TaggedSpan 2 2) "city" Nothing] + it "just parse into entities" $ do + let (Right ents) = parseBioSequenceIntoEntities "O O B-city/LOS_ANGELES I-city B-city/KLUCZBORK O B-name O B-person/JOHN I-person/VON I-person/NEUMANN" + ents `shouldBe` [TaggedEntity (TaggedSpan 3 4) "city" (Just "LOS_ANGELES"), + TaggedEntity (TaggedSpan 5 5) "city" (Just "KLUCZBORK"), + TaggedEntity (TaggedSpan 7 7) "name" (Nothing), + TaggedEntity (TaggedSpan 9 11) "person" (Just "JOHN_VON_NEUMANN")] + it "another entity parse" $ do + let (Right ents) = parseBioSequenceIntoEntities "B-month/JULY B-month/JULY O O B-foo/bar" + ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "month" (Just "JULY"), + TaggedEntity (TaggedSpan 2 2) "month" (Just "JULY"), + TaggedEntity (TaggedSpan 5 5) "foo" (Just "bar")] + it "another entity parse" $ do + let (Right ents) = parseBioSequenceIntoEntities "B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK" + ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "city" (Just "LOS_ANGELES"), + TaggedEntity (TaggedSpan 4 5) "city" (Just "NEW_YORK")] + it "parse entity" $ do + let (Right ents) = parseBioSequenceIntoEntities "B-surname/BROWN B-surname/SMITH" + ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "surname" (Just "BROWN"), + TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] + it "parse entity" $ do + let (Right ents) = parseBioSequenceIntoEntities "O B-surname/SMITH" + ents `shouldBe` [TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] + it "check counting" $ do + gatherCountsForBIO [TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] [TaggedEntity (TaggedSpan 1 1) "surname" (Just "BROWN"), + TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] `shouldBe` (1, 1, 2) + it "check F1 on a more complicated example" $ do + runGEvalTest "bio-f1-complex" `shouldReturnAlmost` 0.625 + it "calculate F1" $ do + runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5 + it "check perfect score" $ do + runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0 neverMatch :: Char -> Int -> Bool neverMatch _ _ = False diff --git a/test/bio-f1-complex/bio-f1-complex-solution/test-A/out.tsv b/test/bio-f1-complex/bio-f1-complex-solution/test-A/out.tsv new file mode 100644 index 0000000..f84fa1d --- /dev/null +++ b/test/bio-f1-complex/bio-f1-complex-solution/test-A/out.tsv @@ -0,0 +1,6 @@ +B-wrong +B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK +B-surname/BROWN B-surname/SMITH +B-month/JULY B-month/JULY O O B-foo/bar +O B-class I-class I-class +O B-wrong diff --git a/test/bio-f1-complex/bio-f1-complex/config.txt b/test/bio-f1-complex/bio-f1-complex/config.txt new file mode 100644 index 0000000..70977e1 --- /dev/null +++ b/test/bio-f1-complex/bio-f1-complex/config.txt @@ -0,0 +1 @@ +--metric BIO-F1 diff --git a/test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv b/test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv new file mode 100644 index 0000000..c6e719d --- /dev/null +++ b/test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv @@ -0,0 +1,6 @@ +O +B-city/LOS I-city/ANGELES O B-city/NEW_YORK O +O B-surname/SMITH +B-month/JULY O O O B-foo/bar +O B-class I-class I-class +O O diff --git a/test/bio-f1-perfect/bio-f1-perfect-solution/test-A/out.tsv b/test/bio-f1-perfect/bio-f1-perfect-solution/test-A/out.tsv new file mode 100644 index 0000000..e0fa64b --- /dev/null +++ b/test/bio-f1-perfect/bio-f1-perfect-solution/test-A/out.tsv @@ -0,0 +1,4 @@ +O O O +O B-city/NEW I-city/YORK I-city/CITY O B-month/July +B-surname/SMITH +B-city/LONDON B-city/PARIS diff --git a/test/bio-f1-perfect/bio-f1-perfect/config.txt b/test/bio-f1-perfect/bio-f1-perfect/config.txt new file mode 100644 index 0000000..70977e1 --- /dev/null +++ b/test/bio-f1-perfect/bio-f1-perfect/config.txt @@ -0,0 +1 @@ +--metric BIO-F1 diff --git a/test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv b/test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv new file mode 100644 index 0000000..e0fa64b --- /dev/null +++ b/test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv @@ -0,0 +1,4 @@ +O O O +O B-city/NEW I-city/YORK I-city/CITY O B-month/July +B-surname/SMITH +B-city/LONDON B-city/PARIS diff --git a/test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv b/test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..0666d27 --- /dev/null +++ b/test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv @@ -0,0 +1,3 @@ +O O B-city/POZNAŃ O O B-date/MARCH I-date/12 +B-city/BUK O O O +B-name/FOO O B-surname/KOWALSKI diff --git a/test/bio-f1-simple/bio-f1-simple/config.txt b/test/bio-f1-simple/bio-f1-simple/config.txt new file mode 100644 index 0000000..70977e1 --- /dev/null +++ b/test/bio-f1-simple/bio-f1-simple/config.txt @@ -0,0 +1 @@ +--metric BIO-F1 diff --git a/test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv b/test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv new file mode 100644 index 0000000..659986b --- /dev/null +++ b/test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv @@ -0,0 +1,3 @@ +O O B-city/POZNAŃ O O B-date/MARCH I-date/12 +O O O O +O B-city/KONIN O From b01f9439b73ba772c9b81ea8ff1f297857ad9c3e Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Wed, 16 May 2018 20:59:40 +0200 Subject: [PATCH 05/24] log probs --- src/GEval/CreateChallenge.hs | 8 ++++++-- src/GEval/LogLossHashed.hs | 12 ++++++++++++ test/Spec.hs | 3 +++ .../test-A/out.tsv | 3 +++ .../log-loss-hashed-normalization/config.txt | 1 + .../test-A/expected.tsv | 3 +++ 6 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 test/log-loss-hashed-normalization/log-loss-hashed-normalization-solution/test-A/out.tsv create mode 100644 test/log-loss-hashed-normalization/log-loss-hashed-normalization/config.txt create mode 100644 test/log-loss-hashed-normalization/log-loss-hashed-normalization/test-A/expected.tsv diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 09a85e4..3e00993 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -125,8 +125,12 @@ must be given: where *logprobi* is the logarithm of the probability for *wordi* and *logprob0* is the logarithm of the probability mass for all the other words (it will be spread between all 1024 fingerprint values). If the -respective probabilities do not sum up to 1, they will be normalised with -softmax. +respective probabilities do not sum up to 1: + + * if the sum is larger than 0.0 and smaller than 1.0, and no logprob0 + is given, log of the remaining probablity mass will be assigned to logprob0, + * otherwise they will be normalised with. +softmax Note: the separator here is space, not TAB! diff --git a/src/GEval/LogLossHashed.hs b/src/GEval/LogLossHashed.hs index bed3c7b..a4388ee 100644 --- a/src/GEval/LogLossHashed.hs +++ b/src/GEval/LogLossHashed.hs @@ -61,6 +61,7 @@ data WordSpecWithLogProb = WordSpecWithLogProb WordSpec Double parseDistributionFromWordList :: Word32 -> Word32 -> T.Text -> Either String HashedDistribution parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<< + normalizeLogProbs =<< lookForProbs =<< (processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec) @@ -99,6 +100,17 @@ areProbs specs = all isProb specs && any isPositiveProb specs toLogProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb] toLogProbs = map (\(WordSpecWithLogProb w p) -> (WordSpecWithLogProb w (log p))) +normalizeLogProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb] +normalizeLogProbs specs = if isProbTotalIncorrect probTotal + && probTotal < 1.0 && probTotal > 0.0 + && not (any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs) + && all (\(WordSpecWithLogProb _ lp) -> lp <= 0) specs + then + Right ((WordSpecWithLogProb AnyWord (log (1-probTotal))):specs) + else + Right specs + where probTotal = sum $ map (\(WordSpecWithLogProb _ logp) -> exp logp) specs + normalizeProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb] normalizeProbs specs = if isProbTotalIncorrect probTotal then diff --git a/test/Spec.hs b/test/Spec.hs index 8cd27bf..831d6b5 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -100,6 +100,9 @@ main = hspec $ do runGEvalTest "log-loss-hashed-probs" `shouldReturnAlmost` 4.11631293099392 it "with probs instead of log probs (with normalization)" $ do runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853 + it "with log probs whose probs are summing up to less than 1.0" $ do + runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851 + describe "reading options" $ do it "can get the metric" $ do extractMetric "bleu-complex" `shouldReturn` (Just BLEU) diff --git a/test/log-loss-hashed-normalization/log-loss-hashed-normalization-solution/test-A/out.tsv b/test/log-loss-hashed-normalization/log-loss-hashed-normalization-solution/test-A/out.tsv new file mode 100644 index 0000000..22e39a7 --- /dev/null +++ b/test/log-loss-hashed-normalization/log-loss-hashed-normalization-solution/test-A/out.tsv @@ -0,0 +1,3 @@ +B:-1.20397280432594 A:-0.916290731874155 +A:-2.3025850929940 C:-1.6094379124341 +A:-2.3025850929940 C:-1.6094379124341 :-0.356674943938732 diff --git a/test/log-loss-hashed-normalization/log-loss-hashed-normalization/config.txt b/test/log-loss-hashed-normalization/log-loss-hashed-normalization/config.txt new file mode 100644 index 0000000..8121298 --- /dev/null +++ b/test/log-loss-hashed-normalization/log-loss-hashed-normalization/config.txt @@ -0,0 +1 @@ +--metric LogLossHashed10 diff --git a/test/log-loss-hashed-normalization/log-loss-hashed-normalization/test-A/expected.tsv b/test/log-loss-hashed-normalization/log-loss-hashed-normalization/test-A/expected.tsv new file mode 100644 index 0000000..3042490 --- /dev/null +++ b/test/log-loss-hashed-normalization/log-loss-hashed-normalization/test-A/expected.tsv @@ -0,0 +1,3 @@ +A +B +B From 01b93dd2437ee3fa76395e0df94045359ebe3ebd Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Wed, 16 May 2018 21:00:45 +0200 Subject: [PATCH 06/24] improve help for geval --init --- src/GEval/OptionsParser.hs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index bad86fe..eaabd31 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -169,11 +169,9 @@ initChallenge spec = case gesExpectedDirectory spec of showInitInstructions = do putStrLn [here| Run: - geval --init --expected-directory CHALLENGE + geval --init --expected-directory CHALLENGE --metric METRIC-NAME --precision NUMBER-OF-DIGITS to create a directory CHALLENGE representing a Gonito challenge. -You can specify a metric with `--metric METRIC-NAME` option. - -Note that `--out-directory` option is not taken into account with `--init` option. +(Note that `--out-directory` option is not taken into account with `--init` option.) |] exitFailure From bab4f7d94c0ec078395dd42ed4583c1337cd3921 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Wed, 16 May 2018 21:01:16 +0200 Subject: [PATCH 07/24] bump up version number --- geval.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geval.cabal b/geval.cabal index 23e3672..a5611cb 100644 --- a/geval.cabal +++ b/geval.cabal @@ -1,5 +1,5 @@ name: geval -version: 0.5.5.0 +version: 0.5.6.0 synopsis: Machine learning evaluation tools description: Please see README.md homepage: http://github.com/name/project From 438f0139141674e5ec05c1b39b014f3c6937d5fa Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Thu, 17 May 2018 08:26:57 +0200 Subject: [PATCH 08/24] automatic decompression --- geval.cabal | 7 +++ src/Data/Conduit/AutoDecompress.hs | 44 ++++++++++++++++++ src/GEval/Core.hs | 32 +++++++++++-- test/Spec.hs | 4 ++ .../test-A/out.tsv.bz2 | Bin 0 -> 60 bytes .../charmatch-complex-compressed/config.txt | 1 + .../test-A/expected.tsv.gz | Bin 0 -> 59 bytes .../test-A/in.tsv.xz | Bin 0 -> 84 bytes 8 files changed, 83 insertions(+), 5 deletions(-) create mode 100644 src/Data/Conduit/AutoDecompress.hs create mode 100644 test/charmatch-complex-compressed/charmatch-complex-compressed-solution/test-A/out.tsv.bz2 create mode 100644 test/charmatch-complex-compressed/charmatch-complex-compressed/config.txt create mode 100644 test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/expected.tsv.gz create mode 100644 test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/in.tsv.xz diff --git a/geval.cabal b/geval.cabal index a5611cb..4a2046d 100644 --- a/geval.cabal +++ b/geval.cabal @@ -27,6 +27,7 @@ library , GEval.CharMatch , GEval.LineByLine , GEval.BIO + , Data.Conduit.AutoDecompress build-depends: base >= 4.7 && < 5 , cond , conduit @@ -49,6 +50,12 @@ library , vector , mtl , edit-distance + , bytestring + , word8 + , primitive + , transformers-base + , bzlib-conduit + , lzma-conduit default-language: Haskell2010 executable geval diff --git a/src/Data/Conduit/AutoDecompress.hs b/src/Data/Conduit/AutoDecompress.hs new file mode 100644 index 0000000..eb5a51f --- /dev/null +++ b/src/Data/Conduit/AutoDecompress.hs @@ -0,0 +1,44 @@ +{-# LANGUAGE AllowAmbiguousTypes #-} + +module Data.Conduit.AutoDecompress + (autoDecompress) + where + +import Data.Conduit +import Data.Conduit.Combinators +import Data.ByteString +import Data.Conduit.Zlib +import Data.Word8 +import Control.Monad.Trans.Resource (MonadThrow, MonadResource) +import Control.Monad.Primitive (PrimMonad) +import Control.Monad.Base (MonadBase) +import qualified Data.Conduit.Lzma as XZ +import qualified Data.Conduit.BZlib as BZ + +autoDecompress :: (MonadResource m, MonadBase base m, MonadThrow m, PrimMonad base) => ConduitM ByteString ByteString m () +autoDecompress = do + f <- await + case f of + Just chunk -> if Data.ByteString.length chunk > 1 + then + do + let firstByte = Data.ByteString.head chunk + let secondByte = Data.ByteString.index chunk 1 + leftover chunk + lookAtMagicNumbers (firstByte, secondByte) + else + do + leftover chunk + doNothing + + Nothing -> return () + + +lookAtMagicNumbers :: (MonadResource m, MonadBase base m, MonadThrow m, PrimMonad base) => (Word8, Word8) -> Conduit ByteString m ByteString +lookAtMagicNumbers (31, 139) = ungzip +lookAtMagicNumbers (66, 90) = BZ.bunzip2 +lookAtMagicNumbers (253, 55) = XZ.decompress Nothing +lookAtMagicNumbers _ = doNothing + +doNothing :: Monad m => Conduit ByteString m ByteString +doNothing = Data.Conduit.Combinators.filter (const True) diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 2ba9b27..c5a7194 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -57,7 +57,7 @@ import Data.Tuple import qualified Data.List.Split as DLS import Control.Monad.IO.Class -import Control.Monad ((<=<)) +import Control.Monad ((<=<), filterM) import Data.Attoparsec.Text (parseOnly) @@ -69,6 +69,7 @@ import GEval.ClusteringMetrics import GEval.LogLossHashed import GEval.CharMatch import GEval.BIO +import Data.Conduit.AutoDecompress import qualified Data.HashMap.Strict as M @@ -239,11 +240,14 @@ checkAndGetFiles gevalSpec = do unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory + inputFilePath <- lookForCompressedFiles inputFilePath' + expectedFilePath <- lookForCompressedFiles expectedFilePath' + outFilePath <- lookForCompressedFiles outFilePath' checkInputFileIfNeeded metric inputFilePath return (inputFilePath, expectedFilePath, outFilePath) - where expectedFilePath = expectedTestDirectory (gesExpectedFile gevalSpec) - outFilePath = getOutFile gevalSpec (gesOutFile gevalSpec) - inputFilePath = expectedTestDirectory (gesInputFile gevalSpec) + where expectedFilePath' = expectedTestDirectory (gesExpectedFile gevalSpec) + outFilePath' = getOutFile gevalSpec (gesOutFile gevalSpec) + inputFilePath' = expectedTestDirectory (gesInputFile gevalSpec) expectedTestDirectory = expectedDirectory testName outTestDirectory = outDirectory testName expectedDirectory = getExpectedDirectory gevalSpec @@ -251,6 +255,24 @@ checkAndGetFiles gevalSpec = do testName = gesTestName gevalSpec metric = gesMetric gevalSpec +lookForCompressedFiles :: FilePath -> IO FilePath +lookForCompressedFiles = lookForAlternativeFiles [".gz", ".xz", ".bz2"] + +lookForAlternativeFiles :: [String] -> FilePath -> IO FilePath +lookForAlternativeFiles suffixes filePath + | takeExtension filePath `Prelude.elem` suffixes = return filePath + | otherwise = do + fileIsThere <- D.doesFileExist filePath + if fileIsThere + then + return filePath + else + do + found <- Control.Monad.filterM D.doesFileExist $ Prelude.map (filePath <.>) suffixes + return $ case found of + [fp] -> fp + _ -> filePath + getOutFile :: GEvalSpecification -> FilePath -> FilePath getOutFile gevalSpec out = outDirectory testName out where outDirectory = gesOutDirectory gevalSpec @@ -264,7 +286,7 @@ checkInputFileIfNeeded _ _ = return () fileAsLineSource :: FilePath -> LineSource (ResourceT IO) fileAsLineSource filePath = - LineSource (CB.sourceFile filePath $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1 + LineSource (CB.sourceFile filePath $= autoDecompress =$= CT.decodeUtf8Lenient =$= CT.lines) filePath 1 gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue) gevalCoreOnSingleLines metric inpLine expLine outLine = diff --git a/test/Spec.hs b/test/Spec.hs index 831d6b5..5784042 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -252,6 +252,10 @@ main = hspec $ do runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5 it "check perfect score" $ do runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0 + describe "automatic decompression" $ do + it "more complex test" $ do + runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923 + neverMatch :: Char -> Int -> Bool neverMatch _ _ = False diff --git a/test/charmatch-complex-compressed/charmatch-complex-compressed-solution/test-A/out.tsv.bz2 b/test/charmatch-complex-compressed/charmatch-complex-compressed-solution/test-A/out.tsv.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..f74ec2e50d093e4d690edbcb2850a1a60a5a61c7 GIT binary patch literal 60 zcmV-C0K@-6T4*^jL0KkKSp`@L_5c6|#ee`100J%=&>$cHF#ym3Pf4TEsp$wo*ZVR{ S<3X?~V7rnj!i0b-z)!F!h83m& literal 0 HcmV?d00001 diff --git a/test/charmatch-complex-compressed/charmatch-complex-compressed/config.txt b/test/charmatch-complex-compressed/charmatch-complex-compressed/config.txt new file mode 100644 index 0000000..ceb23f3 --- /dev/null +++ b/test/charmatch-complex-compressed/charmatch-complex-compressed/config.txt @@ -0,0 +1 @@ +--metric CharMatch diff --git a/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/expected.tsv.gz b/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/expected.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..6457aec741699222c58087eafebbd5a161ded1eb GIT binary patch literal 59 zcmb2|=HSrto^8ysn0>r|u&!&$DOG>v{Q{^Lp~hSLc+^ P6DEd*-XG+p7#J7;aC;T? literal 0 HcmV?d00001 diff --git a/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/in.tsv.xz b/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/in.tsv.xz new file mode 100644 index 0000000000000000000000000000000000000000..34c5c1eb48ee631df5e893b91e0e66e1902cbea2 GIT binary patch literal 84 zcmexsUKJ6=z`*kC+7>q^21Q0O1_p)_{ill=86>mwa|=pKQj54!QgidOixQJ76S;g7 mi?R~AfNEG&`0S#d9lFZEXe6Z|F~vZB+vi$Fpg03dWE22b;TX#R literal 0 HcmV?d00001 From 1caec3de357be94e4e88100254b5beac8e1c330b Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Thu, 17 May 2018 08:27:15 +0200 Subject: [PATCH 09/24] bump up version --- geval.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geval.cabal b/geval.cabal index 4a2046d..0533593 100644 --- a/geval.cabal +++ b/geval.cabal @@ -1,5 +1,5 @@ name: geval -version: 0.5.6.0 +version: 0.5.7.0 synopsis: Machine learning evaluation tools description: Please see README.md homepage: http://github.com/name/project From 192d531969bc886ae2c636a82d45695aba3908eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Grali=C5=84ski?= Date: Thu, 17 May 2018 15:21:03 +0200 Subject: [PATCH 10/24] add likelihood as evaluation metrics --- src/GEval/Core.hs | 25 ++++++++++++++++++- src/GEval/CreateChallenge.hs | 22 ++++++++++++++++ src/GEval/OptionsParser.hs | 2 +- test/Spec.hs | 6 +++++ .../test-A/out.tsv | 2 ++ .../config.txt | 1 + .../test-A/expected.tsv | 2 ++ .../likelihood-simple-solution/test-A/out.tsv | 4 +++ .../likelihood-simple/config.txt | 1 + .../likelihood-simple/test-A/expected.tsv | 4 +++ 10 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv create mode 100644 test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt create mode 100644 test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv create mode 100644 test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv create mode 100644 test/likelihood-simple/likelihood-simple/config.txt create mode 100644 test/likelihood-simple/likelihood-simple/test-A/expected.tsv diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index c5a7194..bed64b7 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -83,7 +83,7 @@ defaultLogLossHashedSize :: Word32 defaultLogLossHashedSize = 10 data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch - | MAP | LogLoss | BIOF1 + | MAP | LogLoss | Likelihood | BIOF1 | LikelihoodHashed Word32 deriving (Eq) instance Show Metric where @@ -100,9 +100,16 @@ instance Show Metric where "" else (show nbOfBits)) + show (LikelihoodHashed nbOfBits) = "LikelihoodHashed" ++ (if + nbOfBits == defaultLogLossHashedSize + then + "" + else + (show nbOfBits)) show CharMatch = "CharMatch" show MAP = "MAP" show LogLoss = "LogLoss" + show Likelihood = "Likelihood" show BIOF1 = "BIO-F1" instance Read Metric where @@ -118,7 +125,11 @@ instance Read Metric where readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of [(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)] _ -> [(LogLossHashed defaultLogLossHashedSize, theRest)] + readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of + [(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)] + _ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)] readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)] + readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)] readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)] readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)] @@ -134,9 +145,11 @@ getMetricOrdering ClippEU = TheHigherTheBetter getMetricOrdering (FMeasure _) = TheHigherTheBetter getMetricOrdering NMI = TheHigherTheBetter getMetricOrdering (LogLossHashed _) = TheLowerTheBetter +getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter getMetricOrdering CharMatch = TheHigherTheBetter getMetricOrdering MAP = TheHigherTheBetter getMetricOrdering LogLoss = TheLowerTheBetter +getMetricOrdering Likelihood = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter defaultOutDirectory = "." @@ -308,6 +321,8 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do (fileAsLineSource expectedFilePath) (fileAsLineSource outFilePath) +logLossToLikehood logLoss = exp (-logLoss) + gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) @@ -317,6 +332,14 @@ gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource return $ mse ** 0.5 +gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do + logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource + return $ logLossToLikehood logLoss + +gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do + logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource + return $ logLossToLikehood logLoss + gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do gevalCore' metric inputLineSource expectedLineSource outLineSource diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 3e00993..0d141d5 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -101,6 +101,8 @@ Cluster proverbs for languages. This is a sample challenge for flat clustering (unsupervised learning challenge). |] ++ (commonReadmeMDContents testName) +readmeMDContents (LikelihoodHashed b) testname = readmeMDContents (LogLossHashed b) testname + readmeMDContents (LogLossHashed _) testName = [i| GEval sample challenge — language model evaluation ================================================== @@ -203,6 +205,16 @@ This a sample challenge for the log-loss metric. |] ++ (commonReadmeMDContents testName) +readmeMDContents Likelihood testName = [i| +Give the probability of a positive sentiment +============================================ + +Give the probability that a sentence expresses a positive sentiment. + +This a sample challenge for the likelihood metric. + +|] ++ (commonReadmeMDContents testName) + readmeMDContents BIOF1 testName = [i| Tag and normalize names ======================= @@ -284,6 +296,7 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada. en The pen is mightier than the sword. pl Baba z wozu, koniom lżej. |] +trainContents (LikelihoodHashed b) = trainContents (LogLossHashed b) trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota Basia ma psa Nie kupujemy kota w worku @@ -299,6 +312,7 @@ honour GB honor titbit GB smakołyk tidbit US smakołyk |] +trainContents Likelihood = trainContents LogLoss trainContents LogLoss = [hereLit|0.0 Hell, no!!! 0.0 I hate this stuff 1.0 Lekker!!! @@ -328,6 +342,7 @@ When the going gets tough, the tough get going. devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0 b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000 |] +devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b) devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku Ona psa |] @@ -339,6 +354,7 @@ devInContents MAP = [hereLit|US noc GB wózek dziecięcy GB wizualizować |] +devInContents Likelihood = devInContents LogLoss devInContents LogLoss = [hereLit|Great stuff! Boring stuff That's good @@ -364,6 +380,7 @@ devExpectedContents NMI = [hereLit|en pl en |] +devExpectedContents (LikelihoodHashed b) = devExpectedContents (LogLossHashed b) devExpectedContents (LogLossHashed _) = [hereLit|kota ma |] @@ -375,6 +392,7 @@ devExpectedContents MAP = [hereLit|night nite pram visualise |] +devExpectedContents Likelihood = devExpectedContents LogLoss devExpectedContents LogLoss = [hereLit|1.0 0.0 1.0 @@ -402,6 +420,7 @@ W marcu, jak w garncu. A cada necio agrada su porrada. Kwiecień plecień, bo przeplata trochę zimy, trochę lata. |] +testInContents (LikelihoodHashed b) = testInContents (LogLossHashed b) testInContents (LogLossHashed _) = [hereLit|Ala ma Ona ma kota worku |] @@ -413,6 +432,7 @@ testInContents MAP = [hereLit|US wózek dziecięcy GB słoń US słoń |] +testInContents Likelihood = testInContents LogLoss testInContents LogLoss = [hereLit|That's great, ha, ha, I love it! Super-duper!! That is incredibly boring. @@ -440,6 +460,7 @@ pl es pl |] +testExpectedContents (LikelihoodHashed b) = testExpectedContents (LogLossHashed b) testExpectedContents (LogLossHashed _) = [hereLit|ma w |] @@ -451,6 +472,7 @@ testExpectedContents MAP = [hereLit|trolley elephant elephant |] +testExpectedContents Likelihood = testExpectedContents LogLoss testExpectedContents LogLoss = [hereLit|1.0 1.0 0.0 diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index eaabd31..d8ce4e7 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -100,7 +100,7 @@ metricReader = option auto <> value defaultMetric <> showDefault <> metavar "METRIC" - <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, BIO-F1 or CharMatch" ) + <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1 or CharMatch" ) runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue)) runGEval args = do diff --git a/test/Spec.hs b/test/Spec.hs index 5784042..68f595c 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -102,6 +102,9 @@ main = hspec $ do runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853 it "with log probs whose probs are summing up to less than 1.0" $ do runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851 + describe "LikelihoodHashed challenge" $ do + it "example with unnormalized values" $ do + runGEvalTest "likelihood-hashed-not-normalized" `shouldReturnAlmost` 0.351043364110715 describe "reading options" $ do it "can get the metric" $ do @@ -190,6 +193,9 @@ main = hspec $ do runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824 it "perfect" $ do runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0 + describe "Likelihood" $ do + it "simple" $ do + runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866 describe "evaluating single lines" $ do it "RMSE" $ do gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla") diff --git a/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv new file mode 100644 index 0000000..02c35e7 --- /dev/null +++ b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv @@ -0,0 +1,2 @@ +tak:10 nie:8.9 +niebieski:0 żółty:1.5 czerwony:-0.5 diff --git a/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt new file mode 100644 index 0000000..00b7f18 --- /dev/null +++ b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt @@ -0,0 +1 @@ +--metric LikelihoodHashed8 diff --git a/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv new file mode 100644 index 0000000..70d9d14 --- /dev/null +++ b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv @@ -0,0 +1,2 @@ +tak +niebieski diff --git a/test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv b/test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..6978be2 --- /dev/null +++ b/test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv @@ -0,0 +1,4 @@ +0.7 +0 +0.0 +0.6 diff --git a/test/likelihood-simple/likelihood-simple/config.txt b/test/likelihood-simple/likelihood-simple/config.txt new file mode 100644 index 0000000..34e7898 --- /dev/null +++ b/test/likelihood-simple/likelihood-simple/config.txt @@ -0,0 +1 @@ +--metric Likelihood diff --git a/test/likelihood-simple/likelihood-simple/test-A/expected.tsv b/test/likelihood-simple/likelihood-simple/test-A/expected.tsv new file mode 100644 index 0000000..968ac3e --- /dev/null +++ b/test/likelihood-simple/likelihood-simple/test-A/expected.tsv @@ -0,0 +1,4 @@ +1 +0 +0 +0 From 862ca9fd3ba9a9008f1c3ab9ad86a39bc6339417 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Thu, 17 May 2018 21:17:57 +0200 Subject: [PATCH 11/24] use newer stack --- stack.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stack.yaml b/stack.yaml index 1d9e84d..042d516 100644 --- a/stack.yaml +++ b/stack.yaml @@ -2,4 +2,4 @@ flags: {} packages: - '.' extra-deps: [cond-0.4.1.1,murmur3-1.0.3] -resolver: lts-9.14 +resolver: lts-9.21 From 3e201d11ef9b339797ba5f1a92ded05ef5f31a28 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 19 May 2018 13:49:53 +0200 Subject: [PATCH 12/24] update for Stack LTS 11.9 --- geval.cabal | 2 +- src/Data/Conduit/AutoDecompress.hs | 4 ++-- src/GEval/Core.hs | 16 ++++++++-------- stack.yaml | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/geval.cabal b/geval.cabal index 0533593..97c5847 100644 --- a/geval.cabal +++ b/geval.cabal @@ -30,7 +30,7 @@ library , Data.Conduit.AutoDecompress build-depends: base >= 4.7 && < 5 , cond - , conduit + , conduit >= 1.3.0 , conduit-combinators , conduit-extra , directory diff --git a/src/Data/Conduit/AutoDecompress.hs b/src/Data/Conduit/AutoDecompress.hs index eb5a51f..2e92402 100644 --- a/src/Data/Conduit/AutoDecompress.hs +++ b/src/Data/Conduit/AutoDecompress.hs @@ -15,7 +15,7 @@ import Control.Monad.Base (MonadBase) import qualified Data.Conduit.Lzma as XZ import qualified Data.Conduit.BZlib as BZ -autoDecompress :: (MonadResource m, MonadBase base m, MonadThrow m, PrimMonad base) => ConduitM ByteString ByteString m () +autoDecompress :: (MonadResource m, MonadThrow m, PrimMonad m) => ConduitM ByteString ByteString m () autoDecompress = do f <- await case f of @@ -34,7 +34,7 @@ autoDecompress = do Nothing -> return () -lookAtMagicNumbers :: (MonadResource m, MonadBase base m, MonadThrow m, PrimMonad base) => (Word8, Word8) -> Conduit ByteString m ByteString +lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> Conduit ByteString m ByteString lookAtMagicNumbers (31, 139) = ungzip lookAtMagicNumbers (66, 90) = BZ.bunzip2 lookAtMagicNumbers (253, 55) = XZ.decompress Nothing diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index bed64b7..e0a110c 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -323,7 +323,7 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do logLossToLikehood logLoss = exp (-logLoss) -gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric +gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) @@ -345,7 +345,7 @@ gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do data LineInFile = LineInFile FilePath Word32 Text -gevalCore' :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue) +gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue) gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id where outParser = getValue . TR.double @@ -447,25 +447,25 @@ data SourceItem a = Got a | Wrong String | Done skipLineNumber :: (x -> c) -> ((Word32, x) -> c) skipLineNumber fun = fun . snd -gevalCoreWithoutInput :: (MonadBaseControl IO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue) +gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue) gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream = gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep (WithoutInput expectedLineStream outLineStream) where trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c trans step (ParsedRecordWithoutInput x y) = step (x, y) -gevalCore''' :: (MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue) +gevalCore''' :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue) gevalCore''' parserSpec itemStep aggregator finalStep context = gevalCoreGeneralized' parserSpec (trans itemStep) aggregator finalStep context where trans :: ((Word32, (a, b)) -> c) -> (Word32, ParsedRecord (WithoutInput m a b)) -> c trans step (n, ParsedRecordWithoutInput x y) = step (n, (x, y)) -gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue) +gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue) gevalCoreGeneralized parserSpec itemStep aggregator finalStep context = gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep context -gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue) +gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue) gevalCoreGeneralized' parserSpec itemStep aggregator finalStep context = do v <- runResourceT $ (((getZipSource $ (,) @@ -486,7 +486,7 @@ class EvaluationContext ctxt m where data WithoutInput m e o = WithoutInput (LineSource (ResourceT m)) (LineSource (ResourceT m)) -instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where +instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where data ParserSpec (WithoutInput m e o) = ParserSpecWithoutInput (Text -> Either String e) (Text -> Either String o) data WrappedParsedRecord (WithoutInput m e o) = WrappedParsedRecordWithoutInput (SourceItem e) (SourceItem o) data ParsedRecord (WithoutInput m e o) = ParsedRecordWithoutInput e o @@ -515,7 +515,7 @@ data WithInput m i e o = WithInput (LineSource (ResourceT m)) (LineSource (Resou getInputFilePath (WithInput (LineSource _ inputFilePath _) _ _) = inputFilePath -instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where +instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where data ParserSpec (WithInput m i e o) = ParserSpecWithInput (Text -> Either String i) (Text -> Either String e) (Text -> Either String o) data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o) data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o diff --git a/stack.yaml b/stack.yaml index 042d516..86aa566 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,5 +1,5 @@ flags: {} packages: - '.' -extra-deps: [cond-0.4.1.1,murmur3-1.0.3] -resolver: lts-9.21 +extra-deps: [murmur3-1.0.3] +resolver: lts-11.9 From 881a77e2399f0f4057ea22cc9c87cb77ce9bb4ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Grali=C5=84ski?= Date: Fri, 25 May 2018 14:44:19 +0200 Subject: [PATCH 13/24] better diagnostic messages for BIO --- src/GEval/BIO.hs | 9 ++++++++- test/Spec.hs | 2 ++ test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv | 2 ++ test/bio-f1-error/bio-f1-error/config.txt | 1 + test/bio-f1-error/bio-f1-error/test-A/expected.tsv | 2 ++ 5 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv create mode 100644 test/bio-f1-error/bio-f1-error/config.txt create mode 100644 test/bio-f1-error/bio-f1-error/test-A/expected.tsv diff --git a/src/GEval/BIO.hs b/src/GEval/BIO.hs index a3e8819..f213ab4 100644 --- a/src/GEval/BIO.hs +++ b/src/GEval/BIO.hs @@ -19,6 +19,13 @@ import GEval.Common data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text) deriving (Eq, Show) +formatBioLabel :: BIOLabel -> T.Text +formatBioLabel Outside = "O" +formatBioLabel (Beginning label Nothing) = T.concat ["B-", label] +formatBioLabel (Beginning label (Just normalized)) = T.concat ["B-", label, "/", normalized] +formatBioLabel (Inside label Nothing) = T.concat ["I-", label] +formatBioLabel (Inside label (Just normalized)) = T.concat ["I-", label, "/", normalized] + data TaggedSpan = TaggedSpan Int Int deriving (Eq, Show) @@ -45,7 +52,7 @@ labelSplitToEntity labs@(h@(_,begIx):t) = if isBeginning h && all (\tp -> isInsi then Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized else - Left "something wrong with label sequence" + Left $ "inconsistent label sequence `" ++ (T.unpack $ T.intercalate " " $ map (formatBioLabel . fst) labs) ++ "`" where isBeginning (Beginning _ _, _) = True isBeginning _ = False isInside (Inside _ _, _) = True diff --git a/test/Spec.hs b/test/Spec.hs index 68f595c..d183bc0 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -258,6 +258,8 @@ main = hspec $ do runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5 it "check perfect score" $ do runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0 + it "check inconsistent input" $ do + runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`") describe "automatic decompression" $ do it "more complex test" $ do runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923 diff --git a/test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv b/test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv new file mode 100644 index 0000000..5b95d34 --- /dev/null +++ b/test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv @@ -0,0 +1,2 @@ +O B-CITY/WARSZAWA I-CITY/WARSZAWA +O B-NAME/JOHN I-FOO/SMITH I-FOO/X O diff --git a/test/bio-f1-error/bio-f1-error/config.txt b/test/bio-f1-error/bio-f1-error/config.txt new file mode 100644 index 0000000..70977e1 --- /dev/null +++ b/test/bio-f1-error/bio-f1-error/config.txt @@ -0,0 +1 @@ +--metric BIO-F1 diff --git a/test/bio-f1-error/bio-f1-error/test-A/expected.tsv b/test/bio-f1-error/bio-f1-error/test-A/expected.tsv new file mode 100644 index 0000000..f6465b5 --- /dev/null +++ b/test/bio-f1-error/bio-f1-error/test-A/expected.tsv @@ -0,0 +1,2 @@ +O B-CITY/WARSZAWA I-CITY/WARSZAWA +O B-NAME/JOHN I-NAME/SMITH O O From c71c7a019d7feb711687ebbec5e87925b42c28ac Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 26 May 2018 13:09:06 +0200 Subject: [PATCH 14/24] remove warning in LineByLine.hs --- src/GEval/LineByLine.hs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index 9afdf83..d44e9d4 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -35,8 +35,8 @@ runLineByLine spec = do (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum where metric = gesMetric spec - consum :: Consumer LineRecord (ResourceT IO) () - consum = (CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout) + consum :: ConduitT LineRecord Void (ResourceT IO) () + consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [ formatScore score, escapeTabs inp, @@ -51,13 +51,13 @@ runDiff otherOut spec = do (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath - runResourceT $ + runResourceT $ runConduit $ ((getZipSource $ (,) <$> ZipSource sourceA - <*> ZipSource sourceB) $$ consum) + <*> ZipSource sourceB) .| consum) where metric = gesMetric spec - consum :: Consumer (LineRecord, LineRecord) (ResourceT IO) () - consum = (CL.filter shouldBeShown =$= CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout) + consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) () + consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) = outA /= outB && scoreA /= scoreB formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [ @@ -72,16 +72,16 @@ runDiff otherOut spec = do escapeTabs :: Text -> Text escapeTabs = Data.Text.replace "\t" "" -gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> Sink LineRecord (ResourceT IO) () -> IO () +gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) () -> IO () gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum = - runResourceT $ - ((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) $$ consum) + runResourceT $ runConduit $ + ((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) .| consum) -gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> Source (ResourceT IO) LineRecord +gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT () LineRecord (ResourceT IO) () gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath = (getZipSource $ (,) <$> ZipSource (CL.sourceList [1..]) - <*> (ZipSource $ recordSource context parserSpec)) =$= CL.mapM (checkStepM evaluateLine) =$= CL.catMaybes + <*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id)) context = (WithInput inputLineSource expectedLineSource outputLineSource) inputLineSource = fileAsLineSource inputFilePath From cb655cd2ae0fb4cb7dd7cf9ebcf9a57e2d33ee2e Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 26 May 2018 14:40:26 +0200 Subject: [PATCH 15/24] refactor LineByLine --- src/GEval/LineByLine.hs | 42 ++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index d44e9d4..b2ee4b3 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -9,7 +9,9 @@ module GEval.LineByLine (runLineByLine, - runDiff + runLineByLineGeneralized, + runDiff, + runDiffGeneralized ) where import GEval.Core @@ -31,11 +33,8 @@ data LineRecord = LineRecord Text Text Text Word32 MetricValue deriving (Eq, Show) runLineByLine :: GEvalSpecification -> IO () -runLineByLine spec = do - (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec - gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum - where metric = gesMetric spec - consum :: ConduitT LineRecord Void (ResourceT IO) () +runLineByLine spec = runLineByLineGeneralized spec consum + where consum :: ConduitT LineRecord Void (ResourceT IO) () consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [ formatScore score, @@ -45,18 +44,15 @@ runLineByLine spec = do formatScore :: MetricValue -> Text formatScore = Data.Text.pack . printf "%f" -runDiff :: FilePath -> GEvalSpecification -> IO () -runDiff otherOut spec = do - let otherOutFilePath = getOutFile spec otherOut +runLineByLineGeneralized :: GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a +runLineByLineGeneralized spec consum = do (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec - let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath - let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath - runResourceT $ runConduit $ - ((getZipSource $ (,) - <$> ZipSource sourceA - <*> ZipSource sourceB) .| consum) + gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum where metric = gesMetric spec - consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) () + +runDiff :: FilePath -> GEvalSpecification -> IO () +runDiff otherOut spec = runDiffGeneralized otherOut spec consum + where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) () consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) = outA /= outB && scoreA /= scoreB @@ -69,10 +65,22 @@ runDiff otherOut spec = do formatScoreDiff :: Double -> Text formatScoreDiff = Data.Text.pack . printf "%f" +runDiffGeneralized :: FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a +runDiffGeneralized otherOut spec consum = do + let otherOutFilePath = getOutFile spec otherOut + (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec + let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath + let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath + runResourceT $ runConduit $ + ((getZipSource $ (,) + <$> ZipSource sourceA + <*> ZipSource sourceB) .| consum) + where metric = gesMetric spec + escapeTabs :: Text -> Text escapeTabs = Data.Text.replace "\t" "" -gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) () -> IO () +gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) a -> IO a gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum = runResourceT $ runConduit $ ((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) .| consum) From f68223409e32a7313d69a8263923880fd2610ab9 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 26 May 2018 21:10:22 +0200 Subject: [PATCH 16/24] add test for the line-by-line mode --- geval.cabal | 1 + src/GEval/LineByLine.hs | 3 ++- test/Spec.hs | 21 ++++++++++++++++++- .../likelihood-simple/test-A/in.tsv | 4 ++++ 4 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 test/likelihood-simple/likelihood-simple/test-A/in.tsv diff --git a/geval.cabal b/geval.cabal index 97c5847..08ce371 100644 --- a/geval.cabal +++ b/geval.cabal @@ -80,6 +80,7 @@ test-suite geval-test , text , attoparsec , edit-distance + , conduit ghc-options: -threaded -rtsopts -with-rtsopts=-N default-language: Haskell2010 diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index b2ee4b3..fe6aa30 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -11,7 +11,8 @@ module GEval.LineByLine (runLineByLine, runLineByLineGeneralized, runDiff, - runDiffGeneralized + runDiffGeneralized, + LineRecord(..) ) where import GEval.Core diff --git a/test/Spec.hs b/test/Spec.hs index d183bc0..c1b24c8 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -9,11 +9,14 @@ import GEval.ClippEU import GEval.PrecisionRecall import GEval.ClusteringMetrics import GEval.BIO +import GEval.LineByLine import Data.Attoparsec.Text import Options.Applicative import Data.Text import Text.EditDistance +import Data.Conduit.List (consume) + import qualified Test.HUnit as HU informationRetrievalBookExample :: [(String, Int)] @@ -263,7 +266,23 @@ main = hspec $ do describe "automatic decompression" $ do it "more complex test" $ do runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923 - + describe "line by line mode" $ do + let sampleChallenge = + GEvalSpecification + { gesOutDirectory = "test/likelihood-simple/likelihood-simple-solution", + gesExpectedDirectory = Just "test/likelihood-simple/likelihood-simple", + gesTestName = "test-A", + gesOutFile = "out.tsv", + gesExpectedFile = "expected.tsv", + gesInputFile = "in.tsv", + gesMetric = Likelihood, + gesPrecision = Nothing } + it "simple test" $ do + results <- runLineByLineGeneralized sampleChallenge Data.Conduit.List.consume + Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo", + "bar", + "baz", + "baq"] neverMatch :: Char -> Int -> Bool neverMatch _ _ = False diff --git a/test/likelihood-simple/likelihood-simple/test-A/in.tsv b/test/likelihood-simple/likelihood-simple/test-A/in.tsv new file mode 100644 index 0000000..655a059 --- /dev/null +++ b/test/likelihood-simple/likelihood-simple/test-A/in.tsv @@ -0,0 +1,4 @@ +foo +bar +baz +baq From ab1056301eccfd0bf4459297544eda6a8426a43e Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 28 May 2018 09:45:08 +0200 Subject: [PATCH 17/24] add sorting for --line-by-line internally --- src/Data/Conduit/AutoDecompress.hs | 7 +++--- src/GEval/LineByLine.hs | 38 ++++++++++++++++++++++-------- src/GEval/OptionsParser.hs | 4 ++-- test/Spec.hs | 6 ++++- 4 files changed, 39 insertions(+), 16 deletions(-) diff --git a/src/Data/Conduit/AutoDecompress.hs b/src/Data/Conduit/AutoDecompress.hs index 2e92402..b959d6f 100644 --- a/src/Data/Conduit/AutoDecompress.hs +++ b/src/Data/Conduit/AutoDecompress.hs @@ -1,7 +1,8 @@ {-# LANGUAGE AllowAmbiguousTypes #-} module Data.Conduit.AutoDecompress - (autoDecompress) + (autoDecompress, + doNothing) where import Data.Conduit @@ -34,11 +35,11 @@ autoDecompress = do Nothing -> return () -lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> Conduit ByteString m ByteString +lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> ConduitT ByteString ByteString m () lookAtMagicNumbers (31, 139) = ungzip lookAtMagicNumbers (66, 90) = BZ.bunzip2 lookAtMagicNumbers (253, 55) = XZ.decompress Nothing lookAtMagicNumbers _ = doNothing -doNothing :: Monad m => Conduit ByteString m ByteString +doNothing :: Monad m => ConduitT a a m () doNothing = Data.Conduit.Combinators.filter (const True) diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index fe6aa30..91848db 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -12,17 +12,22 @@ module GEval.LineByLine runLineByLineGeneralized, runDiff, runDiffGeneralized, - LineRecord(..) + LineRecord(..), + ResultOrdering(..) ) where import GEval.Core +import Data.Conduit.AutoDecompress (doNothing) + import Data.Conduit import qualified Data.Conduit.List as CL import qualified Data.Conduit.Combinators as CC import Data.Text import Data.Text.Encoding +import Data.List (sortBy, sort) + import Control.Monad.IO.Class import Control.Monad.Trans.Resource @@ -33,8 +38,10 @@ import Text.Printf data LineRecord = LineRecord Text Text Text Word32 MetricValue deriving (Eq, Show) -runLineByLine :: GEvalSpecification -> IO () -runLineByLine spec = runLineByLineGeneralized spec consum +data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest + +runLineByLine :: ResultOrdering -> GEvalSpecification -> IO () +runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum where consum :: ConduitT LineRecord Void (ResourceT IO) () consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [ @@ -45,14 +52,25 @@ runLineByLine spec = runLineByLineGeneralized spec consum formatScore :: MetricValue -> Text formatScore = Data.Text.pack . printf "%f" -runLineByLineGeneralized :: GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a -runLineByLineGeneralized spec consum = do +runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a +runLineByLineGeneralized ordering spec consum = do (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec - gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum + gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum) where metric = gesMetric spec + sorter KeepTheOriginalOrder = doNothing + sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric)) + sortOrder FirstTheWorst TheHigherTheBetter = compareScores + sortOrder FirstTheBest TheLowerTheBetter = compareScores + sortOrder _ _ = flip compareScores + compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2 -runDiff :: FilePath -> GEvalSpecification -> IO () -runDiff otherOut spec = runDiffGeneralized otherOut spec consum +gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m () +gobbleAndDo fun = do + l <- CC.sinkList + CC.yieldMany $ fun l + +runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO () +runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) () consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout) shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) = @@ -66,8 +84,8 @@ runDiff otherOut spec = runDiffGeneralized otherOut spec consum formatScoreDiff :: Double -> Text formatScoreDiff = Data.Text.pack . printf "%f" -runDiffGeneralized :: FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a -runDiffGeneralized otherOut spec consum = do +runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a +runDiffGeneralized ordering otherOut spec consum = do let otherOutFilePath = getOutFile spec otherOut (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index d8ce4e7..76d6808 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -155,10 +155,10 @@ runGEval''' (Just Init) spec = do initChallenge spec return Nothing runGEval''' (Just LineByLine) spec = do - runLineByLine spec + runLineByLine KeepTheOriginalOrder spec return Nothing runGEval''' (Just (Diff otherOut)) spec = do - runDiff otherOut spec + runDiff KeepTheOriginalOrder otherOut spec return Nothing initChallenge :: GEvalSpecification -> IO () diff --git a/test/Spec.hs b/test/Spec.hs index c1b24c8..b8dac23 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -278,11 +278,15 @@ main = hspec $ do gesMetric = Likelihood, gesPrecision = Nothing } it "simple test" $ do - results <- runLineByLineGeneralized sampleChallenge Data.Conduit.List.consume + results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge Data.Conduit.List.consume Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo", "bar", "baz", "baq"] + it "test sorting" $ do + results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume + Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq" + neverMatch :: Char -> Int -> Bool neverMatch _ _ = False From 3f7384f46748f6e900b422ac8513bbf239f64df9 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 28 May 2018 10:04:27 +0200 Subject: [PATCH 18/24] add --sort and --reverse-sort options --- geval.cabal | 2 +- src/GEval/Core.hs | 5 ++++- src/GEval/LineByLine.hs | 13 ++++++++++--- src/GEval/OptionsParser.hs | 26 ++++++++++++++++++-------- 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/geval.cabal b/geval.cabal index 08ce371..e51ad70 100644 --- a/geval.cabal +++ b/geval.cabal @@ -1,5 +1,5 @@ name: geval -version: 0.5.7.0 +version: 0.6.0.0 synopsis: Machine learning evaluation tools description: Please see README.md homepage: http://github.com/name/project diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index e0a110c..37cb237 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -16,6 +16,7 @@ module GEval.Core MetricValue, GEvalSpecialCommand(..), GEvalSpecification(..), + ResultOrdering(..), GEvalOptions(..), GEvalException(..), defaultGEvalSpecification, @@ -180,11 +181,13 @@ getExpectedDirectory spec = fromMaybe outDirectory $ gesExpectedDirectory spec data GEvalSpecialCommand = Init | LineByLine | Diff FilePath +data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest + data GEvalOptions = GEvalOptions { geoSpecialCommand :: Maybe GEvalSpecialCommand, + geoResultOrdering :: ResultOrdering, geoSpec :: GEvalSpecification } - data GEvalException = NoExpectedFile FilePath | NoOutFile FilePath | NoExpectedDirectory FilePath diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index 91848db..74f053b 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -38,8 +38,6 @@ import Text.Printf data LineRecord = LineRecord Text Text Text Word32 MetricValue deriving (Eq, Show) -data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest - runLineByLine :: ResultOrdering -> GEvalSpecification -> IO () runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum where consum :: ConduitT LineRecord Void (ResourceT IO) () @@ -93,8 +91,17 @@ runDiffGeneralized ordering otherOut spec consum = do runResourceT $ runConduit $ ((getZipSource $ (,) <$> ZipSource sourceA - <*> ZipSource sourceB) .| consum) + <*> ZipSource sourceB) .| sorter ordering .| consum) where metric = gesMetric spec + sorter KeepTheOriginalOrder = doNothing + sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric)) + sortOrder FirstTheWorst TheHigherTheBetter = compareScores + sortOrder FirstTheBest TheLowerTheBetter = compareScores + sortOrder _ _ = flip compareScores + compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1)) + ((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2)) + = (n1 - o1) `compare` (n2 - o2) + escapeTabs :: Text -> Text escapeTabs = Data.Text.replace "\t" "" diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index 76d6808..dbdd6d6 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -41,6 +41,16 @@ optionsParser = GEvalOptions <> short 'd' <> metavar "OTHER-OUT" <> help "compare results"))) + <*> ((flag' FirstTheWorst + (long "sort" + <> short 's' + <> help "When in line-by-line or diff mode, sort the results from the worst to the best")) + <|> + (flag' FirstTheBest + (long "reverse-sort" + <> short 'r' + <> help "When in line-by-line or diff mode, sort the results from the best to the worst")) + <|> pure KeepTheOriginalOrder) <*> specParser precisionArgParser :: Parser Int @@ -145,20 +155,20 @@ attemptToReadOptsFromConfigFile args opts = do runGEval'' :: GEvalOptions -> IO (Maybe MetricValue) -runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoSpec opts) +runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoResultOrdering opts) (geoSpec opts) -runGEval''' :: Maybe GEvalSpecialCommand -> GEvalSpecification -> IO (Maybe MetricValue) -runGEval''' Nothing spec = do +runGEval''' :: Maybe GEvalSpecialCommand -> ResultOrdering -> GEvalSpecification -> IO (Maybe MetricValue) +runGEval''' Nothing _ spec = do val <- geval spec return $ Just val -runGEval''' (Just Init) spec = do +runGEval''' (Just Init) _ spec = do initChallenge spec return Nothing -runGEval''' (Just LineByLine) spec = do - runLineByLine KeepTheOriginalOrder spec +runGEval''' (Just LineByLine) ordering spec = do + runLineByLine ordering spec return Nothing -runGEval''' (Just (Diff otherOut)) spec = do - runDiff KeepTheOriginalOrder otherOut spec +runGEval''' (Just (Diff otherOut)) ordering spec = do + runDiff ordering otherOut spec return Nothing initChallenge :: GEvalSpecification -> IO () From 65e8d2562e31c4305f52cf699c778abe99a44ef2 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Tue, 29 May 2018 20:59:00 +0200 Subject: [PATCH 19/24] underscores can be used in the BIO format --- src/GEval/BIO.hs | 2 +- test/Spec.hs | 2 ++ .../bio-f1-simple-underscores-solution/test-A/out.tsv | 3 +++ .../bio-f1-simple-underscores/config.txt | 1 + .../bio-f1-simple-underscores/test-A/expected.tsv | 3 +++ 5 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 test/bio-f1-simple-underscores/bio-f1-simple-underscores-solution/test-A/out.tsv create mode 100644 test/bio-f1-simple-underscores/bio-f1-simple-underscores/config.txt create mode 100644 test/bio-f1-simple-underscores/bio-f1-simple-underscores/test-A/expected.tsv diff --git a/src/GEval/BIO.hs b/src/GEval/BIO.hs index f213ab4..7b7b601 100644 --- a/src/GEval/BIO.hs +++ b/src/GEval/BIO.hs @@ -93,7 +93,7 @@ bioLabelParser = (string "O" *> pure Outside) <|> (do labelType <- bioMarkerParser - string "-" + (string "-" <|> string "_") label <- takeWhile1 (\c -> not (isSpace c) && c /= '/') normalized <- (do string "/" diff --git a/test/Spec.hs b/test/Spec.hs index b8dac23..83572e6 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -259,6 +259,8 @@ main = hspec $ do runGEvalTest "bio-f1-complex" `shouldReturnAlmost` 0.625 it "calculate F1" $ do runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5 + it "calculate F1 with underscores rather than minus signs" $ do + runGEvalTest "bio-f1-simple-underscores" `shouldReturnAlmost` 0.5 it "check perfect score" $ do runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0 it "check inconsistent input" $ do diff --git a/test/bio-f1-simple-underscores/bio-f1-simple-underscores-solution/test-A/out.tsv b/test/bio-f1-simple-underscores/bio-f1-simple-underscores-solution/test-A/out.tsv new file mode 100644 index 0000000..4380412 --- /dev/null +++ b/test/bio-f1-simple-underscores/bio-f1-simple-underscores-solution/test-A/out.tsv @@ -0,0 +1,3 @@ +O O B_city/POZNAŃ O O B_date/MARCH I_date/12 +B_city/BUK O O O +B_name/FOO O B_surname/KOWALSKI diff --git a/test/bio-f1-simple-underscores/bio-f1-simple-underscores/config.txt b/test/bio-f1-simple-underscores/bio-f1-simple-underscores/config.txt new file mode 100644 index 0000000..70977e1 --- /dev/null +++ b/test/bio-f1-simple-underscores/bio-f1-simple-underscores/config.txt @@ -0,0 +1 @@ +--metric BIO-F1 diff --git a/test/bio-f1-simple-underscores/bio-f1-simple-underscores/test-A/expected.tsv b/test/bio-f1-simple-underscores/bio-f1-simple-underscores/test-A/expected.tsv new file mode 100644 index 0000000..31de6dc --- /dev/null +++ b/test/bio-f1-simple-underscores/bio-f1-simple-underscores/test-A/expected.tsv @@ -0,0 +1,3 @@ +O O B_city/POZNAŃ O O B_date/MARCH I_date/12 +O O O O +O B_city/KONIN O From 4768931221f23ef83997104048a63f5282ef84e3 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Tue, 29 May 2018 22:04:19 +0200 Subject: [PATCH 20/24] add BIO-F1-Labels metric --- src/GEval/BIO.hs | 7 ++++++- src/GEval/Core.hs | 10 +++++++++- src/GEval/CreateChallenge.hs | 6 ++++++ src/GEval/OptionsParser.hs | 2 +- test/Spec.hs | 2 ++ .../bio-f1-complex-labels-solution/test-A/out.tsv | 6 ++++++ .../bio-f1-complex-labels/config.txt | 1 + .../bio-f1-complex-labels/test-A/expected.tsv | 6 ++++++ 8 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 test/bio-f1-complex-labels/bio-f1-complex-labels-solution/test-A/out.tsv create mode 100644 test/bio-f1-complex-labels/bio-f1-complex-labels/config.txt create mode 100644 test/bio-f1-complex-labels/bio-f1-complex-labels/test-A/expected.tsv diff --git a/src/GEval/BIO.hs b/src/GEval/BIO.hs index 7b7b601..1f4b75b 100644 --- a/src/GEval/BIO.hs +++ b/src/GEval/BIO.hs @@ -1,7 +1,9 @@ {-# LANGUAGE OverloadedStrings #-} module GEval.BIO - (BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities, TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO) + (BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities, + TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO, + eraseNormalisation) where import GEval.PrecisionRecall @@ -32,6 +34,9 @@ data TaggedSpan = TaggedSpan Int Int data TaggedEntity = TaggedEntity TaggedSpan T.Text (Maybe T.Text) deriving (Eq, Show) +eraseNormalisation :: TaggedEntity -> TaggedEntity +eraseNormalisation (TaggedEntity span label normalized) = (TaggedEntity span label Nothing) + gatherCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> (Int, Int, Int) gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, length expected, length got) where diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 37cb237..1353aab 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -84,7 +84,7 @@ defaultLogLossHashedSize :: Word32 defaultLogLossHashedSize = 10 data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch - | MAP | LogLoss | Likelihood | BIOF1 | LikelihoodHashed Word32 + | MAP | LogLoss | Likelihood | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 deriving (Eq) instance Show Metric where @@ -112,6 +112,7 @@ instance Show Metric where show LogLoss = "LogLoss" show Likelihood = "Likelihood" show BIOF1 = "BIO-F1" + show BIOF1Labels = "BIO-F1-Labels" instance Read Metric where readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)] @@ -133,6 +134,7 @@ instance Read Metric where readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)] readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)] readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)] + readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)] data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter @@ -152,6 +154,7 @@ getMetricOrdering MAP = TheHigherTheBetter getMetricOrdering LogLoss = TheLowerTheBetter getMetricOrdering Likelihood = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter +getMetricOrdering BIOF1Labels = TheHigherTheBetter defaultOutDirectory = "." defaultTestName = "test-A" @@ -437,6 +440,11 @@ gevalCore' CharMatch inputLineSource = helper inputLineSource gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts +gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts + where parseBioSequenceIntoEntitiesWithoutNormalization s = do + entities <- parseBioSequenceIntoEntities s + return $ Prelude.map eraseNormalisation entities + countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int) countAgg = CC.foldl countFolder (0, 0, 0) diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 0d141d5..8880afa 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -215,6 +215,7 @@ This a sample challenge for the likelihood metric. |] ++ (commonReadmeMDContents testName) +readmeMDContents BIOF1Labels testName = readmeMDContents BIOF1 testName readmeMDContents BIOF1 testName = [i| Tag and normalize names ======================= @@ -318,6 +319,7 @@ trainContents LogLoss = [hereLit|0.0 Hell, no!!! 1.0 Lekker!!! 0.0 Boring, boring, boring |] +trainContents BIOF1Labels = trainContents BIOF1 trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond O O O O O There is no name here B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman @@ -359,6 +361,7 @@ devInContents LogLoss = [hereLit|Great stuff! Boring stuff That's good |] +devInContents BIOF1Labels = devInContents BIOF1 devInContents BIOF1 = [hereLit|Adam and Eve Mr Jan Kowalski |] @@ -397,6 +400,7 @@ devExpectedContents LogLoss = [hereLit|1.0 0.0 1.0 |] +devExpectedContents BIOF1Labels = devExpectedContents BIOF1 devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE O B-firstname/JAN B-surname/KOWALSKI |] @@ -437,6 +441,7 @@ testInContents LogLoss = [hereLit|That's great, ha, ha, I love it! Super-duper!! That is incredibly boring. |] +testInContents BIOF1Labels = testInContents BIOF1 testInContents BIOF1 = [hereLit|Alan Tring No name here |] @@ -477,6 +482,7 @@ testExpectedContents LogLoss = [hereLit|1.0 1.0 0.0 |] +testExpectedContents BIOF1Labels = testExpectedContents BIOF1 testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING O O O |] diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index dbdd6d6..13ac6ec 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -110,7 +110,7 @@ metricReader = option auto <> value defaultMetric <> showDefault <> metavar "METRIC" - <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1 or CharMatch" ) + <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" ) runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue)) runGEval args = do diff --git a/test/Spec.hs b/test/Spec.hs index 83572e6..644e8e1 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -257,6 +257,8 @@ main = hspec $ do TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] `shouldBe` (1, 1, 2) it "check F1 on a more complicated example" $ do runGEvalTest "bio-f1-complex" `shouldReturnAlmost` 0.625 + it "check F1 on labels only" $ do + runGEvalTest "bio-f1-complex-labels" `shouldReturnAlmost` 0.6666666666 it "calculate F1" $ do runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5 it "calculate F1 with underscores rather than minus signs" $ do diff --git a/test/bio-f1-complex-labels/bio-f1-complex-labels-solution/test-A/out.tsv b/test/bio-f1-complex-labels/bio-f1-complex-labels-solution/test-A/out.tsv new file mode 100644 index 0000000..0e8b8a1 --- /dev/null +++ b/test/bio-f1-complex-labels/bio-f1-complex-labels-solution/test-A/out.tsv @@ -0,0 +1,6 @@ +B-wrong +B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK_CITY +B-surname/BROWN B-surname/SMIT +B-month B-month O O B-foo/bar +O B-class I-class I-class +O O diff --git a/test/bio-f1-complex-labels/bio-f1-complex-labels/config.txt b/test/bio-f1-complex-labels/bio-f1-complex-labels/config.txt new file mode 100644 index 0000000..eee37bb --- /dev/null +++ b/test/bio-f1-complex-labels/bio-f1-complex-labels/config.txt @@ -0,0 +1 @@ +--metric BIO-F1-Labels diff --git a/test/bio-f1-complex-labels/bio-f1-complex-labels/test-A/expected.tsv b/test/bio-f1-complex-labels/bio-f1-complex-labels/test-A/expected.tsv new file mode 100644 index 0000000..c6e719d --- /dev/null +++ b/test/bio-f1-complex-labels/bio-f1-complex-labels/test-A/expected.tsv @@ -0,0 +1,6 @@ +O +B-city/LOS I-city/ANGELES O B-city/NEW_YORK O +O B-surname/SMITH +B-month/JULY O O O B-foo/bar +O B-class I-class I-class +O O From d370e375a0c5c06871a37662090c1e00bb440bd3 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 2 Jun 2018 11:29:54 +0200 Subject: [PATCH 21/24] add --alt-metric option --- src/GEval/OptionsParser.hs | 13 ++++++++++++- test/Spec.hs | 9 +++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index 13ac6ec..e46b033 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -100,9 +100,13 @@ specParser = GEvalSpecification <> showDefault <> metavar "INPUT" <> help "The name of the file with the input (applicable only for some metrics)" ) - <*> metricReader + <*> ((flip fromMaybe) <$> altMetricReader <*> metricReader) <*> optional precisionArgParser +sel :: Maybe Metric -> Metric -> Metric +sel Nothing m = m +sel (Just m) _ = m + metricReader :: Parser Metric metricReader = option auto ( long "metric" @@ -112,6 +116,13 @@ metricReader = option auto <> metavar "METRIC" <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" ) +altMetricReader :: Parser (Maybe Metric) +altMetricReader = optional $ option auto + ( long "alt-metric" + <> short 'a' + <> metavar "METRIC" + <> help "Alternative metric (overrides --metric option)" ) + runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue)) runGEval args = do ret <- runGEvalGetOptions args diff --git a/test/Spec.hs b/test/Spec.hs index 644e8e1..0474ae2 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -290,6 +290,9 @@ main = hspec $ do it "test sorting" $ do results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq" + describe "handle --alt-metric option" $ do + it "accuracy instead of likelihood" $ do + runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "likelihood-simple" `shouldReturnAlmost` 0.75 neverMatch :: Char -> Int -> Bool @@ -309,11 +312,13 @@ testMatchFun _ _ = False extractVal :: (Either (ParserResult GEvalOptions) (Maybe MetricValue)) -> IO MetricValue extractVal (Right (Just val)) = return val -runGEvalTest testName = (runGEval [ +runGEvalTest = runGEvalTestExtraOptions [] + +runGEvalTestExtraOptions extraOptions testName = (runGEval ([ "--expected-directory", "test/" ++ testName ++ "/" ++ testName, "--out-directory", - "test/" ++ testName ++ "/" ++ testName ++ "-solution"]) >>= extractVal + "test/" ++ testName ++ "/" ++ testName ++ "-solution"] ++ extraOptions)) >>= extractVal extractMetric :: String -> IO (Maybe Metric) extractMetric testName = do From f9dfbc14661b6a3675031f088e1ee01ecf3f75a6 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 2 Jun 2018 12:24:14 +0200 Subject: [PATCH 22/24] accuracy can work on probablity distributions now --- src/GEval/Common.hs | 4 ++++ src/GEval/Core.hs | 15 ++++++++++++--- src/GEval/LogLossHashed.hs | 11 +++++++++-- test/Spec.hs | 3 ++- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/GEval/Common.hs b/src/GEval/Common.hs index ad89c79..38828ab 100644 --- a/src/GEval/Common.hs +++ b/src/GEval/Common.hs @@ -38,3 +38,7 @@ sepByWhitespaces parser = possibleWhitespace *> parser `sepBy` whitespace <* pos possibleWhitespace = many' (satisfy isHorizontalSpace) whitespace = many1 (satisfy isHorizontalSpace) + +indicator :: Bool -> Double +indicator True = 1.0 +indicator False = 0.0 diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 1353aab..180cc41 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -369,9 +369,18 @@ gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . D | otherwise = exp (1.0 - (r /. c)) gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id - where hitOrMiss (exp,got) = if (normalizeProbForAccuracy exp got) == exp then 1.0 else 0.0 - -- if the expected value is 0 or 1 treat values between 0.0 and 1.0 as probabilities - -- for the positive outcome + where hitOrMiss (exp, got) = + -- first try to parse what we got as a probability distribution + -- (like the one used for Likelikehood/LogLossHashed metric) + case parseWordSpecs got of + Right wordSpecs -> if Prelude.null pairs + then 0.0 + else indicator (exp == (snd $ Prelude.maximum pairs)) + where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs + Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp) + -- if the expected value is 0 or 1 treat values + -- between 0.0 and 1.0 as probabilities + -- for the positive outcome normalizeProbForAccuracy :: Text -> Text -> Text normalizeProbForAccuracy exp got | exp == (pack "1") = case tryReadingAsFloat got of diff --git a/src/GEval/LogLossHashed.hs b/src/GEval/LogLossHashed.hs index a4388ee..5b3dfcc 100644 --- a/src/GEval/LogLossHashed.hs +++ b/src/GEval/LogLossHashed.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} module GEval.LogLossHashed - (HashedDistribution, parseDistribution, calculateLogLoss) + (HashedDistribution, parseDistribution, calculateLogLoss, parseWordSpecs, wordSpecToPair) where import qualified Data.Vector as V @@ -59,11 +59,18 @@ isAnyWord _ = False data WordSpecWithLogProb = WordSpecWithLogProb WordSpec Double +wordSpecToPair :: WordSpecWithLogProb -> Maybe (Double, T.Text) +wordSpecToPair (WordSpecWithLogProb AnyWord _) = Nothing +wordSpecToPair (WordSpecWithLogProb (SpecificWord w) lp) = Just (lp, w) + parseDistributionFromWordList :: Word32 -> Word32 -> T.Text -> Either String HashedDistribution parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<< normalizeLogProbs =<< lookForProbs =<< - (processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec) + (parseWordSpecs distroSpec) + +parseWordSpecs :: T.Text -> Either String [WordSpecWithLogProb] +parseWordSpecs distroSpec = processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec getWordSpecWithLogProb :: T.Text -> Either String WordSpecWithLogProb getWordSpecWithLogProb t = diff --git a/test/Spec.hs b/test/Spec.hs index 0474ae2..90039bb 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -293,7 +293,8 @@ main = hspec $ do describe "handle --alt-metric option" $ do it "accuracy instead of likelihood" $ do runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "likelihood-simple" `shouldReturnAlmost` 0.75 - + it "accuracy instead of log loss" $ do + runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "log-loss-hashed-probs" `shouldReturnAlmost` 0.4 neverMatch :: Char -> Int -> Bool neverMatch _ _ = False From 546f2a4e24eb6fc3e145bd9cb019ad8caeb06268 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 2 Jun 2018 12:24:49 +0200 Subject: [PATCH 23/24] bump up version number --- geval.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geval.cabal b/geval.cabal index e51ad70..395267a 100644 --- a/geval.cabal +++ b/geval.cabal @@ -1,5 +1,5 @@ name: geval -version: 0.6.0.0 +version: 0.7.0.0 synopsis: Machine learning evaluation tools description: Please see README.md homepage: http://github.com/name/project From 04ffaeeb9e07ab80a24bcb24a87dda825b09d616 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 2 Jun 2018 13:02:18 +0200 Subject: [PATCH 24/24] minor changes to README --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e89c28..411f81c 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,15 @@ # GEval -GEval is a Haskell library (and a stand-alone tool) for evaluating the +GEval is a Haskell library and a stand-alone tool for evaluating the results of solutions to machine learning challenges as defined on the [Gonito](http://gonito.net) platform. Note that GEval is only about machine learning evaluation. No actual machine learning algorithms are available here. +The official repository is `git://gonito.net/geval`, browsable at +. + ## Installing You need [Haskell Stack](https://github.com/commercialhaskell/stack).