log probs

This commit is contained in:
Filip Gralinski 2018-05-16 20:59:40 +02:00
parent 82e794ae3c
commit b01f9439b7
6 changed files with 28 additions and 2 deletions

View File

@ -125,8 +125,12 @@ must be given:
where *logprobi* is the logarithm of the probability for *wordi* and
*logprob0* is the logarithm of the probability mass for all the other
words (it will be spread between all 1024 fingerprint values). If the
respective probabilities do not sum up to 1, they will be normalised with
softmax.
respective probabilities do not sum up to 1:
* if the sum is larger than 0.0 and smaller than 1.0, and no logprob0
is given, log of the remaining probablity mass will be assigned to logprob0,
* otherwise they will be normalised with.
softmax
Note: the separator here is space, not TAB!

View File

@ -61,6 +61,7 @@ data WordSpecWithLogProb = WordSpecWithLogProb WordSpec Double
parseDistributionFromWordList :: Word32 -> Word32 -> T.Text -> Either String HashedDistribution
parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<<
normalizeLogProbs =<<
lookForProbs =<<
(processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec)
@ -99,6 +100,17 @@ areProbs specs = all isProb specs && any isPositiveProb specs
toLogProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
toLogProbs = map (\(WordSpecWithLogProb w p) -> (WordSpecWithLogProb w (log p)))
normalizeLogProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb]
normalizeLogProbs specs = if isProbTotalIncorrect probTotal
&& probTotal < 1.0 && probTotal > 0.0
&& not (any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs)
&& all (\(WordSpecWithLogProb _ lp) -> lp <= 0) specs
then
Right ((WordSpecWithLogProb AnyWord (log (1-probTotal))):specs)
else
Right specs
where probTotal = sum $ map (\(WordSpecWithLogProb _ logp) -> exp logp) specs
normalizeProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
normalizeProbs specs = if isProbTotalIncorrect probTotal
then

View File

@ -100,6 +100,9 @@ main = hspec $ do
runGEvalTest "log-loss-hashed-probs" `shouldReturnAlmost` 4.11631293099392
it "with probs instead of log probs (with normalization)" $ do
runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
it "with log probs whose probs are summing up to less than 1.0" $ do
runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
describe "reading options" $ do
it "can get the metric" $ do
extractMetric "bleu-complex" `shouldReturn` (Just BLEU)

View File

@ -0,0 +1,3 @@
B:-1.20397280432594 A:-0.916290731874155
A:-2.3025850929940 C:-1.6094379124341
A:-2.3025850929940 C:-1.6094379124341 :-0.356674943938732
1 B:-1.20397280432594 A:-0.916290731874155
2 A:-2.3025850929940 C:-1.6094379124341
3 A:-2.3025850929940 C:-1.6094379124341 :-0.356674943938732

View File

@ -0,0 +1 @@
--metric LogLossHashed10

View File

@ -0,0 +1,3 @@
A
B
B
1 A
2 B
3 B