Updated outs

This commit is contained in:
s426135 2020-03-30 17:20:34 +02:00
parent 505e948550
commit 85b1e9509c
4 changed files with 285 additions and 265 deletions

File diff suppressed because it is too large Load Diff

View File

@ -7,7 +7,7 @@ import sys
import nltk import nltk
from nltk.corpus import stopwords from nltk.corpus import stopwords
def calc_post_class(post, paranormal_class_logprob, sceptic_class_logprob, bigrams_logprobs, words_logprobs): def calc_post_class(post, paranormal_class_logprob, sceptic_class_logprob, bigrams_logprobs, words_logprobs, total_sceptic_bigram, total_paranormal_bigram, total_sceptic_word, total_paranormal_word):
text, timestap = post.rstrip('\n').split('\t') text, timestap = post.rstrip('\n').split('\t')
tokens = clear_post(text) tokens = clear_post(text)
#tokens = text.lower().split(' ') #tokens = text.lower().split(' ')
@ -22,11 +22,20 @@ def calc_post_class(post, paranormal_class_logprob, sceptic_class_logprob, bigra
product += bigrams_logprobs[class_][bigram] product += bigrams_logprobs[class_][bigram]
except KeyError: except KeyError:
product += 0 product += 0
# if class_ == 'sceptic':
# product += math.log(1/total_sceptic_bigram)
# elif class_ == 'paranormal':
# product += math.log(1/total_paranormal_bigram)
for token in tokens: for token in tokens:
try: try:
product += words_logprobs[class_][token] product += words_logprobs[class_][token]/7
except KeyError: except KeyError:
product +=0 product +=0
#if class_ == 'sceptic':
# product += math.log(1/total_sceptic_word)
#elif class_ == 'paranormal':
# product += math.log(1/total_paranormal_word)
if class_ == 'sceptic': if class_ == 'sceptic':
product += sceptic_class_logprob product += sceptic_class_logprob
elif class_ == 'paranormal': elif class_ == 'paranormal':
@ -65,10 +74,17 @@ def main():
sceptic_class_logprob = pickle_list[1] sceptic_class_logprob = pickle_list[1]
bigrams_logprobs = pickle_list[2] bigrams_logprobs = pickle_list[2]
words_logprobs = pickle_list[3] words_logprobs = pickle_list[3]
total_sceptic_bigram = pickle_list[4]
total_paranormal_bigram = pickle_list[5]
total_sceptic_word = pickle_list[6]
total_paranormal_word = pickle_list[7]
print(math.log(1/total_sceptic_bigram))
print(math.log(1/total_paranormal_bigram))
print(math.log(1/total_sceptic_word))
print(math.log(1/total_paranormal_word))
with open(in_file) as in_f, open(out_file, 'w') as out_f: with open(in_file) as in_f, open(out_file, 'w') as out_f:
for line in in_f: for line in in_f:
hyp = calc_post_class(line, paranormal_class_logprob, sceptic_class_logprob, bigrams_logprobs, words_logprobs) hyp = calc_post_class(line, paranormal_class_logprob, sceptic_class_logprob, bigrams_logprobs, words_logprobs, total_sceptic_bigram, total_paranormal_bigram, total_sceptic_word, total_paranormal_word)
if hyp == 'sceptic': if hyp == 'sceptic':
out_f.write(' S\n') out_f.write(' S\n')
elif hyp == 'paranormal': elif hyp == 'paranormal':

File diff suppressed because it is too large Load Diff

View File

@ -147,7 +147,11 @@ def main():
bigrams_count, words_count = launch_bigrams_and_words(in_file, expected_file) bigrams_count, words_count = launch_bigrams_and_words(in_file, expected_file)
bigram_logprobs = calc_bigram_logprobs(bigrams_count) bigram_logprobs = calc_bigram_logprobs(bigrams_count)
word_logprobs = calc_word_logprobs(words_count) word_logprobs = calc_word_logprobs(words_count)
total_sceptic_bigram = sum(bigrams_count['sceptic'].values()) + len(bigrams_count['sceptic'].keys())
total_paranormal_bigram = sum(bigrams_count['paranormal'].values()) + len(bigrams_count['paranormal'].keys())
total_sceptic_word = sum(words_count['sceptic'].values()) + len(words_count['sceptic'].keys())
total_paranormal_word = sum(words_count['paranormal'].values())+ len(words_count['paranormal'].keys())
with open(model, 'wb') as f: with open(model, 'wb') as f:
pickle.dump([paranormal_class_logprob, sceptic_class_logprob, bigram_logprobs, word_logprobs],f) pickle.dump([paranormal_class_logprob, sceptic_class_logprob, bigram_logprobs, word_logprobs, total_sceptic_bigram, total_paranormal_bigram, total_sceptic_word, total_paranormal_word],f)
main() main()