change output

This commit is contained in:
dylodylo 2020-05-02 19:29:47 +02:00
parent 9aea4283bd
commit d13443a750
3 changed files with 10439 additions and 10437 deletions

File diff suppressed because it is too large Load Diff

View File

@ -45,7 +45,7 @@ def calc_class_logprob(expected_path): #zliczamy ogólne prawdopodobieństwo dla
paranormal_class_prob = paranoarmal_class_count / (paranoarmal_class_count + skeptic_class_count)
skeptic_class_prob = skeptic_class_count / (paranoarmal_class_count + skeptic_class_count)
return math.log(paranormal_class_prob), math.log(skeptic_class_prob)
return paranormal_class_prob, skeptic_class_prob
def calc_word_counts(in_path, expected_path):
with open(in_path) as in_file, open(expected_path) as exp_file:
@ -74,7 +74,7 @@ def calc_word_logprobs(word_counts):
word_prob = (value + 1)/ total_skeptic
else:
word_prob = (value + 1)/total_paranormal
word_logprobs[class_][token] = math.log(word_prob)
word_logprobs[class_][token] = word_prob
return word_logprobs
paranormal_class_logprob, skeptic_class_logprob = calc_class_logprob("train/expected.tsv")
@ -100,30 +100,30 @@ def predict_post_class(posts, sprob, pprob, word_logprobs):
out_classes = []
for post in posts:
total_s_prob = sprob
total_p_prob = pprob
total_s_prob = math.log(sprob)
total_p_prob = math.log(pprob)
post = tokenize(post)
tokens = post.lower().split(' ')
for token in tokens:
#dlasceptic
if (token in word_logprobs['skeptic'].keys()):
sceptic_prob = word_logprobs['skeptic'][token]
sceptic_prob = word_logprobs['skeptic'][token]+1/(len(word_logprobs['skeptic']) + len(word_logprobs['skeptic']) + len(word_logprobs['paranormal']))
else:
sceptic_prob = 0
sceptic_prob = 1/(len(word_logprobs['skeptic']) + len(word_logprobs['skeptic']) + len(word_logprobs['paranormal']))
#dlaparanormal
if (token in word_logprobs['paranormal'].keys()):
paranormal_prob = word_logprobs['paranormal'][token]
paranormal_prob = word_logprobs['paranormal'][token]+1/(len(word_logprobs['paranormal']) + len(word_logprobs['skeptic']) + len(word_logprobs['paranormal']))
else:
paranormal_prob = 0
total_s_prob += sceptic_prob
total_p_prob += paranormal_prob
paranormal_prob = 1/(len(word_logprobs['paranormal']) + len(word_logprobs['skeptic']) + len(word_logprobs['paranormal']))
total_s_prob += math.log(sceptic_prob)
total_p_prob += math.log(paranormal_prob)
#print(total_p_prob)
#print(total_s_prob)
if total_p_prob > total_s_prob:
out_classes.append('1')
out_classes.append(total_p_prob)
else:
out_classes.append('0')
out_classes.append(total_s_prob)
return out_classes
@ -133,7 +133,9 @@ def predict_posts(path):
classes = predict_post_class(posts, skeptic_class_logprob, paranormal_class_logprob, word_logprobs)
with open(path+"/out.tsv", 'wt') as tsvfile:
tsv_writer = csv.writer(tsvfile, delimiter='\t')
tsv_writer.writerows(map(lambda x: [x], classes))
# for i in classes:
# tsv_writer.writerow(i)
tsv_writer.writerows(map(lambda x: [-x], classes))
predict_posts("dev-0")
predict_posts("test-A")

File diff suppressed because it is too large Load Diff