change output
This commit is contained in:
parent
9aea4283bd
commit
d13443a750
10544
dev-0/out.tsv
10544
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
28
solution.py
28
solution.py
@ -45,7 +45,7 @@ def calc_class_logprob(expected_path): #zliczamy ogólne prawdopodobieństwo dla
|
||||
paranormal_class_prob = paranoarmal_class_count / (paranoarmal_class_count + skeptic_class_count)
|
||||
skeptic_class_prob = skeptic_class_count / (paranoarmal_class_count + skeptic_class_count)
|
||||
|
||||
return math.log(paranormal_class_prob), math.log(skeptic_class_prob)
|
||||
return paranormal_class_prob, skeptic_class_prob
|
||||
|
||||
def calc_word_counts(in_path, expected_path):
|
||||
with open(in_path) as in_file, open(expected_path) as exp_file:
|
||||
@ -74,7 +74,7 @@ def calc_word_logprobs(word_counts):
|
||||
word_prob = (value + 1)/ total_skeptic
|
||||
else:
|
||||
word_prob = (value + 1)/total_paranormal
|
||||
word_logprobs[class_][token] = math.log(word_prob)
|
||||
word_logprobs[class_][token] = word_prob
|
||||
return word_logprobs
|
||||
|
||||
paranormal_class_logprob, skeptic_class_logprob = calc_class_logprob("train/expected.tsv")
|
||||
@ -100,30 +100,30 @@ def predict_post_class(posts, sprob, pprob, word_logprobs):
|
||||
out_classes = []
|
||||
|
||||
for post in posts:
|
||||
total_s_prob = sprob
|
||||
total_p_prob = pprob
|
||||
total_s_prob = math.log(sprob)
|
||||
total_p_prob = math.log(pprob)
|
||||
post = tokenize(post)
|
||||
tokens = post.lower().split(' ')
|
||||
for token in tokens:
|
||||
#dlasceptic
|
||||
if (token in word_logprobs['skeptic'].keys()):
|
||||
sceptic_prob = word_logprobs['skeptic'][token]
|
||||
sceptic_prob = word_logprobs['skeptic'][token]+1/(len(word_logprobs['skeptic']) + len(word_logprobs['skeptic']) + len(word_logprobs['paranormal']))
|
||||
else:
|
||||
sceptic_prob = 0
|
||||
sceptic_prob = 1/(len(word_logprobs['skeptic']) + len(word_logprobs['skeptic']) + len(word_logprobs['paranormal']))
|
||||
#dlaparanormal
|
||||
if (token in word_logprobs['paranormal'].keys()):
|
||||
paranormal_prob = word_logprobs['paranormal'][token]
|
||||
paranormal_prob = word_logprobs['paranormal'][token]+1/(len(word_logprobs['paranormal']) + len(word_logprobs['skeptic']) + len(word_logprobs['paranormal']))
|
||||
else:
|
||||
paranormal_prob = 0
|
||||
total_s_prob += sceptic_prob
|
||||
total_p_prob += paranormal_prob
|
||||
paranormal_prob = 1/(len(word_logprobs['paranormal']) + len(word_logprobs['skeptic']) + len(word_logprobs['paranormal']))
|
||||
total_s_prob += math.log(sceptic_prob)
|
||||
total_p_prob += math.log(paranormal_prob)
|
||||
|
||||
#print(total_p_prob)
|
||||
#print(total_s_prob)
|
||||
if total_p_prob > total_s_prob:
|
||||
out_classes.append('1')
|
||||
out_classes.append(total_p_prob)
|
||||
else:
|
||||
out_classes.append('0')
|
||||
out_classes.append(total_s_prob)
|
||||
|
||||
return out_classes
|
||||
|
||||
@ -133,7 +133,9 @@ def predict_posts(path):
|
||||
classes = predict_post_class(posts, skeptic_class_logprob, paranormal_class_logprob, word_logprobs)
|
||||
with open(path+"/out.tsv", 'wt') as tsvfile:
|
||||
tsv_writer = csv.writer(tsvfile, delimiter='\t')
|
||||
tsv_writer.writerows(map(lambda x: [x], classes))
|
||||
# for i in classes:
|
||||
# tsv_writer.writerow(i)
|
||||
tsv_writer.writerows(map(lambda x: [-x], classes))
|
||||
|
||||
predict_posts("dev-0")
|
||||
predict_posts("test-A")
|
||||
|
10304
test-A/out.tsv
10304
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user