33 lines
965 B
Python
33 lines
965 B
Python
#!/usr/bin/python3
|
|
|
|
import sys
|
|
import pickle
|
|
from math import log
|
|
from tokenizer import tokenize
|
|
|
|
model = pickle.load(open("model.pkl","rb"))
|
|
pskeptic, vocabulary_size, skeptic_words_total, paranormal_words_total, skeptic_count, paranormal_count = model
|
|
|
|
|
|
for line in sys.stdin:
|
|
document = line.rstrip()
|
|
fields = document.split('\t')
|
|
document = fields[0]
|
|
terms = tokenize(document)
|
|
|
|
log_prob_sketpic = log(pskeptic)
|
|
log_prob_paranormal = log(1 - pskeptic)
|
|
|
|
for term in terms:
|
|
if term not in skeptic_count:
|
|
skeptic_count[term] = 0
|
|
if term not in paranormal_count:
|
|
paranormal_count[term] = 0
|
|
log_prob_sketpic += log((skeptic_count[term] + 1) / (skeptic_words_total + vocabulary_size))
|
|
log_prob_paranormal += log((paranormal_count[term] + 1) / (paranormal_words_total + vocabulary_size))
|
|
|
|
if log_prob_sketpic > log_prob_paranormal:
|
|
print('S')
|
|
else:
|
|
print('P')
|