paranormal-or-skeptic/predict.py

33 lines
965 B
Python

#!/usr/bin/python3
import sys
import pickle
from math import log
from tokenizer import tokenize
model = pickle.load(open("model.pkl","rb"))
pskeptic, vocabulary_size, skeptic_words_total, paranormal_words_total, skeptic_count, paranormal_count = model
for line in sys.stdin:
document = line.rstrip()
fields = document.split('\t')
document = fields[0]
terms = tokenize(document)
log_prob_sketpic = log(pskeptic)
log_prob_paranormal = log(1 - pskeptic)
for term in terms:
if term not in skeptic_count:
skeptic_count[term] = 0
if term not in paranormal_count:
paranormal_count[term] = 0
log_prob_sketpic += log((skeptic_count[term] + 1) / (skeptic_words_total + vocabulary_size))
log_prob_paranormal += log((paranormal_count[term] + 1) / (paranormal_words_total + vocabulary_size))
if log_prob_sketpic > log_prob_paranormal:
print('S')
else:
print('P')