2020-04-02 15:45:53 +02:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
|
|
|
import sys
|
|
|
|
import pickle
|
|
|
|
from math import log
|
|
|
|
from tokenizer import tokenize
|
|
|
|
|
|
|
|
#Load model
|
|
|
|
model = pickle.load(open("model.pkl","rb"))
|
|
|
|
weights, word_to_index_mapping, word_count = model
|
|
|
|
|
|
|
|
for line in sys.stdin:
|
|
|
|
document = line.rstrip()
|
|
|
|
fields = document.split('\t')
|
|
|
|
document = fields[0]
|
|
|
|
terms = tokenize(document)
|
|
|
|
|
|
|
|
y_predicted = weights[0]
|
|
|
|
for word in terms:
|
|
|
|
y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count))
|
|
|
|
|
|
|
|
|
2020-04-04 19:02:51 +02:00
|
|
|
if y_predicted <= 0.63:
|
2020-04-02 15:45:53 +02:00
|
|
|
print(0)
|
|
|
|
else:
|
|
|
|
print(1)
|