paranormal-or-skeptic/linearpred.py

#!/usr/bin/env python3
import math
import pickle
import sys
from tokenize import tokenize

model = pickle.load(open("model.pkl", "rb"))
word_to_index, vocabulary, weights, words_count = model
lines = sys.stdin.readlines()
for line2 in lines:
			line2 = line2.rstrip()
			fields2 = line2.split('\t') ##rozdzielamy linie na tablice oddzielonymi tabami
			label2 = fields2[0].strip() ##to etykiety
			document2 = fields2[1] ##to posty
			terms2 = document2.split(' ') ##to rozdziel posty na słowa
			for term2 in terms2: ##dla każdego słowa w poście
				if term2 in words_count:
					words_count[term2] += 1 ##robimy słownik dla danego słowa ile razy występuje
				else:
					words_count[term2] = 1
			expected=weights[0]
			for t in terms2:
				if(t in vocabulary):
					expected=expected+(words_count[t]/len(words_count)*(weights[word_to_index[t]]))
			if(expected>0.65):
				print(1)
			else:
				print(0)