Linear regression first try

This commit is contained in:
Th3NiKo 2020-04-06 13:07:14 +02:00
parent d6158fa514
commit d7040c9bc6
3 changed files with 1793 additions and 1785 deletions

View File

@ -2,7 +2,7 @@
import sys import sys
import pickle import pickle
from math import log from math import log, exp
from tokenizer import tokenize from tokenizer import tokenize
#Load model #Load model
@ -19,8 +19,7 @@ for line in sys.stdin:
for word in terms: for word in terms:
y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count)) y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count))
if y_predicted <= 0.5:
if y_predicted <= 0.63:
print(0) print(0)
else: else:
print(1) print(1)

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,17 @@
#!/usr/bin/python3 #!/usr/bin/python3
'''
Linear regression for paranormal and sceptic challange 2.0.0
In order to use train.py you need to pass two columns
label document
splited by \t
Commands used: xzcat, paste
'''
import sys import sys
import pickle import pickle
import random import random
from math import log, exp
import collections import collections
from tokenizer import tokenize from tokenizer import tokenize
@ -76,7 +85,7 @@ def train():
#We will stop after loss reach some value #We will stop after loss reach some value
if Loss_sum_counter % 10000 == 0: if Loss_sum_counter % 10000 == 0:
print(Loss_sum / 10000) print(str(Loss_sum_counter) + " " + str(Loss_sum / 10000))
Loss_sum = 0.0 Loss_sum = 0.0
Loss_sum_counter += 1 Loss_sum_counter += 1
@ -87,13 +96,13 @@ def train():
if word in word_to_index_mapping: if word in word_to_index_mapping:
weights[word_to_index_mapping[word]] -= ((word_count[word] / len(word_count)) * delta) weights[word_to_index_mapping[word]] -= ((word_count[word] / len(word_count)) * delta)
if Loss_sum_counter > 50000000: if Loss_sum_counter > 10000000:
break break
#We save only things we need for predicion #We save only things we need for prediction
model = (weights, word_to_index_mapping, word_count) model = (weights, word_to_index_mapping, word_count)
pickle.dump(model, open("model.pkl", "wb")) pickle.dump(model, open("model.pkl", "wb"))