Linear regression first try

This commit is contained in:
Th3NiKo 2020-04-06 13:07:14 +02:00
parent d6158fa514
commit d7040c9bc6
3 changed files with 1793 additions and 1785 deletions

View File

@ -2,7 +2,7 @@
import sys
import pickle
from math import log
from math import log, exp
from tokenizer import tokenize
#Load model
@ -19,8 +19,7 @@ for line in sys.stdin:
for word in terms:
y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count))
if y_predicted <= 0.63:
if y_predicted <= 0.5:
print(0)
else:
print(1)
print(1)

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,17 @@
#!/usr/bin/python3
'''
Linear regression for paranormal and sceptic challange 2.0.0
In order to use train.py you need to pass two columns
label document
splited by \t
Commands used: xzcat, paste
'''
import sys
import pickle
import random
from math import log, exp
import collections
from tokenizer import tokenize
@ -76,7 +85,7 @@ def train():
#We will stop after loss reach some value
if Loss_sum_counter % 10000 == 0:
print(Loss_sum / 10000)
print(str(Loss_sum_counter) + " " + str(Loss_sum / 10000))
Loss_sum = 0.0
Loss_sum_counter += 1
@ -87,13 +96,13 @@ def train():
if word in word_to_index_mapping:
weights[word_to_index_mapping[word]] -= ((word_count[word] / len(word_count)) * delta)
if Loss_sum_counter > 50000000:
if Loss_sum_counter > 10000000:
break
#We save only things we need for predicion
#We save only things we need for prediction
model = (weights, word_to_index_mapping, word_count)
pickle.dump(model, open("model.pkl", "wb"))