Linear regression first try
This commit is contained in:
parent
d6158fa514
commit
d7040c9bc6
@ -2,7 +2,7 @@
|
||||
|
||||
import sys
|
||||
import pickle
|
||||
from math import log
|
||||
from math import log, exp
|
||||
from tokenizer import tokenize
|
||||
|
||||
#Load model
|
||||
@ -19,8 +19,7 @@ for line in sys.stdin:
|
||||
for word in terms:
|
||||
y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count))
|
||||
|
||||
|
||||
if y_predicted <= 0.63:
|
||||
if y_predicted <= 0.5:
|
||||
print(0)
|
||||
else:
|
||||
print(1)
|
3556
test-A/out.tsv
3556
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
15
train.py
15
train.py
@ -1,8 +1,17 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
'''
|
||||
Linear regression for paranormal and sceptic challange 2.0.0
|
||||
In order to use train.py you need to pass two columns
|
||||
label document
|
||||
splited by \t
|
||||
Commands used: xzcat, paste
|
||||
'''
|
||||
|
||||
import sys
|
||||
import pickle
|
||||
import random
|
||||
from math import log, exp
|
||||
import collections
|
||||
from tokenizer import tokenize
|
||||
|
||||
@ -76,7 +85,7 @@ def train():
|
||||
#We will stop after loss reach some value
|
||||
|
||||
if Loss_sum_counter % 10000 == 0:
|
||||
print(Loss_sum / 10000)
|
||||
print(str(Loss_sum_counter) + " " + str(Loss_sum / 10000))
|
||||
Loss_sum = 0.0
|
||||
Loss_sum_counter += 1
|
||||
|
||||
@ -87,13 +96,13 @@ def train():
|
||||
if word in word_to_index_mapping:
|
||||
weights[word_to_index_mapping[word]] -= ((word_count[word] / len(word_count)) * delta)
|
||||
|
||||
if Loss_sum_counter > 50000000:
|
||||
if Loss_sum_counter > 10000000:
|
||||
break
|
||||
|
||||
|
||||
|
||||
|
||||
#We save only things we need for predicion
|
||||
#We save only things we need for prediction
|
||||
model = (weights, word_to_index_mapping, word_count)
|
||||
pickle.dump(model, open("model.pkl", "wb"))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user