Linear regression first try
This commit is contained in:
parent
d6158fa514
commit
d7040c9bc6
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import pickle
|
import pickle
|
||||||
from math import log
|
from math import log, exp
|
||||||
from tokenizer import tokenize
|
from tokenizer import tokenize
|
||||||
|
|
||||||
#Load model
|
#Load model
|
||||||
@ -19,8 +19,7 @@ for line in sys.stdin:
|
|||||||
for word in terms:
|
for word in terms:
|
||||||
y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count))
|
y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count))
|
||||||
|
|
||||||
|
if y_predicted <= 0.5:
|
||||||
if y_predicted <= 0.63:
|
|
||||||
print(0)
|
print(0)
|
||||||
else:
|
else:
|
||||||
print(1)
|
print(1)
|
3556
test-A/out.tsv
3556
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
15
train.py
15
train.py
@ -1,8 +1,17 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
'''
|
||||||
|
Linear regression for paranormal and sceptic challange 2.0.0
|
||||||
|
In order to use train.py you need to pass two columns
|
||||||
|
label document
|
||||||
|
splited by \t
|
||||||
|
Commands used: xzcat, paste
|
||||||
|
'''
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import pickle
|
import pickle
|
||||||
import random
|
import random
|
||||||
|
from math import log, exp
|
||||||
import collections
|
import collections
|
||||||
from tokenizer import tokenize
|
from tokenizer import tokenize
|
||||||
|
|
||||||
@ -76,7 +85,7 @@ def train():
|
|||||||
#We will stop after loss reach some value
|
#We will stop after loss reach some value
|
||||||
|
|
||||||
if Loss_sum_counter % 10000 == 0:
|
if Loss_sum_counter % 10000 == 0:
|
||||||
print(Loss_sum / 10000)
|
print(str(Loss_sum_counter) + " " + str(Loss_sum / 10000))
|
||||||
Loss_sum = 0.0
|
Loss_sum = 0.0
|
||||||
Loss_sum_counter += 1
|
Loss_sum_counter += 1
|
||||||
|
|
||||||
@ -87,13 +96,13 @@ def train():
|
|||||||
if word in word_to_index_mapping:
|
if word in word_to_index_mapping:
|
||||||
weights[word_to_index_mapping[word]] -= ((word_count[word] / len(word_count)) * delta)
|
weights[word_to_index_mapping[word]] -= ((word_count[word] / len(word_count)) * delta)
|
||||||
|
|
||||||
if Loss_sum_counter > 50000000:
|
if Loss_sum_counter > 10000000:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#We save only things we need for predicion
|
#We save only things we need for prediction
|
||||||
model = (weights, word_to_index_mapping, word_count)
|
model = (weights, word_to_index_mapping, word_count)
|
||||||
pickle.dump(model, open("model.pkl", "wb"))
|
pickle.dump(model, open("model.pkl", "wb"))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user