better code
This commit is contained in:
parent
ef7d13af8b
commit
6eb349dc94
@ -4,7 +4,6 @@ import random
|
||||
import json
|
||||
from math import sqrt
|
||||
|
||||
# Prints ['Hey', 'you', 'what', 'are', 'you', 'doing', 'here']
|
||||
def make_dict(path):
|
||||
dict = {}
|
||||
with open(path) as in_file:
|
||||
@ -14,8 +13,8 @@ def make_dict(path):
|
||||
if not word in dict:
|
||||
weight = round(random.random()%0.2-0.1,2)
|
||||
dict[word] = weight
|
||||
|
||||
return dict
|
||||
with open('dict.txt', 'w') as file:
|
||||
json.dump(dict, file)
|
||||
|
||||
def make_posts_list(in_file):
|
||||
posts = []
|
||||
@ -35,7 +34,8 @@ def make_exp_list(exp_file):
|
||||
return exp_list
|
||||
|
||||
def train_model(in_path, exp_path):
|
||||
dict = make_dict(in_path)
|
||||
with open('dict.txt', 'r') as file:
|
||||
dict = json.load(file)
|
||||
posts = make_posts_list(in_path)
|
||||
exp = make_exp_list(exp_path)
|
||||
w0 = 0.1
|
||||
@ -53,22 +53,11 @@ def train_model(in_path, exp_path):
|
||||
post = (in_line.split('\t')[0])
|
||||
error_rate = 1
|
||||
y = int(exp_line)
|
||||
#loop_counter = 0
|
||||
#while (error_rate > 0.2 and loop_counter < 10000):
|
||||
#loop_counter +=1
|
||||
y_hat = w0
|
||||
for word in re.findall(r"[\w']+", post):
|
||||
#dict[word] -= (y_hat - y)*lr
|
||||
y_hat += dict[word]
|
||||
loss = (y_hat - y)**2
|
||||
loss_sum += loss
|
||||
#error_rate = (y_hat - y)**2
|
||||
# if loop_counter%1000 == 0:
|
||||
# print(error_rate)
|
||||
# loss_cost += error_rate
|
||||
# if loss_counter%1000==0:
|
||||
# print(loss_sum/1000)
|
||||
# loss_sum = 0
|
||||
|
||||
#uczenie
|
||||
delta = (y_hat - y) * lr
|
||||
@ -86,19 +75,20 @@ def train_model(in_path, exp_path):
|
||||
last_sum = real_loss
|
||||
loss_sum = 0
|
||||
loss_counter = 0
|
||||
with open('dict.txt', 'w') as file:
|
||||
dict["w0"] = w0
|
||||
with open('dict2.txt', 'w') as file:
|
||||
json.dump(dict, file)
|
||||
|
||||
def predict(path):
|
||||
results = []
|
||||
with open('dict.txt', 'r') as file:
|
||||
with open('dict2.txt', 'r') as file:
|
||||
dict = json.load(file)
|
||||
|
||||
with open(path+"/in.tsv") as in_file:
|
||||
for in_line in in_file:
|
||||
print("new post" + str(random.randint(0,10)))
|
||||
post = (in_line.split('\t')[0])
|
||||
y=0
|
||||
y=dict["w0"]
|
||||
for word in re.findall(r"[\w']+", post):
|
||||
if word in dict:
|
||||
y += dict[word]
|
||||
@ -112,8 +102,8 @@ def predict(path):
|
||||
for i in results:
|
||||
tsv_writer.writerow(i)
|
||||
|
||||
|
||||
# train_model("train/in.tsv", "train/expected.tsv")
|
||||
make_dict("train/in.tsv")
|
||||
train_model("train/in.tsv", "train/expected.tsv")
|
||||
|
||||
def check_dev():
|
||||
with open("dev-0/out.tsv") as out_file, open("dev-0/expected.tsv") as exp_file:
|
||||
@ -125,4 +115,5 @@ def check_dev():
|
||||
positive += 1
|
||||
print(positive/counter)
|
||||
|
||||
predict("test-A")
|
||||
predict("dev-0")
|
||||
check_dev()
|
||||
|
Loading…
Reference in New Issue
Block a user