better code

2020-04-20 16:14:42 +02:00 · 2020-04-20 16:14:42 +02:00 · 6eb349dc94
commit 6eb349dc94
parent ef7d13af8b
1 changed files with 12 additions and 21 deletions
--- a/linear_regression.py
+++ b/linear_regression.py
@ -4,7 +4,6 @@ import random
 import json
 from math import sqrt

-# Prints ['Hey', 'you', 'what', 'are', 'you', 'doing', 'here']
 def make_dict(path):
    dict = {}
    with open(path) as in_file:
@ -14,8 +13,8 @@ def make_dict(path):
                if not word in dict:
                    weight = round(random.random()%0.2-0.1,2)
                    dict[word] = weight
-    
-    return dict
+    with open('dict.txt', 'w') as file:
+        json.dump(dict, file)

 def make_posts_list(in_file):
    posts = []
@ -35,7 +34,8 @@ def make_exp_list(exp_file):
    return exp_list

 def train_model(in_path, exp_path):
-    dict = make_dict(in_path)
+    with open('dict.txt', 'r') as file:
+        dict = json.load(file)
    posts = make_posts_list(in_path)
    exp = make_exp_list(exp_path)
    w0 = 0.1
@ -53,22 +53,11 @@ def train_model(in_path, exp_path):
            post = (in_line.split('\t')[0])
            error_rate = 1
            y = int(exp_line)
-            #loop_counter = 0
-            #while (error_rate > 0.2 and loop_counter < 10000):
-                #loop_counter +=1
            y_hat = w0
            for word in re.findall(r"[\w']+", post):
-                #dict[word] -= (y_hat - y)*lr
                y_hat += dict[word]
            loss = (y_hat - y)**2
            loss_sum += loss
-            #error_rate = (y_hat - y)**2
-            # if loop_counter%1000 == 0:
-            #     print(error_rate)
-            # loss_cost += error_rate
-            # if loss_counter%1000==0:
-            #     print(loss_sum/1000)
-            #     loss_sum = 0

            #uczenie
            delta = (y_hat - y) * lr
@ -86,19 +75,20 @@ def train_model(in_path, exp_path):
            last_sum = real_loss
        loss_sum = 0
        loss_counter = 0
-    with open('dict.txt', 'w') as file:
+    dict["w0"] = w0
+    with open('dict2.txt', 'w') as file:
        json.dump(dict, file)

 def predict(path):
    results = []
-    with open('dict.txt', 'r') as file:
+    with open('dict2.txt', 'r') as file:
        dict = json.load(file)

    with open(path+"/in.tsv") as in_file:
        for in_line in in_file:
            print("new post" + str(random.randint(0,10)))
            post = (in_line.split('\t')[0])
-            y=0
+            y=dict["w0"]
            for word in re.findall(r"[\w']+", post):
                if word in dict:
                    y += dict[word]
@ -112,8 +102,8 @@ def predict(path):
            for i in results:
                tsv_writer.writerow(i)

-
-# train_model("train/in.tsv", "train/expected.tsv")
+make_dict("train/in.tsv")
+train_model("train/in.tsv", "train/expected.tsv")

 def check_dev():
    with open("dev-0/out.tsv") as out_file, open("dev-0/expected.tsv") as exp_file:
@ -125,4 +115,5 @@ def check_dev():
                positive += 1
        print(positive/counter)

-predict("test-A")
+predict("dev-0")
+check_dev()