laptop commit linear regression

2020-05-02 21:24:44 +02:00 · 2020-05-02 21:24:44 +02:00 · 5df01c9b41
commit 5df01c9b41
parent dfa4304d9c
3 changed files with 2004 additions and 2154 deletions
--- a/code_regression.py
+++ b/code_regression.py
@ -53,7 +53,7 @@ def read_words(input_path):
    return vocabulary

 def train(vocabulary,input_train,expected_train):
-    learning_rate=0.0001
+    learning_rate=0.000001
    #learning_precision=0.0000001
    words_vocabulary={}
    with open(input_train,encoding='utf-8') as input_file, open(expected_train,encoding='utf-8') as expected_file:
@ -65,7 +65,7 @@ def train(vocabulary,input_train,expected_train):
    iteration=0
    loss_sum=0.0
    error=10.0
-    max_iteration=len(vocabulary)
+    max_iteration=len(vocabulary) + 1000    
    for i in vocabulary['count'].keys():
        weights[i]=random.uniform(-0.01,0.01)
    # delta>learning_precision and
@ -121,153 +121,3 @@ main()



-
-
-
-
-
-
-
-
-# from collections import defaultdict
-# import math
-# import pickle
-# import re
-#
-# from pip._vendor.msgpack.fallback import xrange
-# import random
-#
-# vocabulary = []
-#
-# file_to_save = open("test.tsv", "w", encoding='utf-8')
-#
-#
-# def define_vocabulary(file_to_learn_new_words):
-#     word_counts = {'count': defaultdict(int)}
-#     with open(file_to_learn_new_words, encoding='utf-8') as in_file:
-#         for line in in_file:
-#             text, timestamp = line.rstrip('\n').split('\t')
-#             tokens = text.lower().split(' ')
-#             for token in tokens:
-#                 word_counts['count'][token] += 1
-#     return word_counts
-#
-#
-# def read_input(file_path):
-#     read_word_counts = {'count': defaultdict(int)}
-#     with open(file_path, encoding='utf-8') as in_file:
-#         for line in in_file:
-#             text, timestamp = line.rstrip('\n').split('\t')
-#             tokens = text.lower().split(' ')
-#             for token in tokens:
-#                 read_word_counts['count'][token] += 1
-#     return read_word_counts
-#
-#
-# def training(vocabulary, read_input, expected):
-#     file_to_write = open(expected, 'w+', encoding='utf8')
-#     file_to_write2 = open('out_y_hat.tsv', 'w+', encoding='utf8')
-#     learning_rate = 0.00001
-#     learning_precision = 0.0001
-#     weights = []
-#     iteration = 0
-#     loss_sum = 0.0
-#     ix = 1
-#     readed_words_values = []
-#     for word in read_input['count']:
-#         if word not in vocabulary['count']:
-#             read_input['count'][word] = 0
-#         readed_words_values.append(read_input['count'][word])
-#     for ix in range(0, len(vocabulary['count']) + 1):
-#         weights.append(random.uniform(-0.001, 0.001))
-#     # max_iteration=len(vocabulary['count'])+1
-#     max_iteration = 10000
-#     delta = 1
-#     while delta>learning_precision and iteration<max_iteration:
-#         d, y = random.choice(list(read_input['count'].items()))  # d-word, y-value of
-#         y_hat = weights[0]
-#         i = 0
-#         for word_d in d:
-#             if word_d in vocabulary['count'].keys():
-#                 # print(vocabulary['count'][d])
-#                 y_hat += weights[vocabulary['count'][word_d]] * readed_words_values[i]
-#                 i += 1
-#             print(f'Y_hat: {y_hat}')
-#             file_to_write2.write(f'Y_hat: {y_hat}\n')
-#             if y_hat > 0.5:
-#                 file_to_write.write('1\n')
-#             else:
-#                 file_to_write.write('0\n')
-#         i = 0
-#         delta = (y_hat - y) * learning_rate
-#         weights[0] = weights[0] - delta
-#         for word_w in d:
-#             if word_w in vocabulary['count'].keys():
-#                 weights[vocabulary['count'][word_w]] -= readed_words_values[i] * delta
-#                 i += 1
-#         # print(weights)
-#         #print(f'Y: {y}')
-#         loss = (y_hat - y) ** 2.0
-#         # loss=(y_hat-y)*(y_hat-y)
-#         loss_sum += loss
-#         if (iteration % 1000 == 0):
-#             #print(loss_sum / 1000)
-#             iteration = 0
-#             loss_sum = 0.0
-#         iteration += 1
-#         file_to_write.close
-#
-# def main():
-#     vocabulary = define_vocabulary('train/in.tsv')
-#     readed_words = read_input('dev-0/in.tsv')
-#     readed_words_test_a = read_input('test-A/in.tsv/in.tsv')
-#     training(vocabulary, readed_words, 'dev-0/out.tsv')
-#     training(vocabulary, readed_words_test_a, 'test-A/out.tsv')
-#
-#
-# # def cost_function(y_hat,y):
-# #    loss=(y_hat-y)**2.0
-# #    loss_sum+=loss
-# #    if loss_counter%1000==0:
-# #        print(loss_sum/1000)
-# #        loss_counter=0
-# #        loss_sum=0.0
-#
-#
-# # def main():
-# # --------------- initialization ---------------------------------
-# #    vocabulary = define_vocabulary('train/in.tsv')
-# #    readed_words=read_input('dev-0/in.tsv')
-# #    i=1;
-# #    weights=[]
-# #    readed_words_values=[]
-# #    rangeVocabulary=len(vocabulary['count'])+1
-# #    for i in range(rangeVocabulary):
-# #        weights.append(random.randrange(0,len(vocabulary['count'])+1))
-# #    for word in readed_words['count']:
-# #        if word not in vocabulary['count']:
-# #            readed_words['count'][word]=0
-# #        readed_words_values.append(readed_words['count'][word])
-# #    precision=0.00001
-# #    learning_rate=0.00001
-# #    delta=1
-# #    max_iterations=len(vocabulary['count'])+1
-# #    current_iteration=0
-# #    rangeReadedValues=len(readed_words['count'])+1
-# # --------------- prediction -------------------------------------
-# #    while (delta>precision and current_iteration<max_iterations):
-# #        y=random.choice(readed_words_values)
-# #        y_hat=weights[0]
-# #        i=0
-# #        j=0
-# #        for i in range(rangeReadedValues):
-# #            y_hat+=weights[i]*y
-# #            delta=abs(y_hat-y)*learning_rate
-# #            weights[0]=weights[0]-delta
-# #        for j in range(rangeVocabulary):
-# #            weights[j]-=y*delta
-# #        print(delta)
-# #        current_iteration+=1
-#
-#
-# main()
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
--- a/test-A/out.tsv
+++ b/test-A/out.tsv