From 72f56d6b424c4fb846f2af2244e00f7a10f661af Mon Sep 17 00:00:00 2001 From: Bartusiak Date: Sun, 5 Apr 2020 20:10:04 +0200 Subject: [PATCH] Regression --- code_regression.py | 129 +++++++++++++++++++++++++++++++++------------ 1 file changed, 95 insertions(+), 34 deletions(-) diff --git a/code_regression.py b/code_regression.py index 874e0aa..017da6b 100644 --- a/code_regression.py +++ b/code_regression.py @@ -7,23 +7,6 @@ from pip._vendor.msgpack.fallback import xrange import random vocabulary=[] -#word_to_index_mapping=[] -#index_to_word_mapping=[] - -#file_to_save=open("test.tsv","w",encoding='utf-8') -#def define_vocabulary(file_to_learn_new_words,expected_path): -# word_counts = {'paranormal': defaultdict(int), 'skeptic': defaultdict(int)} -# with open(file_to_learn_new_words, encoding='utf-8') as in_file, open(expected_path, encoding='utf-8') as expected_file: -# for line, exp in zip(in_file, expected_file): -# class_ = exp.rstrip('\n').replace(' ', '') -# text, timestamp = line.rstrip('\n').split('\t') -# tokens = text.lower().split(' ') -# for token in tokens: -# if class_ == 'P': -# word_counts['paranormal'][token] += 1 -# elif class_ == 'S': -# word_counts['skeptic'][token] += 1 -# return word_counts file_to_save=open("test.tsv","w",encoding='utf-8') def define_vocabulary(file_to_learn_new_words): @@ -37,31 +20,109 @@ def define_vocabulary(file_to_learn_new_words): return word_counts def read_input(file_path): - word_counts={'count': defaultdict(int)} + read_word_counts={'count': defaultdict(int)} with open(file_path, encoding='utf-8') as in_file: for line in in_file: text, timestamp = line.rstrip('\n').split('\t') tokens = text.lower().split(' ') for token in tokens: - word_counts['count'][token]+=1 - return word_counts + read_word_counts['count'][token]+=1 + return read_word_counts -def main(): - # --------------- initialization --------------------------------- - vocabulary = define_vocabulary('train/in.tsv') - i=1; +def training(vocabulary,read_input,expected): + learning_rate=0.00001 + learning_precision=0.0000001 weights=[] - testFuckingPython=len(vocabulary['count'])+1 - for i in range(testFuckingPython): - weights.append(random.randrange(0,len(vocabulary['count'])+1)) - precision=0.00001 - learning_rate=0.001 - prev_step_size=1 - max_iterations=len(vocabulary['count']) - current_iteration=0 - readed_words=read_input("train/in.tsv") + iteration=0 + loss_sum=0.0 + ix=1 + readed_words_values = [] + for word in read_input['count']: + if word not in vocabulary['count']: + read_input['count'][word]=0 + readed_words_values.append(read_input['count'][word]) + for ix in range(0,len(vocabulary['count'])+1): + weights.append(random.uniform(-0.001,0.001)) + #max_iteration=len(vocabulary['count'])+1 + max_iteration=1000 + delta=1 + while (delta>learning_precision and iterationprecision and current_iterationprecision and current_iteration