diff --git a/code_regression.py b/code_regression.py index e45b80d..874e0aa 100644 --- a/code_regression.py +++ b/code_regression.py @@ -36,17 +36,32 @@ def define_vocabulary(file_to_learn_new_words): word_counts['count'][token]+=1 return word_counts +def read_input(file_path): + word_counts={'count': defaultdict(int)} + with open(file_path, encoding='utf-8') as in_file: + for line in in_file: + text, timestamp = line.rstrip('\n').split('\t') + tokens = text.lower().split(' ') + for token in tokens: + word_counts['count'][token]+=1 + return word_counts + def main(): # --------------- initialization --------------------------------- vocabulary = define_vocabulary('train/in.tsv') i=1; weights=[] testFuckingPython=len(vocabulary['count'])+1 - print(testFuckingPython) for i in range(testFuckingPython): weights.append(random.randrange(0,len(vocabulary['count'])+1)) - + precision=0.00001 + learning_rate=0.001 + prev_step_size=1 + max_iterations=len(vocabulary['count']) + current_iteration=0 + readed_words=read_input("train/in.tsv") # --------------- prediction ------------------------------------- + #while (prev_step_size>precision and current_iteration