from collections import defaultdict import math import pickle import re from pip._vendor.msgpack.fallback import xrange import random vocabulary=[] file_to_save=open("test.tsv","w",encoding='utf-8') def define_vocabulary(file_to_learn_new_words): word_counts={'count': defaultdict(int)} with open(file_to_learn_new_words,encoding='utf-8') as in_file: for line in in_file: text, timestamp = line.rstrip('\n').split('\t') tokens = text.lower().split(' ') for token in tokens: word_counts['count'][token]+=1 return word_counts def read_input(file_path): read_word_counts={'count': defaultdict(int)} with open(file_path, encoding='utf-8') as in_file: for line in in_file: text, timestamp = line.rstrip('\n').split('\t') tokens = text.lower().split(' ') for token in tokens: read_word_counts['count'][token]+=1 return read_word_counts def training(vocabulary,read_input,expected): learning_rate=0.00001 learning_precision=0.0000001 weights=[] iteration=0 loss_sum=0.0 ix=1 readed_words_values = [] for word in read_input['count']: if word not in vocabulary['count']: read_input['count'][word]=0 readed_words_values.append(read_input['count'][word]) for ix in range(0,len(vocabulary['count'])+1): weights.append(random.uniform(-0.001,0.001)) #max_iteration=len(vocabulary['count'])+1 max_iteration=1000 delta=1 while (delta>learning_precision and iterationprecision and current_iteration