from collections import defaultdict import math import pickle import re vocabulary=[] file_to_save=open("test.tsv","wb") def define_vocabulary(file_to_learn_new_words): with open(file_to_learn_new_words,encoding='utf-8') as file: for line in file: #for word in re.findall(r"([a-zA-Z\-]+)", line): for word in line.split(): vocabulary.append(word) return vocabulary def main(): vocabulary=define_vocabulary('train/in.tsv') file_to_save=vocabulary main()