Created vocabulary

This commit is contained in:
Bartusiak 2020-04-02 18:29:06 +02:00
parent 65fbcd275f
commit a546cd9958

22
code_regression.py Normal file
View File

@ -0,0 +1,22 @@
from collections import defaultdict
import math
import pickle
import re
vocabulary=[]
file_to_save=open("test.tsv","wb")
def define_vocabulary(file_to_learn_new_words):
with open(file_to_learn_new_words,encoding='utf-8') as file:
for line in file:
#for word in re.findall(r"([a-zA-Z\-]+)", line):
for word in line.split():
vocabulary.append(word)
return vocabulary
def main():
vocabulary=define_vocabulary('train/in.tsv')
file_to_save=vocabulary
main()