23 lines
522 B
Python
23 lines
522 B
Python
from collections import defaultdict
|
|
import math
|
|
import pickle
|
|
import re
|
|
|
|
vocabulary=[]
|
|
file_to_save=open("test.tsv","wb")
|
|
|
|
def define_vocabulary(file_to_learn_new_words):
|
|
with open(file_to_learn_new_words,encoding='utf-8') as file:
|
|
for line in file:
|
|
#for word in re.findall(r"([a-zA-Z\-]+)", line):
|
|
for word in line.split():
|
|
vocabulary.append(word)
|
|
return vocabulary
|
|
|
|
def main():
|
|
vocabulary=define_vocabulary('train/in.tsv')
|
|
file_to_save=vocabulary
|
|
|
|
main()
|
|
|