Created vocabulary
This commit is contained in:
parent
65fbcd275f
commit
a546cd9958
22
code_regression.py
Normal file
22
code_regression.py
Normal file
@ -0,0 +1,22 @@
|
||||
from collections import defaultdict
|
||||
import math
|
||||
import pickle
|
||||
import re
|
||||
|
||||
vocabulary=[]
|
||||
file_to_save=open("test.tsv","wb")
|
||||
|
||||
def define_vocabulary(file_to_learn_new_words):
|
||||
with open(file_to_learn_new_words,encoding='utf-8') as file:
|
||||
for line in file:
|
||||
#for word in re.findall(r"([a-zA-Z\-]+)", line):
|
||||
for word in line.split():
|
||||
vocabulary.append(word)
|
||||
return vocabulary
|
||||
|
||||
def main():
|
||||
vocabulary=define_vocabulary('train/in.tsv')
|
||||
file_to_save=vocabulary
|
||||
|
||||
main()
|
||||
|
Loading…
Reference in New Issue
Block a user