Created vocabulary
This commit is contained in:
parent
65fbcd275f
commit
a546cd9958
22
code_regression.py
Normal file
22
code_regression.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
from collections import defaultdict
|
||||||
|
import math
|
||||||
|
import pickle
|
||||||
|
import re
|
||||||
|
|
||||||
|
vocabulary=[]
|
||||||
|
file_to_save=open("test.tsv","wb")
|
||||||
|
|
||||||
|
def define_vocabulary(file_to_learn_new_words):
|
||||||
|
with open(file_to_learn_new_words,encoding='utf-8') as file:
|
||||||
|
for line in file:
|
||||||
|
#for word in re.findall(r"([a-zA-Z\-]+)", line):
|
||||||
|
for word in line.split():
|
||||||
|
vocabulary.append(word)
|
||||||
|
return vocabulary
|
||||||
|
|
||||||
|
def main():
|
||||||
|
vocabulary=define_vocabulary('train/in.tsv')
|
||||||
|
file_to_save=vocabulary
|
||||||
|
|
||||||
|
main()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user