save test.py

This commit is contained in:
Jakub Konieczny 2022-01-18 23:27:23 +00:00
parent f9ad7efe63
commit 51435aace9

26
test.py Normal file
View File

@ -0,0 +1,26 @@
import time
import nltk
from nltk.stem import WordNetLemmatizer
# nltk.download('omw-1.4')
# nltk.download('punkt')
nltk.download('wordnet')
wl = WordNetLemmatizer()
start_time = time.time_ns()
filex = []
with open('mt-summit-corpora/train/in.tsv', 'r') as file:
for line in file:
if len(filex) % 50000 == 0:
print(len(filex), end='\r')
line = nltk.word_tokenize(line)
filex.append(' '.join([wl.lemmatize(x) for x in line]))
stop = time.time_ns()
timex = (stop - start_time) / 1000000000
print(timex)
f = open('temp', 'w')
for line in filex:
f.write(line + '\n')