save test.py
This commit is contained in:
parent
f9ad7efe63
commit
51435aace9
26
test.py
Normal file
26
test.py
Normal file
@ -0,0 +1,26 @@
|
||||
import time
|
||||
import nltk
|
||||
from nltk.stem import WordNetLemmatizer
|
||||
|
||||
# nltk.download('omw-1.4')
|
||||
# nltk.download('punkt')
|
||||
nltk.download('wordnet')
|
||||
|
||||
wl = WordNetLemmatizer()
|
||||
|
||||
start_time = time.time_ns()
|
||||
filex = []
|
||||
with open('mt-summit-corpora/train/in.tsv', 'r') as file:
|
||||
for line in file:
|
||||
if len(filex) % 50000 == 0:
|
||||
print(len(filex), end='\r')
|
||||
line = nltk.word_tokenize(line)
|
||||
filex.append(' '.join([wl.lemmatize(x) for x in line]))
|
||||
|
||||
|
||||
stop = time.time_ns()
|
||||
timex = (stop - start_time) / 1000000000
|
||||
print(timex)
|
||||
f = open('temp', 'w')
|
||||
for line in filex:
|
||||
f.write(line + '\n')
|
Loading…
Reference in New Issue
Block a user