save test.py
This commit is contained in:
parent
f9ad7efe63
commit
51435aace9
26
test.py
Normal file
26
test.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import time
|
||||||
|
import nltk
|
||||||
|
from nltk.stem import WordNetLemmatizer
|
||||||
|
|
||||||
|
# nltk.download('omw-1.4')
|
||||||
|
# nltk.download('punkt')
|
||||||
|
nltk.download('wordnet')
|
||||||
|
|
||||||
|
wl = WordNetLemmatizer()
|
||||||
|
|
||||||
|
start_time = time.time_ns()
|
||||||
|
filex = []
|
||||||
|
with open('mt-summit-corpora/train/in.tsv', 'r') as file:
|
||||||
|
for line in file:
|
||||||
|
if len(filex) % 50000 == 0:
|
||||||
|
print(len(filex), end='\r')
|
||||||
|
line = nltk.word_tokenize(line)
|
||||||
|
filex.append(' '.join([wl.lemmatize(x) for x in line]))
|
||||||
|
|
||||||
|
|
||||||
|
stop = time.time_ns()
|
||||||
|
timex = (stop - start_time) / 1000000000
|
||||||
|
print(timex)
|
||||||
|
f = open('temp', 'w')
|
||||||
|
for line in filex:
|
||||||
|
f.write(line + '\n')
|
Loading…
Reference in New Issue
Block a user