import time import nltk from nltk.stem import WordNetLemmatizer # nltk.download('omw-1.4') # nltk.download('punkt') nltk.download('wordnet') wl = WordNetLemmatizer() start_time = time.time_ns() filex = [] with open('mt-summit-corpora/train/in.tsv', 'r') as file: for line in file: if len(filex) % 50000 == 0: print(len(filex), end='\r') line = nltk.word_tokenize(line) filex.append(' '.join([wl.lemmatize(x) for x in line])) stop = time.time_ns() timex = (stop - start_time) / 1000000000 print(timex) f = open('temp', 'w') for line in filex: f.write(line + '\n')