import nltk import os import pandas as pd import sys from nltk.stem import WordNetLemmatizer nltk.download('wordnet') def read_arguments(): try: glossary_arg_pathx = sys.argv return glossary_arg_pathx except Exception: print("ERROR: Wrong argument.") sys.exit(1) wl = WordNetLemmatizer() glossary_path = os.path.join(os.path.expanduser('~'), read_arguments()[0]) glossary = pd.read_csv(glossary_path, sep='\t', header=None, names=['source', 'result']) source_lemmatized = [] for word in glossary['source']: word = nltk.word_tokenize(word) source_lemmatized.append(' '.join([wl.lemmatize(x) for x in word])) glossary['source_lem'] = source_lemmatized glossary = glossary[['source', 'source_lem', 'result']] glossary.set_index('source_lem') glossary.to_csv(glossary_path + '.lemmatized', sep='\t', index=False)