concordia-preprocessor/dictionaries/generate_dict.py

18 lines
425 B
Python
Executable File

#!/usr/bin/python3
import pickle, lzma
d = dict()
with lzma.open('polimorf-20190617.tab.xz', mode='rt') as in_file:
for line in in_file:
fields = line.split('\t')
form = fields[0].lower()
lemma = fields[1].lower()
tag = fields[2].lower()
if not tag.startswith('brev'):
d[form] = lemma
pickle_out = open("dict.pickle","wb")
pickle.dump(d, pickle_out)
pickle_out.close()