concordia-server/fast-aligner/collect_dict.py

20 lines
596 B
Python
Raw Normal View History

2019-02-04 15:27:56 +01:00
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import sys, os, bz2
src_lang = sys.argv[1]
trg_lang = sys.argv[2]
weight = int(sys.argv[3])
2019-02-21 14:02:51 +01:00
dictionaries_path = '../dictionaries'
for dname in os.listdir(dictionaries_path):
src_path = '%s/%s/%s.bz2' % (dictionaries_path, dname, src_lang)
trg_path = '%s/%s/%s.bz2' % (dictionaries_path, dname, trg_lang)
2019-02-04 15:27:56 +01:00
if os.path.isfile(src_path) and os.path.isfile(trg_path):
with bz2.open(src_path, 'rt') as src_dict_file:
for line in src_dict_file:
for i in range(weight):
print(line.strip())