This commit is contained in:
Rafał Jaworski 2019-06-13 12:54:33 +02:00
parent 193ad4b01d
commit 946e77cd09

View File

@ -37,6 +37,8 @@ lem_output_name = sys.argv[4]
sentences_buffer = [] sentences_buffer = []
with open(file_name) as in_file, open(norm_output_name, 'w') as out_norm, open(lem_output_name, 'w') as out_lem: with open(file_name) as in_file, open(norm_output_name, 'w') as out_norm, open(lem_output_name, 'w') as out_lem:
for line in in_file: for line in in_file:
if language_code == 'en':
print('sending: '+line.rstrip())
sentences_buffer.append(line.rstrip()) sentences_buffer.append(line.rstrip())
if len(sentences_buffer) == BUFFER_SIZE: if len(sentences_buffer) == BUFFER_SIZE:
write_result(lemmatize_sentences(language_code,sentences_buffer), out_norm, out_lem) write_result(lemmatize_sentences(language_code,sentences_buffer), out_norm, out_lem)