concordia-server/fast-aligner/sentence_lemmatizer.py
2019-02-04 15:27:56 +01:00

38 lines
875 B
Python
Executable File

#!/usr/bin/python3
# -*- coding: utf-8 -*-
import json
import requests
import sys
BUFFER_SIZE = 500
def lemmatize_sentences(language_code, sentences):
data = {
'operation': 'lemmatizeAll',
'languageCode':language_code,
'sentences':sentences
}
address = 'http://localhost:8800'
response = requests.post(address, data = json.dumps(data))
response.encoding = 'utf-8'
response_json = json.loads(response.text)
return '\n'.join(response_json['lemmatizedSentences'])
language_code = sys.argv[1]
sentences_buffer = []
for line in sys.stdin:
sentences_buffer.append(line.rstrip())
if len(sentences_buffer) == BUFFER_SIZE:
print(lemmatize_sentences(language_code,sentences_buffer))
sentences_buffer = []
if len(sentences_buffer) > 0:
print(lemmatize_sentences(language_code,sentences_buffer))