concordia-server/fast-aligner/sentence_lemmatizer.py

#!/usr/bin/python3
# -*- coding: utf-8 -*-

import json
import requests
import sys

BUFFER_SIZE = 500

def lemmatize_sentences(language_code, sentences):
    data = {
        'operation': 'lemmatizeAll',
        'languageCode':language_code,
        'sentences':sentences
    }

    address = 'http://localhost:8800'

    response = requests.post(address, data = json.dumps(data))
    response.encoding = 'utf-8'

    response_json = json.loads(response.text)
    return '\n'.join(response_json['lemmatizedSentences'])


language_code = sys.argv[1]
sentences_buffer = []
for line in sys.stdin:
    sentences_buffer.append(line.rstrip())
    if len(sentences_buffer) == BUFFER_SIZE:
        print(lemmatize_sentences(language_code,sentences_buffer))
        sentences_buffer = []

if len(sentences_buffer) > 0:
    print(lemmatize_sentences(language_code,sentences_buffer))
lemmatize all 2019-02-04 15:27:56 +01:00			`#!/usr/bin/python3`
			`# -- coding: utf-8 --`

			`import json`
			`import requests`
			`import sys`

			`BUFFER_SIZE = 500`

			`def lemmatize_sentences(language_code, sentences):`
			`data = {`
			`'operation': 'lemmatizeAll',`
			`'languageCode':language_code,`
			`'sentences':sentences`
			`}`

			`address = 'http://localhost:8800'`

			`response = requests.post(address, data = json.dumps(data))`
			`response.encoding = 'utf-8'`

			`response_json = json.loads(response.text)`
			`return '\n'.join(response_json['lemmatizedSentences'])`


			`language_code = sys.argv[1]`
			`sentences_buffer = []`
			`for line in sys.stdin:`
			`sentences_buffer.append(line.rstrip())`
			`if len(sentences_buffer) == BUFFER_SIZE:`
			`print(lemmatize_sentences(language_code,sentences_buffer))`
			`sentences_buffer = []`

			`if len(sentences_buffer) > 0:`
			`print(lemmatize_sentences(language_code,sentences_buffer))`