#!/usr/bin/python3 import unittest import json import requests import sys def lemmatizeSentence(lang, sentence): data = { 'operation': 'lemmatize', 'languageCode':lang, 'sentence':sentence } address = 'http://localhost:8800' response = requests.post(address, data=json.dumps(data)) return response.json()['lemmatizedSentence'] corpus_file_path = sys.argv[1] lang = sys.argv[2] line_count = 0 with open(corpus_file_path) as corpus_file: for line in corpus_file: line_count += 1 orig = line.rstrip() lemmatized = lemmatizeSentence(lang,orig) if len(orig.split()) != len(lemmatized.split()): print("Different length in:") print(orig) print(lemmatized) if line_count % 1000 == 0: sys.stderr.write("Done %d lines\n" % line_count)