diff --git a/tests/addFastAlignedTM.py b/tests/addFastAlignedTM.py index 2459925..a29f254 100755 --- a/tests/addFastAlignedTM.py +++ b/tests/addFastAlignedTM.py @@ -1,12 +1,11 @@ -#!/usr/bin/python3 +#!/usr/bin/python # -*- coding: utf-8 -*- import json -import requests +import urllib2 import sys import host import time -import codecs BUFFER_SIZE = 500 LEAVE_OUT = 1 # that does not leave out anything @@ -14,8 +13,6 @@ LEAVE_OUT = 1 # that does not leave out anything address = 'http://'+host.concordia_host if len(host.concordia_port) > 0: address += ':'+host.concordia_port -headers = {"content-type" : "application/json;charset=UTF-8" } - def file_len(fname): @@ -25,7 +22,10 @@ def file_len(fname): return i + 1 def add_examples(examplesData): - response = requests.post(address, data=json.dumps(examplesData, ensure_ascii=False).encode('utf-8'), headers=headers).json() + req = urllib2.Request(address) + req.add_header('Content-Type', 'application/json') + response = json.loads(urllib2.urlopen(req, json.dumps(examplesData), timeout = 3600).read()) + print(response) if response['status'] == 'error': raise Exception(response['message']) @@ -62,10 +62,12 @@ data = { 'tmLemmatized':True } -response = requests.post(address, json=data, headers=headers).json() +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data), timeout = 3600).read()) print(response) tmId = int(response['newTmId']) -print("Added new tm: %d" % tmId) +print "Added new tm: %d" % tmId data = { 'operation': 'addSentences', @@ -74,7 +76,7 @@ data = { examples = [] start = time.time() -with codecs.open(sourceFile, "r", "utf-8", errors='replace') as source_file, codecs.open(lemmatizedSourceFile, "r", "utf-8", errors='replace') as lemmatized_source_file, codecs.open(targetFile, "r", "utf-8", errors='replace') as target_file, open(alignmentsFile) as alignments_file, open(sourceIdsFile) as source_ids_file: +with open(sourceFile) as source_file, open(lemmatizedSourceFile) as lemmatized_source_file, open(targetFile) as target_file, open(alignmentsFile) as alignments_file, open(sourceIdsFile) as source_ids_file: addedCount = 0 for lineNumber in range(sourceFileLength): if lineNumber % LEAVE_OUT == 0: @@ -90,7 +92,7 @@ with codecs.open(sourceFile, "r", "utf-8", errors='replace') as source_file, cod data['examples'] = examples add_examples(data) mark = time.time() - print("Added %d of %d lemmatized examples. Time elapsed: %.4f s, current speed: %.4f examples/second" % (addedCount, totalExamples, mark-start, addedCount/(mark-start))) + print "Added %d of %d lemmatized examples. Time elapsed: %.4f s, current speed: %.4f examples/second" % (addedCount, totalExamples, mark-start, addedCount/(mark-start)) examples = [] @@ -99,15 +101,17 @@ if len(examples) > 0: add_examples(data) end = time.time() -print("Added all %d lemmatized sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (addedCount, end-start, addedCount/(end-start))) +print "Added all %d lemmatized sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (addedCount, end-start, addedCount/(end-start)) -print("Generating index...") +print "Generating index..." start = time.time() data = { 'operation': 'refreshIndex', 'tmId' : tmId } -requests.post(address, json=data, headers=headers) +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +urllib2.urlopen(req, json.dumps(data), timeout = 3600).read() end = time.time() -print("Index regeneration complete. The operation took %.4f s" % (end - start)) +print "Index regeneration complete. The operation took %.4f s" % (end - start)