diff --git a/concordia-server/Dockerfile b/concordia-server/Dockerfile index 9601ce0..d8b63ce 100644 --- a/concordia-server/Dockerfile +++ b/concordia-server/Dockerfile @@ -1,7 +1,14 @@ FROM ubuntu:16.04 -RUN apt-get update && apt-get install -y git libfcgi-dev libpq-dev python3-psycopg2 nginx php apache2 libapache2-mod-php spawn-fcgi cmake g++ libboost-dev libboost-serialization-dev libboost-test-dev libboost-filesystem-dev libboost-system-dev libboost-program-options-dev libboost-iostreams-dev libboost-regex-dev libboost-locale-dev liblog4cpp5-dev libconfig++-dev libconfig-dev libpcre3-dev +# Set the locale +RUN apt-get update && apt-get install -y locales +RUN sed -i -e 's/# pl_PL.UTF-8 UTF-8/pl_PL.UTF-8 UTF-8/' /etc/locale.gen && locale-gen +ENV LANG pl_PL.UTF-8 +ENV LANGUAGE pl_PL:pl +ENV LC_ALL pl_PL.UTF-8 + +RUN apt-get install -y git libfcgi-dev libpq-dev python3-psycopg2 nginx php apache2 libapache2-mod-php spawn-fcgi cmake g++ libboost-dev libboost-serialization-dev libboost-test-dev libboost-filesystem-dev libboost-system-dev libboost-program-options-dev libboost-iostreams-dev libboost-regex-dev libboost-locale-dev liblog4cpp5-dev libconfig++-dev libconfig-dev libpcre3-dev RUN git clone https://git.code.sf.net/p/tmconcordia/code RUN cd code && mkdir build && cd build && ../cmake.sh && make && make install && ldconfig RUN git clone https://git.wmi.amu.edu.pl/rjawor/concordia-server.git diff --git a/docker-compose.yml b/docker-compose.yml index 532e64a..92aeecb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,6 +17,7 @@ services: concordia-server: build: ./concordia-server container_name: concordia-server + restart: always ports: - "10001:8800" depends_on: diff --git a/lemmagen/Dockerfile b/lemmagen/Dockerfile index 83bad8b..2ea81a2 100644 --- a/lemmagen/Dockerfile +++ b/lemmagen/Dockerfile @@ -1,6 +1,13 @@ FROM ubuntu:16.04 - RUN apt-get update && apt-get -y install git mono-runtime +# Set the locale +RUN apt-get update && apt-get install -y locales +RUN sed -i -e 's/# pl_PL.UTF-8 UTF-8/pl_PL.UTF-8 UTF-8/' /etc/locale.gen && locale-gen +ENV LANG pl_PL.UTF-8 +ENV LANGUAGE pl_PL:pl +ENV LC_ALL pl_PL.UTF-8 + + RUN apt-get -y install git mono-complete RUN git clone https://git.wmi.amu.edu.pl/rjawor/lemmagen-wrapper.git CMD mono /lemmagen-wrapper/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe \ No newline at end of file diff --git a/tests/.gitignore b/tests/.gitignore index 6842736..b6cea80 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -1,2 +1,3 @@ +__pycache__ host.py host.pyc diff --git a/tests/addFastAlignedTM.py b/tests/addFastAlignedTM.py index 0620610..2459925 100755 --- a/tests/addFastAlignedTM.py +++ b/tests/addFastAlignedTM.py @@ -1,8 +1,8 @@ -#!/usr/bin/python +#!/usr/bin/python3 # -*- coding: utf-8 -*- import json -import urllib2 +import requests import sys import host import time @@ -14,6 +14,8 @@ LEAVE_OUT = 1 # that does not leave out anything address = 'http://'+host.concordia_host if len(host.concordia_port) > 0: address += ':'+host.concordia_port +headers = {"content-type" : "application/json;charset=UTF-8" } + def file_len(fname): @@ -23,11 +25,7 @@ def file_len(fname): return i + 1 def add_examples(examplesData): - req = urllib2.Request(address) - req.add_header('Content-Type', 'application/json; charset=utf-8') - encodedData = json.dumps(examplesData, ensure_ascii=False).encode('utf-8', 'ignore') - response = json.loads(urllib2.urlopen(req, encodedData, timeout = 3600).read()) - print(response) + response = requests.post(address, data=json.dumps(examplesData, ensure_ascii=False).encode('utf-8'), headers=headers).json() if response['status'] == 'error': raise Exception(response['message']) @@ -64,9 +62,7 @@ data = { 'tmLemmatized':True } -req = urllib2.Request(address) -req.add_header('Content-Type', 'application/json') -response = json.loads(urllib2.urlopen(req, json.dumps(data), timeout = 3600).read()) +response = requests.post(address, json=data, headers=headers).json() print(response) tmId = int(response['newTmId']) print("Added new tm: %d" % tmId) @@ -78,19 +74,16 @@ data = { examples = [] start = time.time() -with codecs.open(sourceFile, "r", "utf-8") as source_file, codecs.open(lemmatizedSourceFile, "r", "utf-8") as lemmatized_source_file, codecs.open(targetFile, "r", "utf-8") as target_file, open(alignmentsFile) as alignments_file, open(sourceIdsFile) as source_ids_file: +with codecs.open(sourceFile, "r", "utf-8", errors='replace') as source_file, codecs.open(lemmatizedSourceFile, "r", "utf-8", errors='replace') as lemmatized_source_file, codecs.open(targetFile, "r", "utf-8", errors='replace') as target_file, open(alignmentsFile) as alignments_file, open(sourceIdsFile) as source_ids_file: addedCount = 0 for lineNumber in range(sourceFileLength): if lineNumber % LEAVE_OUT == 0: - sourceSentence = source_file.readline().strip().encode('utf-8') - lemmatizedSourceSentence = lemmatized_source_file.readline().strip().encode('utf-8') - targetSentence = target_file.readline().strip().encode('utf-8') + sourceSentence = source_file.readline().strip() + lemmatizedSourceSentence = lemmatized_source_file.readline().strip() + targetSentence = target_file.readline().strip() alignment = json.loads(alignments_file.readline().strip()) sourceId = int(source_ids_file.readline().strip()) - #print(sourceSentence) - #print(lemmatizedSourceSentence) - #print(targetSentence) examples.append([sourceSentence, lemmatizedSourceSentence, targetSentence, alignment, sourceId]) addedCount += 1 if len(examples) >= BUFFER_SIZE: @@ -114,9 +107,7 @@ data = { 'operation': 'refreshIndex', 'tmId' : tmId } -req = urllib2.Request(address) -req.add_header('Content-Type', 'application/json') -urllib2.urlopen(req, json.dumps(data), timeout = 3600).read() +requests.post(address, json=data, headers=headers) end = time.time() print("Index regeneration complete. The operation took %.4f s" % (end - start)) diff --git a/tests/concordiaSearch.py b/tests/concordiaSearch.py index 9880d94..63cabc9 100755 --- a/tests/concordiaSearch.py +++ b/tests/concordiaSearch.py @@ -1,9 +1,9 @@ -#!/usr/bin/python +#!/usr/bin/python3 # -*- coding: utf-8 -*- import unittest import json -import urllib2 +import requests import sys import time import host @@ -20,11 +20,9 @@ data = { } start = time.time() -req = urllib2.Request(address) -req.add_header('Content-Type', 'application/json') -response = urllib2.urlopen(req, json.dumps(data)).read() +response = requests.post(address, json=data).json() end = time.time() -print "Execution time: %.4f seconds." % (end-start) -print "Result: " -print response +print("Execution time: %.4f seconds." % (end-start)) +print("Result: ") +print(response) diff --git a/tests/getTmsInfo3.py b/tests/getTmsInfo3.py new file mode 100755 index 0000000..00c47a3 --- /dev/null +++ b/tests/getTmsInfo3.py @@ -0,0 +1,23 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import json +import requests +import sys +import time +import host + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + +print("Trying getTmsInfo on %s" % address) + +data = { + 'operation': 'getTmsInfo' +} + +response = requests.post(address, json=data) + +print(response.content)