From dab669729f294f9660ae8c17adae666a61a99d58 Mon Sep 17 00:00:00 2001 From: rjawor Date: Thu, 16 May 2019 18:28:30 +0200 Subject: [PATCH] added tests, fixed network --- docker-compose.yml | 7 ++ tests/.gitignore | 2 + tests/addAlignedFile.py | 98 ++++++++++++++++++++++++ tests/addAlignedFileToTM.py | 80 ++++++++++++++++++++ tests/addAlignedLemmatizedTM.py | 111 +++++++++++++++++++++++++++ tests/addFastAlignedTM.py | 117 +++++++++++++++++++++++++++++ tests/addFastAlignedTM.sh | 8 ++ tests/addFile.py | 97 ++++++++++++++++++++++++ tests/addJrc.sh | 4 + tests/addLemmatizedTM.sh | 7 ++ tests/addLemmatizedTMfromParams.sh | 7 ++ tests/addSentence.py | 33 ++++++++ tests/addSources.py | 49 ++++++++++++ tests/addStocznia.sh | 12 +++ tests/addTm.py | 27 +++++++ tests/build.sh | 8 ++ tests/concordiaSearch.py | 30 ++++++++ tests/fullSearch.py | 31 ++++++++ tests/generateIndex.py | 28 +++++++ tests/getTmsInfo.py | 25 ++++++ tests/host.py_example | 2 + tests/lemmatizeSentence.py | 29 +++++++ tests/lemmatizeSentences.py | 29 +++++++ tests/lexiconSearch.py | 29 +++++++ tests/simpleSearch.py | 29 +++++++ tests/testCurl.sh | 12 +++ 26 files changed, 911 insertions(+) create mode 100644 tests/.gitignore create mode 100755 tests/addAlignedFile.py create mode 100755 tests/addAlignedFileToTM.py create mode 100755 tests/addAlignedLemmatizedTM.py create mode 100755 tests/addFastAlignedTM.py create mode 100755 tests/addFastAlignedTM.sh create mode 100755 tests/addFile.py create mode 100755 tests/addJrc.sh create mode 100755 tests/addLemmatizedTM.sh create mode 100755 tests/addLemmatizedTMfromParams.sh create mode 100755 tests/addSentence.py create mode 100755 tests/addSources.py create mode 100755 tests/addStocznia.sh create mode 100755 tests/addTm.py create mode 100755 tests/build.sh create mode 100755 tests/concordiaSearch.py create mode 100755 tests/fullSearch.py create mode 100755 tests/generateIndex.py create mode 100755 tests/getTmsInfo.py create mode 100644 tests/host.py_example create mode 100755 tests/lemmatizeSentence.py create mode 100755 tests/lemmatizeSentences.py create mode 100755 tests/lexiconSearch.py create mode 100755 tests/simpleSearch.py create mode 100755 tests/testCurl.sh diff --git a/docker-compose.yml b/docker-compose.yml index d2ea4c6..b5fa1cb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,6 +6,10 @@ services: build: ./concordia-postgres container_name: concordia-postgres restart: always + environment: + POSTGRES_DB: concordia_db + POSTGRES_USER: concordia + POSTGRES_PASSWORD: concordia lemmagen: build: ./lemmagen container_name: lemmagen @@ -15,3 +19,6 @@ services: container_name: concordia-server ports: - "10001:80" + depends_on: + - "concordia-postgres" + - "lemmagen" \ No newline at end of file diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 0000000..6842736 --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1,2 @@ +host.py +host.pyc diff --git a/tests/addAlignedFile.py b/tests/addAlignedFile.py new file mode 100755 index 0000000..22846f2 --- /dev/null +++ b/tests/addAlignedFile.py @@ -0,0 +1,98 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import host +import time + +BUFFER_SIZE = 500 + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + +def file_len(fname): + with open(fname) as f: + for i, l in enumerate(f): + pass + return i + 1 + +def add_data(data): + req = urllib2.Request(address) + req.add_header('Content-Type', 'application/json') + json.loads(urllib2.urlopen(req, json.dumps(data)).read()) + +sourceFile = sys.argv[1] +sourceLangId = int(sys.argv[2]) +targetLangId = int(sys.argv[3]) +name = sys.argv[4] + +totalLines = file_len(sourceFile) + +data = { + 'operation': 'addTm', + 'sourceLangId':sourceLangId, + 'targetLangId':targetLangId, + 'name':name +} + +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) +tmId = int(response['newTmId']) +print "Added new tm: %d" % tmId + +data = { + 'operation': 'addAlignedSentences', + 'tmId':tmId +} + +sentences = [] +currSentence = [] +start = time.time() +with open(sourceFile) as sourceLines: + lineNumber = 0 + for line in sourceLines: + line = line.strip() + if lineNumber % 3 == 1: + currSentence.append(line) + elif lineNumber % 3 == 2: + currSentence.append(line) + currSentence.reverse() + sentences.append(currSentence) + currSentence = [] + if len(sentences) >= BUFFER_SIZE: + data['sentences'] = sentences + add_data(data) + mark = time.time() + print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % ( (lineNumber+1)/3, totalLines/3, mark-start, (lineNumber+1)/(3*(mark-start))) + sentences = [] + lineNumber += 1 + + +if len(sentences) > 0: + data['sentences'] = sentences + add_data(data) + +end = time.time() +print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1)/3, end-start, (lineNumber+1)/(3*(end-start))) + +print "Generating index..." +start = time.time() +data = { + 'operation': 'refreshIndex', + 'tmId' : tmId +} +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +urllib2.urlopen(req, json.dumps(data)).read() + +end = time.time() +print "Index regeneration complete. The operation took %.4f s" % (end - start) + + + diff --git a/tests/addAlignedFileToTM.py b/tests/addAlignedFileToTM.py new file mode 100755 index 0000000..b450ac6 --- /dev/null +++ b/tests/addAlignedFileToTM.py @@ -0,0 +1,80 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import host +import time + +BUFFER_SIZE = 500 + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + +def file_len(fname): + with open(fname) as f: + for i, l in enumerate(f): + pass + return i + 1 + +def add_data(data): + req = urllib2.Request(address) + req.add_header('Content-Type', 'application/json') + json.loads(urllib2.urlopen(req, json.dumps(data)).read()) + +sourceFile = sys.argv[1] +tmId = int(sys.argv[2]) + +totalLines = file_len(sourceFile) + +data = { + 'operation': 'addAlignedSentences', + 'tmId':tmId +} + +sentences = [] +currSentence = [] +start = time.time() +with open(sourceFile) as sourceLines: + lineNumber = 0 + for line in sourceLines: + line = line.strip() + if lineNumber % 3 == 1: + currSentence.append(line) + elif lineNumber % 3 == 2: + currSentence.append(line) + currSentence.reverse() + sentences.append(currSentence) + currSentence = [] + if len(sentences) >= BUFFER_SIZE: + data['sentences'] = sentences + add_data(data) + mark = time.time() + print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % ( (lineNumber+1)/3, totalLines/3, mark-start, (lineNumber+1)/(3*(mark-start))) + sentences = [] + lineNumber += 1 + + +if len(sentences) > 0: + data['sentences'] = sentences + add_data(data) + +end = time.time() +print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1)/3, end-start, (lineNumber+1)/(3*(end-start))) + +print "Generating index..." +start = time.time() +data = { + 'operation': 'refreshIndex', + 'tmId' : tmId +} +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +urllib2.urlopen(req, json.dumps(data)).read() + +end = time.time() +print "Index regeneration complete. The operation took %.4f s" % (end - start) diff --git a/tests/addAlignedLemmatizedTM.py b/tests/addAlignedLemmatizedTM.py new file mode 100755 index 0000000..26f2960 --- /dev/null +++ b/tests/addAlignedLemmatizedTM.py @@ -0,0 +1,111 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import host +import time + +BUFFER_SIZE = 500 + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + +def file_len(fname): + with open(fname) as f: + for i, l in enumerate(f): + pass + return i + 1 + +def add_examples(examplesData): + req = urllib2.Request(address) + req.add_header('Content-Type', 'application/json') + response = json.loads(urllib2.urlopen(req, json.dumps(examplesData)).read()) + if response['status'] == 'error': + raise Exception(response['message']) + +if len(sys.argv) != 7: + raise Exception("wrong number of arguments") + +name = sys.argv[1] +sourceFile = sys.argv[2] +sourceLangId = int(sys.argv[3]) +targetFile = sys.argv[4] +targetLangId = int(sys.argv[5]) +alignmentsFile = sys.argv[6] + +if (file_len(sourceFile) != file_len(targetFile)): + raise Exception("source and target files are not of the same length!") + +if (file_len(alignmentsFile) != 3*file_len(sourceFile)): + raise Exception("alignments file is not exactly 3 times longer than source and target") + + +totalExamples = file_len(sourceFile) + +data = { + 'operation': 'addTm', + 'sourceLangId':sourceLangId, + 'targetLangId':targetLangId, + 'name':name, + 'tmLemmatized':True +} + +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) +print(response) +tmId = int(response['newTmId']) +print "Added new tm: %d" % tmId + +data = { + 'operation': 'addAlignedLemmatizedSentences', + 'tmId':tmId +} + +examples = [] +start = time.time() +with open(sourceFile) as sf, open(targetFile) as tf, open(alignmentsFile) as af: + for lineNumber in range(totalExamples): + sourceSentence = sf.readline().strip() + targetSentence = tf.readline().strip() + + # skip to lines of the alignments file, these are lemmatized and we need the raw sentences from the source and target files. + af.readline() + af.readline() + + alignmentString = af.readline().strip() + + examples.append([sourceSentence, targetSentence, alignmentString]) + + if len(examples) >= BUFFER_SIZE: + data['examples'] = examples + add_examples(data) + mark = time.time() + print "Added %d of %d lemmatized examples. Time elapsed: %.4f s, current speed: %.4f examples/second" % ( (lineNumber+1), totalExamples, mark-start, (lineNumber+1)/(mark-start)) + examples = [] + + +if len(examples) > 0: + data['examples'] = examples + add_examples(data) + +end = time.time() +print "Added all %d lemmatized sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1), end-start, (lineNumber+1)/(end-start)) + +print "Generating index..." +start = time.time() +data = { + 'operation': 'refreshIndex', + 'tmId' : tmId +} +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +urllib2.urlopen(req, json.dumps(data)).read() + +end = time.time() +print "Index regeneration complete. The operation took %.4f s" % (end - start) diff --git a/tests/addFastAlignedTM.py b/tests/addFastAlignedTM.py new file mode 100755 index 0000000..a29f254 --- /dev/null +++ b/tests/addFastAlignedTM.py @@ -0,0 +1,117 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import json +import urllib2 +import sys +import host +import time + +BUFFER_SIZE = 500 +LEAVE_OUT = 1 # that does not leave out anything + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + +def file_len(fname): + with open(fname) as f: + for i, l in enumerate(f): + pass + return i + 1 + +def add_examples(examplesData): + req = urllib2.Request(address) + req.add_header('Content-Type', 'application/json') + response = json.loads(urllib2.urlopen(req, json.dumps(examplesData), timeout = 3600).read()) + print(response) + if response['status'] == 'error': + raise Exception(response['message']) + +if len(sys.argv) != 9: + raise Exception("wrong number of arguments") + +name = sys.argv[1] +sourceFile = sys.argv[2] +lemmatizedSourceFile = sys.argv[3] +sourceLangId = int(sys.argv[4]) +targetFile = sys.argv[5] +targetLangId = int(sys.argv[6]) +alignmentsFile = sys.argv[7] +sourceIdsFile = sys.argv[8] + +sourceFileLength = file_len(sourceFile) +lemmatizedSourceFileLength = file_len(lemmatizedSourceFile) +targetFileLength = file_len(targetFile) +alignmentsFileLength = file_len(alignmentsFile) +sourceIdsFileLength = file_len(sourceIdsFile) + +if not (sourceFileLength == lemmatizedSourceFileLength and lemmatizedSourceFileLength == targetFileLength and targetFileLength == alignmentsFileLength and alignmentsFileLength == sourceIdsFileLength): + print("File lengths:") + print("source file: %d\nlemmatized source file: %d\ntarget file: %d\nalignments file: %d\nsource ids file: %d" % (sourceFileLength, lemmatizedSourceFileLength, targetFileLength, alignmentsFileLength, sourceIdsFileLength)) + raise Exception("files are not of the same length!") + +totalExamples = sourceFileLength / LEAVE_OUT + +data = { + 'operation': 'addTm', + 'sourceLangId':sourceLangId, + 'targetLangId':targetLangId, + 'name':name, + 'tmLemmatized':True +} + +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data), timeout = 3600).read()) +print(response) +tmId = int(response['newTmId']) +print "Added new tm: %d" % tmId + +data = { + 'operation': 'addSentences', + 'tmId':tmId +} + +examples = [] +start = time.time() +with open(sourceFile) as source_file, open(lemmatizedSourceFile) as lemmatized_source_file, open(targetFile) as target_file, open(alignmentsFile) as alignments_file, open(sourceIdsFile) as source_ids_file: + addedCount = 0 + for lineNumber in range(sourceFileLength): + if lineNumber % LEAVE_OUT == 0: + sourceSentence = source_file.readline().strip() + lemmatizedSourceSentence = lemmatized_source_file.readline().strip() + targetSentence = target_file.readline().strip() + alignment = json.loads(alignments_file.readline().strip()) + sourceId = int(source_ids_file.readline().strip()) + + examples.append([sourceSentence, lemmatizedSourceSentence, targetSentence, alignment, sourceId]) + addedCount += 1 + if len(examples) >= BUFFER_SIZE: + data['examples'] = examples + add_examples(data) + mark = time.time() + print "Added %d of %d lemmatized examples. Time elapsed: %.4f s, current speed: %.4f examples/second" % (addedCount, totalExamples, mark-start, addedCount/(mark-start)) + examples = [] + + +if len(examples) > 0: + data['examples'] = examples + add_examples(data) + +end = time.time() +print "Added all %d lemmatized sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (addedCount, end-start, addedCount/(end-start)) + +print "Generating index..." +start = time.time() +data = { + 'operation': 'refreshIndex', + 'tmId' : tmId +} +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +urllib2.urlopen(req, json.dumps(data), timeout = 3600).read() + +end = time.time() +print "Index regeneration complete. The operation took %.4f s" % (end - start) diff --git a/tests/addFastAlignedTM.sh b/tests/addFastAlignedTM.sh new file mode 100755 index 0000000..5a079a6 --- /dev/null +++ b/tests/addFastAlignedTM.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +CORPUS_NAME=opensubtitles +CORPUS_PATH=../fast-aligner/corpora/$CORPUS_NAME +SRC_LANG_ID=1 +TRG_LANG_ID=2 + +./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID $CORPUS_PATH/alignments.txt $CORPUS_PATH/ids_clean.txt diff --git a/tests/addFile.py b/tests/addFile.py new file mode 100755 index 0000000..c484100 --- /dev/null +++ b/tests/addFile.py @@ -0,0 +1,97 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import host +import time + +BUFFER_SIZE = 500 + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + +def file_len(fname): + with open(fname) as f: + for i, l in enumerate(f): + pass + return i + 1 + +def add_data(data): + req = urllib2.Request(address) + req.add_header('Content-Type', 'application/json') + urllib2.urlopen(req, json.dumps(data)).read() + +sourceFile = sys.argv[1] +sourceLangId = int(sys.argv[2]) +targetFile = sys.argv[3] +targetLangId = int(sys.argv[4]) +name = sys.argv[5] + +totalLines = file_len(sourceFile) +if file_len(targetFile) != totalLines: + print "File lengths do not match" + sys.exit(1) + +data = { + 'operation': 'addTm', + 'sourceLangId':sourceLangId, + 'targetLangId':targetLangId, + 'name':name +} + +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) +tmId = int(response['newTmId']) +print "Added new tm: %d" % tmId + + +data = { + 'operation': 'addSentences', + 'tmId':tmId +} + +sentences = [] +start = time.time() +with open(sourceFile) as sourceSentences: + with open(targetFile) as targetSentences: + lineNumber = 0 + for sourceSentence in sourceSentences: + lineNumber += 1 + targetSentence = targetSentences.readline() + sentences.append([sourceSentence, targetSentence]) + if lineNumber % BUFFER_SIZE == 0: + data['sentences'] = sentences + sentences = [] + add_data(data) + mark = time.time() + print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % (lineNumber, totalLines, mark-start, lineNumber/(mark-start)) + + +if len(sentences) > 0: + data['sentences'] = sentences + add_data(data) + +end = time.time() +print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (lineNumber, end-start, lineNumber/(end-start)) + +print "Generating index..." +start = time.time() +data = { + 'operation': 'refreshIndex', + 'tmId' : tmId +} +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +urllib2.urlopen(req, json.dumps(data)).read() + +end = time.time() +print "Index regeneration complete. The operation took %.4f s" % (end - start) + + + diff --git a/tests/addJrc.sh b/tests/addJrc.sh new file mode 100755 index 0000000..3526eff --- /dev/null +++ b/tests/addJrc.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +./addFile.py ~/projects/corpora/jrc/jrc_pl.txt ~/projects/corpora/jrc/jrc_en.txt 1 + diff --git a/tests/addLemmatizedTM.sh b/tests/addLemmatizedTM.sh new file mode 100755 index 0000000..8a83148 --- /dev/null +++ b/tests/addLemmatizedTM.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +CORPUS_NAME="stocznia_plen" +SRC_LANG_ID=1 +TRG_LANG_ID=2 + +./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt diff --git a/tests/addLemmatizedTMfromParams.sh b/tests/addLemmatizedTMfromParams.sh new file mode 100755 index 0000000..1c61582 --- /dev/null +++ b/tests/addLemmatizedTMfromParams.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +CORPUS_NAME=$1 +SRC_LANG_ID=$2 +TRG_LANG_ID=$3 + +./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt diff --git a/tests/addSentence.py b/tests/addSentence.py new file mode 100755 index 0000000..6ad3f63 --- /dev/null +++ b/tests/addSentence.py @@ -0,0 +1,33 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import time +import host + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + +data = { + 'operation': 'addSentence', + 'sourceSentence':sys.argv[1], + 'targetSentence':sys.argv[2], + 'tmId':int(sys.argv[3]) +} + +start = time.time() +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) +end = time.time() + +print "Execution time: %.4f seconds." % (end-start) +print "Result: " +print response + + diff --git a/tests/addSources.py b/tests/addSources.py new file mode 100755 index 0000000..0c1da7f --- /dev/null +++ b/tests/addSources.py @@ -0,0 +1,49 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import json +import urllib2 +import sys +import time +import host + +BUFFER_SIZE = 500 + + +def addSources(sources_buffer): + data = { + 'operation': 'addSources', + 'sources':sources_buffer + } + + req = urllib2.Request(address) + req.add_header('Content-Type', 'application/json') + urllib2.urlopen(req, json.dumps(data)) + + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + +with open(sys.argv[1]) as sources_file: + counter = 0 + sources_buffer = [] + for line in sources_file: + counter += 1 + id_raw, link, name = line.rstrip().split('\t') + + sources_buffer.append([int(id_raw),name, link]) + if len(sources_buffer) == BUFFER_SIZE: + addSources(sources_buffer) + sources_buffer = [] + print("Added %d sources" % counter) + + +if len(sources_buffer) > 0: + addSources(sources_buffer) + +print("Added all %d sources" % counter) + + + diff --git a/tests/addStocznia.sh b/tests/addStocznia.sh new file mode 100755 index 0000000..b68a82e --- /dev/null +++ b/tests/addStocznia.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +./addTm.py 1 2 placeholder 1 + +./addAlignedLemmatizedTM.py stocznia_plen ../mgiza-aligner/corpora/stocznia_plen/src_final.txt 1 ../mgiza-aligner/corpora/stocznia_plen/trg_final.txt 2 ../mgiza-aligner/corpora/stocznia_plen/aligned_final.txt + +./addTm.py 1 2 placeholder 1 + +./addTm.py 1 2 placeholder 1 + +./addAlignedLemmatizedTM.py stocznia_enpl ../mgiza-aligner/corpora/stocznia_enpl/src_final.txt 2 ../mgiza-aligner/corpora/stocznia_enpl/trg_final.txt 1 ../mgiza-aligner/corpora/stocznia_enpl/aligned_final.txt + diff --git a/tests/addTm.py b/tests/addTm.py new file mode 100755 index 0000000..c36e791 --- /dev/null +++ b/tests/addTm.py @@ -0,0 +1,27 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import json +import urllib2 +import sys +import time +import host + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + +data = { + 'operation': 'addTm', + 'sourceLangId':int(sys.argv[1]), + 'targetLangId':int(sys.argv[2]), + 'name':sys.argv[3], + 'tmLemmatized':bool(int(sys.argv[4])) +} + +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) + +print response diff --git a/tests/build.sh b/tests/build.sh new file mode 100755 index 0000000..4985b50 --- /dev/null +++ b/tests/build.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +./addLemmatizedTMfromParams.sh tmrepository_enhr 2 6 +./addTm.py 1 2 placeholder 1 +./addLemmatizedTMfromParams.sh icd_dictionary 1 2 +./addLemmatizedTMfromParams.sh icd_filtered 1 2 +./addLemmatizedTMfromParams.sh emea_plen 1 2 +./addLemmatizedTMfromParams.sh jrc_enes 2 4 diff --git a/tests/concordiaSearch.py b/tests/concordiaSearch.py new file mode 100755 index 0000000..9880d94 --- /dev/null +++ b/tests/concordiaSearch.py @@ -0,0 +1,30 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import time +import host + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + +data = { + 'operation': 'concordiaSearch', + 'pattern':sys.argv[1], + 'tmId':int(sys.argv[2]) +} + +start = time.time() +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = urllib2.urlopen(req, json.dumps(data)).read() +end = time.time() + +print "Execution time: %.4f seconds." % (end-start) +print "Result: " +print response diff --git a/tests/fullSearch.py b/tests/fullSearch.py new file mode 100755 index 0000000..a64688c --- /dev/null +++ b/tests/fullSearch.py @@ -0,0 +1,31 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import time +import host + +data = { + 'operation': 'fullSearch', + 'pattern':sys.argv[1], + 'tmId':int(sys.argv[2]), + 'limit':int(sys.argv[3]), + 'offset':int(sys.argv[4]) +} + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + +start = time.time() +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = urllib2.urlopen(req, json.dumps(data)).read() +end = time.time() + +print "Execution time: %.4f seconds." % (end-start) +print "Result: " +print response diff --git a/tests/generateIndex.py b/tests/generateIndex.py new file mode 100755 index 0000000..51e3f93 --- /dev/null +++ b/tests/generateIndex.py @@ -0,0 +1,28 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import host +import time + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + + +print "Generating index..." +start = time.time() +data = { + 'operation': 'refreshIndex', + 'tmId' : 1 +} +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +urllib2.urlopen(req, json.dumps(data)).read() + +end = time.time() +print "Index regeneration complete. The operation took %.4f s" % (end - start) diff --git a/tests/getTmsInfo.py b/tests/getTmsInfo.py new file mode 100755 index 0000000..e871068 --- /dev/null +++ b/tests/getTmsInfo.py @@ -0,0 +1,25 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import json +import urllib2 +import sys +import time +import host + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + +print("Trying getTmsInfo on %s" % address) + +data = { + 'operation': 'getTmsInfo' +} + +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) + +print response diff --git a/tests/host.py_example b/tests/host.py_example new file mode 100644 index 0000000..26bce0b --- /dev/null +++ b/tests/host.py_example @@ -0,0 +1,2 @@ +concordia_host = 'localhost' +concordia_port = '' diff --git a/tests/lemmatizeSentence.py b/tests/lemmatizeSentence.py new file mode 100755 index 0000000..4874f7c --- /dev/null +++ b/tests/lemmatizeSentence.py @@ -0,0 +1,29 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import time +import host + +data = { + 'operation': 'lemmatize', + 'languageCode':sys.argv[1], + 'sentence':sys.argv[2] +} + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + +start = time.time() +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) +end = time.time() + +print "Execution time: %.4f seconds." % (end-start) +print "Result: " +print response diff --git a/tests/lemmatizeSentences.py b/tests/lemmatizeSentences.py new file mode 100755 index 0000000..0ef29a6 --- /dev/null +++ b/tests/lemmatizeSentences.py @@ -0,0 +1,29 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import time +import host + +data = { + 'operation': 'lemmatizeAll', + 'languageCode':sys.argv[1], + 'sentences':["ona poszła do sklepu", "powiedziałem to Tomkowi"] +} + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + +start = time.time() +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) +end = time.time() + +print "Execution time: %.4f seconds." % (end-start) +print "Result: " +print response diff --git a/tests/lexiconSearch.py b/tests/lexiconSearch.py new file mode 100755 index 0000000..37ccdf4 --- /dev/null +++ b/tests/lexiconSearch.py @@ -0,0 +1,29 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import time +import host + +data = { + 'operation': 'lexiconSearch', + 'pattern':sys.argv[1], + 'tmId':int(sys.argv[2]) +} + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + +start = time.time() +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = urllib2.urlopen(req, json.dumps(data)).read() +end = time.time() + +print "Execution time: %.4f seconds." % (end-start) +print "Result: " +print response diff --git a/tests/simpleSearch.py b/tests/simpleSearch.py new file mode 100755 index 0000000..e7bdaee --- /dev/null +++ b/tests/simpleSearch.py @@ -0,0 +1,29 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import time +import host + +data = { + 'operation': 'simpleSearch', + 'pattern':sys.argv[1], + 'tmId':int(sys.argv[2]) +} + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + +start = time.time() +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = urllib2.urlopen(req, json.dumps(data)).read() +end = time.time() + +print "Execution time: %.4f seconds." % (end-start) +print "Result: " +print response diff --git a/tests/testCurl.sh b/tests/testCurl.sh new file mode 100755 index 0000000..6e1d388 --- /dev/null +++ b/tests/testCurl.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +# add sentence +#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"I jeszcze jedno zdanie testowe", "targetSentence":"Yet another test sentence", "tmId":1}' http://localhost + +# add sentences +#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentences", "sentences":[[1,"test source one", "test target one"],[4,"test source two", "test target two"],[9,"test source three", "test target three"],[13,"test source four", "test target four"]]}' http://localhost + +# simple search +curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"test source"}' http://localhost + +