From dab669729f294f9660ae8c17adae666a61a99d58 Mon Sep 17 00:00:00 2001
From: rjawor <rjawor@amu.edu.pl>
Date: Thu, 16 May 2019 18:28:30 +0200
Subject: [PATCH] added tests, fixed network

---
 docker-compose.yml                 |   7 ++
 tests/.gitignore                   |   2 +
 tests/addAlignedFile.py            |  98 ++++++++++++++++++++++++
 tests/addAlignedFileToTM.py        |  80 ++++++++++++++++++++
 tests/addAlignedLemmatizedTM.py    | 111 +++++++++++++++++++++++++++
 tests/addFastAlignedTM.py          | 117 +++++++++++++++++++++++++++++
 tests/addFastAlignedTM.sh          |   8 ++
 tests/addFile.py                   |  97 ++++++++++++++++++++++++
 tests/addJrc.sh                    |   4 +
 tests/addLemmatizedTM.sh           |   7 ++
 tests/addLemmatizedTMfromParams.sh |   7 ++
 tests/addSentence.py               |  33 ++++++++
 tests/addSources.py                |  49 ++++++++++++
 tests/addStocznia.sh               |  12 +++
 tests/addTm.py                     |  27 +++++++
 tests/build.sh                     |   8 ++
 tests/concordiaSearch.py           |  30 ++++++++
 tests/fullSearch.py                |  31 ++++++++
 tests/generateIndex.py             |  28 +++++++
 tests/getTmsInfo.py                |  25 ++++++
 tests/host.py_example              |   2 +
 tests/lemmatizeSentence.py         |  29 +++++++
 tests/lemmatizeSentences.py        |  29 +++++++
 tests/lexiconSearch.py             |  29 +++++++
 tests/simpleSearch.py              |  29 +++++++
 tests/testCurl.sh                  |  12 +++
 26 files changed, 911 insertions(+)
 create mode 100644 tests/.gitignore
 create mode 100755 tests/addAlignedFile.py
 create mode 100755 tests/addAlignedFileToTM.py
 create mode 100755 tests/addAlignedLemmatizedTM.py
 create mode 100755 tests/addFastAlignedTM.py
 create mode 100755 tests/addFastAlignedTM.sh
 create mode 100755 tests/addFile.py
 create mode 100755 tests/addJrc.sh
 create mode 100755 tests/addLemmatizedTM.sh
 create mode 100755 tests/addLemmatizedTMfromParams.sh
 create mode 100755 tests/addSentence.py
 create mode 100755 tests/addSources.py
 create mode 100755 tests/addStocznia.sh
 create mode 100755 tests/addTm.py
 create mode 100755 tests/build.sh
 create mode 100755 tests/concordiaSearch.py
 create mode 100755 tests/fullSearch.py
 create mode 100755 tests/generateIndex.py
 create mode 100755 tests/getTmsInfo.py
 create mode 100644 tests/host.py_example
 create mode 100755 tests/lemmatizeSentence.py
 create mode 100755 tests/lemmatizeSentences.py
 create mode 100755 tests/lexiconSearch.py
 create mode 100755 tests/simpleSearch.py
 create mode 100755 tests/testCurl.sh

diff --git a/docker-compose.yml b/docker-compose.yml
index d2ea4c6..b5fa1cb 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -6,6 +6,10 @@ services:
     build: ./concordia-postgres
     container_name: concordia-postgres
     restart: always
+    environment:
+      POSTGRES_DB: concordia_db
+      POSTGRES_USER: concordia
+      POSTGRES_PASSWORD: concordia
   lemmagen:
     build: ./lemmagen
     container_name: lemmagen
@@ -15,3 +19,6 @@ services:
     container_name: concordia-server
     ports:
       - "10001:80"
+    depends_on:
+      - "concordia-postgres"
+      - "lemmagen"
\ No newline at end of file
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 0000000..6842736
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1,2 @@
+host.py
+host.pyc
diff --git a/tests/addAlignedFile.py b/tests/addAlignedFile.py
new file mode 100755
index 0000000..22846f2
--- /dev/null
+++ b/tests/addAlignedFile.py
@@ -0,0 +1,98 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import host
+import time
+
+BUFFER_SIZE = 500
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+
+def file_len(fname):
+    with open(fname) as f:
+        for i, l in enumerate(f):
+            pass
+    return i + 1
+
+def add_data(data):
+    req = urllib2.Request(address)
+    req.add_header('Content-Type', 'application/json')
+    json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+    
+sourceFile = sys.argv[1]
+sourceLangId = int(sys.argv[2])
+targetLangId = int(sys.argv[3])
+name = sys.argv[4]
+
+totalLines = file_len(sourceFile)
+
+data = {
+    'operation': 'addTm',
+    'sourceLangId':sourceLangId,
+    'targetLangId':targetLangId,
+    'name':name
+}
+
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+tmId = int(response['newTmId'])
+print "Added new tm: %d" % tmId
+
+data = {
+    'operation': 'addAlignedSentences',
+    'tmId':tmId
+}
+
+sentences = []
+currSentence = []
+start = time.time()
+with open(sourceFile) as sourceLines:
+    lineNumber = 0
+    for line in sourceLines:
+        line = line.strip()
+        if lineNumber % 3 == 1:
+            currSentence.append(line)
+        elif lineNumber % 3 == 2:
+            currSentence.append(line)
+            currSentence.reverse()
+            sentences.append(currSentence)
+            currSentence = []
+            if len(sentences) >= BUFFER_SIZE:
+                data['sentences'] = sentences
+                add_data(data)
+                mark = time.time()
+                print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % ( (lineNumber+1)/3, totalLines/3, mark-start, (lineNumber+1)/(3*(mark-start)))
+                sentences = []
+        lineNumber += 1
+                
+
+if len(sentences) > 0:
+    data['sentences'] = sentences
+    add_data(data)
+    
+end = time.time()
+print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1)/3, end-start, (lineNumber+1)/(3*(end-start)))
+
+print "Generating index..."
+start = time.time()
+data = {
+    'operation': 'refreshIndex',
+    'tmId' : tmId
+}
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+urllib2.urlopen(req, json.dumps(data)).read()
+
+end = time.time()
+print "Index regeneration complete. The operation took %.4f s" % (end - start)
+
+
+
diff --git a/tests/addAlignedFileToTM.py b/tests/addAlignedFileToTM.py
new file mode 100755
index 0000000..b450ac6
--- /dev/null
+++ b/tests/addAlignedFileToTM.py
@@ -0,0 +1,80 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import host
+import time
+
+BUFFER_SIZE = 500
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+
+def file_len(fname):
+    with open(fname) as f:
+        for i, l in enumerate(f):
+            pass
+    return i + 1
+
+def add_data(data):
+    req = urllib2.Request(address)
+    req.add_header('Content-Type', 'application/json')
+    json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+
+sourceFile = sys.argv[1]
+tmId = int(sys.argv[2])
+
+totalLines = file_len(sourceFile)
+
+data = {
+    'operation': 'addAlignedSentences',
+    'tmId':tmId
+}
+
+sentences = []
+currSentence = []
+start = time.time()
+with open(sourceFile) as sourceLines:
+    lineNumber = 0
+    for line in sourceLines:
+        line = line.strip()
+        if lineNumber % 3 == 1:
+            currSentence.append(line)
+        elif lineNumber % 3 == 2:
+            currSentence.append(line)
+            currSentence.reverse()
+            sentences.append(currSentence)
+            currSentence = []
+            if len(sentences) >= BUFFER_SIZE:
+                data['sentences'] = sentences
+                add_data(data)
+                mark = time.time()
+                print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % ( (lineNumber+1)/3, totalLines/3, mark-start, (lineNumber+1)/(3*(mark-start)))
+                sentences = []
+        lineNumber += 1
+
+
+if len(sentences) > 0:
+    data['sentences'] = sentences
+    add_data(data)
+
+end = time.time()
+print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1)/3, end-start, (lineNumber+1)/(3*(end-start)))
+
+print "Generating index..."
+start = time.time()
+data = {
+    'operation': 'refreshIndex',
+    'tmId' : tmId
+}
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+urllib2.urlopen(req, json.dumps(data)).read()
+
+end = time.time()
+print "Index regeneration complete. The operation took %.4f s" % (end - start)
diff --git a/tests/addAlignedLemmatizedTM.py b/tests/addAlignedLemmatizedTM.py
new file mode 100755
index 0000000..26f2960
--- /dev/null
+++ b/tests/addAlignedLemmatizedTM.py
@@ -0,0 +1,111 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import host
+import time
+
+BUFFER_SIZE = 500
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+
+def file_len(fname):
+    with open(fname) as f:
+        for i, l in enumerate(f):
+            pass
+    return i + 1
+
+def add_examples(examplesData):
+    req = urllib2.Request(address)
+    req.add_header('Content-Type', 'application/json')
+    response = json.loads(urllib2.urlopen(req, json.dumps(examplesData)).read())
+    if response['status'] == 'error':
+        raise Exception(response['message'])
+
+if len(sys.argv) != 7:
+    raise Exception("wrong number of arguments")
+
+name = sys.argv[1]
+sourceFile = sys.argv[2]
+sourceLangId = int(sys.argv[3])
+targetFile = sys.argv[4]
+targetLangId = int(sys.argv[5])
+alignmentsFile = sys.argv[6]
+
+if (file_len(sourceFile) != file_len(targetFile)):
+    raise Exception("source and target files are not of the same length!")
+
+if (file_len(alignmentsFile) != 3*file_len(sourceFile)):
+    raise Exception("alignments file is not exactly 3 times longer than source and target")
+
+
+totalExamples = file_len(sourceFile)
+
+data = {
+    'operation': 'addTm',
+    'sourceLangId':sourceLangId,
+    'targetLangId':targetLangId,
+    'name':name,
+    'tmLemmatized':True
+}
+
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+print(response)
+tmId = int(response['newTmId'])
+print "Added new tm: %d" % tmId
+
+data = {
+    'operation': 'addAlignedLemmatizedSentences',
+    'tmId':tmId
+}
+
+examples = []
+start = time.time()
+with open(sourceFile) as sf, open(targetFile) as tf, open(alignmentsFile) as af:
+    for lineNumber in range(totalExamples):
+        sourceSentence = sf.readline().strip()
+        targetSentence = tf.readline().strip()
+
+        # skip to lines of the alignments file, these are lemmatized and we need the raw sentences from the source and target files.
+        af.readline()
+        af.readline()
+
+        alignmentString = af.readline().strip()
+
+        examples.append([sourceSentence, targetSentence, alignmentString])
+
+        if len(examples) >= BUFFER_SIZE:
+            data['examples'] = examples
+            add_examples(data)
+            mark = time.time()
+            print "Added %d of %d lemmatized examples. Time elapsed: %.4f s, current speed: %.4f examples/second" % ( (lineNumber+1), totalExamples, mark-start, (lineNumber+1)/(mark-start))
+            examples = []
+
+
+if len(examples) > 0:
+    data['examples'] = examples
+    add_examples(data)
+
+end = time.time()
+print "Added all %d lemmatized sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1), end-start, (lineNumber+1)/(end-start))
+
+print "Generating index..."
+start = time.time()
+data = {
+    'operation': 'refreshIndex',
+    'tmId' : tmId
+}
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+urllib2.urlopen(req, json.dumps(data)).read()
+
+end = time.time()
+print "Index regeneration complete. The operation took %.4f s" % (end - start)
diff --git a/tests/addFastAlignedTM.py b/tests/addFastAlignedTM.py
new file mode 100755
index 0000000..a29f254
--- /dev/null
+++ b/tests/addFastAlignedTM.py
@@ -0,0 +1,117 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import json
+import urllib2
+import sys
+import host
+import time
+
+BUFFER_SIZE = 500
+LEAVE_OUT = 1 # that does not leave out anything
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+
+def file_len(fname):
+    with open(fname) as f:
+        for i, l in enumerate(f):
+            pass
+    return i + 1
+
+def add_examples(examplesData):
+    req = urllib2.Request(address)
+    req.add_header('Content-Type', 'application/json')
+    response = json.loads(urllib2.urlopen(req, json.dumps(examplesData), timeout = 3600).read())
+    print(response)
+    if response['status'] == 'error':
+        raise Exception(response['message'])
+
+if len(sys.argv) != 9:
+    raise Exception("wrong number of arguments")
+
+name = sys.argv[1]
+sourceFile = sys.argv[2]
+lemmatizedSourceFile = sys.argv[3]
+sourceLangId = int(sys.argv[4])
+targetFile = sys.argv[5]
+targetLangId = int(sys.argv[6])
+alignmentsFile = sys.argv[7]
+sourceIdsFile = sys.argv[8]
+
+sourceFileLength = file_len(sourceFile)
+lemmatizedSourceFileLength = file_len(lemmatizedSourceFile)
+targetFileLength = file_len(targetFile)
+alignmentsFileLength = file_len(alignmentsFile)
+sourceIdsFileLength = file_len(sourceIdsFile)
+
+if not (sourceFileLength == lemmatizedSourceFileLength and lemmatizedSourceFileLength == targetFileLength and targetFileLength == alignmentsFileLength and alignmentsFileLength == sourceIdsFileLength):
+    print("File lengths:")
+    print("source file: %d\nlemmatized source file: %d\ntarget file: %d\nalignments file: %d\nsource ids file: %d" % (sourceFileLength, lemmatizedSourceFileLength, targetFileLength, alignmentsFileLength, sourceIdsFileLength))
+    raise Exception("files are not of the same length!")
+
+totalExamples = sourceFileLength / LEAVE_OUT
+
+data = {
+    'operation': 'addTm',
+    'sourceLangId':sourceLangId,
+    'targetLangId':targetLangId,
+    'name':name,
+    'tmLemmatized':True
+}
+
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = json.loads(urllib2.urlopen(req, json.dumps(data), timeout = 3600).read())
+print(response)
+tmId = int(response['newTmId'])
+print "Added new tm: %d" % tmId
+
+data = {
+    'operation': 'addSentences',
+    'tmId':tmId
+}
+
+examples = []
+start = time.time()
+with open(sourceFile) as source_file, open(lemmatizedSourceFile) as lemmatized_source_file, open(targetFile) as target_file, open(alignmentsFile) as alignments_file, open(sourceIdsFile) as source_ids_file:
+    addedCount = 0
+    for lineNumber in range(sourceFileLength):
+        if lineNumber % LEAVE_OUT == 0:
+            sourceSentence = source_file.readline().strip()
+            lemmatizedSourceSentence = lemmatized_source_file.readline().strip()
+            targetSentence = target_file.readline().strip()
+            alignment = json.loads(alignments_file.readline().strip())
+            sourceId = int(source_ids_file.readline().strip())
+
+            examples.append([sourceSentence, lemmatizedSourceSentence, targetSentence, alignment, sourceId])
+            addedCount += 1
+            if len(examples) >= BUFFER_SIZE:
+                data['examples'] = examples
+                add_examples(data)
+                mark = time.time()
+                print "Added %d of %d lemmatized examples. Time elapsed: %.4f s, current speed: %.4f examples/second" % (addedCount, totalExamples, mark-start, addedCount/(mark-start))
+                examples = []
+
+
+if len(examples) > 0:
+    data['examples'] = examples
+    add_examples(data)
+
+end = time.time()
+print "Added all %d lemmatized sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (addedCount, end-start, addedCount/(end-start))
+
+print "Generating index..."
+start = time.time()
+data = {
+    'operation': 'refreshIndex',
+    'tmId' : tmId
+}
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+urllib2.urlopen(req, json.dumps(data), timeout = 3600).read()
+
+end = time.time()
+print "Index regeneration complete. The operation took %.4f s" % (end - start)
diff --git a/tests/addFastAlignedTM.sh b/tests/addFastAlignedTM.sh
new file mode 100755
index 0000000..5a079a6
--- /dev/null
+++ b/tests/addFastAlignedTM.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+CORPUS_NAME=opensubtitles
+CORPUS_PATH=../fast-aligner/corpora/$CORPUS_NAME
+SRC_LANG_ID=1
+TRG_LANG_ID=2
+
+./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID  $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID  $CORPUS_PATH/alignments.txt  $CORPUS_PATH/ids_clean.txt
diff --git a/tests/addFile.py b/tests/addFile.py
new file mode 100755
index 0000000..c484100
--- /dev/null
+++ b/tests/addFile.py
@@ -0,0 +1,97 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import host
+import time
+
+BUFFER_SIZE = 500
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+
+def file_len(fname):
+    with open(fname) as f:
+        for i, l in enumerate(f):
+            pass
+    return i + 1
+
+def add_data(data):
+    req = urllib2.Request(address)
+    req.add_header('Content-Type', 'application/json')
+    urllib2.urlopen(req, json.dumps(data)).read()
+
+sourceFile = sys.argv[1]
+sourceLangId = int(sys.argv[2])
+targetFile = sys.argv[3]
+targetLangId = int(sys.argv[4])
+name = sys.argv[5]
+
+totalLines = file_len(sourceFile)
+if file_len(targetFile) != totalLines:
+    print "File lengths do not match"
+    sys.exit(1)
+    
+data = {
+    'operation': 'addTm',
+    'sourceLangId':sourceLangId,
+    'targetLangId':targetLangId,
+    'name':name
+}
+
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+tmId = int(response['newTmId'])
+print "Added new tm: %d" % tmId
+
+
+data = {
+    'operation': 'addSentences',
+    'tmId':tmId
+}
+
+sentences = []
+start = time.time()
+with open(sourceFile) as sourceSentences:
+    with open(targetFile) as targetSentences:
+        lineNumber = 0
+        for sourceSentence in sourceSentences:
+            lineNumber += 1
+            targetSentence = targetSentences.readline()
+            sentences.append([sourceSentence, targetSentence])
+            if lineNumber % BUFFER_SIZE == 0:
+                data['sentences'] = sentences
+                sentences = []
+                add_data(data)
+                mark = time.time()
+                print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % (lineNumber, totalLines, mark-start, lineNumber/(mark-start))
+                
+
+if len(sentences) > 0:
+    data['sentences'] = sentences
+    add_data(data)
+    
+end = time.time()
+print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (lineNumber, end-start, lineNumber/(end-start))
+
+print "Generating index..."
+start = time.time()
+data = {
+    'operation': 'refreshIndex',
+    'tmId' : tmId
+}
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+urllib2.urlopen(req, json.dumps(data)).read()
+
+end = time.time()
+print "Index regeneration complete. The operation took %.4f s" % (end - start)
+
+
+
diff --git a/tests/addJrc.sh b/tests/addJrc.sh
new file mode 100755
index 0000000..3526eff
--- /dev/null
+++ b/tests/addJrc.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+./addFile.py ~/projects/corpora/jrc/jrc_pl.txt ~/projects/corpora/jrc/jrc_en.txt 1
+
diff --git a/tests/addLemmatizedTM.sh b/tests/addLemmatizedTM.sh
new file mode 100755
index 0000000..8a83148
--- /dev/null
+++ b/tests/addLemmatizedTM.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+CORPUS_NAME="stocznia_plen"
+SRC_LANG_ID=1
+TRG_LANG_ID=2
+
+./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt
diff --git a/tests/addLemmatizedTMfromParams.sh b/tests/addLemmatizedTMfromParams.sh
new file mode 100755
index 0000000..1c61582
--- /dev/null
+++ b/tests/addLemmatizedTMfromParams.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+CORPUS_NAME=$1
+SRC_LANG_ID=$2
+TRG_LANG_ID=$3
+
+./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt
diff --git a/tests/addSentence.py b/tests/addSentence.py
new file mode 100755
index 0000000..6ad3f63
--- /dev/null
+++ b/tests/addSentence.py
@@ -0,0 +1,33 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import time
+import host
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+
+data = {
+    'operation': 'addSentence',
+    'sourceSentence':sys.argv[1],
+    'targetSentence':sys.argv[2],
+    'tmId':int(sys.argv[3])
+}
+
+start = time.time()
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+end = time.time()
+
+print "Execution time: %.4f seconds." % (end-start)
+print "Result: "
+print response
+
+
diff --git a/tests/addSources.py b/tests/addSources.py
new file mode 100755
index 0000000..0c1da7f
--- /dev/null
+++ b/tests/addSources.py
@@ -0,0 +1,49 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import json
+import urllib2
+import sys
+import time
+import host
+
+BUFFER_SIZE = 500
+
+
+def addSources(sources_buffer):
+	data = {
+	    'operation': 'addSources',
+	    'sources':sources_buffer
+	}
+
+	req = urllib2.Request(address)
+	req.add_header('Content-Type', 'application/json')
+	urllib2.urlopen(req, json.dumps(data))
+
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+
+with open(sys.argv[1]) as sources_file:
+	counter = 0
+	sources_buffer = []
+	for line in sources_file:
+		counter += 1
+		id_raw, link, name = line.rstrip().split('\t')
+
+		sources_buffer.append([int(id_raw),name, link])
+		if len(sources_buffer) == BUFFER_SIZE:
+			addSources(sources_buffer)
+			sources_buffer = []
+			print("Added %d sources" % counter)
+			
+
+if len(sources_buffer) > 0:
+	addSources(sources_buffer)
+
+print("Added all %d sources" % counter)
+
+
+
diff --git a/tests/addStocznia.sh b/tests/addStocznia.sh
new file mode 100755
index 0000000..b68a82e
--- /dev/null
+++ b/tests/addStocznia.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+./addTm.py 1 2 placeholder 1
+
+./addAlignedLemmatizedTM.py stocznia_plen ../mgiza-aligner/corpora/stocznia_plen/src_final.txt 1 ../mgiza-aligner/corpora/stocznia_plen/trg_final.txt 2 ../mgiza-aligner/corpora/stocznia_plen/aligned_final.txt
+
+./addTm.py 1 2 placeholder 1
+
+./addTm.py 1 2 placeholder 1
+
+./addAlignedLemmatizedTM.py stocznia_enpl ../mgiza-aligner/corpora/stocznia_enpl/src_final.txt 2 ../mgiza-aligner/corpora/stocznia_enpl/trg_final.txt 1 ../mgiza-aligner/corpora/stocznia_enpl/aligned_final.txt
+
diff --git a/tests/addTm.py b/tests/addTm.py
new file mode 100755
index 0000000..c36e791
--- /dev/null
+++ b/tests/addTm.py
@@ -0,0 +1,27 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import json
+import urllib2
+import sys
+import time
+import host
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+
+data = {
+    'operation': 'addTm',
+    'sourceLangId':int(sys.argv[1]),
+    'targetLangId':int(sys.argv[2]),
+    'name':sys.argv[3],
+    'tmLemmatized':bool(int(sys.argv[4]))
+}
+
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+
+print response
diff --git a/tests/build.sh b/tests/build.sh
new file mode 100755
index 0000000..4985b50
--- /dev/null
+++ b/tests/build.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+./addLemmatizedTMfromParams.sh tmrepository_enhr 2 6
+./addTm.py 1 2 placeholder 1
+./addLemmatizedTMfromParams.sh icd_dictionary 1 2
+./addLemmatizedTMfromParams.sh icd_filtered 1 2
+./addLemmatizedTMfromParams.sh emea_plen 1 2
+./addLemmatizedTMfromParams.sh jrc_enes 2 4
diff --git a/tests/concordiaSearch.py b/tests/concordiaSearch.py
new file mode 100755
index 0000000..9880d94
--- /dev/null
+++ b/tests/concordiaSearch.py
@@ -0,0 +1,30 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import time
+import host
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+
+data = {
+    'operation': 'concordiaSearch',
+    'pattern':sys.argv[1],
+    'tmId':int(sys.argv[2])
+}
+
+start = time.time()
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = urllib2.urlopen(req, json.dumps(data)).read()
+end = time.time()
+
+print "Execution time: %.4f seconds." % (end-start)
+print "Result: "
+print response
diff --git a/tests/fullSearch.py b/tests/fullSearch.py
new file mode 100755
index 0000000..a64688c
--- /dev/null
+++ b/tests/fullSearch.py
@@ -0,0 +1,31 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import time
+import host
+
+data = {
+    'operation': 'fullSearch',
+    'pattern':sys.argv[1],
+    'tmId':int(sys.argv[2]),
+    'limit':int(sys.argv[3]),
+    'offset':int(sys.argv[4])
+}
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+start = time.time()
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = urllib2.urlopen(req, json.dumps(data)).read()
+end = time.time()
+
+print "Execution time: %.4f seconds." % (end-start)
+print "Result: "
+print response
diff --git a/tests/generateIndex.py b/tests/generateIndex.py
new file mode 100755
index 0000000..51e3f93
--- /dev/null
+++ b/tests/generateIndex.py
@@ -0,0 +1,28 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import host
+import time
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+
+
+print "Generating index..."
+start = time.time()
+data = {
+    'operation': 'refreshIndex',
+    'tmId' : 1
+}
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+urllib2.urlopen(req, json.dumps(data)).read()
+
+end = time.time()
+print "Index regeneration complete. The operation took %.4f s" % (end - start)
diff --git a/tests/getTmsInfo.py b/tests/getTmsInfo.py
new file mode 100755
index 0000000..e871068
--- /dev/null
+++ b/tests/getTmsInfo.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import json
+import urllib2
+import sys
+import time
+import host
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+
+print("Trying getTmsInfo on %s" % address)
+
+data = {
+    'operation': 'getTmsInfo'
+}
+
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+
+print response
diff --git a/tests/host.py_example b/tests/host.py_example
new file mode 100644
index 0000000..26bce0b
--- /dev/null
+++ b/tests/host.py_example
@@ -0,0 +1,2 @@
+concordia_host = 'localhost'
+concordia_port = ''
diff --git a/tests/lemmatizeSentence.py b/tests/lemmatizeSentence.py
new file mode 100755
index 0000000..4874f7c
--- /dev/null
+++ b/tests/lemmatizeSentence.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import time
+import host
+
+data = {
+    'operation': 'lemmatize',
+    'languageCode':sys.argv[1],
+    'sentence':sys.argv[2]
+}
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+start = time.time()
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+end = time.time()
+
+print "Execution time: %.4f seconds." % (end-start)
+print "Result: "
+print response
diff --git a/tests/lemmatizeSentences.py b/tests/lemmatizeSentences.py
new file mode 100755
index 0000000..0ef29a6
--- /dev/null
+++ b/tests/lemmatizeSentences.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import time
+import host
+
+data = {
+    'operation': 'lemmatizeAll',
+    'languageCode':sys.argv[1],
+    'sentences':["ona poszła do sklepu", "powiedziałem to Tomkowi"]
+}
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+start = time.time()
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+end = time.time()
+
+print "Execution time: %.4f seconds." % (end-start)
+print "Result: "
+print response
diff --git a/tests/lexiconSearch.py b/tests/lexiconSearch.py
new file mode 100755
index 0000000..37ccdf4
--- /dev/null
+++ b/tests/lexiconSearch.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import time
+import host
+
+data = {
+    'operation': 'lexiconSearch',
+    'pattern':sys.argv[1],
+    'tmId':int(sys.argv[2])
+}
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+start = time.time()
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = urllib2.urlopen(req, json.dumps(data)).read()
+end = time.time()
+
+print "Execution time: %.4f seconds." % (end-start)
+print "Result: "
+print response
diff --git a/tests/simpleSearch.py b/tests/simpleSearch.py
new file mode 100755
index 0000000..e7bdaee
--- /dev/null
+++ b/tests/simpleSearch.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import time
+import host
+
+data = {
+    'operation': 'simpleSearch',
+    'pattern':sys.argv[1],
+    'tmId':int(sys.argv[2])
+}
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+    address += ':'+host.concordia_port
+
+start = time.time()
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = urllib2.urlopen(req, json.dumps(data)).read()
+end = time.time()
+
+print "Execution time: %.4f seconds." % (end-start)
+print "Result: "
+print response
diff --git a/tests/testCurl.sh b/tests/testCurl.sh
new file mode 100755
index 0000000..6e1d388
--- /dev/null
+++ b/tests/testCurl.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+# add sentence
+#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"I jeszcze jedno zdanie testowe", "targetSentence":"Yet another test sentence", "tmId":1}' http://localhost
+
+# add sentences
+#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentences", "sentences":[[1,"test source one", "test target one"],[4,"test source two", "test target two"],[9,"test source three", "test target three"],[13,"test source four", "test target four"]]}' http://localhost
+
+# simple search
+curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"test source"}' http://localhost
+
+