added tests, fixed network

2019-05-16 18:28:30 +02:00 · 2019-05-16 18:28:30 +02:00 · dab669729f
commit dab669729f
parent 943911209d
26 changed files with 911 additions and 0 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -6,6 +6,10 @@ services:
    build: ./concordia-postgres
    container_name: concordia-postgres
    restart: always
    environment:
      POSTGRES_DB: concordia_db
      POSTGRES_USER: concordia
      POSTGRES_PASSWORD: concordia
  lemmagen:
    build: ./lemmagen
    container_name: lemmagen
@ -15,3 +19,6 @@ services:
    container_name: concordia-server
    ports:
      - "10001:80"
    depends_on:
      - "concordia-postgres"
      - "lemmagen"
--- a/tests/.gitignore
+++ b/tests/.gitignore
@ -0,0 +1,2 @@
 host.py
 host.pyc
--- a/tests/addAlignedFile.py
+++ b/tests/addAlignedFile.py
@ -0,0 +1,98 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import host
 import time
 BUFFER_SIZE = 500
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 def file_len(fname):
    with open(fname) as f:
        for i, l in enumerate(f):
            pass
    return i + 1
 def add_data(data):
    req = urllib2.Request(address)
    req.add_header('Content-Type', 'application/json')
    json.loads(urllib2.urlopen(req, json.dumps(data)).read())
 sourceFile = sys.argv[1]
 sourceLangId = int(sys.argv[2])
 targetLangId = int(sys.argv[3])
 name = sys.argv[4]
 totalLines = file_len(sourceFile)
 data = {
    'operation': 'addTm',
    'sourceLangId':sourceLangId,
    'targetLangId':targetLangId,
    'name':name
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
 tmId = int(response['newTmId'])
 print "Added new tm: %d" % tmId
 data = {
    'operation': 'addAlignedSentences',
    'tmId':tmId
 }
 sentences = []
 currSentence = []
 start = time.time()
 with open(sourceFile) as sourceLines:
    lineNumber = 0
    for line in sourceLines:
        line = line.strip()
        if lineNumber % 3 == 1:
            currSentence.append(line)
        elif lineNumber % 3 == 2:
            currSentence.append(line)
            currSentence.reverse()
            sentences.append(currSentence)
            currSentence = []
            if len(sentences) >= BUFFER_SIZE:
                data['sentences'] = sentences
                add_data(data)
                mark = time.time()
                print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % ( (lineNumber+1)/3, totalLines/3, mark-start, (lineNumber+1)/(3*(mark-start)))
                sentences = []
        lineNumber += 1
 if len(sentences) > 0:
    data['sentences'] = sentences
    add_data(data)
 end = time.time()
 print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1)/3, end-start, (lineNumber+1)/(3*(end-start)))
 print "Generating index..."
 start = time.time()
 data = {
    'operation': 'refreshIndex',
    'tmId' : tmId
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 urllib2.urlopen(req, json.dumps(data)).read()
 end = time.time()
 print "Index regeneration complete. The operation took %.4f s" % (end - start)
--- a/tests/addAlignedFileToTM.py
+++ b/tests/addAlignedFileToTM.py
@ -0,0 +1,80 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import host
 import time
 BUFFER_SIZE = 500
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 def file_len(fname):
    with open(fname) as f:
        for i, l in enumerate(f):
            pass
    return i + 1
 def add_data(data):
    req = urllib2.Request(address)
    req.add_header('Content-Type', 'application/json')
    json.loads(urllib2.urlopen(req, json.dumps(data)).read())
 sourceFile = sys.argv[1]
 tmId = int(sys.argv[2])
 totalLines = file_len(sourceFile)
 data = {
    'operation': 'addAlignedSentences',
    'tmId':tmId
 }
 sentences = []
 currSentence = []
 start = time.time()
 with open(sourceFile) as sourceLines:
    lineNumber = 0
    for line in sourceLines:
        line = line.strip()
        if lineNumber % 3 == 1:
            currSentence.append(line)
        elif lineNumber % 3 == 2:
            currSentence.append(line)
            currSentence.reverse()
            sentences.append(currSentence)
            currSentence = []
            if len(sentences) >= BUFFER_SIZE:
                data['sentences'] = sentences
                add_data(data)
                mark = time.time()
                print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % ( (lineNumber+1)/3, totalLines/3, mark-start, (lineNumber+1)/(3*(mark-start)))
                sentences = []
        lineNumber += 1
 if len(sentences) > 0:
    data['sentences'] = sentences
    add_data(data)
 end = time.time()
 print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1)/3, end-start, (lineNumber+1)/(3*(end-start)))
 print "Generating index..."
 start = time.time()
 data = {
    'operation': 'refreshIndex',
    'tmId' : tmId
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 urllib2.urlopen(req, json.dumps(data)).read()
 end = time.time()
 print "Index regeneration complete. The operation took %.4f s" % (end - start)
--- a/tests/addAlignedLemmatizedTM.py
+++ b/tests/addAlignedLemmatizedTM.py
@ -0,0 +1,111 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import host
 import time
 BUFFER_SIZE = 500
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 def file_len(fname):
    with open(fname) as f:
        for i, l in enumerate(f):
            pass
    return i + 1
 def add_examples(examplesData):
    req = urllib2.Request(address)
    req.add_header('Content-Type', 'application/json')
    response = json.loads(urllib2.urlopen(req, json.dumps(examplesData)).read())
    if response['status'] == 'error':
        raise Exception(response['message'])
 if len(sys.argv) != 7:
    raise Exception("wrong number of arguments")
 name = sys.argv[1]
 sourceFile = sys.argv[2]
 sourceLangId = int(sys.argv[3])
 targetFile = sys.argv[4]
 targetLangId = int(sys.argv[5])
 alignmentsFile = sys.argv[6]
 if (file_len(sourceFile) != file_len(targetFile)):
    raise Exception("source and target files are not of the same length!")
 if (file_len(alignmentsFile) != 3*file_len(sourceFile)):
    raise Exception("alignments file is not exactly 3 times longer than source and target")
 totalExamples = file_len(sourceFile)
 data = {
    'operation': 'addTm',
    'sourceLangId':sourceLangId,
    'targetLangId':targetLangId,
    'name':name,
    'tmLemmatized':True
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
 print(response)
 tmId = int(response['newTmId'])
 print "Added new tm: %d" % tmId
 data = {
    'operation': 'addAlignedLemmatizedSentences',
    'tmId':tmId
 }
 examples = []
 start = time.time()
 with open(sourceFile) as sf, open(targetFile) as tf, open(alignmentsFile) as af:
    for lineNumber in range(totalExamples):
        sourceSentence = sf.readline().strip()
        targetSentence = tf.readline().strip()
        # skip to lines of the alignments file, these are lemmatized and we need the raw sentences from the source and target files.
        af.readline()
        af.readline()
        alignmentString = af.readline().strip()
        examples.append([sourceSentence, targetSentence, alignmentString])
        if len(examples) >= BUFFER_SIZE:
            data['examples'] = examples
            add_examples(data)
            mark = time.time()
            print "Added %d of %d lemmatized examples. Time elapsed: %.4f s, current speed: %.4f examples/second" % ( (lineNumber+1), totalExamples, mark-start, (lineNumber+1)/(mark-start))
            examples = []
 if len(examples) > 0:
    data['examples'] = examples
    add_examples(data)
 end = time.time()
 print "Added all %d lemmatized sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1), end-start, (lineNumber+1)/(end-start))
 print "Generating index..."
 start = time.time()
 data = {
    'operation': 'refreshIndex',
    'tmId' : tmId
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 urllib2.urlopen(req, json.dumps(data)).read()
 end = time.time()
 print "Index regeneration complete. The operation took %.4f s" % (end - start)
--- a/tests/addFastAlignedTM.py
+++ b/tests/addFastAlignedTM.py
@ -0,0 +1,117 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import json
 import urllib2
 import sys
 import host
 import time
 BUFFER_SIZE = 500
 LEAVE_OUT = 1 # that does not leave out anything
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 def file_len(fname):
    with open(fname) as f:
        for i, l in enumerate(f):
            pass
    return i + 1
 def add_examples(examplesData):
    req = urllib2.Request(address)
    req.add_header('Content-Type', 'application/json')
    response = json.loads(urllib2.urlopen(req, json.dumps(examplesData), timeout = 3600).read())
    print(response)
    if response['status'] == 'error':
        raise Exception(response['message'])
 if len(sys.argv) != 9:
    raise Exception("wrong number of arguments")
 name = sys.argv[1]
 sourceFile = sys.argv[2]
 lemmatizedSourceFile = sys.argv[3]
 sourceLangId = int(sys.argv[4])
 targetFile = sys.argv[5]
 targetLangId = int(sys.argv[6])
 alignmentsFile = sys.argv[7]
 sourceIdsFile = sys.argv[8]
 sourceFileLength = file_len(sourceFile)
 lemmatizedSourceFileLength = file_len(lemmatizedSourceFile)
 targetFileLength = file_len(targetFile)
 alignmentsFileLength = file_len(alignmentsFile)
 sourceIdsFileLength = file_len(sourceIdsFile)
 if not (sourceFileLength == lemmatizedSourceFileLength and lemmatizedSourceFileLength == targetFileLength and targetFileLength == alignmentsFileLength and alignmentsFileLength == sourceIdsFileLength):
    print("File lengths:")
    print("source file: %d\nlemmatized source file: %d\ntarget file: %d\nalignments file: %d\nsource ids file: %d" % (sourceFileLength, lemmatizedSourceFileLength, targetFileLength, alignmentsFileLength, sourceIdsFileLength))
    raise Exception("files are not of the same length!")
 totalExamples = sourceFileLength / LEAVE_OUT
 data = {
    'operation': 'addTm',
    'sourceLangId':sourceLangId,
    'targetLangId':targetLangId,
    'name':name,
    'tmLemmatized':True
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = json.loads(urllib2.urlopen(req, json.dumps(data), timeout = 3600).read())
 print(response)
 tmId = int(response['newTmId'])
 print "Added new tm: %d" % tmId
 data = {
    'operation': 'addSentences',
    'tmId':tmId
 }
 examples = []
 start = time.time()
 with open(sourceFile) as source_file, open(lemmatizedSourceFile) as lemmatized_source_file, open(targetFile) as target_file, open(alignmentsFile) as alignments_file, open(sourceIdsFile) as source_ids_file:
    addedCount = 0
    for lineNumber in range(sourceFileLength):
        if lineNumber % LEAVE_OUT == 0:
            sourceSentence = source_file.readline().strip()
            lemmatizedSourceSentence = lemmatized_source_file.readline().strip()
            targetSentence = target_file.readline().strip()
            alignment = json.loads(alignments_file.readline().strip())
            sourceId = int(source_ids_file.readline().strip())
            examples.append([sourceSentence, lemmatizedSourceSentence, targetSentence, alignment, sourceId])
            addedCount += 1
            if len(examples) >= BUFFER_SIZE:
                data['examples'] = examples
                add_examples(data)
                mark = time.time()
                print "Added %d of %d lemmatized examples. Time elapsed: %.4f s, current speed: %.4f examples/second" % (addedCount, totalExamples, mark-start, addedCount/(mark-start))
                examples = []
 if len(examples) > 0:
    data['examples'] = examples
    add_examples(data)
 end = time.time()
 print "Added all %d lemmatized sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (addedCount, end-start, addedCount/(end-start))
 print "Generating index..."
 start = time.time()
 data = {
    'operation': 'refreshIndex',
    'tmId' : tmId
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 urllib2.urlopen(req, json.dumps(data), timeout = 3600).read()
 end = time.time()
 print "Index regeneration complete. The operation took %.4f s" % (end - start)
--- a/tests/addFastAlignedTM.sh
+++ b/tests/addFastAlignedTM.sh
@ -0,0 +1,8 @@
 #!/bin/sh
 CORPUS_NAME=opensubtitles
 CORPUS_PATH=../fast-aligner/corpora/$CORPUS_NAME
 SRC_LANG_ID=1
 TRG_LANG_ID=2
 ./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID  $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID  $CORPUS_PATH/alignments.txt  $CORPUS_PATH/ids_clean.txt
--- a/tests/addFile.py
+++ b/tests/addFile.py
@ -0,0 +1,97 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import host
 import time
 BUFFER_SIZE = 500
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 def file_len(fname):
    with open(fname) as f:
        for i, l in enumerate(f):
            pass
    return i + 1
 def add_data(data):
    req = urllib2.Request(address)
    req.add_header('Content-Type', 'application/json')
    urllib2.urlopen(req, json.dumps(data)).read()
 sourceFile = sys.argv[1]
 sourceLangId = int(sys.argv[2])
 targetFile = sys.argv[3]
 targetLangId = int(sys.argv[4])
 name = sys.argv[5]
 totalLines = file_len(sourceFile)
 if file_len(targetFile) != totalLines:
    print "File lengths do not match"
    sys.exit(1)
 data = {
    'operation': 'addTm',
    'sourceLangId':sourceLangId,
    'targetLangId':targetLangId,
    'name':name
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
 tmId = int(response['newTmId'])
 print "Added new tm: %d" % tmId
 data = {
    'operation': 'addSentences',
    'tmId':tmId
 }
 sentences = []
 start = time.time()
 with open(sourceFile) as sourceSentences:
    with open(targetFile) as targetSentences:
        lineNumber = 0
        for sourceSentence in sourceSentences:
            lineNumber += 1
            targetSentence = targetSentences.readline()
            sentences.append([sourceSentence, targetSentence])
            if lineNumber % BUFFER_SIZE == 0:
                data['sentences'] = sentences
                sentences = []
                add_data(data)
                mark = time.time()
                print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % (lineNumber, totalLines, mark-start, lineNumber/(mark-start))
 if len(sentences) > 0:
    data['sentences'] = sentences
    add_data(data)
 end = time.time()
 print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (lineNumber, end-start, lineNumber/(end-start))
 print "Generating index..."
 start = time.time()
 data = {
    'operation': 'refreshIndex',
    'tmId' : tmId
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 urllib2.urlopen(req, json.dumps(data)).read()
 end = time.time()
 print "Index regeneration complete. The operation took %.4f s" % (end - start)
--- a/tests/addJrc.sh
+++ b/tests/addJrc.sh
@ -0,0 +1,4 @@
 #!/bin/sh
 ./addFile.py ~/projects/corpora/jrc/jrc_pl.txt ~/projects/corpora/jrc/jrc_en.txt 1
--- a/tests/addLemmatizedTM.sh
+++ b/tests/addLemmatizedTM.sh
@ -0,0 +1,7 @@
 #!/bin/sh
 CORPUS_NAME="stocznia_plen"
 SRC_LANG_ID=1
 TRG_LANG_ID=2
 ./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt
--- a/tests/addLemmatizedTMfromParams.sh
+++ b/tests/addLemmatizedTMfromParams.sh
@ -0,0 +1,7 @@
 #!/bin/sh
 CORPUS_NAME=$1
 SRC_LANG_ID=$2
 TRG_LANG_ID=$3
 ./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt
--- a/tests/addSentence.py
+++ b/tests/addSentence.py
@ -0,0 +1,33 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import time
 import host
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 data = {
    'operation': 'addSentence',
    'sourceSentence':sys.argv[1],
    'targetSentence':sys.argv[2],
    'tmId':int(sys.argv[3])
 }
 start = time.time()
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
 end = time.time()
 print "Execution time: %.4f seconds." % (end-start)
 print "Result: "
 print response
--- a/tests/addSources.py
+++ b/tests/addSources.py
@ -0,0 +1,49 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import json
 import urllib2
 import sys
 import time
 import host
 BUFFER_SIZE = 500
 def addSources(sources_buffer):
 	data = {
 	    'operation': 'addSources',
 	    'sources':sources_buffer
 	}
 	req = urllib2.Request(address)
 	req.add_header('Content-Type', 'application/json')
 	urllib2.urlopen(req, json.dumps(data))
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 with open(sys.argv[1]) as sources_file:
 	counter = 0
 	sources_buffer = []
 	for line in sources_file:
 		counter += 1
 		id_raw, link, name = line.rstrip().split('\t')
 		sources_buffer.append([int(id_raw),name, link])
 		if len(sources_buffer) == BUFFER_SIZE:
 			addSources(sources_buffer)
 			sources_buffer = []
 			print("Added %d sources" % counter)
 if len(sources_buffer) > 0:
 	addSources(sources_buffer)
 print("Added all %d sources" % counter)
--- a/tests/addStocznia.sh
+++ b/tests/addStocznia.sh
@ -0,0 +1,12 @@
 #!/bin/sh
 ./addTm.py 1 2 placeholder 1
 ./addAlignedLemmatizedTM.py stocznia_plen ../mgiza-aligner/corpora/stocznia_plen/src_final.txt 1 ../mgiza-aligner/corpora/stocznia_plen/trg_final.txt 2 ../mgiza-aligner/corpora/stocznia_plen/aligned_final.txt
 ./addTm.py 1 2 placeholder 1
 ./addTm.py 1 2 placeholder 1
 ./addAlignedLemmatizedTM.py stocznia_enpl ../mgiza-aligner/corpora/stocznia_enpl/src_final.txt 2 ../mgiza-aligner/corpora/stocznia_enpl/trg_final.txt 1 ../mgiza-aligner/corpora/stocznia_enpl/aligned_final.txt
--- a/tests/addTm.py
+++ b/tests/addTm.py
@ -0,0 +1,27 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import json
 import urllib2
 import sys
 import time
 import host
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 data = {
    'operation': 'addTm',
    'sourceLangId':int(sys.argv[1]),
    'targetLangId':int(sys.argv[2]),
    'name':sys.argv[3],
    'tmLemmatized':bool(int(sys.argv[4]))
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
 print response
--- a/tests/build.sh
+++ b/tests/build.sh
@ -0,0 +1,8 @@
 #!/bin/sh
 ./addLemmatizedTMfromParams.sh tmrepository_enhr 2 6
 ./addTm.py 1 2 placeholder 1
 ./addLemmatizedTMfromParams.sh icd_dictionary 1 2
 ./addLemmatizedTMfromParams.sh icd_filtered 1 2
 ./addLemmatizedTMfromParams.sh emea_plen 1 2
 ./addLemmatizedTMfromParams.sh jrc_enes 2 4
--- a/tests/concordiaSearch.py
+++ b/tests/concordiaSearch.py
@ -0,0 +1,30 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import time
 import host
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 data = {
    'operation': 'concordiaSearch',
    'pattern':sys.argv[1],
    'tmId':int(sys.argv[2])
 }
 start = time.time()
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = urllib2.urlopen(req, json.dumps(data)).read()
 end = time.time()
 print "Execution time: %.4f seconds." % (end-start)
 print "Result: "
 print response
--- a/tests/fullSearch.py
+++ b/tests/fullSearch.py
@ -0,0 +1,31 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import time
 import host
 data = {
    'operation': 'fullSearch',
    'pattern':sys.argv[1],
    'tmId':int(sys.argv[2]),
    'limit':int(sys.argv[3]),
    'offset':int(sys.argv[4])
 }
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 start = time.time()
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = urllib2.urlopen(req, json.dumps(data)).read()
 end = time.time()
 print "Execution time: %.4f seconds." % (end-start)
 print "Result: "
 print response
--- a/tests/generateIndex.py
+++ b/tests/generateIndex.py
@ -0,0 +1,28 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import host
 import time
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 print "Generating index..."
 start = time.time()
 data = {
    'operation': 'refreshIndex',
    'tmId' : 1
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 urllib2.urlopen(req, json.dumps(data)).read()
 end = time.time()
 print "Index regeneration complete. The operation took %.4f s" % (end - start)
--- a/tests/getTmsInfo.py
+++ b/tests/getTmsInfo.py
@ -0,0 +1,25 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import json
 import urllib2
 import sys
 import time
 import host
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 print("Trying getTmsInfo on %s" % address)
 data = {
    'operation': 'getTmsInfo'
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
 print response
--- a/tests/host.py_example
+++ b/tests/host.py_example
@ -0,0 +1,2 @@
 concordia_host = 'localhost'
 concordia_port = ''
--- a/tests/lemmatizeSentence.py
+++ b/tests/lemmatizeSentence.py
@ -0,0 +1,29 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import time
 import host
 data = {
    'operation': 'lemmatize',
    'languageCode':sys.argv[1],
    'sentence':sys.argv[2]
 }
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 start = time.time()
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
 end = time.time()
 print "Execution time: %.4f seconds." % (end-start)
 print "Result: "
 print response
--- a/tests/lemmatizeSentences.py
+++ b/tests/lemmatizeSentences.py
@ -0,0 +1,29 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import time
 import host
 data = {
    'operation': 'lemmatizeAll',
    'languageCode':sys.argv[1],
    'sentences':["ona poszła do sklepu", "powiedziałem to Tomkowi"]
 }
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 start = time.time()
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
 end = time.time()
 print "Execution time: %.4f seconds." % (end-start)
 print "Result: "
 print response
--- a/tests/lexiconSearch.py
+++ b/tests/lexiconSearch.py
@ -0,0 +1,29 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import time
 import host
 data = {
    'operation': 'lexiconSearch',
    'pattern':sys.argv[1],
    'tmId':int(sys.argv[2])
 }
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 start = time.time()
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = urllib2.urlopen(req, json.dumps(data)).read()
 end = time.time()
 print "Execution time: %.4f seconds." % (end-start)
 print "Result: "
 print response
--- a/tests/simpleSearch.py
+++ b/tests/simpleSearch.py
@ -0,0 +1,29 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import time
 import host
 data = {
    'operation': 'simpleSearch',
    'pattern':sys.argv[1],
    'tmId':int(sys.argv[2])
 }
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 start = time.time()
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 response = urllib2.urlopen(req, json.dumps(data)).read()
 end = time.time()
 print "Execution time: %.4f seconds." % (end-start)
 print "Result: "
 print response
--- a/tests/testCurl.sh
+++ b/tests/testCurl.sh
@ -0,0 +1,12 @@
 #!/bin/sh
 # add sentence
 #curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"I jeszcze jedno zdanie testowe", "targetSentence":"Yet another test sentence", "tmId":1}' http://localhost
 # add sentences
 #curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentences", "sentences":[[1,"test source one", "test target one"],[4,"test source two", "test target two"],[9,"test source three", "test target three"],[13,"test source four", "test target four"]]}' http://localhost
 # simple search
 curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"test source"}' http://localhost
		`@ -0,0 +1,4 @@`
							`#!/bin/sh`

							`./addFile.py ~/projects/corpora/jrc/jrc_pl.txt ~/projects/corpora/jrc/jrc_en.txt 1`
		`@ -0,0 +1,2 @@`
							`concordia_host = 'localhost'`
							`concordia_port = ''`