diff --git a/tests/addAlignedFile.py b/tests/addAlignedFile.py deleted file mode 100755 index 22846f2..0000000 --- a/tests/addAlignedFile.py +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -import unittest -import json -import urllib2 -import sys -import host -import time - -BUFFER_SIZE = 500 - -address = 'http://'+host.concordia_host -if len(host.concordia_port) > 0: - address += ':'+host.concordia_port - - -def file_len(fname): - with open(fname) as f: - for i, l in enumerate(f): - pass - return i + 1 - -def add_data(data): - req = urllib2.Request(address) - req.add_header('Content-Type', 'application/json') - json.loads(urllib2.urlopen(req, json.dumps(data)).read()) - -sourceFile = sys.argv[1] -sourceLangId = int(sys.argv[2]) -targetLangId = int(sys.argv[3]) -name = sys.argv[4] - -totalLines = file_len(sourceFile) - -data = { - 'operation': 'addTm', - 'sourceLangId':sourceLangId, - 'targetLangId':targetLangId, - 'name':name -} - -req = urllib2.Request(address) -req.add_header('Content-Type', 'application/json') -response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) -tmId = int(response['newTmId']) -print "Added new tm: %d" % tmId - -data = { - 'operation': 'addAlignedSentences', - 'tmId':tmId -} - -sentences = [] -currSentence = [] -start = time.time() -with open(sourceFile) as sourceLines: - lineNumber = 0 - for line in sourceLines: - line = line.strip() - if lineNumber % 3 == 1: - currSentence.append(line) - elif lineNumber % 3 == 2: - currSentence.append(line) - currSentence.reverse() - sentences.append(currSentence) - currSentence = [] - if len(sentences) >= BUFFER_SIZE: - data['sentences'] = sentences - add_data(data) - mark = time.time() - print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % ( (lineNumber+1)/3, totalLines/3, mark-start, (lineNumber+1)/(3*(mark-start))) - sentences = [] - lineNumber += 1 - - -if len(sentences) > 0: - data['sentences'] = sentences - add_data(data) - -end = time.time() -print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1)/3, end-start, (lineNumber+1)/(3*(end-start))) - -print "Generating index..." -start = time.time() -data = { - 'operation': 'refreshIndex', - 'tmId' : tmId -} -req = urllib2.Request(address) -req.add_header('Content-Type', 'application/json') -urllib2.urlopen(req, json.dumps(data)).read() - -end = time.time() -print "Index regeneration complete. The operation took %.4f s" % (end - start) - - - diff --git a/tests/addAlignedFileToTM.py b/tests/addAlignedFileToTM.py deleted file mode 100755 index b450ac6..0000000 --- a/tests/addAlignedFileToTM.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -import unittest -import json -import urllib2 -import sys -import host -import time - -BUFFER_SIZE = 500 - -address = 'http://'+host.concordia_host -if len(host.concordia_port) > 0: - address += ':'+host.concordia_port - - -def file_len(fname): - with open(fname) as f: - for i, l in enumerate(f): - pass - return i + 1 - -def add_data(data): - req = urllib2.Request(address) - req.add_header('Content-Type', 'application/json') - json.loads(urllib2.urlopen(req, json.dumps(data)).read()) - -sourceFile = sys.argv[1] -tmId = int(sys.argv[2]) - -totalLines = file_len(sourceFile) - -data = { - 'operation': 'addAlignedSentences', - 'tmId':tmId -} - -sentences = [] -currSentence = [] -start = time.time() -with open(sourceFile) as sourceLines: - lineNumber = 0 - for line in sourceLines: - line = line.strip() - if lineNumber % 3 == 1: - currSentence.append(line) - elif lineNumber % 3 == 2: - currSentence.append(line) - currSentence.reverse() - sentences.append(currSentence) - currSentence = [] - if len(sentences) >= BUFFER_SIZE: - data['sentences'] = sentences - add_data(data) - mark = time.time() - print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % ( (lineNumber+1)/3, totalLines/3, mark-start, (lineNumber+1)/(3*(mark-start))) - sentences = [] - lineNumber += 1 - - -if len(sentences) > 0: - data['sentences'] = sentences - add_data(data) - -end = time.time() -print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1)/3, end-start, (lineNumber+1)/(3*(end-start))) - -print "Generating index..." -start = time.time() -data = { - 'operation': 'refreshIndex', - 'tmId' : tmId -} -req = urllib2.Request(address) -req.add_header('Content-Type', 'application/json') -urllib2.urlopen(req, json.dumps(data)).read() - -end = time.time() -print "Index regeneration complete. The operation took %.4f s" % (end - start) diff --git a/tests/addAlignedLemmatizedTM.py b/tests/addAlignedLemmatizedTM.py deleted file mode 100755 index 26f2960..0000000 --- a/tests/addAlignedLemmatizedTM.py +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -import unittest -import json -import urllib2 -import sys -import host -import time - -BUFFER_SIZE = 500 - -address = 'http://'+host.concordia_host -if len(host.concordia_port) > 0: - address += ':'+host.concordia_port - - -def file_len(fname): - with open(fname) as f: - for i, l in enumerate(f): - pass - return i + 1 - -def add_examples(examplesData): - req = urllib2.Request(address) - req.add_header('Content-Type', 'application/json') - response = json.loads(urllib2.urlopen(req, json.dumps(examplesData)).read()) - if response['status'] == 'error': - raise Exception(response['message']) - -if len(sys.argv) != 7: - raise Exception("wrong number of arguments") - -name = sys.argv[1] -sourceFile = sys.argv[2] -sourceLangId = int(sys.argv[3]) -targetFile = sys.argv[4] -targetLangId = int(sys.argv[5]) -alignmentsFile = sys.argv[6] - -if (file_len(sourceFile) != file_len(targetFile)): - raise Exception("source and target files are not of the same length!") - -if (file_len(alignmentsFile) != 3*file_len(sourceFile)): - raise Exception("alignments file is not exactly 3 times longer than source and target") - - -totalExamples = file_len(sourceFile) - -data = { - 'operation': 'addTm', - 'sourceLangId':sourceLangId, - 'targetLangId':targetLangId, - 'name':name, - 'tmLemmatized':True -} - -req = urllib2.Request(address) -req.add_header('Content-Type', 'application/json') -response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) -print(response) -tmId = int(response['newTmId']) -print "Added new tm: %d" % tmId - -data = { - 'operation': 'addAlignedLemmatizedSentences', - 'tmId':tmId -} - -examples = [] -start = time.time() -with open(sourceFile) as sf, open(targetFile) as tf, open(alignmentsFile) as af: - for lineNumber in range(totalExamples): - sourceSentence = sf.readline().strip() - targetSentence = tf.readline().strip() - - # skip to lines of the alignments file, these are lemmatized and we need the raw sentences from the source and target files. - af.readline() - af.readline() - - alignmentString = af.readline().strip() - - examples.append([sourceSentence, targetSentence, alignmentString]) - - if len(examples) >= BUFFER_SIZE: - data['examples'] = examples - add_examples(data) - mark = time.time() - print "Added %d of %d lemmatized examples. Time elapsed: %.4f s, current speed: %.4f examples/second" % ( (lineNumber+1), totalExamples, mark-start, (lineNumber+1)/(mark-start)) - examples = [] - - -if len(examples) > 0: - data['examples'] = examples - add_examples(data) - -end = time.time() -print "Added all %d lemmatized sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1), end-start, (lineNumber+1)/(end-start)) - -print "Generating index..." -start = time.time() -data = { - 'operation': 'refreshIndex', - 'tmId' : tmId -} -req = urllib2.Request(address) -req.add_header('Content-Type', 'application/json') -urllib2.urlopen(req, json.dumps(data)).read() - -end = time.time() -print "Index regeneration complete. The operation took %.4f s" % (end - start) diff --git a/tests/addFile.py b/tests/addFile.py deleted file mode 100755 index c484100..0000000 --- a/tests/addFile.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -import unittest -import json -import urllib2 -import sys -import host -import time - -BUFFER_SIZE = 500 - -address = 'http://'+host.concordia_host -if len(host.concordia_port) > 0: - address += ':'+host.concordia_port - - -def file_len(fname): - with open(fname) as f: - for i, l in enumerate(f): - pass - return i + 1 - -def add_data(data): - req = urllib2.Request(address) - req.add_header('Content-Type', 'application/json') - urllib2.urlopen(req, json.dumps(data)).read() - -sourceFile = sys.argv[1] -sourceLangId = int(sys.argv[2]) -targetFile = sys.argv[3] -targetLangId = int(sys.argv[4]) -name = sys.argv[5] - -totalLines = file_len(sourceFile) -if file_len(targetFile) != totalLines: - print "File lengths do not match" - sys.exit(1) - -data = { - 'operation': 'addTm', - 'sourceLangId':sourceLangId, - 'targetLangId':targetLangId, - 'name':name -} - -req = urllib2.Request(address) -req.add_header('Content-Type', 'application/json') -response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) -tmId = int(response['newTmId']) -print "Added new tm: %d" % tmId - - -data = { - 'operation': 'addSentences', - 'tmId':tmId -} - -sentences = [] -start = time.time() -with open(sourceFile) as sourceSentences: - with open(targetFile) as targetSentences: - lineNumber = 0 - for sourceSentence in sourceSentences: - lineNumber += 1 - targetSentence = targetSentences.readline() - sentences.append([sourceSentence, targetSentence]) - if lineNumber % BUFFER_SIZE == 0: - data['sentences'] = sentences - sentences = [] - add_data(data) - mark = time.time() - print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % (lineNumber, totalLines, mark-start, lineNumber/(mark-start)) - - -if len(sentences) > 0: - data['sentences'] = sentences - add_data(data) - -end = time.time() -print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (lineNumber, end-start, lineNumber/(end-start)) - -print "Generating index..." -start = time.time() -data = { - 'operation': 'refreshIndex', - 'tmId' : tmId -} -req = urllib2.Request(address) -req.add_header('Content-Type', 'application/json') -urllib2.urlopen(req, json.dumps(data)).read() - -end = time.time() -print "Index regeneration complete. The operation took %.4f s" % (end - start) - - - diff --git a/tests/addJrc.sh b/tests/addJrc.sh deleted file mode 100755 index 3526eff..0000000 --- a/tests/addJrc.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -./addFile.py ~/projects/corpora/jrc/jrc_pl.txt ~/projects/corpora/jrc/jrc_en.txt 1 - diff --git a/tests/addLemmatizedTM.sh b/tests/addLemmatizedTM.sh deleted file mode 100755 index 8a83148..0000000 --- a/tests/addLemmatizedTM.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -CORPUS_NAME="stocznia_plen" -SRC_LANG_ID=1 -TRG_LANG_ID=2 - -./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt diff --git a/tests/addLemmatizedTMfromParams.sh b/tests/addLemmatizedTMfromParams.sh deleted file mode 100755 index 1c61582..0000000 --- a/tests/addLemmatizedTMfromParams.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -CORPUS_NAME=$1 -SRC_LANG_ID=$2 -TRG_LANG_ID=$3 - -./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt diff --git a/tests/addOpenSubtitlesError.sh b/tests/addOpenSubtitlesError.sh deleted file mode 100755 index c65a124..0000000 --- a/tests/addOpenSubtitlesError.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh - -CORPUS_NAME=error_sample -CORPUS_PATH=/root/opensubtitles_pack/error_sample -SRC_LANG_ID=1 -TRG_LANG_ID=2 - -./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID $CORPUS_PATH/alignments.txt $CORPUS_PATH/ids_clean.txt - diff --git a/tests/addSentence.py b/tests/addSentence.py deleted file mode 100755 index 6ad3f63..0000000 --- a/tests/addSentence.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -import unittest -import json -import urllib2 -import sys -import time -import host - -address = 'http://'+host.concordia_host -if len(host.concordia_port) > 0: - address += ':'+host.concordia_port - - -data = { - 'operation': 'addSentence', - 'sourceSentence':sys.argv[1], - 'targetSentence':sys.argv[2], - 'tmId':int(sys.argv[3]) -} - -start = time.time() -req = urllib2.Request(address) -req.add_header('Content-Type', 'application/json') -response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) -end = time.time() - -print "Execution time: %.4f seconds." % (end-start) -print "Result: " -print response - - diff --git a/tests/addStocznia.sh b/tests/addStocznia.sh deleted file mode 100755 index b68a82e..0000000 --- a/tests/addStocznia.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh - -./addTm.py 1 2 placeholder 1 - -./addAlignedLemmatizedTM.py stocznia_plen ../mgiza-aligner/corpora/stocznia_plen/src_final.txt 1 ../mgiza-aligner/corpora/stocznia_plen/trg_final.txt 2 ../mgiza-aligner/corpora/stocznia_plen/aligned_final.txt - -./addTm.py 1 2 placeholder 1 - -./addTm.py 1 2 placeholder 1 - -./addAlignedLemmatizedTM.py stocznia_enpl ../mgiza-aligner/corpora/stocznia_enpl/src_final.txt 2 ../mgiza-aligner/corpora/stocznia_enpl/trg_final.txt 1 ../mgiza-aligner/corpora/stocznia_enpl/aligned_final.txt - diff --git a/tests/build.sh b/tests/build.sh deleted file mode 100755 index 4985b50..0000000 --- a/tests/build.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh - -./addLemmatizedTMfromParams.sh tmrepository_enhr 2 6 -./addTm.py 1 2 placeholder 1 -./addLemmatizedTMfromParams.sh icd_dictionary 1 2 -./addLemmatizedTMfromParams.sh icd_filtered 1 2 -./addLemmatizedTMfromParams.sh emea_plen 1 2 -./addLemmatizedTMfromParams.sh jrc_enes 2 4 diff --git a/tests/.gitignore b/tools/.gitignore similarity index 100% rename from tests/.gitignore rename to tools/.gitignore diff --git a/tests/addFastAlignedTM.py b/tools/addFastAlignedTM.py similarity index 100% rename from tests/addFastAlignedTM.py rename to tools/addFastAlignedTM.py diff --git a/tests/addFastAlignedTM.sh b/tools/addFastAlignedTM.sh similarity index 100% rename from tests/addFastAlignedTM.sh rename to tools/addFastAlignedTM.sh diff --git a/tests/addOpenSubtitles.sh b/tools/addOpenSubtitles.sh similarity index 100% rename from tests/addOpenSubtitles.sh rename to tools/addOpenSubtitles.sh diff --git a/tests/addSources.py b/tools/addSources.py similarity index 100% rename from tests/addSources.py rename to tools/addSources.py diff --git a/tests/addTm.py b/tools/addTm.py similarity index 100% rename from tests/addTm.py rename to tools/addTm.py diff --git a/tests/concordiaSearch.py b/tools/concordiaSearch.py similarity index 100% rename from tests/concordiaSearch.py rename to tools/concordiaSearch.py diff --git a/tests/fullSearch.py b/tools/fullSearch.py similarity index 100% rename from tests/fullSearch.py rename to tools/fullSearch.py diff --git a/tests/generateIndex.py b/tools/generateIndex.py similarity index 100% rename from tests/generateIndex.py rename to tools/generateIndex.py diff --git a/tests/getTmsInfo.py b/tools/getTmsInfo.py similarity index 100% rename from tests/getTmsInfo.py rename to tools/getTmsInfo.py diff --git a/tests/getTmsInfo3.py b/tools/getTmsInfo3.py similarity index 100% rename from tests/getTmsInfo3.py rename to tools/getTmsInfo3.py diff --git a/tests/host.py_example b/tools/host.py_example similarity index 100% rename from tests/host.py_example rename to tools/host.py_example diff --git a/tests/lemmatizeSentence.py b/tools/lemmatizeSentence.py similarity index 100% rename from tests/lemmatizeSentence.py rename to tools/lemmatizeSentence.py diff --git a/tests/lemmatizeSentences.py b/tools/lemmatizeSentences.py similarity index 100% rename from tests/lemmatizeSentences.py rename to tools/lemmatizeSentences.py diff --git a/tests/lexiconSearch.py b/tools/lexiconSearch.py similarity index 100% rename from tests/lexiconSearch.py rename to tools/lexiconSearch.py diff --git a/tests/opensubtitles_sample/alignments.txt b/tools/opensubtitles_sample/alignments.txt similarity index 100% rename from tests/opensubtitles_sample/alignments.txt rename to tools/opensubtitles_sample/alignments.txt diff --git a/tests/opensubtitles_sample/falign_corpus.txt b/tools/opensubtitles_sample/falign_corpus.txt similarity index 100% rename from tests/opensubtitles_sample/falign_corpus.txt rename to tools/opensubtitles_sample/falign_corpus.txt diff --git a/tests/opensubtitles_sample/falign_result.txt b/tools/opensubtitles_sample/falign_result.txt similarity index 100% rename from tests/opensubtitles_sample/falign_result.txt rename to tools/opensubtitles_sample/falign_result.txt diff --git a/tests/opensubtitles_sample/ids.txt b/tools/opensubtitles_sample/ids.txt similarity index 100% rename from tests/opensubtitles_sample/ids.txt rename to tools/opensubtitles_sample/ids.txt diff --git a/tests/opensubtitles_sample/ids_clean.txt b/tools/opensubtitles_sample/ids_clean.txt similarity index 100% rename from tests/opensubtitles_sample/ids_clean.txt rename to tools/opensubtitles_sample/ids_clean.txt diff --git a/tests/opensubtitles_sample/ids_sources_censored.txt b/tools/opensubtitles_sample/ids_sources_censored.txt similarity index 100% rename from tests/opensubtitles_sample/ids_sources_censored.txt rename to tools/opensubtitles_sample/ids_sources_censored.txt diff --git a/tests/opensubtitles_sample/src.dict b/tools/opensubtitles_sample/src.dict similarity index 100% rename from tests/opensubtitles_sample/src.dict rename to tools/opensubtitles_sample/src.dict diff --git a/tests/opensubtitles_sample/src.lem b/tools/opensubtitles_sample/src.lem similarity index 100% rename from tests/opensubtitles_sample/src.lem rename to tools/opensubtitles_sample/src.lem diff --git a/tests/opensubtitles_sample/src.txt b/tools/opensubtitles_sample/src.txt similarity index 100% rename from tests/opensubtitles_sample/src.txt rename to tools/opensubtitles_sample/src.txt diff --git a/tests/opensubtitles_sample/src_clean.lem b/tools/opensubtitles_sample/src_clean.lem similarity index 100% rename from tests/opensubtitles_sample/src_clean.lem rename to tools/opensubtitles_sample/src_clean.lem diff --git a/tests/opensubtitles_sample/src_clean.txt b/tools/opensubtitles_sample/src_clean.txt similarity index 100% rename from tests/opensubtitles_sample/src_clean.txt rename to tools/opensubtitles_sample/src_clean.txt diff --git a/tests/opensubtitles_sample/trg.dict b/tools/opensubtitles_sample/trg.dict similarity index 100% rename from tests/opensubtitles_sample/trg.dict rename to tools/opensubtitles_sample/trg.dict diff --git a/tests/opensubtitles_sample/trg.lem b/tools/opensubtitles_sample/trg.lem similarity index 100% rename from tests/opensubtitles_sample/trg.lem rename to tools/opensubtitles_sample/trg.lem diff --git a/tests/opensubtitles_sample/trg.txt b/tools/opensubtitles_sample/trg.txt similarity index 100% rename from tests/opensubtitles_sample/trg.txt rename to tools/opensubtitles_sample/trg.txt diff --git a/tests/opensubtitles_sample/trg_clean.txt b/tools/opensubtitles_sample/trg_clean.txt similarity index 100% rename from tests/opensubtitles_sample/trg_clean.txt rename to tools/opensubtitles_sample/trg_clean.txt diff --git a/tests/removeDockerContainers.sh b/tools/removeDockerContainers.sh similarity index 100% rename from tests/removeDockerContainers.sh rename to tools/removeDockerContainers.sh diff --git a/tests/removeDockerImages.sh b/tools/removeDockerImages.sh similarity index 100% rename from tests/removeDockerImages.sh rename to tools/removeDockerImages.sh diff --git a/tests/simpleSearch.py b/tools/simpleSearch.py similarity index 100% rename from tests/simpleSearch.py rename to tools/simpleSearch.py diff --git a/tests/testCurl.sh b/tools/testCurl.sh similarity index 100% rename from tests/testCurl.sh rename to tools/testCurl.sh