cleaned tests

2019-05-30 11:25:12 +02:00 · 2019-05-30 11:25:12 +02:00 · 6b59e1b0f3
commit 6b59e1b0f3
parent 441c6fffb5
45 changed files with 0 additions and 466 deletions
--- a/tests/addAlignedFile.py
+++ b/tests/addAlignedFile.py
@ -1,98 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-import unittest
-import json
-import urllib2
-import sys
-import host
-import time
-
-BUFFER_SIZE = 500
-
-address = 'http://'+host.concordia_host
-if len(host.concordia_port) > 0:
-    address += ':'+host.concordia_port
-
-
-def file_len(fname):
-    with open(fname) as f:
-        for i, l in enumerate(f):
-            pass
-    return i + 1
-
-def add_data(data):
-    req = urllib2.Request(address)
-    req.add_header('Content-Type', 'application/json')
-    json.loads(urllib2.urlopen(req, json.dumps(data)).read())
-    
-sourceFile = sys.argv[1]
-sourceLangId = int(sys.argv[2])
-targetLangId = int(sys.argv[3])
-name = sys.argv[4]
-
-totalLines = file_len(sourceFile)
-
-data = {
-    'operation': 'addTm',
-    'sourceLangId':sourceLangId,
-    'targetLangId':targetLangId,
-    'name':name
-}
-
-req = urllib2.Request(address)
-req.add_header('Content-Type', 'application/json')
-response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
-tmId = int(response['newTmId'])
-print "Added new tm: %d" % tmId
-
-data = {
-    'operation': 'addAlignedSentences',
-    'tmId':tmId
-}
-
-sentences = []
-currSentence = []
-start = time.time()
-with open(sourceFile) as sourceLines:
-    lineNumber = 0
-    for line in sourceLines:
-        line = line.strip()
-        if lineNumber % 3 == 1:
-            currSentence.append(line)
-        elif lineNumber % 3 == 2:
-            currSentence.append(line)
-            currSentence.reverse()
-            sentences.append(currSentence)
-            currSentence = []
-            if len(sentences) >= BUFFER_SIZE:
-                data['sentences'] = sentences
-                add_data(data)
-                mark = time.time()
-                print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % ( (lineNumber+1)/3, totalLines/3, mark-start, (lineNumber+1)/(3*(mark-start)))
-                sentences = []
-        lineNumber += 1
-                
-
-if len(sentences) > 0:
-    data['sentences'] = sentences
-    add_data(data)
-    
-end = time.time()
-print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1)/3, end-start, (lineNumber+1)/(3*(end-start)))
-
-print "Generating index..."
-start = time.time()
-data = {
-    'operation': 'refreshIndex',
-    'tmId' : tmId
-}
-req = urllib2.Request(address)
-req.add_header('Content-Type', 'application/json')
-urllib2.urlopen(req, json.dumps(data)).read()
-
-end = time.time()
-print "Index regeneration complete. The operation took %.4f s" % (end - start)
-
-
-
--- a/tests/addAlignedFileToTM.py
+++ b/tests/addAlignedFileToTM.py
@ -1,80 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-import unittest
-import json
-import urllib2
-import sys
-import host
-import time
-
-BUFFER_SIZE = 500
-
-address = 'http://'+host.concordia_host
-if len(host.concordia_port) > 0:
-    address += ':'+host.concordia_port
-
-
-def file_len(fname):
-    with open(fname) as f:
-        for i, l in enumerate(f):
-            pass
-    return i + 1
-
-def add_data(data):
-    req = urllib2.Request(address)
-    req.add_header('Content-Type', 'application/json')
-    json.loads(urllib2.urlopen(req, json.dumps(data)).read())
-
-sourceFile = sys.argv[1]
-tmId = int(sys.argv[2])
-
-totalLines = file_len(sourceFile)
-
-data = {
-    'operation': 'addAlignedSentences',
-    'tmId':tmId
-}
-
-sentences = []
-currSentence = []
-start = time.time()
-with open(sourceFile) as sourceLines:
-    lineNumber = 0
-    for line in sourceLines:
-        line = line.strip()
-        if lineNumber % 3 == 1:
-            currSentence.append(line)
-        elif lineNumber % 3 == 2:
-            currSentence.append(line)
-            currSentence.reverse()
-            sentences.append(currSentence)
-            currSentence = []
-            if len(sentences) >= BUFFER_SIZE:
-                data['sentences'] = sentences
-                add_data(data)
-                mark = time.time()
-                print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % ( (lineNumber+1)/3, totalLines/3, mark-start, (lineNumber+1)/(3*(mark-start)))
-                sentences = []
-        lineNumber += 1
-
-
-if len(sentences) > 0:
-    data['sentences'] = sentences
-    add_data(data)
-
-end = time.time()
-print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1)/3, end-start, (lineNumber+1)/(3*(end-start)))
-
-print "Generating index..."
-start = time.time()
-data = {
-    'operation': 'refreshIndex',
-    'tmId' : tmId
-}
-req = urllib2.Request(address)
-req.add_header('Content-Type', 'application/json')
-urllib2.urlopen(req, json.dumps(data)).read()
-
-end = time.time()
-print "Index regeneration complete. The operation took %.4f s" % (end - start)
--- a/tests/addAlignedLemmatizedTM.py
+++ b/tests/addAlignedLemmatizedTM.py
@ -1,111 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-import unittest
-import json
-import urllib2
-import sys
-import host
-import time
-
-BUFFER_SIZE = 500
-
-address = 'http://'+host.concordia_host
-if len(host.concordia_port) > 0:
-    address += ':'+host.concordia_port
-
-
-def file_len(fname):
-    with open(fname) as f:
-        for i, l in enumerate(f):
-            pass
-    return i + 1
-
-def add_examples(examplesData):
-    req = urllib2.Request(address)
-    req.add_header('Content-Type', 'application/json')
-    response = json.loads(urllib2.urlopen(req, json.dumps(examplesData)).read())
-    if response['status'] == 'error':
-        raise Exception(response['message'])
-
-if len(sys.argv) != 7:
-    raise Exception("wrong number of arguments")
-
-name = sys.argv[1]
-sourceFile = sys.argv[2]
-sourceLangId = int(sys.argv[3])
-targetFile = sys.argv[4]
-targetLangId = int(sys.argv[5])
-alignmentsFile = sys.argv[6]
-
-if (file_len(sourceFile) != file_len(targetFile)):
-    raise Exception("source and target files are not of the same length!")
-
-if (file_len(alignmentsFile) != 3*file_len(sourceFile)):
-    raise Exception("alignments file is not exactly 3 times longer than source and target")
-
-
-totalExamples = file_len(sourceFile)
-
-data = {
-    'operation': 'addTm',
-    'sourceLangId':sourceLangId,
-    'targetLangId':targetLangId,
-    'name':name,
-    'tmLemmatized':True
-}
-
-req = urllib2.Request(address)
-req.add_header('Content-Type', 'application/json')
-response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
-print(response)
-tmId = int(response['newTmId'])
-print "Added new tm: %d" % tmId
-
-data = {
-    'operation': 'addAlignedLemmatizedSentences',
-    'tmId':tmId
-}
-
-examples = []
-start = time.time()
-with open(sourceFile) as sf, open(targetFile) as tf, open(alignmentsFile) as af:
-    for lineNumber in range(totalExamples):
-        sourceSentence = sf.readline().strip()
-        targetSentence = tf.readline().strip()
-
-        # skip to lines of the alignments file, these are lemmatized and we need the raw sentences from the source and target files.
-        af.readline()
-        af.readline()
-
-        alignmentString = af.readline().strip()
-
-        examples.append([sourceSentence, targetSentence, alignmentString])
-
-        if len(examples) >= BUFFER_SIZE:
-            data['examples'] = examples
-            add_examples(data)
-            mark = time.time()
-            print "Added %d of %d lemmatized examples. Time elapsed: %.4f s, current speed: %.4f examples/second" % ( (lineNumber+1), totalExamples, mark-start, (lineNumber+1)/(mark-start))
-            examples = []
-
-
-if len(examples) > 0:
-    data['examples'] = examples
-    add_examples(data)
-
-end = time.time()
-print "Added all %d lemmatized sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1), end-start, (lineNumber+1)/(end-start))
-
-print "Generating index..."
-start = time.time()
-data = {
-    'operation': 'refreshIndex',
-    'tmId' : tmId
-}
-req = urllib2.Request(address)
-req.add_header('Content-Type', 'application/json')
-urllib2.urlopen(req, json.dumps(data)).read()
-
-end = time.time()
-print "Index regeneration complete. The operation took %.4f s" % (end - start)
--- a/tests/addFile.py
+++ b/tests/addFile.py
@ -1,97 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-import unittest
-import json
-import urllib2
-import sys
-import host
-import time
-
-BUFFER_SIZE = 500
-
-address = 'http://'+host.concordia_host
-if len(host.concordia_port) > 0:
-    address += ':'+host.concordia_port
-
-
-def file_len(fname):
-    with open(fname) as f:
-        for i, l in enumerate(f):
-            pass
-    return i + 1
-
-def add_data(data):
-    req = urllib2.Request(address)
-    req.add_header('Content-Type', 'application/json')
-    urllib2.urlopen(req, json.dumps(data)).read()
-
-sourceFile = sys.argv[1]
-sourceLangId = int(sys.argv[2])
-targetFile = sys.argv[3]
-targetLangId = int(sys.argv[4])
-name = sys.argv[5]
-
-totalLines = file_len(sourceFile)
-if file_len(targetFile) != totalLines:
-    print "File lengths do not match"
-    sys.exit(1)
-    
-data = {
-    'operation': 'addTm',
-    'sourceLangId':sourceLangId,
-    'targetLangId':targetLangId,
-    'name':name
-}
-
-req = urllib2.Request(address)
-req.add_header('Content-Type', 'application/json')
-response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
-tmId = int(response['newTmId'])
-print "Added new tm: %d" % tmId
-
-
-data = {
-    'operation': 'addSentences',
-    'tmId':tmId
-}
-
-sentences = []
-start = time.time()
-with open(sourceFile) as sourceSentences:
-    with open(targetFile) as targetSentences:
-        lineNumber = 0
-        for sourceSentence in sourceSentences:
-            lineNumber += 1
-            targetSentence = targetSentences.readline()
-            sentences.append([sourceSentence, targetSentence])
-            if lineNumber % BUFFER_SIZE == 0:
-                data['sentences'] = sentences
-                sentences = []
-                add_data(data)
-                mark = time.time()
-                print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % (lineNumber, totalLines, mark-start, lineNumber/(mark-start))
-                
-
-if len(sentences) > 0:
-    data['sentences'] = sentences
-    add_data(data)
-    
-end = time.time()
-print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (lineNumber, end-start, lineNumber/(end-start))
-
-print "Generating index..."
-start = time.time()
-data = {
-    'operation': 'refreshIndex',
-    'tmId' : tmId
-}
-req = urllib2.Request(address)
-req.add_header('Content-Type', 'application/json')
-urllib2.urlopen(req, json.dumps(data)).read()
-
-end = time.time()
-print "Index regeneration complete. The operation took %.4f s" % (end - start)
-
-
-
--- a/tests/addJrc.sh
+++ b/tests/addJrc.sh
@ -1,4 +0,0 @@
-#!/bin/sh
-
-./addFile.py ~/projects/corpora/jrc/jrc_pl.txt ~/projects/corpora/jrc/jrc_en.txt 1
-
--- a/tests/addLemmatizedTM.sh
+++ b/tests/addLemmatizedTM.sh
@ -1,7 +0,0 @@
-#!/bin/sh
-
-CORPUS_NAME="stocznia_plen"
-SRC_LANG_ID=1
-TRG_LANG_ID=2
-
-./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt
--- a/tests/addLemmatizedTMfromParams.sh
+++ b/tests/addLemmatizedTMfromParams.sh
@ -1,7 +0,0 @@
-#!/bin/sh
-
-CORPUS_NAME=$1
-SRC_LANG_ID=$2
-TRG_LANG_ID=$3
-
-./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt
--- a/tests/addOpenSubtitlesError.sh
+++ b/tests/addOpenSubtitlesError.sh
@ -1,9 +0,0 @@
-#!/bin/sh
-
-CORPUS_NAME=error_sample
-CORPUS_PATH=/root/opensubtitles_pack/error_sample
-SRC_LANG_ID=1
-TRG_LANG_ID=2
-
-./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID  $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID  $CORPUS_PATH/alignments.txt  $CORPUS_PATH/ids_clean.txt
-
--- a/tests/addSentence.py
+++ b/tests/addSentence.py
@ -1,33 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-import unittest
-import json
-import urllib2
-import sys
-import time
-import host
-
-address = 'http://'+host.concordia_host
-if len(host.concordia_port) > 0:
-    address += ':'+host.concordia_port
-
-
-data = {
-    'operation': 'addSentence',
-    'sourceSentence':sys.argv[1],
-    'targetSentence':sys.argv[2],
-    'tmId':int(sys.argv[3])
-}
-
-start = time.time()
-req = urllib2.Request(address)
-req.add_header('Content-Type', 'application/json')
-response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
-end = time.time()
-
-print "Execution time: %.4f seconds." % (end-start)
-print "Result: "
-print response
-
-
--- a/tests/addStocznia.sh
+++ b/tests/addStocznia.sh
@ -1,12 +0,0 @@
-#!/bin/sh
-
-./addTm.py 1 2 placeholder 1
-
-./addAlignedLemmatizedTM.py stocznia_plen ../mgiza-aligner/corpora/stocznia_plen/src_final.txt 1 ../mgiza-aligner/corpora/stocznia_plen/trg_final.txt 2 ../mgiza-aligner/corpora/stocznia_plen/aligned_final.txt
-
-./addTm.py 1 2 placeholder 1
-
-./addTm.py 1 2 placeholder 1
-
-./addAlignedLemmatizedTM.py stocznia_enpl ../mgiza-aligner/corpora/stocznia_enpl/src_final.txt 2 ../mgiza-aligner/corpora/stocznia_enpl/trg_final.txt 1 ../mgiza-aligner/corpora/stocznia_enpl/aligned_final.txt
-
--- a/tests/build.sh
+++ b/tests/build.sh
@ -1,8 +0,0 @@
-#!/bin/sh
-
-./addLemmatizedTMfromParams.sh tmrepository_enhr 2 6
-./addTm.py 1 2 placeholder 1
-./addLemmatizedTMfromParams.sh icd_dictionary 1 2
-./addLemmatizedTMfromParams.sh icd_filtered 1 2
-./addLemmatizedTMfromParams.sh emea_plen 1 2
-./addLemmatizedTMfromParams.sh jrc_enes 2 4
--- a/tools/.gitignore
+++ b/tools/.gitignore
--- a/tools/addFastAlignedTM.py
+++ b/tools/addFastAlignedTM.py
--- a/tools/addFastAlignedTM.sh
+++ b/tools/addFastAlignedTM.sh
--- a/tools/addOpenSubtitles.sh
+++ b/tools/addOpenSubtitles.sh
--- a/tools/addSources.py
+++ b/tools/addSources.py
--- a/tools/addTm.py
+++ b/tools/addTm.py
--- a/tools/concordiaSearch.py
+++ b/tools/concordiaSearch.py
--- a/tools/fullSearch.py
+++ b/tools/fullSearch.py
--- a/tools/generateIndex.py
+++ b/tools/generateIndex.py
--- a/tools/getTmsInfo.py
+++ b/tools/getTmsInfo.py
--- a/tools/getTmsInfo3.py
+++ b/tools/getTmsInfo3.py
--- a/tools/host.py_example
+++ b/tools/host.py_example
--- a/tools/lemmatizeSentence.py
+++ b/tools/lemmatizeSentence.py
--- a/tools/lemmatizeSentences.py
+++ b/tools/lemmatizeSentences.py
--- a/tools/lexiconSearch.py
+++ b/tools/lexiconSearch.py
--- a/tools/opensubtitles_sample/alignments.txt
+++ b/tools/opensubtitles_sample/alignments.txt
--- a/tools/opensubtitles_sample/falign_corpus.txt
+++ b/tools/opensubtitles_sample/falign_corpus.txt
--- a/tools/opensubtitles_sample/falign_result.txt
+++ b/tools/opensubtitles_sample/falign_result.txt
--- a/tools/opensubtitles_sample/ids.txt
+++ b/tools/opensubtitles_sample/ids.txt
--- a/tools/opensubtitles_sample/ids_clean.txt
+++ b/tools/opensubtitles_sample/ids_clean.txt
--- a/tools/opensubtitles_sample/ids_sources_censored.txt
+++ b/tools/opensubtitles_sample/ids_sources_censored.txt
--- a/tools/opensubtitles_sample/src.dict
+++ b/tools/opensubtitles_sample/src.dict
--- a/tools/opensubtitles_sample/src.lem
+++ b/tools/opensubtitles_sample/src.lem
--- a/tools/opensubtitles_sample/src.txt
+++ b/tools/opensubtitles_sample/src.txt
--- a/tools/opensubtitles_sample/src_clean.lem
+++ b/tools/opensubtitles_sample/src_clean.lem
--- a/tools/opensubtitles_sample/src_clean.txt
+++ b/tools/opensubtitles_sample/src_clean.txt
--- a/tools/opensubtitles_sample/trg.dict
+++ b/tools/opensubtitles_sample/trg.dict
--- a/tools/opensubtitles_sample/trg.lem
+++ b/tools/opensubtitles_sample/trg.lem
--- a/tools/opensubtitles_sample/trg.txt
+++ b/tools/opensubtitles_sample/trg.txt
--- a/tools/opensubtitles_sample/trg_clean.txt
+++ b/tools/opensubtitles_sample/trg_clean.txt
--- a/tools/removeDockerContainers.sh
+++ b/tools/removeDockerContainers.sh
--- a/tools/removeDockerImages.sh
+++ b/tools/removeDockerImages.sh
--- a/tools/simpleSearch.py
+++ b/tools/simpleSearch.py
--- a/tools/testCurl.sh
+++ b/tools/testCurl.sh