From 0a4a9b2ce8ee5bc8eac1bad95287cf8bb5cb3f63 Mon Sep 17 00:00:00 2001 From: rjawor Date: Wed, 26 Jun 2019 10:19:11 +0200 Subject: [PATCH] convenience scripts --- concordia-aligner/add_corpus.sh | 9 +++------ concordia-aligner/quick_search.py | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 6 deletions(-) create mode 100755 concordia-aligner/quick_search.py diff --git a/concordia-aligner/add_corpus.sh b/concordia-aligner/add_corpus.sh index 5a079a6..fcccf07 100755 --- a/concordia-aligner/add_corpus.sh +++ b/concordia-aligner/add_corpus.sh @@ -1,8 +1,5 @@ -#!/bin/sh +#!/bin/bash -CORPUS_NAME=opensubtitles -CORPUS_PATH=../fast-aligner/corpora/$CORPUS_NAME -SRC_LANG_ID=1 -TRG_LANG_ID=2 +source corpus.cfg -./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID $CORPUS_PATH/alignments.txt $CORPUS_PATH/ids_clean.txt +./add_fast_aligned_TM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID $CORPUS_PATH/alignments.txt $CORPUS_PATH/ids_clean.txt diff --git a/concordia-aligner/quick_search.py b/concordia-aligner/quick_search.py new file mode 100755 index 0000000..b4b83ff --- /dev/null +++ b/concordia-aligner/quick_search.py @@ -0,0 +1,27 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import json +import urllib2 +import sys +import time + +data = { + 'operation': 'fullSearch', + 'pattern':sys.argv[1], + 'tmId':int(sys.argv[2]), + 'limit':10, + 'offset':0 +} + +address = 'http://127.0.0.1:10001' + +start = time.time() +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = urllib2.urlopen(req, json.dumps(data)).read() +end = time.time() + +print "Execution time: %.4f seconds." % (end-start) +print "Result: " +print response