diff --git a/concordia-aligner/add_corpus.sh b/concordia-aligner/add_corpus.sh index 5a079a6..fcccf07 100755 --- a/concordia-aligner/add_corpus.sh +++ b/concordia-aligner/add_corpus.sh @@ -1,8 +1,5 @@ -#!/bin/sh +#!/bin/bash -CORPUS_NAME=opensubtitles -CORPUS_PATH=../fast-aligner/corpora/$CORPUS_NAME -SRC_LANG_ID=1 -TRG_LANG_ID=2 +source corpus.cfg -./addFastAlignedTM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID $CORPUS_PATH/alignments.txt $CORPUS_PATH/ids_clean.txt +./add_fast_aligned_TM.py $CORPUS_NAME $CORPUS_PATH/src_clean.txt $CORPUS_PATH/src_clean.lem $SRC_LANG_ID $CORPUS_PATH/trg_clean.txt $TRG_LANG_ID $CORPUS_PATH/alignments.txt $CORPUS_PATH/ids_clean.txt diff --git a/concordia-aligner/quick_search.py b/concordia-aligner/quick_search.py new file mode 100755 index 0000000..b4b83ff --- /dev/null +++ b/concordia-aligner/quick_search.py @@ -0,0 +1,27 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import json +import urllib2 +import sys +import time + +data = { + 'operation': 'fullSearch', + 'pattern':sys.argv[1], + 'tmId':int(sys.argv[2]), + 'limit':10, + 'offset':0 +} + +address = 'http://127.0.0.1:10001' + +start = time.time() +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = urllib2.urlopen(req, json.dumps(data)).read() +end = time.time() + +print "Execution time: %.4f seconds." % (end-start) +print "Result: " +print response