diff --git a/concordia-server/concordia_server.cpp b/concordia-server/concordia_server.cpp index 9426db1..c548e57 100644 --- a/concordia-server/concordia_server.cpp +++ b/concordia-server/concordia_server.cpp @@ -57,6 +57,8 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) { } } _indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmIds); + } else if (operation == REFRESH_INDEX_OP) { + _indexController->refreshIndexFromRAM(jsonWriter); } else if (operation == SIMPLE_SEARCH_OP) { std::string pattern = d[PATTERN_PARAM].GetString(); _searcherController->simpleSearch(jsonWriter, pattern); diff --git a/concordia-server/config.hpp b/concordia-server/config.hpp index 4b9119f..0eacf57 100644 --- a/concordia-server/config.hpp +++ b/concordia-server/config.hpp @@ -17,6 +17,7 @@ #define ADD_SENTENCE_OP "addSentence" #define ADD_SENTENCES_OP "addSentences" +#define REFRESH_INDEX_OP "refreshIndex" #define SIMPLE_SEARCH_OP "simpleSearch" #define CONCORDIA_SEARCH_OP "concordiaSearch" diff --git a/concordia-server/config.hpp.in b/concordia-server/config.hpp.in index 300e892..2e20ba1 100644 --- a/concordia-server/config.hpp.in +++ b/concordia-server/config.hpp.in @@ -17,6 +17,7 @@ #define ADD_SENTENCE_OP "addSentence" #define ADD_SENTENCES_OP "addSentences" +#define REFRESH_INDEX_OP "refreshIndex" #define SIMPLE_SEARCH_OP "simpleSearch" #define CONCORDIA_SEARCH_OP "concordiaSearch" diff --git a/concordia-server/index_controller.cpp b/concordia-server/index_controller.cpp index d2d470b..9994257 100644 --- a/concordia-server/index_controller.cpp +++ b/concordia-server/index_controller.cpp @@ -45,7 +45,6 @@ void IndexController::addSentences( std::vector tokenizedSentences = _concordia->tokenizeAll(sourceSentences); std::vector sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmIds); _concordia->addAllTokenizedExamples(tokenizedSentences, sentenceIds); - _concordia->refreshSAfromRAM(); jsonWriter.StartObject(); jsonWriter.String("status"); @@ -58,3 +57,20 @@ void IndexController::addSentences( } } +void IndexController::refreshIndexFromRAM(rapidjson::Writer & jsonWriter) { + try { + _concordia->refreshSAfromRAM(); + + jsonWriter.StartObject(); + jsonWriter.String("status"); + jsonWriter.String("success"); + jsonWriter.EndObject(); + } catch (ConcordiaException & e) { + std::stringstream errorstream; + errorstream << "concordia error: " << e.what(); + JsonGenerator::signalError(jsonWriter, errorstream.str()); + } + +} + + diff --git a/concordia-server/index_controller.hpp b/concordia-server/index_controller.hpp index 33ce7c6..28bbbd3 100644 --- a/concordia-server/index_controller.hpp +++ b/concordia-server/index_controller.hpp @@ -30,6 +30,8 @@ public: const std::vector & targetSentences, const std::vector & tmIds); + void refreshIndexFromRAM(rapidjson::Writer & jsonWriter); + private: boost::shared_ptr _concordia; diff --git a/concordia-server/unit_dao.cpp b/concordia-server/unit_dao.cpp index afb3a3c..6b0f2f1 100644 --- a/concordia-server/unit_dao.cpp +++ b/concordia-server/unit_dao.cpp @@ -64,6 +64,9 @@ std::vector UnitDAO::getSearchResults(const std::vector 0: + data['sentences'] = sentences + add_data(data) + +end = time.time() +print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % (lineNumber, end-start, lineNumber/(end-start)) + +print "Generating index..." +start = time.time() +data = { + 'operation': 'refreshIndex' +} +req = urllib2.Request('http://localhost') +req.add_header('Content-Type', 'application/json') +urllib2.urlopen(req, json.dumps(data)).read() + +end = time.time() +print "Index regeneration complete. The operation took %.4f s" % (end - start) + + + diff --git a/tests/addSentence.py b/tests/addSentence.py new file mode 100755 index 0000000..09bebda --- /dev/null +++ b/tests/addSentence.py @@ -0,0 +1,27 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import time + +data = { + 'operation': 'addSentence', + 'sourceSentence':sys.argv[1], + 'targetSentence':sys.argv[2], + 'tmId':sys.argv[3] +} + +start = time.time() +req = urllib2.Request('http://localhost') +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) +end = time.time() + +print "Execution time: %.4f seconds." % (end-start) +print "Result: " +print response + + diff --git a/tests/addingError.txt b/tests/addingError.txt new file mode 100644 index 0000000..cbae3a1 --- /dev/null +++ b/tests/addingError.txt @@ -0,0 +1,20 @@ +Added 1101000 of 1254468 sentences. Time elapsed: 984.7707 s, current speed: 1118.0268 sentences/second +Traceback (most recent call last): + File "./addFile.py", line 48, in + add_data(data) + File "./addFile.py", line 19, in add_data + urllib2.urlopen(req, json.dumps(data)).read() + File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen + return _opener.open(url, data, timeout) + File "/usr/lib/python2.7/urllib2.py", line 410, in open + response = meth(req, response) + File "/usr/lib/python2.7/urllib2.py", line 523, in http_response + 'http', request, response, code, msg, hdrs) + File "/usr/lib/python2.7/urllib2.py", line 448, in error + return self._call_chain(*args) + File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain + result = func(*args) + File "/usr/lib/python2.7/urllib2.py", line 531, in http_error_default + raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) +urllib2.HTTPError: HTTP Error 413: Request Entity Too Large + diff --git a/tests/simpleSearch.py b/tests/simpleSearch.py new file mode 100755 index 0000000..74ee094 --- /dev/null +++ b/tests/simpleSearch.py @@ -0,0 +1,25 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import unittest +import json +import urllib2 +import sys +import time + +data = { + 'operation': 'simpleSearch', + 'pattern':sys.argv[1] +} + +start = time.time() +req = urllib2.Request('http://localhost') +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) +end = time.time() + +print "Execution time: %.4f seconds." % (end-start) +print "Result: " +print response + +