From 5eff07d4b8e4fbc3ace5afaf6a069f41cea45bac Mon Sep 17 00:00:00 2001 From: rjawor Date: Tue, 20 Oct 2015 20:16:00 +0200 Subject: [PATCH] mutliple translation memories --- CMakeLists.txt | 4 -- cat/index.html | 41 ------------ cat/index.html_pattern | 34 ++++++++++ cat/js/cat.js | 3 +- cat/publish.py | 55 ++++++++++++++++ cat/versions/jrc_enes.cfg | 7 ++ cat/versions/setimes_hren.cfg | 6 ++ concordia-server/CMakeLists.txt | 1 + concordia-server/concordia_server.cpp | 66 +++++++++++++++---- concordia-server/concordia_server.hpp | 11 +++- concordia-server/config.hpp.in | 5 ++ concordia-server/db_connection.cpp | 10 ++- concordia-server/db_connection.hpp | 2 + concordia-server/index_controller.cpp | 75 ++++++++++++++------- concordia-server/index_controller.hpp | 15 +++-- concordia-server/searcher_controller.cpp | 84 ++++++++++++++---------- concordia-server/searcher_controller.hpp | 15 +++-- concordia-server/tm_dao.cpp | 55 ++++++++++++++++ concordia-server/tm_dao.hpp | 27 ++++++++ concordia-server/unit_dao.cpp | 4 +- concordia-server/unit_dao.hpp | 2 +- concordia.cfg.in | 11 ---- tests/.gitignore | 2 + tests/addFile.py | 42 +++++++++--- tests/addSentence.py | 10 ++- tests/addTm.py | 26 ++++++++ tests/concordiaSearch.py | 8 ++- tests/host.py_example | 2 + tests/simpleSearch.py | 10 ++- 29 files changed, 472 insertions(+), 161 deletions(-) delete mode 100644 cat/index.html create mode 100644 cat/index.html_pattern create mode 100755 cat/publish.py create mode 100644 cat/versions/jrc_enes.cfg create mode 100644 cat/versions/setimes_hren.cfg create mode 100644 concordia-server/tm_dao.cpp create mode 100644 concordia-server/tm_dao.hpp create mode 100644 tests/.gitignore create mode 100755 tests/addTm.py create mode 100644 tests/host.py_example diff --git a/CMakeLists.txt b/CMakeLists.txt index ef3f14f..9360488 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,10 +18,6 @@ set (STOP_WORDS_ENABLED "false") # ================================================ set (INDEX_DIRECTORY "${concordia-server_SOURCE_DIR}/index") -set (WORD_MAP_FILE "word_map.bin") -set (HASHED_INDEX_FILE "hashed_index.bin") -set (MARKERS_FILE "markers.bin") - set (RESOURCES_DIRECTORY "${concordia-server_SOURCE_DIR}/resources") configure_file ( diff --git a/cat/index.html b/cat/index.html deleted file mode 100644 index 2999b53..0000000 --- a/cat/index.html +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - -
- Banner -


-

- Welcome to the interactive Concordia demo. The system finds the longest matches of the pattern sentence in its translation memory. This translation memory is 200 000 sentences taken from English-Spanish corpus of European Law. Please enter an English sentence in the field below and press Enter (or use the search button). This instance of Concordia works best with law sentences, but is very likely to output some results for any English sentence. You can also use predefined samples, simply use the link "show/hide samples" and apply one of the sample sentences. -

-

- Enjoy your work with the system! -

- - show/hide samples -

-
-
    -
  • Every ship in the European Union must have a crew of 50 or more workers. apply
  • -
  • It is impossible to abolish the customs duties on fruit and vegetables. apply
  • -
  • The convention on human rights was held in Geneva. apply
  • - -
-

-
- -

- -



-
- -
-
- - diff --git a/cat/index.html_pattern b/cat/index.html_pattern new file mode 100644 index 0000000..3f31986 --- /dev/null +++ b/cat/index.html_pattern @@ -0,0 +1,34 @@ + + + + + + + + + +
+ Banner +


+

@desc@

+

+ Enjoy your work with the system! +

+ + show/hide samples +

+
+
    @suggestions@
+

+
+ +

+ +



+
+ +
+
+ + diff --git a/cat/js/cat.js b/cat/js/cat.js index cb8e286..d7b5466 100644 --- a/cat/js/cat.js +++ b/cat/js/cat.js @@ -11,9 +11,10 @@ $(document).ready(function() { }); }); -function searchHandle() { +function searchHandle(tmid) { var concordiaRequest = { operation: 'concordiaSearch', + tmId: tmid, pattern:$("#searchInput").val() } diff --git a/cat/publish.py b/cat/publish.py new file mode 100755 index 0000000..fb9d216 --- /dev/null +++ b/cat/publish.py @@ -0,0 +1,55 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import sys, os, shutil, re + +root_dir = sys.argv[1] + +if not os.path.exists(root_dir): + print "%s does not exist!" % root_dir + sys.exit(1) + +if not os.path.isdir(root_dir): + print "%s is not a directory!" % root_dir + sys.exit(1) + +if len(os.listdir(root_dir))>0: + print "%s is not empty!" % root_dir + sys.exit(1) + +shutil.copytree('js', root_dir+'/js') +shutil.copytree('css', root_dir+'/css') +shutil.copytree('images', root_dir+'/images') + + +versions_dir = 'versions' + +versions = [] + +for version_file in os.listdir(versions_dir): + version = {'suggestions':[]} + with open(versions_dir+'/'+version_file) as v: + for line in v: + + field, value = line.strip().split('@#@') + if field == 'suggestion': + version['suggestions'].append(value) + else: + version[field] = value + versions.append(version) + +for version in versions: + version_dir = root_dir+'/'+version['dir'] + os.mkdir(version_dir) + with open('index.html_pattern', 'r') as pattern_file: + with open(version_dir+'/index.html', 'w') as index_file: + for line in pattern_file: + for field, value in version.iteritems(): + if field == 'suggestions': + suggestions_html = '' + for suggestion in value: + suggestions_html+='
  • '+suggestion+'apply
  • ' + line = re.sub('@suggestions@', suggestions_html, line) + else: + line = re.sub('@'+field+'@', value, line) + index_file.write(line) diff --git a/cat/versions/jrc_enes.cfg b/cat/versions/jrc_enes.cfg new file mode 100644 index 0000000..6400912 --- /dev/null +++ b/cat/versions/jrc_enes.cfg @@ -0,0 +1,7 @@ +dir@#@jrc_enes +tmid@#@1 +desc@#@Welcome to the interactive Concordia demo. The system finds the longest matches of the pattern sentence in its translation memory. This translation memory is 200 000 sentences taken from English-Spanish corpus of European Law. Please enter an English sentence in the field below and press Enter (or use the search button). This instance of Concordia works best with law sentences, but is very likely to output some results for any English sentence. You can also use predefined samples, simply use the link "show/hide samples" and apply one of the sample sentences. +prompt@#@Enter search pattern (English sentence): +suggestion@#@Every ship in the European Union must have a crew of 50 or more workers. +suggestion@#@It is impossible to abolish the customs duties on fruit and vegetables. +suggestion@#@The convention on human rights was held in Geneva. diff --git a/cat/versions/setimes_hren.cfg b/cat/versions/setimes_hren.cfg new file mode 100644 index 0000000..964c185 --- /dev/null +++ b/cat/versions/setimes_hren.cfg @@ -0,0 +1,6 @@ +dir@#@setimes_hren +tmid@#@2 +desc@#@Welcome to Concordia. The system finds the longest matches of the pattern sentence in its translation memory. This translation memory is 200 000 sentences taken from the SETIMES2 Croatian-English corpus (link). Please enter a Croatian sentence in the field below and press Enter (or use the search button). You can test the system on predefined samples, simply use the link "show/hide samples" and apply one of the sample sentences. +prompt@#@Enter search pattern (Croatian sentence): +suggestion@#@Kazna medijskom mogulu obnovila raspravu u Makedoniji +suggestion@#@Član Predsjedništva BiH Komšić podnio ostavku u svojoj stranci diff --git a/concordia-server/CMakeLists.txt b/concordia-server/CMakeLists.txt index 675380c..1caa41f 100644 --- a/concordia-server/CMakeLists.txt +++ b/concordia-server/CMakeLists.txt @@ -13,6 +13,7 @@ add_executable(concordia_server_process int_array_param.cpp simple_search_result.cpp complete_concordia_search_result.cpp + tm_dao.cpp ) target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case) diff --git a/concordia-server/concordia_server.cpp b/concordia-server/concordia_server.cpp index a9eb967..810f21a 100644 --- a/concordia-server/concordia_server.cpp +++ b/concordia-server/concordia_server.cpp @@ -7,12 +7,21 @@ #include "config.hpp" #include "logger.hpp" #include "rapidjson/rapidjson.h" +#include +#include +#include ConcordiaServer::ConcordiaServer(const std::string & configFilePath) - throw(ConcordiaException) { - boost::shared_ptr concordia(new Concordia(configFilePath)); - _indexController = boost::shared_ptr (new IndexController(concordia)); - _searcherController = boost::shared_ptr (new SearcherController(concordia)); + throw(ConcordiaException) : + _configFilePath(configFilePath) { + std::vector tmIds = _tmDAO.getTmIds(); + _concordiasMap = boost::shared_ptr >(new boost::ptr_map()); + + BOOST_FOREACH(int & tmId, tmIds) { + _addTm(tmId); + } + _indexController = boost::shared_ptr (new IndexController(_concordiasMap)); + _searcherController = boost::shared_ptr (new SearcherController(_concordiasMap)); } ConcordiaServer::~ConcordiaServer() { @@ -44,29 +53,48 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) { } else if (operation == ADD_SENTENCES_OP) { std::vector sourceSentences; std::vector targetSentences; - std::vector tmIds; + int tmId = d[TM_ID_PARAM].GetInt(); // loading data from json const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM]; + Logger::log("addSentences"); + Logger::logInt("sentences to add", sentencesArray.Size()); + Logger::logInt("tm id", tmId); for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) { - if (sentencesArray[i].Size() != 3) { - JsonGenerator::signalError(jsonWriter, "sentence should be an array of 3 elements"); + if (sentencesArray[i].Size() != 2) { + JsonGenerator::signalError(jsonWriter, "sentence should be an array of 2 elements"); break; } else { - tmIds.push_back(sentencesArray[i][0].GetInt()); - sourceSentences.push_back(sentencesArray[i][1].GetString()); - targetSentences.push_back(sentencesArray[i][2].GetString()); + sourceSentences.push_back(sentencesArray[i][0].GetString()); + targetSentences.push_back(sentencesArray[i][1].GetString()); } } - _indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmIds); + _indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmId); } else if (operation == REFRESH_INDEX_OP) { - _indexController->refreshIndexFromRAM(jsonWriter); + int tmId = d[TM_ID_PARAM].GetInt(); + _indexController->refreshIndexFromRAM(jsonWriter, tmId); } else if (operation == SIMPLE_SEARCH_OP) { std::string pattern = d[PATTERN_PARAM].GetString(); - _searcherController->simpleSearch(jsonWriter, pattern); + int tmId = d[TM_ID_PARAM].GetInt(); + _searcherController->simpleSearch(jsonWriter, pattern, tmId); } else if (operation == CONCORDIA_SEARCH_OP) { std::string pattern = d[PATTERN_PARAM].GetString(); + int tmId = d[TM_ID_PARAM].GetInt(); Logger::logString("concordia search pattern", pattern); - _searcherController->concordiaSearch(jsonWriter, pattern); + _searcherController->concordiaSearch(jsonWriter, pattern, tmId); + } else if (operation == ADD_TM_OP) { + int sourceLangId = d[SOURCE_LANG_PARAM].GetInt(); + int targetLangId = d[TARGET_LANG_PARAM].GetInt(); + std::string name = d[NAME_PARAM].GetString(); + int newId = _tmDAO.addTm(sourceLangId, targetLangId, name); + _addTm(newId); + + jsonWriter.StartObject(); + jsonWriter.String("status"); + jsonWriter.String("success"); + jsonWriter.String("newTmId"); + jsonWriter.Int(newId); + jsonWriter.EndObject(); + } else { JsonGenerator::signalError(jsonWriter, "no such operation"); } @@ -83,3 +111,13 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) { return outputString.str(); } + +void ConcordiaServer::_addTm(int tmId) { + std::stringstream indexPath; + indexPath << INDEX_DIRECTORY << "/tm_" << tmId; + if (!boost::filesystem::exists(indexPath.str())) { + boost::filesystem::create_directories(indexPath.str()); + } + _concordiasMap->insert(tmId, new Concordia(indexPath.str(), _configFilePath)); +} + diff --git a/concordia-server/concordia_server.hpp b/concordia-server/concordia_server.hpp index 59a66c3..edf57d9 100644 --- a/concordia-server/concordia_server.hpp +++ b/concordia-server/concordia_server.hpp @@ -11,6 +11,7 @@ #include "rapidjson/writer.h" #include "rapidjson/error/en.h" +#include "tm_dao.hpp" #include "index_controller.hpp" #include "searcher_controller.hpp" @@ -28,7 +29,15 @@ public: std::string handleRequest(std::string & requestString); -private: +private: + void _addTm(int tmId); + + std::string _configFilePath; + + boost::shared_ptr > _concordiasMap; + + TmDAO _tmDAO; + boost::shared_ptr _indexController; boost::shared_ptr _searcherController; diff --git a/concordia-server/config.hpp.in b/concordia-server/config.hpp.in index 12ef39e..6b0e004 100644 --- a/concordia-server/config.hpp.in +++ b/concordia-server/config.hpp.in @@ -1,5 +1,6 @@ #define CONFIG_FILE_PATH "@CONFIG_FILE_PATH@" #define LOG_FILE_PATH "@LOG_FILE_PATH@" +#define INDEX_DIRECTORY "@INDEX_DIRECTORY@" // database connection information #define DB_NAME "@DB_NAME@" @@ -15,10 +16,14 @@ #define TARGET_SENTENCE_PARAM "targetSentence" #define TM_ID_PARAM "tmId" #define SENTENCES_PARAM "sentences" +#define SOURCE_LANG_PARAM "sourceLangId" +#define TARGET_LANG_PARAM "targetLangId" +#define NAME_PARAM "name" #define ADD_SENTENCE_OP "addSentence" #define ADD_SENTENCES_OP "addSentences" #define REFRESH_INDEX_OP "refreshIndex" #define SIMPLE_SEARCH_OP "simpleSearch" #define CONCORDIA_SEARCH_OP "concordiaSearch" +#define ADD_TM_OP "addTm" diff --git a/concordia-server/db_connection.cpp b/concordia-server/db_connection.cpp index bced9c7..c46516c 100644 --- a/concordia-server/db_connection.cpp +++ b/concordia-server/db_connection.cpp @@ -144,5 +144,13 @@ std::string DBconnection::getStringValue(PGresult * result, int row, int col) t } } - +int DBconnection::getRowCount(PGresult * result) throw (ConcordiaException) { + try { + return PQntuples(result); + } catch (std::exception & e) { + std::stringstream ss; + ss << "Error getting int value. Message: " << e.what(); + throw ConcordiaException(ss.str()); + } +} diff --git a/concordia-server/db_connection.hpp b/concordia-server/db_connection.hpp index 666d704..5b821af 100644 --- a/concordia-server/db_connection.hpp +++ b/concordia-server/db_connection.hpp @@ -33,6 +33,8 @@ public: std::string getStringValue(PGresult * result, int row, int col) throw (ConcordiaException); + int getRowCount(PGresult * result) throw (ConcordiaException); + private: void close(); diff --git a/concordia-server/index_controller.cpp b/concordia-server/index_controller.cpp index 9994257..b127f11 100644 --- a/concordia-server/index_controller.cpp +++ b/concordia-server/index_controller.cpp @@ -3,10 +3,11 @@ #include #include "json_generator.hpp" +#include "logger.hpp" -IndexController::IndexController(boost::shared_ptr concordia) - throw(ConcordiaException): - _concordia(concordia) { +IndexController::IndexController(boost::shared_ptr >concordiasMap) + throw(ConcordiaException): + _concordiasMap(concordiasMap) { } IndexController::~IndexController() { @@ -20,19 +21,32 @@ void IndexController::addSentence( const int tmId) { try { - TokenizedSentence tokenizedSentence = _concordia->tokenize(sourceSentence); - int sentenceId = _unitDAO.addSentence(tokenizedSentence, targetSentence, tmId); - _concordia->addTokenizedExample(tokenizedSentence, sentenceId); - _concordia->refreshSAfromRAM(); + boost::ptr_map::iterator it = _concordiasMap->find(tmId); + if (it != _concordiasMap->end()) { + TokenizedSentence tokenizedSentence = (*_concordiasMap)[tmId].tokenize(sourceSentence); + int sentenceId = _unitDAO.addSentence(tokenizedSentence, targetSentence, tmId); + (*_concordiasMap)[tmId].addTokenizedExample(tokenizedSentence, sentenceId); + (*_concordiasMap)[tmId].refreshSAfromRAM(); - jsonWriter.StartObject(); - jsonWriter.String("status"); - jsonWriter.String("success"); - jsonWriter.EndObject(); + jsonWriter.StartObject(); + jsonWriter.String("status"); + jsonWriter.String("success"); + jsonWriter.EndObject(); + } else { + JsonGenerator::signalError(jsonWriter, "no such tm!"); + } } catch (ConcordiaException & e) { std::stringstream errorstream; errorstream << "concordia error: " << e.what(); JsonGenerator::signalError(jsonWriter, errorstream.str()); + } catch (std::exception & e) { + std::stringstream errorstream; + errorstream << "general error: " << e.what(); + JsonGenerator::signalError(jsonWriter, errorstream.str()); + } catch (...) { + std::stringstream errorstream; + errorstream << "unexpected error occurred"; + JsonGenerator::signalError(jsonWriter, errorstream.str()); } } @@ -40,16 +54,21 @@ void IndexController::addSentences( rapidjson::Writer & jsonWriter, const std::vector & sourceSentences, const std::vector & targetSentences, - const std::vector & tmIds) { + const int tmId) { try { - std::vector tokenizedSentences = _concordia->tokenizeAll(sourceSentences); - std::vector sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmIds); - _concordia->addAllTokenizedExamples(tokenizedSentences, sentenceIds); + boost::ptr_map::iterator it = _concordiasMap->find(tmId); + if (it != _concordiasMap->end()) { + std::vector tokenizedSentences = (*_concordiasMap)[tmId].tokenizeAll(sourceSentences); + std::vector sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmId); + (*_concordiasMap)[tmId].addAllTokenizedExamples(tokenizedSentences, sentenceIds); - jsonWriter.StartObject(); - jsonWriter.String("status"); - jsonWriter.String("success"); - jsonWriter.EndObject(); + jsonWriter.StartObject(); + jsonWriter.String("status"); + jsonWriter.String("success"); + jsonWriter.EndObject(); + } else { + JsonGenerator::signalError(jsonWriter, "no such tm!"); + } } catch (ConcordiaException & e) { std::stringstream errorstream; errorstream << "concordia error: " << e.what(); @@ -57,14 +76,19 @@ void IndexController::addSentences( } } -void IndexController::refreshIndexFromRAM(rapidjson::Writer & jsonWriter) { +void IndexController::refreshIndexFromRAM(rapidjson::Writer & jsonWriter, + const int tmId) { try { - _concordia->refreshSAfromRAM(); + if (it != _concordiasMap->end()) { + (*_concordiasMap)[tmId].refreshSAfromRAM(); - jsonWriter.StartObject(); - jsonWriter.String("status"); - jsonWriter.String("success"); - jsonWriter.EndObject(); + jsonWriter.StartObject(); + jsonWriter.String("status"); + jsonWriter.String("success"); + jsonWriter.EndObject(); + } else { + JsonGenerator::signalError(jsonWriter, "no such tm!"); + } } catch (ConcordiaException & e) { std::stringstream errorstream; errorstream << "concordia error: " << e.what(); @@ -74,3 +98,4 @@ void IndexController::refreshIndexFromRAM(rapidjson::Writer #include #include +#include + #include "unit_dao.hpp" @@ -14,8 +16,8 @@ class IndexController { public: /*! Constructor. */ - explicit IndexController(boost::shared_ptr concordia) - throw(ConcordiaException); + explicit IndexController(boost::shared_ptr >concordiasMap) + throw(ConcordiaException); /*! Destructor. */ virtual ~IndexController(); @@ -28,12 +30,13 @@ public: void addSentences(rapidjson::Writer & jsonWriter, const std::vector & sourceSentences, const std::vector & targetSentences, - const std::vector & tmIds); - - void refreshIndexFromRAM(rapidjson::Writer & jsonWriter); + const int tmId); + void refreshIndexFromRAM(rapidjson::Writer & jsonWriter, + const int tmId); + private: - boost::shared_ptr _concordia; + boost::shared_ptr > _concordiasMap; UnitDAO _unitDAO; }; diff --git a/concordia-server/searcher_controller.cpp b/concordia-server/searcher_controller.cpp index de33ff9..bf5b0a3 100644 --- a/concordia-server/searcher_controller.cpp +++ b/concordia-server/searcher_controller.cpp @@ -5,50 +5,64 @@ #include "json_generator.hpp" -SearcherController::SearcherController(boost::shared_ptr concordia) - throw(ConcordiaException): - _concordia(concordia) { +SearcherController::SearcherController(boost::shared_ptr >concordiasMap) + throw(ConcordiaException): + _concordiasMap(concordiasMap) { } SearcherController::~SearcherController() { } -void SearcherController::simpleSearch(rapidjson::Writer & jsonWriter, std::string & pattern) { - std::vector results = _unitDAO.getSearchResults(_concordia->simpleSearch(pattern)); +void SearcherController::simpleSearch(rapidjson::Writer & jsonWriter, + std::string & pattern, + const int tmId) { + boost::ptr_map::iterator it = _concordiasMap->find(tmId); + if (it != _concordiasMap->end()) { + std::vector results = _unitDAO.getSearchResults((*_concordiasMap)[tmId].simpleSearch(pattern)); - jsonWriter.StartObject(); - jsonWriter.String("status"); - jsonWriter.String("success"); - jsonWriter.String("results"); - jsonWriter.StartArray(); - BOOST_FOREACH(SimpleSearchResult & result, results) { - JsonGenerator::writeSearchResult(jsonWriter, result); - } - jsonWriter.EndArray(); - jsonWriter.EndObject(); + jsonWriter.StartObject(); + jsonWriter.String("status"); + jsonWriter.String("success"); + jsonWriter.String("results"); + jsonWriter.StartArray(); + BOOST_FOREACH(SimpleSearchResult & result, results) { + JsonGenerator::writeSearchResult(jsonWriter, result); + } + jsonWriter.EndArray(); + jsonWriter.EndObject(); + } else { + JsonGenerator::signalError(jsonWriter, "no such tm!"); + } } -void SearcherController::concordiaSearch(rapidjson::Writer & jsonWriter, std::string & pattern) { +void SearcherController::concordiaSearch(rapidjson::Writer & jsonWriter, + std::string & pattern, + const int tmId) { - CompleteConcordiaSearchResult result = _unitDAO.getConcordiaResult(_concordia->concordiaSearch(pattern)); - - jsonWriter.StartObject(); - jsonWriter.String("status"); - jsonWriter.String("success"); - jsonWriter.String("result"); - jsonWriter.StartObject(); - jsonWriter.String("bestOverlayScore"); - jsonWriter.Double(result.getBestOverlayScore()); - jsonWriter.String("bestOverlay"); - jsonWriter.StartArray(); - BOOST_FOREACH(SimpleSearchResult & simpleResult, result.getBestOverlay()) { - JsonGenerator::writeSearchResult(jsonWriter, simpleResult); - } - jsonWriter.EndArray(); - jsonWriter.EndObject(); - - - jsonWriter.EndObject(); + boost::ptr_map::iterator it = _concordiasMap->find(tmId); + if (it != _concordiasMap->end()) { + CompleteConcordiaSearchResult result = _unitDAO.getConcordiaResult((*_concordiasMap)[tmId].concordiaSearch(pattern)); + + jsonWriter.StartObject(); + jsonWriter.String("status"); + jsonWriter.String("success"); + jsonWriter.String("result"); + jsonWriter.StartObject(); + jsonWriter.String("bestOverlayScore"); + jsonWriter.Double(result.getBestOverlayScore()); + jsonWriter.String("bestOverlay"); + jsonWriter.StartArray(); + BOOST_FOREACH(SimpleSearchResult & simpleResult, result.getBestOverlay()) { + JsonGenerator::writeSearchResult(jsonWriter, simpleResult); + } + jsonWriter.EndArray(); + jsonWriter.EndObject(); + + + jsonWriter.EndObject(); + } else { + JsonGenerator::signalError(jsonWriter, "no such tm!"); + } } diff --git a/concordia-server/searcher_controller.hpp b/concordia-server/searcher_controller.hpp index 3d52cbd..74f88ef 100644 --- a/concordia-server/searcher_controller.hpp +++ b/concordia-server/searcher_controller.hpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -15,19 +16,23 @@ class SearcherController { public: /*! Constructor. */ - explicit SearcherController(boost::shared_ptr concordia) - throw(ConcordiaException); + explicit SearcherController(boost::shared_ptr >concordiasMap) + throw(ConcordiaException); /*! Destructor. */ virtual ~SearcherController(); - void simpleSearch(rapidjson::Writer & jsonWriter, std::string & pattern); + void simpleSearch(rapidjson::Writer & jsonWriter, + std::string & pattern, + const int tmId); - void concordiaSearch(rapidjson::Writer & jsonWriter, std::string & pattern); + void concordiaSearch(rapidjson::Writer & jsonWriter, + std::string & pattern, + const int tmId); private: - boost::shared_ptr _concordia; + boost::shared_ptr > _concordiasMap; UnitDAO _unitDAO; }; diff --git a/concordia-server/tm_dao.cpp b/concordia-server/tm_dao.cpp new file mode 100644 index 0000000..1319907 --- /dev/null +++ b/concordia-server/tm_dao.cpp @@ -0,0 +1,55 @@ +#include "tm_dao.hpp" + +#include "query_param.hpp" +#include "string_param.hpp" +#include "int_param.hpp" +#include "int_array_param.hpp" +#include "logger.hpp" + +#include +#include + +TmDAO::TmDAO() { +} + +TmDAO::~TmDAO() { +} + +std::vector TmDAO::getTmIds() { + std::vector result; + DBconnection connection; + connection.startTransaction(); + std::string query = "SELECT id FROM tm;"; + PGresult * dbResult = connection.execute(query); + for (int i=0;i params; + params.push_back(new IntParam(sourceLangId)); + params.push_back(new IntParam(targetLangId)); + params.push_back(new StringParam(name)); + + PGresult * result = connection.execute(query, params); + int newId = connection.getIntValue(result, 0, 0); + connection.clearResult(result); + connection.endTransaction(); + BOOST_FOREACH (QueryParam * param, params) { + delete param; + } + + return newId; + +} + diff --git a/concordia-server/tm_dao.hpp b/concordia-server/tm_dao.hpp new file mode 100644 index 0000000..e43822a --- /dev/null +++ b/concordia-server/tm_dao.hpp @@ -0,0 +1,27 @@ +#ifndef TM_DAO_HDR +#define TM_DAO_HDR + +#include +#include + +#include +#include "db_connection.hpp" + +class TmDAO { +public: + /*! Constructor. + */ + TmDAO(); + /*! Destructor. + */ + virtual ~TmDAO(); + + int addTm(const int sourceLangId, const int targetLangId, const std::string name); + + std::vector getTmIds(); + +private: + +}; + +#endif diff --git a/concordia-server/unit_dao.cpp b/concordia-server/unit_dao.cpp index db9d807..56a0c8f 100644 --- a/concordia-server/unit_dao.cpp +++ b/concordia-server/unit_dao.cpp @@ -31,13 +31,13 @@ int UnitDAO::addSentence( std::vector UnitDAO::addSentences( const std::vector & sourceSentences, const std::vector & targetSentences, - const std::vector & tmIds) { + const int tmId) { DBconnection connection; std::vector newIds; connection.startTransaction(); int index = 0; BOOST_FOREACH(const TokenizedSentence & sourceSentence, sourceSentences) { - newIds.push_back(_addSingleSentence(connection, sourceSentence, targetSentences.at(index), tmIds.at(index))); + newIds.push_back(_addSingleSentence(connection, sourceSentence, targetSentences.at(index), tmId)); index++; } connection.endTransaction(); diff --git a/concordia-server/unit_dao.hpp b/concordia-server/unit_dao.hpp index 475ac05..7daab4f 100644 --- a/concordia-server/unit_dao.hpp +++ b/concordia-server/unit_dao.hpp @@ -32,7 +32,7 @@ public: std::vector addSentences( const std::vector & sourceSentences, const std::vector & targetSentences, - const std::vector & tmIds); + const int tmId); std::vector getSearchResults(const std::vector & fragments); diff --git a/concordia.cfg.in b/concordia.cfg.in index c4d53ce..3be19ab 100644 --- a/concordia.cfg.in +++ b/concordia.cfg.in @@ -3,17 +3,6 @@ #--------------------------- # -#------------------------------------------------------------------------------- -# The below set the paths for hashed index, markers array and word map files. -# If all the files pointed by these paths exist, Concordia reads them to its -# RAM index. When none of these files exist, a new empty index is created. -# However, if any of these files exist and any other is missing, the index -# is considered corrupt and Concordia does not start. - -hashed_index_path = "@INDEX_DIRECTORY@/@HASHED_INDEX_FILE@" -markers_path = "@INDEX_DIRECTORY@/@MARKERS_FILE@" -word_map_path = "@INDEX_DIRECTORY@/@WORD_MAP_FILE@" - #------------------------------------------------------------------------------- # The following settings control the sentence anonymizer mechanism. It is used to # remove unnecessary symbols and possibly words from sentences added to index diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 0000000..6842736 --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1,2 @@ +host.py +host.pyc diff --git a/tests/addFile.py b/tests/addFile.py index fe17005..c484100 100755 --- a/tests/addFile.py +++ b/tests/addFile.py @@ -5,10 +5,16 @@ import unittest import json import urllib2 import sys +import host import time BUFFER_SIZE = 500 +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + def file_len(fname): with open(fname) as f: for i, l in enumerate(f): @@ -16,32 +22,49 @@ def file_len(fname): return i + 1 def add_data(data): - req = urllib2.Request('http://localhost') + req = urllib2.Request(address) req.add_header('Content-Type', 'application/json') urllib2.urlopen(req, json.dumps(data)).read() sourceFile = sys.argv[1] -targetFile = sys.argv[2] -tmId = int(sys.argv[3]) +sourceLangId = int(sys.argv[2]) +targetFile = sys.argv[3] +targetLangId = int(sys.argv[4]) +name = sys.argv[5] totalLines = file_len(sourceFile) if file_len(targetFile) != totalLines: print "File lengths do not match" sys.exit(1) + +data = { + 'operation': 'addTm', + 'sourceLangId':sourceLangId, + 'targetLangId':targetLangId, + 'name':name +} + +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) +tmId = int(response['newTmId']) +print "Added new tm: %d" % tmId + data = { - 'operation': 'addSentences' + 'operation': 'addSentences', + 'tmId':tmId } sentences = [] start = time.time() -with open(sys.argv[1]) as sourceSentences: - with open(sys.argv[2]) as targetSentences: +with open(sourceFile) as sourceSentences: + with open(targetFile) as targetSentences: lineNumber = 0 for sourceSentence in sourceSentences: lineNumber += 1 targetSentence = targetSentences.readline() - sentences.append([tmId, sourceSentence, targetSentence]) + sentences.append([sourceSentence, targetSentence]) if lineNumber % BUFFER_SIZE == 0: data['sentences'] = sentences sentences = [] @@ -60,9 +83,10 @@ print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentenc print "Generating index..." start = time.time() data = { - 'operation': 'refreshIndex' + 'operation': 'refreshIndex', + 'tmId' : tmId } -req = urllib2.Request('http://localhost') +req = urllib2.Request(address) req.add_header('Content-Type', 'application/json') urllib2.urlopen(req, json.dumps(data)).read() diff --git a/tests/addSentence.py b/tests/addSentence.py index 09bebda..6ad3f63 100755 --- a/tests/addSentence.py +++ b/tests/addSentence.py @@ -6,16 +6,22 @@ import json import urllib2 import sys import time +import host + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + data = { 'operation': 'addSentence', 'sourceSentence':sys.argv[1], 'targetSentence':sys.argv[2], - 'tmId':sys.argv[3] + 'tmId':int(sys.argv[3]) } start = time.time() -req = urllib2.Request('http://localhost') +req = urllib2.Request(address) req.add_header('Content-Type', 'application/json') response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) end = time.time() diff --git a/tests/addTm.py b/tests/addTm.py new file mode 100755 index 0000000..e3bfaa3 --- /dev/null +++ b/tests/addTm.py @@ -0,0 +1,26 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import json +import urllib2 +import sys +import time +import host + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + + +data = { + 'operation': 'addTm', + 'sourceLangId':int(sys.argv[1]), + 'targetLangId':int(sys.argv[2]), + 'name':sys.argv[3] +} + +req = urllib2.Request(address) +req.add_header('Content-Type', 'application/json') +response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) + +print response diff --git a/tests/concordiaSearch.py b/tests/concordiaSearch.py index 2de0f1e..de02cb0 100755 --- a/tests/concordiaSearch.py +++ b/tests/concordiaSearch.py @@ -6,6 +6,12 @@ import json import urllib2 import sys import time +import host + +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + data = { 'operation': 'concordiaSearch', @@ -13,7 +19,7 @@ data = { } start = time.time() -req = urllib2.Request('http://localhost') +req = urllib2.Request(address) req.add_header('Content-Type', 'application/json') response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) end = time.time() diff --git a/tests/host.py_example b/tests/host.py_example new file mode 100644 index 0000000..26bce0b --- /dev/null +++ b/tests/host.py_example @@ -0,0 +1,2 @@ +concordia_host = 'localhost' +concordia_port = '' diff --git a/tests/simpleSearch.py b/tests/simpleSearch.py index a0a18db..6bea521 100755 --- a/tests/simpleSearch.py +++ b/tests/simpleSearch.py @@ -6,14 +6,20 @@ import json import urllib2 import sys import time +import host data = { 'operation': 'simpleSearch', - 'pattern':sys.argv[1] + 'pattern':sys.argv[1], + 'tmId':int(sys.argv[2]) } +address = 'http://'+host.concordia_host +if len(host.concordia_port) > 0: + address += ':'+host.concordia_port + start = time.time() -req = urllib2.Request('http://localhost:8800') +req = urllib2.Request(address) req.add_header('Content-Type', 'application/json') response = json.loads(urllib2.urlopen(req, json.dumps(data)).read()) end = time.time()