From d900e806d94f501c07493edfd715a34298ca9077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Jaworski?= Date: Sat, 23 Feb 2019 21:58:54 +0100 Subject: [PATCH] add fast aligned sentences --- concordia-server/concordia_server.cpp | 42 +++++++++++++++++++-------- concordia-server/concordia_server.hpp | 2 ++ concordia-server/index_controller.cpp | 12 ++++---- concordia-server/index_controller.hpp | 3 ++ 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/concordia-server/concordia_server.cpp b/concordia-server/concordia_server.cpp index ff85849..13a2c94 100644 --- a/concordia-server/concordia_server.cpp +++ b/concordia-server/concordia_server.cpp @@ -64,25 +64,28 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) { _indexController->addSentence(jsonWriter, sourceSentence, targetSentence, tmId); } else if (operation == ADD_SENTENCES_OP) { std::vector sourceSentences; + std::vector lemmatizedSourceSentences; std::vector targetSentences; + std::vector > > alignments; + std::vector sourceIds; + int tmId = _getIntParameter(d, TM_ID_PARAM); // loading data from json - const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM]; - /* - Logger::log("addSentences"); - Logger::logInt("sentences to add", sentencesArray.Size()); - Logger::logInt("tm id", tmId); - */ - for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) { - if (sentencesArray[i].Size() != 2) { - JsonGenerator::signalError(jsonWriter, "sentence should be an array of 2 elements"); + const rapidjson::Value & examplesArray = d[EXAMPLES_PARAM]; + + for (rapidjson::SizeType i = 0; i < examplesArray.Size(); i++) { + if (examplesArray[i].Size() != 5) { + JsonGenerator::signalError(jsonWriter, "example should be an array of 5 elements"); break; } else { - sourceSentences.push_back(sentencesArray[i][0].GetString()); - targetSentences.push_back(sentencesArray[i][1].GetString()); + sourceSentences.push_back(examplesArray[i][0].GetString()); + lemmatizedSourceSentences.push_back(examplesArray[i][1].GetString()); + targetSentences.push_back(examplesArray[i][2].GetString()); + alignments.push_back(_getInt2DArray(examplesArray[i][3])); + sourceIds.push_back(examplesArray[i][4].GetInt()); } } - _indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmId); + _indexController->addSentences(jsonWriter, sourceSentences, lemmatizedSourceSentences, targetSentences, alignments, sourceIds, tmId); } else if (operation == ADD_ALIGNED_SENTENCES_OP) { std::vector sourceSentences; std::vector targetSentences; @@ -359,6 +362,21 @@ int ConcordiaServer::_getBoolParameter(rapidjson::Document & d, const char * nam } } +std::vector > ConcordiaServer::_getInt2DArray(const rapidjson::Value & v) + throw (ConcordiaException) { + std::vector > result; + for (rapidjson::SizeType i = 0; i < v.Size(); i++) { + std::vector innerArray; + for (rapidjson::SizeType j = 0; j < v[i].Size(); j++) { + innerArray.push_back(v[i][j].GetInt()); + } + result.push_back(innerArray); + } + + return result; +} + + void ConcordiaServer::_addTm(int tmId) { std::stringstream indexPath; indexPath << INDEX_DIRECTORY << "/tm_" << tmId; diff --git a/concordia-server/concordia_server.hpp b/concordia-server/concordia_server.hpp index 101965b..996e8fd 100644 --- a/concordia-server/concordia_server.hpp +++ b/concordia-server/concordia_server.hpp @@ -43,6 +43,8 @@ private: int _getBoolParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException); + std::vector > _getInt2DArray(const rapidjson::Value & v) throw (ConcordiaException); + void _addTm(int tmId); std::string _configFilePath; diff --git a/concordia-server/index_controller.cpp b/concordia-server/index_controller.cpp index ae4782f..2a9f5c9 100644 --- a/concordia-server/index_controller.cpp +++ b/concordia-server/index_controller.cpp @@ -62,11 +62,13 @@ void IndexController::addSentence( } } -void IndexController::addSentences( - rapidjson::Writer & jsonWriter, - const std::vector & sourceSentences, - const std::vector & targetSentences, - const int tmId) { +void IndexController::addSentences(rapidjson::Writer & jsonWriter, + const std::vector & sourceSentences, + const std::vector & lemmatizedSourceSentences, + const std::vector & targetSentences, + const std::vector > > & alignments, + const std::vector & sourceIds, + const int tmId) { try { boost::ptr_map::iterator it = _concordiasMap->find(tmId); if (it != _concordiasMap->end()) { diff --git a/concordia-server/index_controller.hpp b/concordia-server/index_controller.hpp index c75d9ab..09b50be 100644 --- a/concordia-server/index_controller.hpp +++ b/concordia-server/index_controller.hpp @@ -33,7 +33,10 @@ public: void addSentences(rapidjson::Writer & jsonWriter, const std::vector & sourceSentences, + const std::vector & lemmatizedSourceSentences, const std::vector & targetSentences, + const std::vector > > & alignments, + const std::vector & sourceIds, const int tmId); void addAlignedSentences(rapidjson::Writer & jsonWriter,