From 2690b1595832163e870c1828e7c1fa6921eb4206 Mon Sep 17 00:00:00 2001 From: rjawor Date: Tue, 23 Jul 2019 21:57:40 +0200 Subject: [PATCH] refactoring, lemmatizers --- concordia-server/concordia_server.cpp | 66 +-------- concordia-server/concordia_server.hpp | 11 +- concordia-server/config.hpp.in | 3 - concordia-server/db_connection.cpp | 20 +-- concordia-server/db_connection.hpp | 18 +-- concordia-server/index_controller.cpp | 163 +---------------------- concordia-server/index_controller.hpp | 25 +--- concordia-server/json_lemmatizer.cpp | 9 +- concordia-server/json_lemmatizer.hpp | 5 +- concordia-server/lemmatizer_facade.cpp | 6 +- concordia-server/lemmatizer_facade.hpp | 8 +- concordia-server/lemmatizer_result.cpp | 10 ++ concordia-server/lemmatizer_result.hpp | 31 +++++ concordia-server/searcher_controller.cpp | 4 +- concordia-server/searcher_controller.hpp | 6 +- concordia-server/socket_lemmatizer.cpp | 119 ----------------- concordia-server/socket_lemmatizer.hpp | 42 ------ concordia-server/tm_dao.cpp | 2 +- concordia-server/tm_dao.hpp | 2 +- concordia-server/unit_dao.cpp | 8 +- concordia-server/unit_dao.hpp | 8 +- 21 files changed, 105 insertions(+), 461 deletions(-) create mode 100644 concordia-server/lemmatizer_result.cpp create mode 100644 concordia-server/lemmatizer_result.hpp delete mode 100644 concordia-server/socket_lemmatizer.cpp delete mode 100644 concordia-server/socket_lemmatizer.hpp diff --git a/concordia-server/concordia_server.cpp b/concordia-server/concordia_server.cpp index be67fc1..2ebd2fd 100644 --- a/concordia-server/concordia_server.cpp +++ b/concordia-server/concordia_server.cpp @@ -20,8 +20,7 @@ #include #include -ConcordiaServer::ConcordiaServer(const std::string & configFilePath) - throw(ConcordiaException) : +ConcordiaServer::ConcordiaServer(const std::string & configFilePath): _configFilePath(configFilePath) { std::vector tmIds = _tmDAO.getTmIds(); @@ -57,12 +56,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) { JsonGenerator::signalError(jsonWriter, errorstream.str()); } else { // json parsed std::string operation = _getStringParameter(d, OPERATION_PARAM); - if (operation == ADD_SENTENCE_OP) { - std::string sourceSentence = _getStringParameter(d, SOURCE_SENTENCE_PARAM); - std::string targetSentence = _getStringParameter(d, TARGET_SENTENCE_PARAM); - int tmId = _getIntParameter(d, TM_ID_PARAM); - _indexController->addSentence(jsonWriter, sourceSentence, targetSentence, tmId); - } else if (operation == ADD_SENTENCES_OP) { + if (operation == ADD_SENTENCES_OP) { std::vector sourceSentences; std::vector concordiaSourceSentences; std::vector targetSentences; @@ -86,50 +80,6 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) { } } _indexController->addSentences(jsonWriter, sourceSentences, concordiaSourceSentences, targetSentences, alignments, sourceIds, tmId); - } else if (operation == ADD_ALIGNED_SENTENCES_OP) { - std::vector sourceSentences; - std::vector targetSentences; - int tmId = d[TM_ID_PARAM].GetInt(); - // loading data from json - const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM]; - /* - Logger::log("addAlignedSentences"); - Logger::logInt("sentences to add", sentencesArray.Size()); - Logger::logInt("tm id", tmId); - */ - for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) { - if (sentencesArray[i].Size() != 2) { - JsonGenerator::signalError(jsonWriter, "sentence should be an array of 2 elements"); - break; - } else { - sourceSentences.push_back(sentencesArray[i][0].GetString()); - targetSentences.push_back(sentencesArray[i][1].GetString()); - } - } - _indexController->addAlignedSentences(jsonWriter, sourceSentences, targetSentences, tmId); - } else if (operation == ADD_ALIGNED_LEMMATIZED_SENTENCES_OP) { - std::vector sourceSentences; - std::vector targetSentences; - std::vector alignmentStrings; - int tmId = d[TM_ID_PARAM].GetInt(); - // loading data from json - const rapidjson::Value & sentencesArray = d[EXAMPLES_PARAM]; - /* - Logger::log("addAlignedLemmatizedSentences"); - Logger::logInt("lemmatized sentences to add", sentencesArray.Size()); - Logger::logInt("tm id", tmId); - */ - for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) { - if (sentencesArray[i].Size() != 3) { - JsonGenerator::signalError(jsonWriter, "sentence should be an array of 3 elements"); - break; - } else { - sourceSentences.push_back(sentencesArray[i][0].GetString()); - targetSentences.push_back(sentencesArray[i][1].GetString()); - alignmentStrings.push_back(sentencesArray[i][2].GetString()); - } - } - _indexController->addAlignedLemmatizedSentences(jsonWriter, sourceSentences, targetSentences, alignmentStrings, tmId); } else if (operation == GET_TMS_INFO_OP) { std::vector tms = _tmDAO.getTms(); @@ -348,8 +298,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) { } -std::string ConcordiaServer::_getStringParameter(rapidjson::Document & d, const char * name) - throw (ConcordiaException) { +std::string ConcordiaServer::_getStringParameter(rapidjson::Document & d, const char * name) { rapidjson::Value::ConstMemberIterator itr = d.FindMember(name); if (itr != d.MemberEnd()) { std::string value = itr->value.GetString(); @@ -359,8 +308,7 @@ std::string ConcordiaServer::_getStringParameter(rapidjson::Document & d, const } } -int ConcordiaServer::_getIntParameter(rapidjson::Document & d, const char * name) - throw (ConcordiaException) { +int ConcordiaServer::_getIntParameter(rapidjson::Document & d, const char * name) { rapidjson::Value::ConstMemberIterator itr = d.FindMember(name); if (itr != d.MemberEnd()) { int value = itr->value.GetInt(); @@ -370,8 +318,7 @@ int ConcordiaServer::_getIntParameter(rapidjson::Document & d, const char * name } } -int ConcordiaServer::_getBoolParameter(rapidjson::Document & d, const char * name) - throw (ConcordiaException) { +int ConcordiaServer::_getBoolParameter(rapidjson::Document & d, const char * name) { rapidjson::Value::ConstMemberIterator itr = d.FindMember(name); if (itr != d.MemberEnd()) { bool value = itr->value.GetBool(); @@ -381,8 +328,7 @@ int ConcordiaServer::_getBoolParameter(rapidjson::Document & d, const char * nam } } -std::vector > ConcordiaServer::_getInt2DArray(const rapidjson::Value & v) - throw (ConcordiaException) { +std::vector > ConcordiaServer::_getInt2DArray(const rapidjson::Value & v) { std::vector > result; for (rapidjson::SizeType i = 0; i < v.Size(); i++) { std::vector innerArray; diff --git a/concordia-server/concordia_server.hpp b/concordia-server/concordia_server.hpp index 996e8fd..f832660 100644 --- a/concordia-server/concordia_server.hpp +++ b/concordia-server/concordia_server.hpp @@ -26,8 +26,7 @@ public: \param configFilePath path to the Concordia configuration file \throws ConcordiaException */ - explicit ConcordiaServer(const std::string & configFilePath) - throw(ConcordiaException); + explicit ConcordiaServer(const std::string & configFilePath); /*! Destructor. */ virtual ~ConcordiaServer(); @@ -37,13 +36,13 @@ public: private: void _logPhrase(std::string phraseString); - std::string _getStringParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException); + std::string _getStringParameter(rapidjson::Document & d, const char * name); - int _getIntParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException); + int _getIntParameter(rapidjson::Document & d, const char * name); - int _getBoolParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException); + int _getBoolParameter(rapidjson::Document & d, const char * name); - std::vector > _getInt2DArray(const rapidjson::Value & v) throw (ConcordiaException); + std::vector > _getInt2DArray(const rapidjson::Value & v); void _addTm(int tmId); diff --git a/concordia-server/config.hpp.in b/concordia-server/config.hpp.in index 6599031..8e5a4ab 100644 --- a/concordia-server/config.hpp.in +++ b/concordia-server/config.hpp.in @@ -32,12 +32,9 @@ #define LINK_PARAM "link" #define SOURCES_PARAM "sources" -#define ADD_SENTENCE_OP "addSentence" #define ADD_SENTENCES_OP "addSentences" #define LEMMATIZE_OP "lemmatize" #define LEMMATIZE_ALL_OP "lemmatizeAll" -#define ADD_ALIGNED_SENTENCES_OP "addAlignedSentences" -#define ADD_ALIGNED_LEMMATIZED_SENTENCES_OP "addAlignedLemmatizedSentences" #define ADD_REQUEST_OP "addRequest" #define GET_TMS_INFO_OP "getTmsInfo" #define GET_REQUESTS_INFO_OP "getRequestsInfo" diff --git a/concordia-server/db_connection.cpp b/concordia-server/db_connection.cpp index 8b26eeb..d114ff3 100644 --- a/concordia-server/db_connection.cpp +++ b/concordia-server/db_connection.cpp @@ -7,8 +7,8 @@ #include "config.hpp" #include "logger.hpp" -DBconnection::DBconnection() throw(ConcordiaException) { - std::string connectionInfo = "dbname="DB_NAME" user="DB_USER" password="DB_PASSWORD" host="DB_HOST" port="DB_PORT; +DBconnection::DBconnection() { + std::string connectionInfo = "dbname=" DB_NAME " user=" DB_USER " password=" DB_PASSWORD " host=" DB_HOST " port=" DB_PORT; _connection = PQconnectdb(connectionInfo.c_str()); if (PQstatus(_connection) != CONNECTION_OK) { close(); @@ -31,7 +31,7 @@ void DBconnection::close() { } } -void DBconnection::startTransaction() throw(ConcordiaException) { +void DBconnection::startTransaction() { if (_connection != NULL) { PGresult * result = PQexec(_connection, "BEGIN"); if (PQresultStatus(result) != PGRES_COMMAND_OK) { @@ -44,7 +44,7 @@ void DBconnection::startTransaction() throw(ConcordiaException) { } } -void DBconnection::endTransaction() throw(ConcordiaException) { +void DBconnection::endTransaction() { if (_connection != NULL) { PGresult * result = PQexec(_connection, "END"); if (PQresultStatus(result) != PGRES_COMMAND_OK) { @@ -57,7 +57,7 @@ void DBconnection::endTransaction() throw(ConcordiaException) { } } -PGresult * DBconnection::execute(std::string query) throw(ConcordiaException) { +PGresult * DBconnection::execute(std::string query) { if (_connection != NULL) { PGresult * result = PQexec(_connection, query.c_str()); if (PQresultStatus(result) != PGRES_COMMAND_OK && @@ -78,7 +78,7 @@ PGresult * DBconnection::execute(std::string query) throw(ConcordiaException) { } PGresult * DBconnection::execute(std::string query, - std::vector params) throw(ConcordiaException) { + std::vector params) { if (_connection != NULL) { const char * paramValues[params.size()]; int paramLengths[params.size()]; @@ -122,7 +122,7 @@ void DBconnection::clearResult(PGresult * result) { PQclear(result); } -int DBconnection::getIntValue(PGresult * result, int row, int col) throw (ConcordiaException) { +int DBconnection::getIntValue(PGresult * result, int row, int col) { try { char * valueStr = PQgetvalue(result,row,col); return strtol(valueStr, NULL, 10); @@ -133,7 +133,7 @@ int DBconnection::getIntValue(PGresult * result, int row, int col) throw (Concor } } -bool DBconnection::getBoolValue(PGresult * result, int row, int col) throw (ConcordiaException) { +bool DBconnection::getBoolValue(PGresult * result, int row, int col) { try { char * valueStr = PQgetvalue(result,row,col); return std::string(valueStr) == "t"; @@ -144,7 +144,7 @@ bool DBconnection::getBoolValue(PGresult * result, int row, int col) throw (Conc } } -std::string DBconnection::getStringValue(PGresult * result, int row, int col) throw (ConcordiaException) { +std::string DBconnection::getStringValue(PGresult * result, int row, int col) { try { char * valueStr = PQgetvalue(result,row,col); return std::string(valueStr); @@ -155,7 +155,7 @@ std::string DBconnection::getStringValue(PGresult * result, int row, int col) t } } -int DBconnection::getRowCount(PGresult * result) throw (ConcordiaException) { +int DBconnection::getRowCount(PGresult * result) { try { return PQntuples(result); } catch (std::exception & e) { diff --git a/concordia-server/db_connection.hpp b/concordia-server/db_connection.hpp index 9542fb8..386c13f 100644 --- a/concordia-server/db_connection.hpp +++ b/concordia-server/db_connection.hpp @@ -13,29 +13,29 @@ class DBconnection { public: /*! Constructor. */ - DBconnection() throw(ConcordiaException); + DBconnection(); /*! Destructor. */ virtual ~DBconnection(); - void startTransaction() throw(ConcordiaException); + void startTransaction(); - void endTransaction() throw(ConcordiaException); + void endTransaction(); - PGresult * execute(std::string query) throw(ConcordiaException); + PGresult * execute(std::string query); PGresult * execute(std::string query, - std::vector params) throw(ConcordiaException); + std::vector params); void clearResult(PGresult * result); - int getIntValue(PGresult * result, int row, int col) throw (ConcordiaException); + int getIntValue(PGresult * result, int row, int col); - bool getBoolValue(PGresult * result, int row, int col) throw (ConcordiaException); + bool getBoolValue(PGresult * result, int row, int col); - std::string getStringValue(PGresult * result, int row, int col) throw (ConcordiaException); + std::string getStringValue(PGresult * result, int row, int col); - int getRowCount(PGresult * result) throw (ConcordiaException); + int getRowCount(PGresult * result); private: void close(); diff --git a/concordia-server/index_controller.cpp b/concordia-server/index_controller.cpp index 2ca9007..cbaee11 100644 --- a/concordia-server/index_controller.cpp +++ b/concordia-server/index_controller.cpp @@ -15,8 +15,7 @@ #include "logger.hpp" IndexController::IndexController(boost::shared_ptr >concordiasMap, - boost::shared_ptr lemmatizerFacade) - throw(ConcordiaException): + boost::shared_ptr lemmatizerFacade): _concordiasMap(concordiasMap), _lemmatizerFacade(lemmatizerFacade) { } @@ -24,44 +23,6 @@ IndexController::IndexController(boost::shared_ptr IndexController::~IndexController() { } - -void IndexController::addSentence( - rapidjson::Writer & jsonWriter, - const std::string & sourceSentence, - const std::string & targetSentence, - const int tmId) { - - try { - boost::ptr_map::iterator it = _concordiasMap->find(tmId); - if (it != _concordiasMap->end()) { - TokenizedSentence tokenizedLemmatizedSentence = it->second->tokenize(_lemmatizerFacade->lemmatizeIfNeeded(sourceSentence, tmId)); - TokenizedSentence tokenizedSentence = it->second->tokenize(sourceSentence); - int sentenceId = _unitDAO.addSentence(tokenizedSentence, targetSentence, tmId); - it->second->addTokenizedExample(tokenizedLemmatizedSentence, sentenceId); - it->second->refreshSAfromRAM(); - - jsonWriter.StartObject(); - jsonWriter.String("status"); - jsonWriter.String("success"); - jsonWriter.EndObject(); - } else { - JsonGenerator::signalError(jsonWriter, "no such tm!"); - } - } catch (ConcordiaException & e) { - std::stringstream errorstream; - errorstream << "concordia error: " << e.what(); - JsonGenerator::signalError(jsonWriter, errorstream.str()); - } catch (std::exception & e) { - std::stringstream errorstream; - errorstream << "general error: " << e.what(); - JsonGenerator::signalError(jsonWriter, errorstream.str()); - } catch (...) { - std::stringstream errorstream; - errorstream << "unexpected error occurred"; - JsonGenerator::signalError(jsonWriter, errorstream.str()); - } -} - void IndexController::addSentences(rapidjson::Writer & jsonWriter, const std::vector & sourceSentences, const std::vector & concordiaSourceSentences, @@ -93,76 +54,6 @@ void IndexController::addSentences(rapidjson::Writer & } } -void IndexController::addAlignedSentences( - rapidjson::Writer & jsonWriter, - const std::vector & rawSourceSentences, - const std::vector & targetSentences, - const int tmId) { - try { - boost::ptr_map::iterator it = _concordiasMap->find(tmId); - if (it != _concordiasMap->end()) { - std::vector sourceSentences; - std::vector > > allAlignments; - _getSourceSentencesAndAlignments(sourceSentences, allAlignments, rawSourceSentences); - - std::vector tokenizedSourceSentences = it->second->tokenizeAll(sourceSentences, true, true); - std::vector tokenizedTargetSentences = it->second->tokenizeAll(targetSentences, true, false); - - std::vector sentenceIds = _unitDAO.addAlignedSentences(tokenizedSourceSentences, tokenizedTargetSentences, allAlignments, tmId); - for(int index = 0; index < tokenizedSourceSentences.size(); index++) { - it->second->addTokenizedExample(tokenizedSourceSentences.at(index), sentenceIds.at(index)); - } - jsonWriter.StartObject(); - jsonWriter.String("status"); - jsonWriter.String("success"); - jsonWriter.EndObject(); - } else { - JsonGenerator::signalError(jsonWriter, "no such tm!"); - } - } catch (ConcordiaException & e) { - std::stringstream errorstream; - errorstream << "concordia error: " << e.what(); - JsonGenerator::signalError(jsonWriter, errorstream.str()); - } -} - -void IndexController::addAlignedLemmatizedSentences( - rapidjson::Writer & jsonWriter, - const std::vector & sourceSentences, - const std::vector & targetSentences, - const std::vector & alignmentStrings, - const int tmId) { - try { - boost::ptr_map::iterator it = _concordiasMap->find(tmId); - if (it != _concordiasMap->end()) { - std::vector lemmatizedSourceSentences; - std::vector > > allAlignments; - _getSourceSentencesAndAlignments(lemmatizedSourceSentences, allAlignments, alignmentStrings); - - std::vector tokenizedLemmatizedSourceSentences = - it->second->tokenizeAll(lemmatizedSourceSentences, true, true); - std::vector tokenizedSourceSentences = it->second->tokenizeAll(sourceSentences, false, false); - std::vector tokenizedTargetSentences = it->second->tokenizeAll(targetSentences, false, false); - - std::vector sentenceIds = - _unitDAO.addAlignedSentences(tokenizedSourceSentences, tokenizedTargetSentences, allAlignments, tmId); - for(int index = 0; index < tokenizedLemmatizedSourceSentences.size(); index++) { - it->second->addTokenizedExample(tokenizedLemmatizedSourceSentences.at(index), sentenceIds.at(index)); - } - jsonWriter.StartObject(); - jsonWriter.String("status"); - jsonWriter.String("success"); - jsonWriter.EndObject(); - } else { - JsonGenerator::signalError(jsonWriter, "no such tm!"); - } - } catch (ConcordiaException & e) { - std::stringstream errorstream; - errorstream << "concordia error: " << e.what(); - JsonGenerator::signalError(jsonWriter, errorstream.str()); - } -} - void IndexController::refreshIndexFromRAM(rapidjson::Writer & jsonWriter, const int tmId) { @@ -184,54 +75,4 @@ void IndexController::refreshIndexFromRAM(rapidjson::Writer & sourceSentences, - std::vector > > & allAlignments, - const std::vector & rawSourceSentences) { - - for (int i = 0; i > alignments; - - UnicodeString s(rawSourceSentence.c_str()); - boost::u32regex_iterator begin( - boost::make_u32regex_iterator( - s, - boost::make_u32regex(UnicodeString("(\\S+) \\(\\{(( \\d+)*) \\}\\)"), boost::regex::icase) - ) - ); - boost::u32regex_iterator end; - - for (; begin != end; ++begin) { - UnicodeString tokenUTF8((*begin)[1].first, (*begin).length(1)); - std::string token; - tokenUTF8.toUTF8String(token); - - if (token != "NULL") { - std::string numbers((*begin)[2].first, (*begin)[2].second); - std::istringstream iss(numbers); - std::vector numberStrings; - std::copy(std::istream_iterator(iss), - std::istream_iterator(), - std::back_inserter(numberStrings)); - - std::vector tokenAlignments; - for (int j=0;j >concordiasMap, - boost::shared_ptr lemmatizerFacade) - throw(ConcordiaException); + boost::shared_ptr lemmatizerFacade); /*! Destructor. */ virtual ~IndexController(); - void addSentence(rapidjson::Writer & jsonWriter, - const std::string & sourceSentence, - const std::string & targetSentence, - const int tmId); - void addSentences(rapidjson::Writer & jsonWriter, const std::vector & sourceSentences, const std::vector & concordiaSourceSentences, @@ -39,27 +33,10 @@ public: const std::vector & sourceIds, const int tmId); - void addAlignedSentences(rapidjson::Writer & jsonWriter, - const std::vector & rawSourceSentences, - const std::vector & targetSentences, - const int tmId); - - void addAlignedLemmatizedSentences( - rapidjson::Writer & jsonWriter, - const std::vector & sourceSentences, - const std::vector & targetSentences, - const std::vector & alignmentStrings, - const int tmId); - void refreshIndexFromRAM(rapidjson::Writer & jsonWriter, const int tmId); private: - void _getSourceSentencesAndAlignments( - std::vector & sourceSentences, - std::vector > > & allAlignments, - const std::vector & rawSourceSentences); - boost::shared_ptr > _concordiasMap; boost::shared_ptr _lemmatizerFacade; diff --git a/concordia-server/json_lemmatizer.cpp b/concordia-server/json_lemmatizer.cpp index 2c2848e..14c1e49 100644 --- a/concordia-server/json_lemmatizer.cpp +++ b/concordia-server/json_lemmatizer.cpp @@ -8,14 +8,14 @@ #include "rapidjson/error/en.h" #include -JsonLemmatizer::JsonLemmatizer() throw(ConcordiaException) { +JsonLemmatizer::JsonLemmatizer() { } JsonLemmatizer::~JsonLemmatizer() { } -std::string JsonLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) { +LemmatizerResult JsonLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) { rapidjson::StringBuffer paramsJson; rapidjson::Writer jsonWriter(paramsJson); jsonWriter.StartObject(); @@ -31,6 +31,7 @@ std::string JsonLemmatizer::lemmatizeSentence(std::string languageCode, std::str RestClient::Response r = RestClient::post("http://concordia-preprocessor:9001/lemmatize", "application/json", paramsJson.GetString()); rapidjson::Document d; d.Parse(r.body.c_str()); - std::string lemmatized = d["processed_sentences"][0]["tokens"].GetString(); - return lemmatized; + std::string lemmatizedSentence = d["processed_sentences"][0]["tokens"].GetString(); + bool isFirstLemmatized = d["processed_sentences"][0]["isFirstLemmatized"].GetBool(); + return LemmatizerResult(lemmatizedSentence, isFirstLemmatized); } diff --git a/concordia-server/json_lemmatizer.hpp b/concordia-server/json_lemmatizer.hpp index 84e9efa..8229da3 100644 --- a/concordia-server/json_lemmatizer.hpp +++ b/concordia-server/json_lemmatizer.hpp @@ -5,18 +5,19 @@ #include +#include "lemmatizer_result.hpp" #include "logger.hpp" class JsonLemmatizer { public: /*! Constructor. */ - explicit JsonLemmatizer() throw(ConcordiaException); + explicit JsonLemmatizer(); /*! Destructor. */ virtual ~JsonLemmatizer(); - std::string lemmatizeSentence(std::string languageCode, std::string sentence); + LemmatizerResult lemmatizeSentence(std::string languageCode, std::string sentence); private: Logger _logger; }; diff --git a/concordia-server/lemmatizer_facade.cpp b/concordia-server/lemmatizer_facade.cpp index df7bc27..83141cf 100644 --- a/concordia-server/lemmatizer_facade.cpp +++ b/concordia-server/lemmatizer_facade.cpp @@ -3,7 +3,7 @@ #include -LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) { +LemmatizerFacade::LemmatizerFacade() { _lemmatizersMap = boost::ptr_map(); // todo: extract this to configuration, especially when new lemmatizers ConstMemberIterator @@ -18,7 +18,7 @@ LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) { LemmatizerFacade::~LemmatizerFacade() { } -std::string LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::string sentence) { +LemmatizerResult LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::string sentence) { boost::ptr_map::iterator it = _lemmatizersMap.find(languageCode); if (it != _lemmatizersMap.end()) { @@ -29,7 +29,7 @@ std::string LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::s } -std::vector LemmatizerFacade::lemmatizeSentences(std::string languageCode, std::vector sentences) { +LemmatizerResult LemmatizerFacade::lemmatizeSentences(std::string languageCode, std::vector sentences) { std::vector result; BOOST_FOREACH(std::string & sentence, sentences) { result.push_back(lemmatizeSentence(languageCode, sentence)); diff --git a/concordia-server/lemmatizer_facade.hpp b/concordia-server/lemmatizer_facade.hpp index 0d8d8aa..824abd4 100644 --- a/concordia-server/lemmatizer_facade.hpp +++ b/concordia-server/lemmatizer_facade.hpp @@ -1,8 +1,8 @@ #ifndef LEMMATIZER_FACADE_HDR #define LEMMATIZER_FACADE_HDR -#include "socket_lemmatizer.hpp" #include "json_lemmatizer.hpp" +#include "lemmatizer_result.hpp" #include "tm_dao.hpp" #include @@ -14,14 +14,14 @@ class LemmatizerFacade { public: /*! Constructor. */ - LemmatizerFacade() throw(ConcordiaException); + LemmatizerFacade(); /*! Destructor. */ virtual ~LemmatizerFacade(); - std::string lemmatizeSentence(std::string languageCode, std::string sentence); + LemmatizerResult lemmatizeSentence(std::string languageCode, std::string sentence); - std::vector lemmatizeSentences(std::string languageCode, std::vector sentences); + std::vector lemmatizeSentences(std::string languageCode, std::vector sentences); private: boost::ptr_map _lemmatizersMap; diff --git a/concordia-server/lemmatizer_result.cpp b/concordia-server/lemmatizer_result.cpp new file mode 100644 index 0000000..3babfcb --- /dev/null +++ b/concordia-server/lemmatizer_result.cpp @@ -0,0 +1,10 @@ +#include "lemmatizer_result.hpp" + +LemmatizerResutl::LemmatizerResult(const std::string & lemmatizedSentence, + const bool isFirstLemmatized): + _lemmatizedSentence(lemmatizedSentence), + _isFirstLemmatized(isFirstLemmatized) { + +} + +LemmatizerResutl::~LemmatizerResult(); diff --git a/concordia-server/lemmatizer_result.hpp b/concordia-server/lemmatizer_result.hpp new file mode 100644 index 0000000..488e363 --- /dev/null +++ b/concordia-server/lemmatizer_result.hpp @@ -0,0 +1,31 @@ +#ifndef LEMMATIZER_RESULT_HDR +#define LEMMATIZER_RESULT__HDR + +#include + +class LemmatizerResult { +public: + /*! Constructor. + */ + LemmatizerResult(const std::string & lemmatizedSentence, + const bool isFirstLemmatized); + + /*! Destructor. + */ + virtual ~LemmatizerResult(); + + const std::string & getLemmatizedSentence() const { + return _lemmatizedSentence; + } + + int isFirstLemmatized() const { + return _isFirstLemmatized; + } + +private: + std::string _lemmatizedSentence; + + bool _isFirstLemmatized; +}; + +#endif diff --git a/concordia-server/searcher_controller.cpp b/concordia-server/searcher_controller.cpp index a077f64..e2468e3 100644 --- a/concordia-server/searcher_controller.cpp +++ b/concordia-server/searcher_controller.cpp @@ -12,8 +12,7 @@ SearcherController::SearcherController(boost::shared_ptr >concordiasMap, - boost::shared_ptr lemmatizerFacade) - throw(ConcordiaException): + boost::shared_ptr lemmatizerFacade): _concordiasMap(concordiasMap), _lemmatizerFacade(lemmatizerFacade) { } @@ -26,6 +25,7 @@ void SearcherController::simpleSearch(rapidjson::Writer std::string & pattern, const int tmId) { boost::ptr_map::iterator it = _concordiasMap->find(tmId); + _tmDAO.getTm if (it != _concordiasMap->end()) { TokenizedSentence tokenizedPattern = it->second->tokenize(pattern, false, false); pattern = _lemmatizerFacade->lemmatizeIfNeeded(tokenizedPattern.getTokenizedSentence(), tmId); diff --git a/concordia-server/searcher_controller.hpp b/concordia-server/searcher_controller.hpp index 344b7b7..f8a3adc 100644 --- a/concordia-server/searcher_controller.hpp +++ b/concordia-server/searcher_controller.hpp @@ -9,6 +9,7 @@ #include #include "unit_dao.hpp" +#include "tm_dao.hpp" #include "simple_search_result.hpp" #include "lemmatizer_facade.hpp" #include "rapidjson/writer.h" @@ -19,8 +20,7 @@ public: /*! Constructor. */ explicit SearcherController(boost::shared_ptr > concordiasMap, - boost::shared_ptr LemmatizerFacade) - throw(ConcordiaException); + boost::shared_ptr LemmatizerFacade); /*! Destructor. */ virtual ~SearcherController(); @@ -56,6 +56,8 @@ private: boost::shared_ptr _lemmatizerFacade; UnitDAO _unitDAO; + + TmDAO _tmDAO; }; #endif diff --git a/concordia-server/socket_lemmatizer.cpp b/concordia-server/socket_lemmatizer.cpp deleted file mode 100644 index 137f4ab..0000000 --- a/concordia-server/socket_lemmatizer.cpp +++ /dev/null @@ -1,119 +0,0 @@ -#include "socket_lemmatizer.hpp" - -#include - -#include "config.hpp" - -#include - -SocketLemmatizer::SocketLemmatizer(int port) throw(ConcordiaException) : - _port(port) { -} - -SocketLemmatizer::~SocketLemmatizer() { -} - -/** - Connect to a host on a certain port number -*/ -bool SocketLemmatizer::_connect() { - - //Create socket - _sock = socket(AF_INET , SOCK_STREAM , 0); - if (_sock == -1) { - throw ConcordiaException("Could not create socket for the lemmatizer."); - } - - std::string address = "127.0.0.1"; - - //setup address structure - if(inet_addr(address.c_str()) == -1) { - struct hostent *he; - struct in_addr **addr_list; - - //resolve the hostname, its not an ip address - if ( (he = gethostbyname( address.c_str() ) ) == NULL) { - //gethostbyname failed - throw ConcordiaException("gethostbyname: Failed to resolve hostname"); - } - - //Cast the h_addr_list to in_addr , since h_addr_list also has the ip address in long format only - addr_list = (struct in_addr **) he->h_addr_list; - - for(int i = 0; addr_list[i] != NULL; i++) { - _server.sin_addr = *addr_list[i]; - break; - } - } else { //plain ip address - _server.sin_addr.s_addr = inet_addr(address.c_str()); - } - - _server.sin_family = AF_INET; - _server.sin_port = htons(_port); - - //Connect to remote server - if (connect(_sock , (struct sockaddr *) & _server , sizeof(_server)) < 0) { - throw ConcordiaException("Connect failed. Error on address: "+address+":"+boost::lexical_cast(_port)); - } - - return true; -} - -bool SocketLemmatizer::_disconnect() { - close(_sock); - _sock = -1; -} - -/** - Send data to the connected host -*/ -bool SocketLemmatizer::_send_data(std::string data) -{ - //Send some data - if(send(_sock , data.c_str() , strlen(data.c_str() ) , 0) < 0) { - throw ConcordiaException("Send failed"); - } - return true; -} - -/** - Receive data from the connected host -*/ -std::string SocketLemmatizer::_receive(int size=512) -{ - char buffer[size]; - std::string reply = ""; - - //Receive a reply from the server - bool dataAvailable = true; - while (dataAvailable) { - int amountReceived = recv(_sock , buffer , sizeof(buffer) , 0); - if (amountReceived < 0) { - throw ConcordiaException("Lemmatizer: recv failed"); - } else if (amountReceived == 0) { - dataAvailable = false; - } else { - buffer[amountReceived] = '\0'; - reply += buffer; - } - } - return reply; -} - -std::string SocketLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) { - for (int i=0;i<5;i++) { - try { - _connect(); - _send_data(languageCode+sentence+LEMMATIZER_DELIMITER); - std::string reply = _receive(512); - _disconnect(); - return reply.substr(0,reply.find(LEMMATIZER_DELIMITER)); - } catch (std::exception & e) { - _logger.logString("Problem with lemmatization of the sentence", sentence); - _logger.log("Waiting 2 seconds and retrying..."); - sleep(2); - } - } - - throw ConcordiaException("Can not lemmatize sentence: "+sentence); -} diff --git a/concordia-server/socket_lemmatizer.hpp b/concordia-server/socket_lemmatizer.hpp deleted file mode 100644 index 13d5949..0000000 --- a/concordia-server/socket_lemmatizer.hpp +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef SOCKET_LEMMATIZER_HDR -#define SOCKET_LEMMATIZER_HDR - -#include -#include //socket -#include //inet_addr -#include //hostent -#include - -#include - -#include "logger.hpp" - -class SocketLemmatizer { -public: - /*! Constructor. - */ - explicit SocketLemmatizer(int port) throw(ConcordiaException); - /*! Destructor. - */ - virtual ~SocketLemmatizer(); - - std::string lemmatizeSentence(std::string languageCode, std::string sentence); -private: - bool _connect(); - - bool _disconnect(); - - bool _send_data(std::string data); - - std::string _receive(int size); - - int _port; - - int _sock; - - struct sockaddr_in _server; - - Logger _logger; -}; - -#endif diff --git a/concordia-server/tm_dao.cpp b/concordia-server/tm_dao.cpp index 0a997e5..c19de76 100644 --- a/concordia-server/tm_dao.cpp +++ b/concordia-server/tm_dao.cpp @@ -88,7 +88,7 @@ int TmDAO::addTm(const int sourceLangId, const int targetLangId, const std::stri } -Tm TmDAO::getTmInfo(int tmId) { +Tm TmDAO::getTm(int tmId) { DBconnection connection; connection.startTransaction(); std::string query = "select tm.id, tm.name, tm.lemmatized, tm.paired_tm_id, source_language.code, target_language.code from tm inner join language as source_language on source_language.id = tm.source_lang_id inner join language as target_language on target_language.id = tm.target_lang_id where tm.id = $1::integer;"; diff --git a/concordia-server/tm_dao.hpp b/concordia-server/tm_dao.hpp index 1bd7227..238d77d 100644 --- a/concordia-server/tm_dao.hpp +++ b/concordia-server/tm_dao.hpp @@ -28,7 +28,7 @@ public: std::vector getTms(); - std::pair getTmInfo(int tmId); + std::pair getTm(int tmId); private: diff --git a/concordia-server/unit_dao.cpp b/concordia-server/unit_dao.cpp index 8600e63..7e9a81d 100644 --- a/concordia-server/unit_dao.cpp +++ b/concordia-server/unit_dao.cpp @@ -59,7 +59,7 @@ std::vector UnitDAO::addAlignedSentences( const std::vector & sourceSentences, const std::vector & targetSentences, const std::vector > > & allAlignments, - const int tmId) throw (ConcordiaException) { + const int tmId) { DBconnection connection; std::vector newIds; @@ -78,7 +78,7 @@ std::vector UnitDAO::addAlignedSentences( const std::vector & targetSentences, const std::vector > > & allAlignments, const std::vector & sourceIds, - const int tmId) throw (ConcordiaException) { + const int tmId) { DBconnection connection; std::vector newIds; @@ -270,7 +270,7 @@ int UnitDAO::_addAlignedUnit ( const TokenizedSentence & sourceSentence, const TokenizedSentence & targetSentence, const std::vector > & alignments, - const int tmId) throw(ConcordiaException) { + const int tmId) { if (sourceSentence.getTokens().size() != alignments.size()) { // Here we check if the source sentence, taken from src.tok, @@ -305,7 +305,7 @@ int UnitDAO::_addAlignedUnit ( const TokenizedSentence & targetSentence, const std::vector > & alignments, const int sourceId, - const int tmId) throw(ConcordiaException) { + const int tmId) { if (sourceSentence.getTokens().size() != alignments.size()) { // Here we check if the source sentence, taken from src.tok, diff --git a/concordia-server/unit_dao.hpp b/concordia-server/unit_dao.hpp index 245282b..ca43ff0 100644 --- a/concordia-server/unit_dao.hpp +++ b/concordia-server/unit_dao.hpp @@ -41,14 +41,14 @@ public: const std::vector & sourceSentences, const std::vector & targetSentences, const std::vector > > & allAlignments, - const int tmId) throw (ConcordiaException); + const int tmId); std::vector addAlignedSentences( const std::vector & sourceSentences, const std::vector & targetSentences, const std::vector > > & allAlignments, const std::vector & sourceIds, - const int tmId) throw (ConcordiaException); + const int tmId); SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment); @@ -83,7 +83,7 @@ private: const TokenizedSentence & sourceSentence, const TokenizedSentence & targetSentence, const std::vector > & alignments, - const int tmId) throw(ConcordiaException); + const int tmId); int _addAlignedUnit( DBconnection & connection, @@ -91,7 +91,7 @@ private: const TokenizedSentence & targetSentence, const std::vector > & alignments, const int sourceId, - const int tmId) throw(ConcordiaException); + const int tmId); std::vector _getArray(std::string arrayString);