diff --git a/TODO.txt b/TODO.txt index f82bd99..a6677de 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1 +1,2 @@ -- implement connection pooling with PgBouncer +DONE - check the parameters and return types (should be const ref) +DONE - implement connection pooling with PgBouncer diff --git a/concordia-server/concordia_server.cpp b/concordia-server/concordia_server.cpp index e7da983..9426db1 100644 --- a/concordia-server/concordia_server.cpp +++ b/concordia-server/concordia_server.cpp @@ -4,17 +4,9 @@ #include #include "json_generator.hpp" - -#define OPERATION_PARAM "operation" -#define PATTERN_PARAM "pattern" -#define SOURCE_SENTENCE_PARAM "sourceSentence" -#define TARGET_SENTENCE_PARAM "targetSentence" -#define TM_ID_PARAM "tmId" - -#define ADD_SENTENCE_OP "addSentence" -#define SIMPLE_SEARCH_OP "simpleSearch" -#define CONCORDIA_SEARCH_OP "concordiaSearch" - +#include "config.hpp" +#include "logger.hpp" +#include "rapidjson/rapidjson.h" ConcordiaServer::ConcordiaServer(const std::string & configFilePath) throw(ConcordiaException) { @@ -48,6 +40,23 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) { std::string targetSentence = d[TARGET_SENTENCE_PARAM].GetString(); int tmId = d[TM_ID_PARAM].GetInt(); _indexController->addSentence(jsonWriter, sourceSentence, targetSentence, tmId); + } else if (operation == ADD_SENTENCES_OP) { + std::vector sourceSentences; + std::vector targetSentences; + std::vector tmIds; + // loading data from json + const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM]; + for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) { + if (sentencesArray[i].Size() != 3) { + JsonGenerator::signalError(jsonWriter, "sentence should be an array of 3 elements"); + break; + } else { + tmIds.push_back(sentencesArray[i][0].GetInt()); + sourceSentences.push_back(sentencesArray[i][1].GetString()); + targetSentences.push_back(sentencesArray[i][2].GetString()); + } + } + _indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmIds); } else if (operation == SIMPLE_SEARCH_OP) { std::string pattern = d[PATTERN_PARAM].GetString(); _searcherController->simpleSearch(jsonWriter, pattern); diff --git a/concordia-server/config.hpp b/concordia-server/config.hpp index 58033a3..4b9119f 100644 --- a/concordia-server/config.hpp +++ b/concordia-server/config.hpp @@ -6,3 +6,17 @@ #define DB_PASSWORD "concordia" #define DB_HOST "localhost" #define DB_PORT "6543" + +// json syntax +#define OPERATION_PARAM "operation" +#define PATTERN_PARAM "pattern" +#define SOURCE_SENTENCE_PARAM "sourceSentence" +#define TARGET_SENTENCE_PARAM "targetSentence" +#define TM_ID_PARAM "tmId" +#define SENTENCES_PARAM "sentences" + +#define ADD_SENTENCE_OP "addSentence" +#define ADD_SENTENCES_OP "addSentences" +#define SIMPLE_SEARCH_OP "simpleSearch" +#define CONCORDIA_SEARCH_OP "concordiaSearch" + diff --git a/concordia-server/config.hpp.in b/concordia-server/config.hpp.in index da7c021..300e892 100644 --- a/concordia-server/config.hpp.in +++ b/concordia-server/config.hpp.in @@ -6,3 +6,17 @@ #define DB_PASSWORD "@DB_PASSWORD@" #define DB_HOST "@DB_HOST@" #define DB_PORT "@DB_PORT@" + +// json syntax +#define OPERATION_PARAM "operation" +#define PATTERN_PARAM "pattern" +#define SOURCE_SENTENCE_PARAM "sourceSentence" +#define TARGET_SENTENCE_PARAM "targetSentence" +#define TM_ID_PARAM "tmId" +#define SENTENCES_PARAM "sentences" + +#define ADD_SENTENCE_OP "addSentence" +#define ADD_SENTENCES_OP "addSentences" +#define SIMPLE_SEARCH_OP "simpleSearch" +#define CONCORDIA_SEARCH_OP "concordiaSearch" + diff --git a/concordia-server/db_connection.cpp b/concordia-server/db_connection.cpp index f184390..bced9c7 100644 --- a/concordia-server/db_connection.cpp +++ b/concordia-server/db_connection.cpp @@ -122,14 +122,26 @@ void DBconnection::clearResult(PGresult * result) { PQclear(result); } -int DBconnection::getIntValue(PGresult * result, int row, int col) { - char * valueStr = PQgetvalue(result,row,col); - return strtol(valueStr, NULL, 10); +int DBconnection::getIntValue(PGresult * result, int row, int col) throw (ConcordiaException) { + try { + char * valueStr = PQgetvalue(result,row,col); + return strtol(valueStr, NULL, 10); + } catch (std::exception & e) { + std::stringstream ss; + ss << "Error getting int value. Message: " << e.what(); + throw ConcordiaException(ss.str()); + } } -std::string DBconnection::getStringValue(PGresult * result, int row, int col) { - char * valueStr = PQgetvalue(result,row,col); - return std::string(valueStr); +std::string DBconnection::getStringValue(PGresult * result, int row, int col) throw (ConcordiaException) { + try { + char * valueStr = PQgetvalue(result,row,col); + return std::string(valueStr); + } catch (std::exception & e) { + std::stringstream ss; + ss << "Error getting string value. Message: " << e.what(); + throw ConcordiaException(ss.str()); + } } diff --git a/concordia-server/db_connection.hpp b/concordia-server/db_connection.hpp index dfb3344..666d704 100644 --- a/concordia-server/db_connection.hpp +++ b/concordia-server/db_connection.hpp @@ -29,9 +29,9 @@ public: void clearResult(PGresult * result); - int getIntValue(PGresult * result, int row, int col); + int getIntValue(PGresult * result, int row, int col) throw (ConcordiaException); - std::string getStringValue(PGresult * result, int row, int col); + std::string getStringValue(PGresult * result, int row, int col) throw (ConcordiaException); private: void close(); diff --git a/concordia-server/index_controller.cpp b/concordia-server/index_controller.cpp index 0506a05..d2d470b 100644 --- a/concordia-server/index_controller.cpp +++ b/concordia-server/index_controller.cpp @@ -1,5 +1,7 @@ #include "index_controller.hpp" +#include + #include "json_generator.hpp" IndexController::IndexController(boost::shared_ptr concordia) @@ -13,12 +15,12 @@ IndexController::~IndexController() { void IndexController::addSentence( rapidjson::Writer & jsonWriter, - std::string & sourceSentence, - std::string & targetSentence, - int tmId) { + const std::string & sourceSentence, + const std::string & targetSentence, + const int tmId) { try { - boost::shared_ptr tokenizedSentence = _concordia->tokenize(sourceSentence); + TokenizedSentence tokenizedSentence = _concordia->tokenize(sourceSentence); int sentenceId = _unitDAO.addSentence(tokenizedSentence, targetSentence, tmId); _concordia->addTokenizedExample(tokenizedSentence, sentenceId); _concordia->refreshSAfromRAM(); @@ -34,3 +36,25 @@ void IndexController::addSentence( } } +void IndexController::addSentences( + rapidjson::Writer & jsonWriter, + const std::vector & sourceSentences, + const std::vector & targetSentences, + const std::vector & tmIds) { + try { + std::vector tokenizedSentences = _concordia->tokenizeAll(sourceSentences); + std::vector sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmIds); + _concordia->addAllTokenizedExamples(tokenizedSentences, sentenceIds); + _concordia->refreshSAfromRAM(); + + jsonWriter.StartObject(); + jsonWriter.String("status"); + jsonWriter.String("success"); + jsonWriter.EndObject(); + } catch (ConcordiaException & e) { + std::stringstream errorstream; + errorstream << "concordia error: " << e.what(); + JsonGenerator::signalError(jsonWriter, errorstream.str()); + } +} + diff --git a/concordia-server/index_controller.hpp b/concordia-server/index_controller.hpp index 51a7692..33ce7c6 100644 --- a/concordia-server/index_controller.hpp +++ b/concordia-server/index_controller.hpp @@ -21,9 +21,14 @@ public: virtual ~IndexController(); void addSentence(rapidjson::Writer & jsonWriter, - std::string & sourceSentence, - std::string & targetSentence, - int tmId); + const std::string & sourceSentence, + const std::string & targetSentence, + const int tmId); + + void addSentences(rapidjson::Writer & jsonWriter, + const std::vector & sourceSentences, + const std::vector & targetSentences, + const std::vector & tmIds); private: boost::shared_ptr _concordia; diff --git a/concordia-server/logger.cpp b/concordia-server/logger.cpp index 77a0a48..aed26e2 100644 --- a/concordia-server/logger.cpp +++ b/concordia-server/logger.cpp @@ -1,31 +1,56 @@ #include "logger.hpp" -#include "log4cpp/Category.hh" #include "log4cpp/Appender.hh" #include "log4cpp/FileAppender.hh" #include "log4cpp/BasicLayout.hh" #include "log4cpp/Priority.hh" +#include + Logger::Logger() { } Logger::~Logger() { } -int Logger::initialized = 0; +int Logger::_initialized = 0; void Logger::log(std::string message) { log4cpp::Category & root = log4cpp::Category::getRoot(); - if (initialized == 0) { - log4cpp::Appender *appender = new log4cpp::FileAppender("default", "/tmp/concordia-server.log"); - appender->setLayout(new log4cpp::BasicLayout()); - - root.setPriority(log4cpp::Priority::INFO); - root.addAppender(appender); - - initialized = 1; - } + if (_initialized == 0) { + _initialize(root); + } root.info(message); } +void Logger::logInt(std::string name, int value) { + log4cpp::Category & root = log4cpp::Category::getRoot(); + if (_initialized == 0) { + _initialize(root); + } + std::stringstream ss; + ss << " " << name << ": " << value; + root.info(ss.str()); +} + +void Logger::logString(std::string name, std::string value) { + log4cpp::Category & root = log4cpp::Category::getRoot(); + if (_initialized == 0) { + _initialize(root); + } + std::stringstream ss; + ss << " " << name << ": " << value; + root.info(ss.str()); +} + +void Logger::_initialize(log4cpp::Category & root) { + log4cpp::Appender *appender = new log4cpp::FileAppender("default", "/tmp/concordia-server.log"); + appender->setLayout(new log4cpp::BasicLayout()); + + root.setPriority(log4cpp::Priority::INFO); + root.addAppender(appender); + + _initialized = 1; +} + diff --git a/concordia-server/logger.hpp b/concordia-server/logger.hpp index 9653c19..ddbd089 100644 --- a/concordia-server/logger.hpp +++ b/concordia-server/logger.hpp @@ -4,6 +4,9 @@ #include #include +#include "log4cpp/Category.hh" + + class Logger { public: /*! Constructor. @@ -14,8 +17,14 @@ public: virtual ~Logger(); static void log(std::string message); + + static void logInt(std::string name, int value); + + static void logString(std::string name, std::string value); private: - static int initialized; + static void _initialize(log4cpp::Category & root); + + static int _initialized; }; #endif diff --git a/concordia-server/unit_dao.cpp b/concordia-server/unit_dao.cpp index fc6ca59..afb3a3c 100644 --- a/concordia-server/unit_dao.cpp +++ b/concordia-server/unit_dao.cpp @@ -1,6 +1,5 @@ #include "unit_dao.hpp" -#include "db_connection.hpp" #include "query_param.hpp" #include "string_param.hpp" #include "int_param.hpp" @@ -18,46 +17,47 @@ UnitDAO::~UnitDAO() { } int UnitDAO::addSentence( - boost::shared_ptr sourceSentence, - std::string & targetSentence, - int tmId) { + const TokenizedSentence & sourceSentence, + const std::string & targetSentence, + const int tmId) { DBconnection connection; connection.startTransaction(); - std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens) values($1::text,$2::text,$3::integer,$4) RETURNING id"; - std::vector params; - params.push_back(new StringParam(sourceSentence->getSentence())); - params.push_back(new StringParam(targetSentence)); - params.push_back(new IntParam(tmId)); - params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence))); - - PGresult * result = connection.execute(query, params); - int newId = connection.getIntValue(result, 0, 0); - connection.clearResult(result); + int newId = _addSingleSentence(connection, sourceSentence, targetSentence, tmId); connection.endTransaction(); - - BOOST_FOREACH (QueryParam * param, params) { - delete param; - } - return newId; - } -std::vector UnitDAO::getSearchResults(std::vector concordiaResults) { +std::vector UnitDAO::addSentences( + const std::vector & sourceSentences, + const std::vector & targetSentences, + const std::vector & tmIds) { + DBconnection connection; + std::vector newIds; + connection.startTransaction(); + int index = 0; + BOOST_FOREACH(const TokenizedSentence & sourceSentence, sourceSentences) { + newIds.push_back(_addSingleSentence(connection, sourceSentence, targetSentences.at(index), tmIds.at(index))); + index++; + } + connection.endTransaction(); + return newIds; +} + + +std::vector UnitDAO::getSearchResults(const std::vector & concordiaResults) { std::vector results; DBconnection connection; connection.startTransaction(); - BOOST_FOREACH(MatchedPatternFragment & fragment, concordiaResults) { + BOOST_FOREACH(const MatchedPatternFragment & fragment, concordiaResults) { std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;"; std::vector params; params.push_back(new IntParam(2*fragment.getExampleOffset()+1)); params.push_back(new IntParam(2*(fragment.getExampleOffset()+fragment.getMatchedLength()))); params.push_back(new IntParam(fragment.getExampleId())); PGresult * result = connection.execute(query, params); - results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0), connection.getIntValue(result,0,3), connection.getIntValue(result,0,4), @@ -70,14 +70,37 @@ std::vector UnitDAO::getSearchResults(std::vector UnitDAO::_getTokenPositions(boost::shared_ptr ts) { +std::vector UnitDAO::_getTokenPositions(const TokenizedSentence & ts) { std::vector result; - BOOST_FOREACH(const TokenAnnotation & token, ts->getTokens()) { + BOOST_FOREACH(const TokenAnnotation & token, ts.getTokens()) { result.push_back(token.getStart()); result.push_back(token.getEnd()); } return result; } +int UnitDAO::_addSingleSentence( + DBconnection & connection, + const TokenizedSentence & sourceSentence, + const std::string & targetSentence, + const int tmId) { + std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens) values($1::text,$2::text,$3::integer,$4) RETURNING id"; + std::vector params; + params.push_back(new StringParam(sourceSentence.getSentence())); + params.push_back(new StringParam(targetSentence)); + params.push_back(new IntParam(tmId)); + params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence))); + + PGresult * result = connection.execute(query, params); + int newId = connection.getIntValue(result, 0, 0); + connection.clearResult(result); + BOOST_FOREACH (QueryParam * param, params) { + delete param; + } + + return newId; +} + + diff --git a/concordia-server/unit_dao.hpp b/concordia-server/unit_dao.hpp index 8b5f44e..dbe6062 100644 --- a/concordia-server/unit_dao.hpp +++ b/concordia-server/unit_dao.hpp @@ -4,12 +4,14 @@ #include #include +#include #include #include #include #include #include "simple_search_result.hpp" +#include "db_connection.hpp" class UnitDAO { public: @@ -21,14 +23,26 @@ public: virtual ~UnitDAO(); int addSentence( - boost::shared_ptr sourceSentence, - std::string & targetSentence, - int tmId); + const TokenizedSentence & sourceSentence, + const std::string & targetSentence, + const int tmId); - std::vector getSearchResults(std::vector concordiaResults); + std::vector addSentences( + const std::vector & sourceSentences, + const std::vector & targetSentences, + const std::vector & tmIds); + + std::vector getSearchResults(const std::vector & concordiaResults); private: - std::vector _getTokenPositions(boost::shared_ptr ts); + std::vector _getTokenPositions(const TokenizedSentence & ts); + + int _addSingleSentence( + DBconnection & connection, + const TokenizedSentence & sourceSentence, + const std::string & targetSentence, + const int tmId); + }; #endif diff --git a/tests/testCurl.sh b/tests/testCurl.sh index 9ebb6de..6e1d388 100755 --- a/tests/testCurl.sh +++ b/tests/testCurl.sh @@ -1,7 +1,12 @@ #!/bin/sh -#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"Marysia ma rysia", "targetSentence":"Mary has a bobcat", "tmId":1}' http://localhost +# add sentence +#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"I jeszcze jedno zdanie testowe", "targetSentence":"Yet another test sentence", "tmId":1}' http://localhost + +# add sentences +#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentences", "sentences":[[1,"test source one", "test target one"],[4,"test source two", "test target two"],[9,"test source three", "test target three"],[13,"test source four", "test target four"]]}' http://localhost + +# simple search +curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"test source"}' http://localhost -curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"ma rysia"}' http://localhost -