adding multiple sentences

This commit is contained in:
rjawor 2015-08-20 12:38:48 +02:00
parent 2067d1042c
commit 823c1fbdb2
13 changed files with 227 additions and 72 deletions

View File

@ -1 +1,2 @@
- implement connection pooling with PgBouncer DONE - check the parameters and return types (should be const ref)
DONE - implement connection pooling with PgBouncer

View File

@ -4,17 +4,9 @@
#include <string> #include <string>
#include "json_generator.hpp" #include "json_generator.hpp"
#include "config.hpp"
#define OPERATION_PARAM "operation" #include "logger.hpp"
#define PATTERN_PARAM "pattern" #include "rapidjson/rapidjson.h"
#define SOURCE_SENTENCE_PARAM "sourceSentence"
#define TARGET_SENTENCE_PARAM "targetSentence"
#define TM_ID_PARAM "tmId"
#define ADD_SENTENCE_OP "addSentence"
#define SIMPLE_SEARCH_OP "simpleSearch"
#define CONCORDIA_SEARCH_OP "concordiaSearch"
ConcordiaServer::ConcordiaServer(const std::string & configFilePath) ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
throw(ConcordiaException) { throw(ConcordiaException) {
@ -48,6 +40,23 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
std::string targetSentence = d[TARGET_SENTENCE_PARAM].GetString(); std::string targetSentence = d[TARGET_SENTENCE_PARAM].GetString();
int tmId = d[TM_ID_PARAM].GetInt(); int tmId = d[TM_ID_PARAM].GetInt();
_indexController->addSentence(jsonWriter, sourceSentence, targetSentence, tmId); _indexController->addSentence(jsonWriter, sourceSentence, targetSentence, tmId);
} else if (operation == ADD_SENTENCES_OP) {
std::vector<std::string> sourceSentences;
std::vector<std::string> targetSentences;
std::vector<int> tmIds;
// loading data from json
const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM];
for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) {
if (sentencesArray[i].Size() != 3) {
JsonGenerator::signalError(jsonWriter, "sentence should be an array of 3 elements");
break;
} else {
tmIds.push_back(sentencesArray[i][0].GetInt());
sourceSentences.push_back(sentencesArray[i][1].GetString());
targetSentences.push_back(sentencesArray[i][2].GetString());
}
}
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmIds);
} else if (operation == SIMPLE_SEARCH_OP) { } else if (operation == SIMPLE_SEARCH_OP) {
std::string pattern = d[PATTERN_PARAM].GetString(); std::string pattern = d[PATTERN_PARAM].GetString();
_searcherController->simpleSearch(jsonWriter, pattern); _searcherController->simpleSearch(jsonWriter, pattern);

View File

@ -6,3 +6,17 @@
#define DB_PASSWORD "concordia" #define DB_PASSWORD "concordia"
#define DB_HOST "localhost" #define DB_HOST "localhost"
#define DB_PORT "6543" #define DB_PORT "6543"
// json syntax
#define OPERATION_PARAM "operation"
#define PATTERN_PARAM "pattern"
#define SOURCE_SENTENCE_PARAM "sourceSentence"
#define TARGET_SENTENCE_PARAM "targetSentence"
#define TM_ID_PARAM "tmId"
#define SENTENCES_PARAM "sentences"
#define ADD_SENTENCE_OP "addSentence"
#define ADD_SENTENCES_OP "addSentences"
#define SIMPLE_SEARCH_OP "simpleSearch"
#define CONCORDIA_SEARCH_OP "concordiaSearch"

View File

@ -6,3 +6,17 @@
#define DB_PASSWORD "@DB_PASSWORD@" #define DB_PASSWORD "@DB_PASSWORD@"
#define DB_HOST "@DB_HOST@" #define DB_HOST "@DB_HOST@"
#define DB_PORT "@DB_PORT@" #define DB_PORT "@DB_PORT@"
// json syntax
#define OPERATION_PARAM "operation"
#define PATTERN_PARAM "pattern"
#define SOURCE_SENTENCE_PARAM "sourceSentence"
#define TARGET_SENTENCE_PARAM "targetSentence"
#define TM_ID_PARAM "tmId"
#define SENTENCES_PARAM "sentences"
#define ADD_SENTENCE_OP "addSentence"
#define ADD_SENTENCES_OP "addSentences"
#define SIMPLE_SEARCH_OP "simpleSearch"
#define CONCORDIA_SEARCH_OP "concordiaSearch"

View File

@ -122,14 +122,26 @@ void DBconnection::clearResult(PGresult * result) {
PQclear(result); PQclear(result);
} }
int DBconnection::getIntValue(PGresult * result, int row, int col) { int DBconnection::getIntValue(PGresult * result, int row, int col) throw (ConcordiaException) {
char * valueStr = PQgetvalue(result,row,col); try {
return strtol(valueStr, NULL, 10); char * valueStr = PQgetvalue(result,row,col);
return strtol(valueStr, NULL, 10);
} catch (std::exception & e) {
std::stringstream ss;
ss << "Error getting int value. Message: " << e.what();
throw ConcordiaException(ss.str());
}
} }
std::string DBconnection::getStringValue(PGresult * result, int row, int col) { std::string DBconnection::getStringValue(PGresult * result, int row, int col) throw (ConcordiaException) {
char * valueStr = PQgetvalue(result,row,col); try {
return std::string(valueStr); char * valueStr = PQgetvalue(result,row,col);
return std::string(valueStr);
} catch (std::exception & e) {
std::stringstream ss;
ss << "Error getting string value. Message: " << e.what();
throw ConcordiaException(ss.str());
}
} }

View File

@ -29,9 +29,9 @@ public:
void clearResult(PGresult * result); void clearResult(PGresult * result);
int getIntValue(PGresult * result, int row, int col); int getIntValue(PGresult * result, int row, int col) throw (ConcordiaException);
std::string getStringValue(PGresult * result, int row, int col); std::string getStringValue(PGresult * result, int row, int col) throw (ConcordiaException);
private: private:
void close(); void close();

View File

@ -1,5 +1,7 @@
#include "index_controller.hpp" #include "index_controller.hpp"
#include <concordia/common/config.hpp>
#include "json_generator.hpp" #include "json_generator.hpp"
IndexController::IndexController(boost::shared_ptr<Concordia> concordia) IndexController::IndexController(boost::shared_ptr<Concordia> concordia)
@ -13,12 +15,12 @@ IndexController::~IndexController() {
void IndexController::addSentence( void IndexController::addSentence(
rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
std::string & sourceSentence, const std::string & sourceSentence,
std::string & targetSentence, const std::string & targetSentence,
int tmId) { const int tmId) {
try { try {
boost::shared_ptr<TokenizedSentence> tokenizedSentence = _concordia->tokenize(sourceSentence); TokenizedSentence tokenizedSentence = _concordia->tokenize(sourceSentence);
int sentenceId = _unitDAO.addSentence(tokenizedSentence, targetSentence, tmId); int sentenceId = _unitDAO.addSentence(tokenizedSentence, targetSentence, tmId);
_concordia->addTokenizedExample(tokenizedSentence, sentenceId); _concordia->addTokenizedExample(tokenizedSentence, sentenceId);
_concordia->refreshSAfromRAM(); _concordia->refreshSAfromRAM();
@ -34,3 +36,25 @@ void IndexController::addSentence(
} }
} }
void IndexController::addSentences(
rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const std::vector<std::string> & sourceSentences,
const std::vector<std::string> & targetSentences,
const std::vector<int> & tmIds) {
try {
std::vector<TokenizedSentence> tokenizedSentences = _concordia->tokenizeAll(sourceSentences);
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmIds);
_concordia->addAllTokenizedExamples(tokenizedSentences, sentenceIds);
_concordia->refreshSAfromRAM();
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("success");
jsonWriter.EndObject();
} catch (ConcordiaException & e) {
std::stringstream errorstream;
errorstream << "concordia error: " << e.what();
JsonGenerator::signalError(jsonWriter, errorstream.str());
}
}

View File

@ -21,9 +21,14 @@ public:
virtual ~IndexController(); virtual ~IndexController();
void addSentence(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, void addSentence(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
std::string & sourceSentence, const std::string & sourceSentence,
std::string & targetSentence, const std::string & targetSentence,
int tmId); const int tmId);
void addSentences(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const std::vector<std::string> & sourceSentences,
const std::vector<std::string> & targetSentences,
const std::vector<int> & tmIds);
private: private:
boost::shared_ptr<Concordia> _concordia; boost::shared_ptr<Concordia> _concordia;

View File

@ -1,31 +1,56 @@
#include "logger.hpp" #include "logger.hpp"
#include "log4cpp/Category.hh"
#include "log4cpp/Appender.hh" #include "log4cpp/Appender.hh"
#include "log4cpp/FileAppender.hh" #include "log4cpp/FileAppender.hh"
#include "log4cpp/BasicLayout.hh" #include "log4cpp/BasicLayout.hh"
#include "log4cpp/Priority.hh" #include "log4cpp/Priority.hh"
#include <sstream>
Logger::Logger() { Logger::Logger() {
} }
Logger::~Logger() { Logger::~Logger() {
} }
int Logger::initialized = 0; int Logger::_initialized = 0;
void Logger::log(std::string message) { void Logger::log(std::string message) {
log4cpp::Category & root = log4cpp::Category::getRoot(); log4cpp::Category & root = log4cpp::Category::getRoot();
if (initialized == 0) { if (_initialized == 0) {
log4cpp::Appender *appender = new log4cpp::FileAppender("default", "/tmp/concordia-server.log"); _initialize(root);
appender->setLayout(new log4cpp::BasicLayout()); }
root.setPriority(log4cpp::Priority::INFO);
root.addAppender(appender);
initialized = 1;
}
root.info(message); root.info(message);
} }
void Logger::logInt(std::string name, int value) {
log4cpp::Category & root = log4cpp::Category::getRoot();
if (_initialized == 0) {
_initialize(root);
}
std::stringstream ss;
ss << " " << name << ": " << value;
root.info(ss.str());
}
void Logger::logString(std::string name, std::string value) {
log4cpp::Category & root = log4cpp::Category::getRoot();
if (_initialized == 0) {
_initialize(root);
}
std::stringstream ss;
ss << " " << name << ": " << value;
root.info(ss.str());
}
void Logger::_initialize(log4cpp::Category & root) {
log4cpp::Appender *appender = new log4cpp::FileAppender("default", "/tmp/concordia-server.log");
appender->setLayout(new log4cpp::BasicLayout());
root.setPriority(log4cpp::Priority::INFO);
root.addAppender(appender);
_initialized = 1;
}

View File

@ -4,6 +4,9 @@
#include <string> #include <string>
#include <sstream> #include <sstream>
#include "log4cpp/Category.hh"
class Logger { class Logger {
public: public:
/*! Constructor. /*! Constructor.
@ -14,8 +17,14 @@ public:
virtual ~Logger(); virtual ~Logger();
static void log(std::string message); static void log(std::string message);
static void logInt(std::string name, int value);
static void logString(std::string name, std::string value);
private: private:
static int initialized; static void _initialize(log4cpp::Category & root);
static int _initialized;
}; };
#endif #endif

View File

@ -1,6 +1,5 @@
#include "unit_dao.hpp" #include "unit_dao.hpp"
#include "db_connection.hpp"
#include "query_param.hpp" #include "query_param.hpp"
#include "string_param.hpp" #include "string_param.hpp"
#include "int_param.hpp" #include "int_param.hpp"
@ -18,46 +17,47 @@ UnitDAO::~UnitDAO() {
} }
int UnitDAO::addSentence( int UnitDAO::addSentence(
boost::shared_ptr<TokenizedSentence> sourceSentence, const TokenizedSentence & sourceSentence,
std::string & targetSentence, const std::string & targetSentence,
int tmId) { const int tmId) {
DBconnection connection; DBconnection connection;
connection.startTransaction(); connection.startTransaction();
std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens) values($1::text,$2::text,$3::integer,$4) RETURNING id"; int newId = _addSingleSentence(connection, sourceSentence, targetSentence, tmId);
std::vector<QueryParam*> params;
params.push_back(new StringParam(sourceSentence->getSentence()));
params.push_back(new StringParam(targetSentence));
params.push_back(new IntParam(tmId));
params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence)));
PGresult * result = connection.execute(query, params);
int newId = connection.getIntValue(result, 0, 0);
connection.clearResult(result);
connection.endTransaction(); connection.endTransaction();
BOOST_FOREACH (QueryParam * param, params) {
delete param;
}
return newId; return newId;
} }
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(std::vector<MatchedPatternFragment> concordiaResults) { std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
const std::vector<TokenizedSentence> & sourceSentences,
const std::vector<std::string> & targetSentences,
const std::vector<int> & tmIds) {
DBconnection connection;
std::vector<SUFFIX_MARKER_TYPE> newIds;
connection.startTransaction();
int index = 0;
BOOST_FOREACH(const TokenizedSentence & sourceSentence, sourceSentences) {
newIds.push_back(_addSingleSentence(connection, sourceSentence, targetSentences.at(index), tmIds.at(index)));
index++;
}
connection.endTransaction();
return newIds;
}
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & concordiaResults) {
std::vector<SimpleSearchResult> results; std::vector<SimpleSearchResult> results;
DBconnection connection; DBconnection connection;
connection.startTransaction(); connection.startTransaction();
BOOST_FOREACH(MatchedPatternFragment & fragment, concordiaResults) { BOOST_FOREACH(const MatchedPatternFragment & fragment, concordiaResults) {
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;"; std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
std::vector<QueryParam*> params; std::vector<QueryParam*> params;
params.push_back(new IntParam(2*fragment.getExampleOffset()+1)); params.push_back(new IntParam(2*fragment.getExampleOffset()+1));
params.push_back(new IntParam(2*(fragment.getExampleOffset()+fragment.getMatchedLength()))); params.push_back(new IntParam(2*(fragment.getExampleOffset()+fragment.getMatchedLength())));
params.push_back(new IntParam(fragment.getExampleId())); params.push_back(new IntParam(fragment.getExampleId()));
PGresult * result = connection.execute(query, params); PGresult * result = connection.execute(query, params);
results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0), results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0),
connection.getIntValue(result,0,3), connection.getIntValue(result,0,3),
connection.getIntValue(result,0,4), connection.getIntValue(result,0,4),
@ -70,14 +70,37 @@ std::vector<SimpleSearchResult> UnitDAO::getSearchResults(std::vector<MatchedPat
} }
std::vector<int> UnitDAO::_getTokenPositions(boost::shared_ptr<TokenizedSentence> ts) { std::vector<int> UnitDAO::_getTokenPositions(const TokenizedSentence & ts) {
std::vector<int> result; std::vector<int> result;
BOOST_FOREACH(const TokenAnnotation & token, ts->getTokens()) { BOOST_FOREACH(const TokenAnnotation & token, ts.getTokens()) {
result.push_back(token.getStart()); result.push_back(token.getStart());
result.push_back(token.getEnd()); result.push_back(token.getEnd());
} }
return result; return result;
} }
int UnitDAO::_addSingleSentence(
DBconnection & connection,
const TokenizedSentence & sourceSentence,
const std::string & targetSentence,
const int tmId) {
std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens) values($1::text,$2::text,$3::integer,$4) RETURNING id";
std::vector<QueryParam*> params;
params.push_back(new StringParam(sourceSentence.getSentence()));
params.push_back(new StringParam(targetSentence));
params.push_back(new IntParam(tmId));
params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence)));
PGresult * result = connection.execute(query, params);
int newId = connection.getIntValue(result, 0, 0);
connection.clearResult(result);
BOOST_FOREACH (QueryParam * param, params) {
delete param;
}
return newId;
}

View File

@ -4,12 +4,14 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <concordia/common/config.hpp>
#include <concordia/tokenized_sentence.hpp> #include <concordia/tokenized_sentence.hpp>
#include <concordia/substring_occurence.hpp> #include <concordia/substring_occurence.hpp>
#include <concordia/matched_pattern_fragment.hpp> #include <concordia/matched_pattern_fragment.hpp>
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include "simple_search_result.hpp" #include "simple_search_result.hpp"
#include "db_connection.hpp"
class UnitDAO { class UnitDAO {
public: public:
@ -21,14 +23,26 @@ public:
virtual ~UnitDAO(); virtual ~UnitDAO();
int addSentence( int addSentence(
boost::shared_ptr<TokenizedSentence> sourceSentence, const TokenizedSentence & sourceSentence,
std::string & targetSentence, const std::string & targetSentence,
int tmId); const int tmId);
std::vector<SimpleSearchResult> getSearchResults(std::vector<MatchedPatternFragment> concordiaResults); std::vector<SUFFIX_MARKER_TYPE> addSentences(
const std::vector<TokenizedSentence> & sourceSentences,
const std::vector<std::string> & targetSentences,
const std::vector<int> & tmIds);
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & concordiaResults);
private: private:
std::vector<int> _getTokenPositions(boost::shared_ptr<TokenizedSentence> ts); std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
int _addSingleSentence(
DBconnection & connection,
const TokenizedSentence & sourceSentence,
const std::string & targetSentence,
const int tmId);
}; };
#endif #endif

View File

@ -1,7 +1,12 @@
#!/bin/sh #!/bin/sh
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"Marysia ma rysia", "targetSentence":"Mary has a bobcat", "tmId":1}' http://localhost # add sentence
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"I jeszcze jedno zdanie testowe", "targetSentence":"Yet another test sentence", "tmId":1}' http://localhost
# add sentences
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentences", "sentences":[[1,"test source one", "test target one"],[4,"test source two", "test target two"],[9,"test source three", "test target three"],[13,"test source four", "test target four"]]}' http://localhost
# simple search
curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"test source"}' http://localhost
curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"ma rysia"}' http://localhost