adding multiple sentences

This commit is contained in:
rjawor 2015-08-20 12:38:48 +02:00
parent 2067d1042c
commit 823c1fbdb2
13 changed files with 227 additions and 72 deletions

View File

@ -1 +1,2 @@
- implement connection pooling with PgBouncer
DONE - check the parameters and return types (should be const ref)
DONE - implement connection pooling with PgBouncer

View File

@ -4,17 +4,9 @@
#include <string>
#include "json_generator.hpp"
#define OPERATION_PARAM "operation"
#define PATTERN_PARAM "pattern"
#define SOURCE_SENTENCE_PARAM "sourceSentence"
#define TARGET_SENTENCE_PARAM "targetSentence"
#define TM_ID_PARAM "tmId"
#define ADD_SENTENCE_OP "addSentence"
#define SIMPLE_SEARCH_OP "simpleSearch"
#define CONCORDIA_SEARCH_OP "concordiaSearch"
#include "config.hpp"
#include "logger.hpp"
#include "rapidjson/rapidjson.h"
ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
throw(ConcordiaException) {
@ -48,6 +40,23 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
std::string targetSentence = d[TARGET_SENTENCE_PARAM].GetString();
int tmId = d[TM_ID_PARAM].GetInt();
_indexController->addSentence(jsonWriter, sourceSentence, targetSentence, tmId);
} else if (operation == ADD_SENTENCES_OP) {
std::vector<std::string> sourceSentences;
std::vector<std::string> targetSentences;
std::vector<int> tmIds;
// loading data from json
const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM];
for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) {
if (sentencesArray[i].Size() != 3) {
JsonGenerator::signalError(jsonWriter, "sentence should be an array of 3 elements");
break;
} else {
tmIds.push_back(sentencesArray[i][0].GetInt());
sourceSentences.push_back(sentencesArray[i][1].GetString());
targetSentences.push_back(sentencesArray[i][2].GetString());
}
}
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmIds);
} else if (operation == SIMPLE_SEARCH_OP) {
std::string pattern = d[PATTERN_PARAM].GetString();
_searcherController->simpleSearch(jsonWriter, pattern);

View File

@ -6,3 +6,17 @@
#define DB_PASSWORD "concordia"
#define DB_HOST "localhost"
#define DB_PORT "6543"
// json syntax
#define OPERATION_PARAM "operation"
#define PATTERN_PARAM "pattern"
#define SOURCE_SENTENCE_PARAM "sourceSentence"
#define TARGET_SENTENCE_PARAM "targetSentence"
#define TM_ID_PARAM "tmId"
#define SENTENCES_PARAM "sentences"
#define ADD_SENTENCE_OP "addSentence"
#define ADD_SENTENCES_OP "addSentences"
#define SIMPLE_SEARCH_OP "simpleSearch"
#define CONCORDIA_SEARCH_OP "concordiaSearch"

View File

@ -6,3 +6,17 @@
#define DB_PASSWORD "@DB_PASSWORD@"
#define DB_HOST "@DB_HOST@"
#define DB_PORT "@DB_PORT@"
// json syntax
#define OPERATION_PARAM "operation"
#define PATTERN_PARAM "pattern"
#define SOURCE_SENTENCE_PARAM "sourceSentence"
#define TARGET_SENTENCE_PARAM "targetSentence"
#define TM_ID_PARAM "tmId"
#define SENTENCES_PARAM "sentences"
#define ADD_SENTENCE_OP "addSentence"
#define ADD_SENTENCES_OP "addSentences"
#define SIMPLE_SEARCH_OP "simpleSearch"
#define CONCORDIA_SEARCH_OP "concordiaSearch"

View File

@ -122,14 +122,26 @@ void DBconnection::clearResult(PGresult * result) {
PQclear(result);
}
int DBconnection::getIntValue(PGresult * result, int row, int col) {
char * valueStr = PQgetvalue(result,row,col);
return strtol(valueStr, NULL, 10);
int DBconnection::getIntValue(PGresult * result, int row, int col) throw (ConcordiaException) {
try {
char * valueStr = PQgetvalue(result,row,col);
return strtol(valueStr, NULL, 10);
} catch (std::exception & e) {
std::stringstream ss;
ss << "Error getting int value. Message: " << e.what();
throw ConcordiaException(ss.str());
}
}
std::string DBconnection::getStringValue(PGresult * result, int row, int col) {
char * valueStr = PQgetvalue(result,row,col);
return std::string(valueStr);
std::string DBconnection::getStringValue(PGresult * result, int row, int col) throw (ConcordiaException) {
try {
char * valueStr = PQgetvalue(result,row,col);
return std::string(valueStr);
} catch (std::exception & e) {
std::stringstream ss;
ss << "Error getting string value. Message: " << e.what();
throw ConcordiaException(ss.str());
}
}

View File

@ -29,9 +29,9 @@ public:
void clearResult(PGresult * result);
int getIntValue(PGresult * result, int row, int col);
int getIntValue(PGresult * result, int row, int col) throw (ConcordiaException);
std::string getStringValue(PGresult * result, int row, int col);
std::string getStringValue(PGresult * result, int row, int col) throw (ConcordiaException);
private:
void close();

View File

@ -1,5 +1,7 @@
#include "index_controller.hpp"
#include <concordia/common/config.hpp>
#include "json_generator.hpp"
IndexController::IndexController(boost::shared_ptr<Concordia> concordia)
@ -13,12 +15,12 @@ IndexController::~IndexController() {
void IndexController::addSentence(
rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
std::string & sourceSentence,
std::string & targetSentence,
int tmId) {
const std::string & sourceSentence,
const std::string & targetSentence,
const int tmId) {
try {
boost::shared_ptr<TokenizedSentence> tokenizedSentence = _concordia->tokenize(sourceSentence);
TokenizedSentence tokenizedSentence = _concordia->tokenize(sourceSentence);
int sentenceId = _unitDAO.addSentence(tokenizedSentence, targetSentence, tmId);
_concordia->addTokenizedExample(tokenizedSentence, sentenceId);
_concordia->refreshSAfromRAM();
@ -34,3 +36,25 @@ void IndexController::addSentence(
}
}
void IndexController::addSentences(
rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const std::vector<std::string> & sourceSentences,
const std::vector<std::string> & targetSentences,
const std::vector<int> & tmIds) {
try {
std::vector<TokenizedSentence> tokenizedSentences = _concordia->tokenizeAll(sourceSentences);
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmIds);
_concordia->addAllTokenizedExamples(tokenizedSentences, sentenceIds);
_concordia->refreshSAfromRAM();
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("success");
jsonWriter.EndObject();
} catch (ConcordiaException & e) {
std::stringstream errorstream;
errorstream << "concordia error: " << e.what();
JsonGenerator::signalError(jsonWriter, errorstream.str());
}
}

View File

@ -21,9 +21,14 @@ public:
virtual ~IndexController();
void addSentence(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
std::string & sourceSentence,
std::string & targetSentence,
int tmId);
const std::string & sourceSentence,
const std::string & targetSentence,
const int tmId);
void addSentences(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const std::vector<std::string> & sourceSentences,
const std::vector<std::string> & targetSentences,
const std::vector<int> & tmIds);
private:
boost::shared_ptr<Concordia> _concordia;

View File

@ -1,31 +1,56 @@
#include "logger.hpp"
#include "log4cpp/Category.hh"
#include "log4cpp/Appender.hh"
#include "log4cpp/FileAppender.hh"
#include "log4cpp/BasicLayout.hh"
#include "log4cpp/Priority.hh"
#include <sstream>
Logger::Logger() {
}
Logger::~Logger() {
}
int Logger::initialized = 0;
int Logger::_initialized = 0;
void Logger::log(std::string message) {
log4cpp::Category & root = log4cpp::Category::getRoot();
if (initialized == 0) {
log4cpp::Appender *appender = new log4cpp::FileAppender("default", "/tmp/concordia-server.log");
appender->setLayout(new log4cpp::BasicLayout());
root.setPriority(log4cpp::Priority::INFO);
root.addAppender(appender);
initialized = 1;
}
if (_initialized == 0) {
_initialize(root);
}
root.info(message);
}
void Logger::logInt(std::string name, int value) {
log4cpp::Category & root = log4cpp::Category::getRoot();
if (_initialized == 0) {
_initialize(root);
}
std::stringstream ss;
ss << " " << name << ": " << value;
root.info(ss.str());
}
void Logger::logString(std::string name, std::string value) {
log4cpp::Category & root = log4cpp::Category::getRoot();
if (_initialized == 0) {
_initialize(root);
}
std::stringstream ss;
ss << " " << name << ": " << value;
root.info(ss.str());
}
void Logger::_initialize(log4cpp::Category & root) {
log4cpp::Appender *appender = new log4cpp::FileAppender("default", "/tmp/concordia-server.log");
appender->setLayout(new log4cpp::BasicLayout());
root.setPriority(log4cpp::Priority::INFO);
root.addAppender(appender);
_initialized = 1;
}

View File

@ -4,6 +4,9 @@
#include <string>
#include <sstream>
#include "log4cpp/Category.hh"
class Logger {
public:
/*! Constructor.
@ -14,8 +17,14 @@ public:
virtual ~Logger();
static void log(std::string message);
static void logInt(std::string name, int value);
static void logString(std::string name, std::string value);
private:
static int initialized;
static void _initialize(log4cpp::Category & root);
static int _initialized;
};
#endif

View File

@ -1,6 +1,5 @@
#include "unit_dao.hpp"
#include "db_connection.hpp"
#include "query_param.hpp"
#include "string_param.hpp"
#include "int_param.hpp"
@ -18,46 +17,47 @@ UnitDAO::~UnitDAO() {
}
int UnitDAO::addSentence(
boost::shared_ptr<TokenizedSentence> sourceSentence,
std::string & targetSentence,
int tmId) {
const TokenizedSentence & sourceSentence,
const std::string & targetSentence,
const int tmId) {
DBconnection connection;
connection.startTransaction();
std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens) values($1::text,$2::text,$3::integer,$4) RETURNING id";
std::vector<QueryParam*> params;
params.push_back(new StringParam(sourceSentence->getSentence()));
params.push_back(new StringParam(targetSentence));
params.push_back(new IntParam(tmId));
params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence)));
PGresult * result = connection.execute(query, params);
int newId = connection.getIntValue(result, 0, 0);
connection.clearResult(result);
int newId = _addSingleSentence(connection, sourceSentence, targetSentence, tmId);
connection.endTransaction();
BOOST_FOREACH (QueryParam * param, params) {
delete param;
}
return newId;
}
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(std::vector<MatchedPatternFragment> concordiaResults) {
std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
const std::vector<TokenizedSentence> & sourceSentences,
const std::vector<std::string> & targetSentences,
const std::vector<int> & tmIds) {
DBconnection connection;
std::vector<SUFFIX_MARKER_TYPE> newIds;
connection.startTransaction();
int index = 0;
BOOST_FOREACH(const TokenizedSentence & sourceSentence, sourceSentences) {
newIds.push_back(_addSingleSentence(connection, sourceSentence, targetSentences.at(index), tmIds.at(index)));
index++;
}
connection.endTransaction();
return newIds;
}
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & concordiaResults) {
std::vector<SimpleSearchResult> results;
DBconnection connection;
connection.startTransaction();
BOOST_FOREACH(MatchedPatternFragment & fragment, concordiaResults) {
BOOST_FOREACH(const MatchedPatternFragment & fragment, concordiaResults) {
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
std::vector<QueryParam*> params;
params.push_back(new IntParam(2*fragment.getExampleOffset()+1));
params.push_back(new IntParam(2*(fragment.getExampleOffset()+fragment.getMatchedLength())));
params.push_back(new IntParam(fragment.getExampleId()));
PGresult * result = connection.execute(query, params);
results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0),
connection.getIntValue(result,0,3),
connection.getIntValue(result,0,4),
@ -70,14 +70,37 @@ std::vector<SimpleSearchResult> UnitDAO::getSearchResults(std::vector<MatchedPat
}
std::vector<int> UnitDAO::_getTokenPositions(boost::shared_ptr<TokenizedSentence> ts) {
std::vector<int> UnitDAO::_getTokenPositions(const TokenizedSentence & ts) {
std::vector<int> result;
BOOST_FOREACH(const TokenAnnotation & token, ts->getTokens()) {
BOOST_FOREACH(const TokenAnnotation & token, ts.getTokens()) {
result.push_back(token.getStart());
result.push_back(token.getEnd());
}
return result;
}
int UnitDAO::_addSingleSentence(
DBconnection & connection,
const TokenizedSentence & sourceSentence,
const std::string & targetSentence,
const int tmId) {
std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens) values($1::text,$2::text,$3::integer,$4) RETURNING id";
std::vector<QueryParam*> params;
params.push_back(new StringParam(sourceSentence.getSentence()));
params.push_back(new StringParam(targetSentence));
params.push_back(new IntParam(tmId));
params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence)));
PGresult * result = connection.execute(query, params);
int newId = connection.getIntValue(result, 0, 0);
connection.clearResult(result);
BOOST_FOREACH (QueryParam * param, params) {
delete param;
}
return newId;
}

View File

@ -4,12 +4,14 @@
#include <string>
#include <vector>
#include <concordia/common/config.hpp>
#include <concordia/tokenized_sentence.hpp>
#include <concordia/substring_occurence.hpp>
#include <concordia/matched_pattern_fragment.hpp>
#include <boost/shared_ptr.hpp>
#include "simple_search_result.hpp"
#include "db_connection.hpp"
class UnitDAO {
public:
@ -21,14 +23,26 @@ public:
virtual ~UnitDAO();
int addSentence(
boost::shared_ptr<TokenizedSentence> sourceSentence,
std::string & targetSentence,
int tmId);
const TokenizedSentence & sourceSentence,
const std::string & targetSentence,
const int tmId);
std::vector<SimpleSearchResult> getSearchResults(std::vector<MatchedPatternFragment> concordiaResults);
std::vector<SUFFIX_MARKER_TYPE> addSentences(
const std::vector<TokenizedSentence> & sourceSentences,
const std::vector<std::string> & targetSentences,
const std::vector<int> & tmIds);
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & concordiaResults);
private:
std::vector<int> _getTokenPositions(boost::shared_ptr<TokenizedSentence> ts);
std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
int _addSingleSentence(
DBconnection & connection,
const TokenizedSentence & sourceSentence,
const std::string & targetSentence,
const int tmId);
};
#endif

View File

@ -1,7 +1,12 @@
#!/bin/sh
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"Marysia ma rysia", "targetSentence":"Mary has a bobcat", "tmId":1}' http://localhost
# add sentence
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"I jeszcze jedno zdanie testowe", "targetSentence":"Yet another test sentence", "tmId":1}' http://localhost
# add sentences
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentences", "sentences":[[1,"test source one", "test target one"],[4,"test source two", "test target two"],[9,"test source three", "test target three"],[13,"test source four", "test target four"]]}' http://localhost
# simple search
curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"test source"}' http://localhost
curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"ma rysia"}' http://localhost