adding multiple sentences
This commit is contained in:
parent
2067d1042c
commit
823c1fbdb2
3
TODO.txt
3
TODO.txt
@ -1 +1,2 @@
|
||||
- implement connection pooling with PgBouncer
|
||||
DONE - check the parameters and return types (should be const ref)
|
||||
DONE - implement connection pooling with PgBouncer
|
||||
|
@ -4,17 +4,9 @@
|
||||
#include <string>
|
||||
|
||||
#include "json_generator.hpp"
|
||||
|
||||
#define OPERATION_PARAM "operation"
|
||||
#define PATTERN_PARAM "pattern"
|
||||
#define SOURCE_SENTENCE_PARAM "sourceSentence"
|
||||
#define TARGET_SENTENCE_PARAM "targetSentence"
|
||||
#define TM_ID_PARAM "tmId"
|
||||
|
||||
#define ADD_SENTENCE_OP "addSentence"
|
||||
#define SIMPLE_SEARCH_OP "simpleSearch"
|
||||
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
||||
|
||||
#include "config.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "rapidjson/rapidjson.h"
|
||||
|
||||
ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
|
||||
throw(ConcordiaException) {
|
||||
@ -48,6 +40,23 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
||||
std::string targetSentence = d[TARGET_SENTENCE_PARAM].GetString();
|
||||
int tmId = d[TM_ID_PARAM].GetInt();
|
||||
_indexController->addSentence(jsonWriter, sourceSentence, targetSentence, tmId);
|
||||
} else if (operation == ADD_SENTENCES_OP) {
|
||||
std::vector<std::string> sourceSentences;
|
||||
std::vector<std::string> targetSentences;
|
||||
std::vector<int> tmIds;
|
||||
// loading data from json
|
||||
const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM];
|
||||
for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) {
|
||||
if (sentencesArray[i].Size() != 3) {
|
||||
JsonGenerator::signalError(jsonWriter, "sentence should be an array of 3 elements");
|
||||
break;
|
||||
} else {
|
||||
tmIds.push_back(sentencesArray[i][0].GetInt());
|
||||
sourceSentences.push_back(sentencesArray[i][1].GetString());
|
||||
targetSentences.push_back(sentencesArray[i][2].GetString());
|
||||
}
|
||||
}
|
||||
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmIds);
|
||||
} else if (operation == SIMPLE_SEARCH_OP) {
|
||||
std::string pattern = d[PATTERN_PARAM].GetString();
|
||||
_searcherController->simpleSearch(jsonWriter, pattern);
|
||||
|
@ -6,3 +6,17 @@
|
||||
#define DB_PASSWORD "concordia"
|
||||
#define DB_HOST "localhost"
|
||||
#define DB_PORT "6543"
|
||||
|
||||
// json syntax
|
||||
#define OPERATION_PARAM "operation"
|
||||
#define PATTERN_PARAM "pattern"
|
||||
#define SOURCE_SENTENCE_PARAM "sourceSentence"
|
||||
#define TARGET_SENTENCE_PARAM "targetSentence"
|
||||
#define TM_ID_PARAM "tmId"
|
||||
#define SENTENCES_PARAM "sentences"
|
||||
|
||||
#define ADD_SENTENCE_OP "addSentence"
|
||||
#define ADD_SENTENCES_OP "addSentences"
|
||||
#define SIMPLE_SEARCH_OP "simpleSearch"
|
||||
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
||||
|
||||
|
@ -6,3 +6,17 @@
|
||||
#define DB_PASSWORD "@DB_PASSWORD@"
|
||||
#define DB_HOST "@DB_HOST@"
|
||||
#define DB_PORT "@DB_PORT@"
|
||||
|
||||
// json syntax
|
||||
#define OPERATION_PARAM "operation"
|
||||
#define PATTERN_PARAM "pattern"
|
||||
#define SOURCE_SENTENCE_PARAM "sourceSentence"
|
||||
#define TARGET_SENTENCE_PARAM "targetSentence"
|
||||
#define TM_ID_PARAM "tmId"
|
||||
#define SENTENCES_PARAM "sentences"
|
||||
|
||||
#define ADD_SENTENCE_OP "addSentence"
|
||||
#define ADD_SENTENCES_OP "addSentences"
|
||||
#define SIMPLE_SEARCH_OP "simpleSearch"
|
||||
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
||||
|
||||
|
@ -122,14 +122,26 @@ void DBconnection::clearResult(PGresult * result) {
|
||||
PQclear(result);
|
||||
}
|
||||
|
||||
int DBconnection::getIntValue(PGresult * result, int row, int col) {
|
||||
int DBconnection::getIntValue(PGresult * result, int row, int col) throw (ConcordiaException) {
|
||||
try {
|
||||
char * valueStr = PQgetvalue(result,row,col);
|
||||
return strtol(valueStr, NULL, 10);
|
||||
} catch (std::exception & e) {
|
||||
std::stringstream ss;
|
||||
ss << "Error getting int value. Message: " << e.what();
|
||||
throw ConcordiaException(ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
std::string DBconnection::getStringValue(PGresult * result, int row, int col) {
|
||||
std::string DBconnection::getStringValue(PGresult * result, int row, int col) throw (ConcordiaException) {
|
||||
try {
|
||||
char * valueStr = PQgetvalue(result,row,col);
|
||||
return std::string(valueStr);
|
||||
} catch (std::exception & e) {
|
||||
std::stringstream ss;
|
||||
ss << "Error getting string value. Message: " << e.what();
|
||||
throw ConcordiaException(ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -29,9 +29,9 @@ public:
|
||||
|
||||
void clearResult(PGresult * result);
|
||||
|
||||
int getIntValue(PGresult * result, int row, int col);
|
||||
int getIntValue(PGresult * result, int row, int col) throw (ConcordiaException);
|
||||
|
||||
std::string getStringValue(PGresult * result, int row, int col);
|
||||
std::string getStringValue(PGresult * result, int row, int col) throw (ConcordiaException);
|
||||
|
||||
private:
|
||||
void close();
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include "index_controller.hpp"
|
||||
|
||||
#include <concordia/common/config.hpp>
|
||||
|
||||
#include "json_generator.hpp"
|
||||
|
||||
IndexController::IndexController(boost::shared_ptr<Concordia> concordia)
|
||||
@ -13,12 +15,12 @@ IndexController::~IndexController() {
|
||||
|
||||
void IndexController::addSentence(
|
||||
rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
std::string & sourceSentence,
|
||||
std::string & targetSentence,
|
||||
int tmId) {
|
||||
const std::string & sourceSentence,
|
||||
const std::string & targetSentence,
|
||||
const int tmId) {
|
||||
|
||||
try {
|
||||
boost::shared_ptr<TokenizedSentence> tokenizedSentence = _concordia->tokenize(sourceSentence);
|
||||
TokenizedSentence tokenizedSentence = _concordia->tokenize(sourceSentence);
|
||||
int sentenceId = _unitDAO.addSentence(tokenizedSentence, targetSentence, tmId);
|
||||
_concordia->addTokenizedExample(tokenizedSentence, sentenceId);
|
||||
_concordia->refreshSAfromRAM();
|
||||
@ -34,3 +36,25 @@ void IndexController::addSentence(
|
||||
}
|
||||
}
|
||||
|
||||
void IndexController::addSentences(
|
||||
rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const std::vector<std::string> & sourceSentences,
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const std::vector<int> & tmIds) {
|
||||
try {
|
||||
std::vector<TokenizedSentence> tokenizedSentences = _concordia->tokenizeAll(sourceSentences);
|
||||
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmIds);
|
||||
_concordia->addAllTokenizedExamples(tokenizedSentences, sentenceIds);
|
||||
_concordia->refreshSAfromRAM();
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.EndObject();
|
||||
} catch (ConcordiaException & e) {
|
||||
std::stringstream errorstream;
|
||||
errorstream << "concordia error: " << e.what();
|
||||
JsonGenerator::signalError(jsonWriter, errorstream.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -21,9 +21,14 @@ public:
|
||||
virtual ~IndexController();
|
||||
|
||||
void addSentence(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
std::string & sourceSentence,
|
||||
std::string & targetSentence,
|
||||
int tmId);
|
||||
const std::string & sourceSentence,
|
||||
const std::string & targetSentence,
|
||||
const int tmId);
|
||||
|
||||
void addSentences(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const std::vector<std::string> & sourceSentences,
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const std::vector<int> & tmIds);
|
||||
|
||||
private:
|
||||
boost::shared_ptr<Concordia> _concordia;
|
||||
|
@ -1,31 +1,56 @@
|
||||
#include "logger.hpp"
|
||||
|
||||
#include "log4cpp/Category.hh"
|
||||
#include "log4cpp/Appender.hh"
|
||||
#include "log4cpp/FileAppender.hh"
|
||||
#include "log4cpp/BasicLayout.hh"
|
||||
#include "log4cpp/Priority.hh"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
Logger::Logger() {
|
||||
}
|
||||
|
||||
Logger::~Logger() {
|
||||
}
|
||||
|
||||
int Logger::initialized = 0;
|
||||
int Logger::_initialized = 0;
|
||||
|
||||
void Logger::log(std::string message) {
|
||||
log4cpp::Category & root = log4cpp::Category::getRoot();
|
||||
if (initialized == 0) {
|
||||
if (_initialized == 0) {
|
||||
_initialize(root);
|
||||
}
|
||||
root.info(message);
|
||||
}
|
||||
|
||||
void Logger::logInt(std::string name, int value) {
|
||||
log4cpp::Category & root = log4cpp::Category::getRoot();
|
||||
if (_initialized == 0) {
|
||||
_initialize(root);
|
||||
}
|
||||
std::stringstream ss;
|
||||
ss << " " << name << ": " << value;
|
||||
root.info(ss.str());
|
||||
}
|
||||
|
||||
void Logger::logString(std::string name, std::string value) {
|
||||
log4cpp::Category & root = log4cpp::Category::getRoot();
|
||||
if (_initialized == 0) {
|
||||
_initialize(root);
|
||||
}
|
||||
std::stringstream ss;
|
||||
ss << " " << name << ": " << value;
|
||||
root.info(ss.str());
|
||||
}
|
||||
|
||||
void Logger::_initialize(log4cpp::Category & root) {
|
||||
log4cpp::Appender *appender = new log4cpp::FileAppender("default", "/tmp/concordia-server.log");
|
||||
appender->setLayout(new log4cpp::BasicLayout());
|
||||
|
||||
root.setPriority(log4cpp::Priority::INFO);
|
||||
root.addAppender(appender);
|
||||
|
||||
initialized = 1;
|
||||
}
|
||||
root.info(message);
|
||||
_initialized = 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -4,6 +4,9 @@
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#include "log4cpp/Category.hh"
|
||||
|
||||
|
||||
class Logger {
|
||||
public:
|
||||
/*! Constructor.
|
||||
@ -14,8 +17,14 @@ public:
|
||||
virtual ~Logger();
|
||||
|
||||
static void log(std::string message);
|
||||
|
||||
static void logInt(std::string name, int value);
|
||||
|
||||
static void logString(std::string name, std::string value);
|
||||
private:
|
||||
static int initialized;
|
||||
static void _initialize(log4cpp::Category & root);
|
||||
|
||||
static int _initialized;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include "unit_dao.hpp"
|
||||
|
||||
#include "db_connection.hpp"
|
||||
#include "query_param.hpp"
|
||||
#include "string_param.hpp"
|
||||
#include "int_param.hpp"
|
||||
@ -18,46 +17,47 @@ UnitDAO::~UnitDAO() {
|
||||
}
|
||||
|
||||
int UnitDAO::addSentence(
|
||||
boost::shared_ptr<TokenizedSentence> sourceSentence,
|
||||
std::string & targetSentence,
|
||||
int tmId) {
|
||||
const TokenizedSentence & sourceSentence,
|
||||
const std::string & targetSentence,
|
||||
const int tmId) {
|
||||
|
||||
DBconnection connection;
|
||||
connection.startTransaction();
|
||||
std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens) values($1::text,$2::text,$3::integer,$4) RETURNING id";
|
||||
std::vector<QueryParam*> params;
|
||||
params.push_back(new StringParam(sourceSentence->getSentence()));
|
||||
params.push_back(new StringParam(targetSentence));
|
||||
params.push_back(new IntParam(tmId));
|
||||
params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence)));
|
||||
|
||||
PGresult * result = connection.execute(query, params);
|
||||
int newId = connection.getIntValue(result, 0, 0);
|
||||
connection.clearResult(result);
|
||||
int newId = _addSingleSentence(connection, sourceSentence, targetSentence, tmId);
|
||||
connection.endTransaction();
|
||||
|
||||
BOOST_FOREACH (QueryParam * param, params) {
|
||||
delete param;
|
||||
}
|
||||
|
||||
return newId;
|
||||
|
||||
}
|
||||
|
||||
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(std::vector<MatchedPatternFragment> concordiaResults) {
|
||||
std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
|
||||
const std::vector<TokenizedSentence> & sourceSentences,
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const std::vector<int> & tmIds) {
|
||||
DBconnection connection;
|
||||
std::vector<SUFFIX_MARKER_TYPE> newIds;
|
||||
connection.startTransaction();
|
||||
int index = 0;
|
||||
BOOST_FOREACH(const TokenizedSentence & sourceSentence, sourceSentences) {
|
||||
newIds.push_back(_addSingleSentence(connection, sourceSentence, targetSentences.at(index), tmIds.at(index)));
|
||||
index++;
|
||||
}
|
||||
connection.endTransaction();
|
||||
return newIds;
|
||||
}
|
||||
|
||||
|
||||
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & concordiaResults) {
|
||||
std::vector<SimpleSearchResult> results;
|
||||
|
||||
DBconnection connection;
|
||||
connection.startTransaction();
|
||||
|
||||
BOOST_FOREACH(MatchedPatternFragment & fragment, concordiaResults) {
|
||||
BOOST_FOREACH(const MatchedPatternFragment & fragment, concordiaResults) {
|
||||
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
|
||||
std::vector<QueryParam*> params;
|
||||
params.push_back(new IntParam(2*fragment.getExampleOffset()+1));
|
||||
params.push_back(new IntParam(2*(fragment.getExampleOffset()+fragment.getMatchedLength())));
|
||||
params.push_back(new IntParam(fragment.getExampleId()));
|
||||
PGresult * result = connection.execute(query, params);
|
||||
|
||||
results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0),
|
||||
connection.getIntValue(result,0,3),
|
||||
connection.getIntValue(result,0,4),
|
||||
@ -70,14 +70,37 @@ std::vector<SimpleSearchResult> UnitDAO::getSearchResults(std::vector<MatchedPat
|
||||
}
|
||||
|
||||
|
||||
std::vector<int> UnitDAO::_getTokenPositions(boost::shared_ptr<TokenizedSentence> ts) {
|
||||
std::vector<int> UnitDAO::_getTokenPositions(const TokenizedSentence & ts) {
|
||||
std::vector<int> result;
|
||||
BOOST_FOREACH(const TokenAnnotation & token, ts->getTokens()) {
|
||||
BOOST_FOREACH(const TokenAnnotation & token, ts.getTokens()) {
|
||||
result.push_back(token.getStart());
|
||||
result.push_back(token.getEnd());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int UnitDAO::_addSingleSentence(
|
||||
DBconnection & connection,
|
||||
const TokenizedSentence & sourceSentence,
|
||||
const std::string & targetSentence,
|
||||
const int tmId) {
|
||||
std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens) values($1::text,$2::text,$3::integer,$4) RETURNING id";
|
||||
std::vector<QueryParam*> params;
|
||||
params.push_back(new StringParam(sourceSentence.getSentence()));
|
||||
params.push_back(new StringParam(targetSentence));
|
||||
params.push_back(new IntParam(tmId));
|
||||
params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence)));
|
||||
|
||||
PGresult * result = connection.execute(query, params);
|
||||
int newId = connection.getIntValue(result, 0, 0);
|
||||
connection.clearResult(result);
|
||||
BOOST_FOREACH (QueryParam * param, params) {
|
||||
delete param;
|
||||
}
|
||||
|
||||
return newId;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -4,12 +4,14 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <concordia/common/config.hpp>
|
||||
#include <concordia/tokenized_sentence.hpp>
|
||||
#include <concordia/substring_occurence.hpp>
|
||||
#include <concordia/matched_pattern_fragment.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
|
||||
#include "simple_search_result.hpp"
|
||||
#include "db_connection.hpp"
|
||||
|
||||
class UnitDAO {
|
||||
public:
|
||||
@ -21,14 +23,26 @@ public:
|
||||
virtual ~UnitDAO();
|
||||
|
||||
int addSentence(
|
||||
boost::shared_ptr<TokenizedSentence> sourceSentence,
|
||||
std::string & targetSentence,
|
||||
int tmId);
|
||||
const TokenizedSentence & sourceSentence,
|
||||
const std::string & targetSentence,
|
||||
const int tmId);
|
||||
|
||||
std::vector<SimpleSearchResult> getSearchResults(std::vector<MatchedPatternFragment> concordiaResults);
|
||||
std::vector<SUFFIX_MARKER_TYPE> addSentences(
|
||||
const std::vector<TokenizedSentence> & sourceSentences,
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const std::vector<int> & tmIds);
|
||||
|
||||
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & concordiaResults);
|
||||
|
||||
private:
|
||||
std::vector<int> _getTokenPositions(boost::shared_ptr<TokenizedSentence> ts);
|
||||
std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
|
||||
|
||||
int _addSingleSentence(
|
||||
DBconnection & connection,
|
||||
const TokenizedSentence & sourceSentence,
|
||||
const std::string & targetSentence,
|
||||
const int tmId);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1,7 +1,12 @@
|
||||
#!/bin/sh
|
||||
|
||||
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"Marysia ma rysia", "targetSentence":"Mary has a bobcat", "tmId":1}' http://localhost
|
||||
# add sentence
|
||||
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"I jeszcze jedno zdanie testowe", "targetSentence":"Yet another test sentence", "tmId":1}' http://localhost
|
||||
|
||||
# add sentences
|
||||
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentences", "sentences":[[1,"test source one", "test target one"],[4,"test source two", "test target two"],[9,"test source three", "test target three"],[13,"test source four", "test target four"]]}' http://localhost
|
||||
|
||||
# simple search
|
||||
curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"test source"}' http://localhost
|
||||
|
||||
|
||||
curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"ma rysia"}' http://localhost
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user