aligned sentences - first mock

This commit is contained in:
rjawor 2015-12-13 19:38:08 +01:00
parent 96a00fe8c9
commit 7307c4612b
10 changed files with 121 additions and 2 deletions

View File

@ -14,6 +14,7 @@ add_executable(concordia_server_process
simple_search_result.cpp
complete_concordia_search_result.cpp
tm_dao.cpp
aligned_unit.cpp
)
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case)

View File

@ -0,0 +1,9 @@
#include "aligned_unit.hpp"
AlignedUnit::AlignedUnit() {
}
AlignedUnit::~AlignedUnit() {
}

View File

@ -0,0 +1,30 @@
#ifndef ALIGNED_UNIT_HDR
#define ALIGNED_UNIT_HDR
#include <vector>
#include <string>
#include <concordia/tokenized_sentence.hpp>
class AlignedUnit {
public:
/*! Constructor.
*/
AlignedUnit();
/*! Destructor.
*/
virtual ~AlignedUnit();
boost::shared_ptr<TokenizedSentence> getSourceSentence() {
return _sourceSentence;
}
private:
boost::shared_ptr<TokenizedSentence> _sourceSentence;
boost::shared_ptr<TokenizedSentence> _targetSentence;
std::vector<std::vector<int> > _alignments;
};
#endif

View File

@ -69,6 +69,25 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
}
}
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmId);
} else if (operation == ADD_ALIGNED_SENTENCES_OP) {
std::vector<std::string> sourceSentences;
std::vector<std::string> targetSentences;
int tmId = d[TM_ID_PARAM].GetInt();
// loading data from json
const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM];
Logger::log("addAlignedSentences");
Logger::logInt("sentences to add", sentencesArray.Size());
Logger::logInt("tm id", tmId);
for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) {
if (sentencesArray[i].Size() != 2) {
JsonGenerator::signalError(jsonWriter, "sentence should be an array of 2 elements");
break;
} else {
sourceSentences.push_back(sentencesArray[i][0].GetString());
targetSentences.push_back(sentencesArray[i][1].GetString());
}
}
_indexController->addAlignedSentences(jsonWriter, sourceSentences, targetSentences, tmId);
} else if (operation == REFRESH_INDEX_OP) {
int tmId = d[TM_ID_PARAM].GetInt();
_indexController->refreshIndexFromRAM(jsonWriter, tmId);

View File

@ -22,6 +22,7 @@
#define ADD_SENTENCE_OP "addSentence"
#define ADD_SENTENCES_OP "addSentences"
#define ADD_ALIGNED_SENTENCES_OP "addAlignedSentences"
#define REFRESH_INDEX_OP "refreshIndex"
#define SIMPLE_SEARCH_OP "simpleSearch"
#define CONCORDIA_SEARCH_OP "concordiaSearch"

View File

@ -76,6 +76,36 @@ void IndexController::addSentences(
}
}
void IndexController::addAlignedSentences(
rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const std::vector<std::string> & sourceSentences,
const std::vector<std::string> & targetSentences,
const int tmId) {
try {
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
if (it != _concordiasMap->end()) {
std::vector<AlignedUnit> alignedUnits = _getAlignedUnits(sourceSentences, targetSentences);
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addAlignedUnits(alignedUnits, tmId);
int index = 0;
for(std::vector<AlignedUnit>::iterator it = alignedUnits.begin(); it != alignedUnits.end(); ++it) {
(*_concordiasMap)[tmId].addTokenizedExample(*(it->getSourceSentence()), sentenceIds.at(index));
index++;
}
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("success");
jsonWriter.EndObject();
} else {
JsonGenerator::signalError(jsonWriter, "no such tm!");
}
} catch (ConcordiaException & e) {
std::stringstream errorstream;
errorstream << "concordia error: " << e.what();
JsonGenerator::signalError(jsonWriter, errorstream.str());
}
}
void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const int tmId) {
try {
@ -98,5 +128,11 @@ void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuf
}
std::vector<AlignedUnit> IndexController::_getAlignedUnits(const std::vector<std::string> & sourceSentences,
const std::vector<std::string> & targetSentences) {
//TODO
}

View File

@ -9,6 +9,7 @@
#include "unit_dao.hpp"
#include "aligned_unit.hpp"
#include "rapidjson/writer.h"
@ -32,10 +33,18 @@ public:
const std::vector<std::string> & targetSentences,
const int tmId);
void addAlignedSentences(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const std::vector<std::string> & sourceSentences,
const std::vector<std::string> & targetSentences,
const int tmId);
void refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const int tmId);
private:
std::vector<AlignedUnit> _getAlignedUnits(const std::vector<std::string> & sourceSentences,
const std::vector<std::string> & targetSentences);
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
UnitDAO _unitDAO;

View File

@ -44,6 +44,13 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
return newIds;
}
std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addAlignedUnits(
const std::vector<AlignedUnit> & alignedUnits,
const int tmId) {
//TODO
std::vector<SUFFIX_MARKER_TYPE> newIds;
return newIds;
}
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & fragments) {
std::vector<SimpleSearchResult> results;

View File

@ -11,6 +11,7 @@
#include <concordia/concordia_search_result.hpp>
#include <boost/shared_ptr.hpp>
#include "aligned_unit.hpp"
#include "simple_search_result.hpp"
#include "complete_concordia_search_result.hpp"
#include "db_connection.hpp"
@ -33,7 +34,11 @@ public:
const std::vector<TokenizedSentence> & sourceSentences,
const std::vector<std::string> & targetSentences,
const int tmId);
std::vector<SUFFIX_MARKER_TYPE> addAlignedUnits(
const std::vector<AlignedUnit> & alignedUnits,
const int tmId);
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & fragments);
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);

View File

@ -19,6 +19,8 @@ CREATE TABLE unit (
tm_id integer,
source_segment text,
target_segment text,
source_tokens integer[]
source_tokens integer[],
target_tokens integer[],
alignments integer[][]
);