add fast aligned sentences

This commit is contained in:
Rafał Jaworski 2019-02-23 21:58:54 +01:00
parent a8e1117f27
commit d900e806d9
4 changed files with 42 additions and 17 deletions

View File

@ -64,25 +64,28 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
_indexController->addSentence(jsonWriter, sourceSentence, targetSentence, tmId);
} else if (operation == ADD_SENTENCES_OP) {
std::vector<std::string> sourceSentences;
std::vector<std::string> lemmatizedSourceSentences;
std::vector<std::string> targetSentences;
std::vector<std::vector<std::vector<int> > > alignments;
std::vector<int> sourceIds;
int tmId = _getIntParameter(d, TM_ID_PARAM);
// loading data from json
const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM];
/*
Logger::log("addSentences");
Logger::logInt("sentences to add", sentencesArray.Size());
Logger::logInt("tm id", tmId);
*/
for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) {
if (sentencesArray[i].Size() != 2) {
JsonGenerator::signalError(jsonWriter, "sentence should be an array of 2 elements");
const rapidjson::Value & examplesArray = d[EXAMPLES_PARAM];
for (rapidjson::SizeType i = 0; i < examplesArray.Size(); i++) {
if (examplesArray[i].Size() != 5) {
JsonGenerator::signalError(jsonWriter, "example should be an array of 5 elements");
break;
} else {
sourceSentences.push_back(sentencesArray[i][0].GetString());
targetSentences.push_back(sentencesArray[i][1].GetString());
sourceSentences.push_back(examplesArray[i][0].GetString());
lemmatizedSourceSentences.push_back(examplesArray[i][1].GetString());
targetSentences.push_back(examplesArray[i][2].GetString());
alignments.push_back(_getInt2DArray(examplesArray[i][3]));
sourceIds.push_back(examplesArray[i][4].GetInt());
}
}
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmId);
_indexController->addSentences(jsonWriter, sourceSentences, lemmatizedSourceSentences, targetSentences, alignments, sourceIds, tmId);
} else if (operation == ADD_ALIGNED_SENTENCES_OP) {
std::vector<std::string> sourceSentences;
std::vector<std::string> targetSentences;
@ -359,6 +362,21 @@ int ConcordiaServer::_getBoolParameter(rapidjson::Document & d, const char * nam
}
}
std::vector<std::vector<int> > ConcordiaServer::_getInt2DArray(const rapidjson::Value & v)
throw (ConcordiaException) {
std::vector<std::vector<int> > result;
for (rapidjson::SizeType i = 0; i < v.Size(); i++) {
std::vector<int> innerArray;
for (rapidjson::SizeType j = 0; j < v[i].Size(); j++) {
innerArray.push_back(v[i][j].GetInt());
}
result.push_back(innerArray);
}
return result;
}
void ConcordiaServer::_addTm(int tmId) {
std::stringstream indexPath;
indexPath << INDEX_DIRECTORY << "/tm_" << tmId;

View File

@ -43,6 +43,8 @@ private:
int _getBoolParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException);
std::vector<std::vector<int> > _getInt2DArray(const rapidjson::Value & v) throw (ConcordiaException);
void _addTm(int tmId);
std::string _configFilePath;

View File

@ -62,10 +62,12 @@ void IndexController::addSentence(
}
}
void IndexController::addSentences(
rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
void IndexController::addSentences(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const std::vector<std::string> & sourceSentences,
const std::vector<std::string> & lemmatizedSourceSentences,
const std::vector<std::string> & targetSentences,
const std::vector<std::vector<std::vector<int> > > & alignments,
const std::vector<int> & sourceIds,
const int tmId) {
try {
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);

View File

@ -33,7 +33,10 @@ public:
void addSentences(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const std::vector<std::string> & sourceSentences,
const std::vector<std::string> & lemmatizedSourceSentences,
const std::vector<std::string> & targetSentences,
const std::vector<std::vector<std::vector<int> > > & alignments,
const std::vector<int> & sourceIds,
const int tmId);
void addAlignedSentences(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,