aligned sentences - first mock
This commit is contained in:
parent
96a00fe8c9
commit
7307c4612b
@ -14,6 +14,7 @@ add_executable(concordia_server_process
|
||||
simple_search_result.cpp
|
||||
complete_concordia_search_result.cpp
|
||||
tm_dao.cpp
|
||||
aligned_unit.cpp
|
||||
)
|
||||
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case)
|
||||
|
||||
|
9
concordia-server/aligned_unit.cpp
Normal file
9
concordia-server/aligned_unit.cpp
Normal file
@ -0,0 +1,9 @@
|
||||
#include "aligned_unit.hpp"
|
||||
|
||||
|
||||
AlignedUnit::AlignedUnit() {
|
||||
}
|
||||
|
||||
AlignedUnit::~AlignedUnit() {
|
||||
}
|
||||
|
30
concordia-server/aligned_unit.hpp
Normal file
30
concordia-server/aligned_unit.hpp
Normal file
@ -0,0 +1,30 @@
|
||||
#ifndef ALIGNED_UNIT_HDR
|
||||
#define ALIGNED_UNIT_HDR
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <concordia/tokenized_sentence.hpp>
|
||||
|
||||
class AlignedUnit {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
AlignedUnit();
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~AlignedUnit();
|
||||
|
||||
boost::shared_ptr<TokenizedSentence> getSourceSentence() {
|
||||
return _sourceSentence;
|
||||
}
|
||||
|
||||
private:
|
||||
boost::shared_ptr<TokenizedSentence> _sourceSentence;
|
||||
|
||||
boost::shared_ptr<TokenizedSentence> _targetSentence;
|
||||
|
||||
std::vector<std::vector<int> > _alignments;
|
||||
};
|
||||
|
||||
#endif
|
@ -69,6 +69,25 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
||||
}
|
||||
}
|
||||
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmId);
|
||||
} else if (operation == ADD_ALIGNED_SENTENCES_OP) {
|
||||
std::vector<std::string> sourceSentences;
|
||||
std::vector<std::string> targetSentences;
|
||||
int tmId = d[TM_ID_PARAM].GetInt();
|
||||
// loading data from json
|
||||
const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM];
|
||||
Logger::log("addAlignedSentences");
|
||||
Logger::logInt("sentences to add", sentencesArray.Size());
|
||||
Logger::logInt("tm id", tmId);
|
||||
for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) {
|
||||
if (sentencesArray[i].Size() != 2) {
|
||||
JsonGenerator::signalError(jsonWriter, "sentence should be an array of 2 elements");
|
||||
break;
|
||||
} else {
|
||||
sourceSentences.push_back(sentencesArray[i][0].GetString());
|
||||
targetSentences.push_back(sentencesArray[i][1].GetString());
|
||||
}
|
||||
}
|
||||
_indexController->addAlignedSentences(jsonWriter, sourceSentences, targetSentences, tmId);
|
||||
} else if (operation == REFRESH_INDEX_OP) {
|
||||
int tmId = d[TM_ID_PARAM].GetInt();
|
||||
_indexController->refreshIndexFromRAM(jsonWriter, tmId);
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
#define ADD_SENTENCE_OP "addSentence"
|
||||
#define ADD_SENTENCES_OP "addSentences"
|
||||
#define ADD_ALIGNED_SENTENCES_OP "addAlignedSentences"
|
||||
#define REFRESH_INDEX_OP "refreshIndex"
|
||||
#define SIMPLE_SEARCH_OP "simpleSearch"
|
||||
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
||||
|
@ -76,6 +76,36 @@ void IndexController::addSentences(
|
||||
}
|
||||
}
|
||||
|
||||
void IndexController::addAlignedSentences(
|
||||
rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const std::vector<std::string> & sourceSentences,
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const int tmId) {
|
||||
try {
|
||||
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
||||
if (it != _concordiasMap->end()) {
|
||||
std::vector<AlignedUnit> alignedUnits = _getAlignedUnits(sourceSentences, targetSentences);
|
||||
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addAlignedUnits(alignedUnits, tmId);
|
||||
int index = 0;
|
||||
for(std::vector<AlignedUnit>::iterator it = alignedUnits.begin(); it != alignedUnits.end(); ++it) {
|
||||
(*_concordiasMap)[tmId].addTokenizedExample(*(it->getSourceSentence()), sentenceIds.at(index));
|
||||
index++;
|
||||
}
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.EndObject();
|
||||
} else {
|
||||
JsonGenerator::signalError(jsonWriter, "no such tm!");
|
||||
}
|
||||
} catch (ConcordiaException & e) {
|
||||
std::stringstream errorstream;
|
||||
errorstream << "concordia error: " << e.what();
|
||||
JsonGenerator::signalError(jsonWriter, errorstream.str());
|
||||
}
|
||||
}
|
||||
|
||||
void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const int tmId) {
|
||||
try {
|
||||
@ -98,5 +128,11 @@ void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuf
|
||||
|
||||
}
|
||||
|
||||
std::vector<AlignedUnit> IndexController::_getAlignedUnits(const std::vector<std::string> & sourceSentences,
|
||||
const std::vector<std::string> & targetSentences) {
|
||||
//TODO
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
|
||||
#include "unit_dao.hpp"
|
||||
#include "aligned_unit.hpp"
|
||||
|
||||
#include "rapidjson/writer.h"
|
||||
|
||||
@ -32,10 +33,18 @@ public:
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const int tmId);
|
||||
|
||||
void addAlignedSentences(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const std::vector<std::string> & sourceSentences,
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const int tmId);
|
||||
|
||||
void refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const int tmId);
|
||||
|
||||
private:
|
||||
std::vector<AlignedUnit> _getAlignedUnits(const std::vector<std::string> & sourceSentences,
|
||||
const std::vector<std::string> & targetSentences);
|
||||
|
||||
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
|
||||
|
||||
UnitDAO _unitDAO;
|
||||
|
@ -44,6 +44,13 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
|
||||
return newIds;
|
||||
}
|
||||
|
||||
std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addAlignedUnits(
|
||||
const std::vector<AlignedUnit> & alignedUnits,
|
||||
const int tmId) {
|
||||
//TODO
|
||||
std::vector<SUFFIX_MARKER_TYPE> newIds;
|
||||
return newIds;
|
||||
}
|
||||
|
||||
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & fragments) {
|
||||
std::vector<SimpleSearchResult> results;
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <concordia/concordia_search_result.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
|
||||
#include "aligned_unit.hpp"
|
||||
#include "simple_search_result.hpp"
|
||||
#include "complete_concordia_search_result.hpp"
|
||||
#include "db_connection.hpp"
|
||||
@ -34,6 +35,10 @@ public:
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const int tmId);
|
||||
|
||||
std::vector<SUFFIX_MARKER_TYPE> addAlignedUnits(
|
||||
const std::vector<AlignedUnit> & alignedUnits,
|
||||
const int tmId);
|
||||
|
||||
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & fragments);
|
||||
|
||||
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
|
||||
|
@ -19,6 +19,8 @@ CREATE TABLE unit (
|
||||
tm_id integer,
|
||||
source_segment text,
|
||||
target_segment text,
|
||||
source_tokens integer[]
|
||||
source_tokens integer[],
|
||||
target_tokens integer[],
|
||||
alignments integer[][]
|
||||
);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user