aligned sentences - first mock
This commit is contained in:
parent
96a00fe8c9
commit
7307c4612b
@ -14,6 +14,7 @@ add_executable(concordia_server_process
|
|||||||
simple_search_result.cpp
|
simple_search_result.cpp
|
||||||
complete_concordia_search_result.cpp
|
complete_concordia_search_result.cpp
|
||||||
tm_dao.cpp
|
tm_dao.cpp
|
||||||
|
aligned_unit.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case)
|
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case)
|
||||||
|
|
||||||
|
9
concordia-server/aligned_unit.cpp
Normal file
9
concordia-server/aligned_unit.cpp
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#include "aligned_unit.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
AlignedUnit::AlignedUnit() {
|
||||||
|
}
|
||||||
|
|
||||||
|
AlignedUnit::~AlignedUnit() {
|
||||||
|
}
|
||||||
|
|
30
concordia-server/aligned_unit.hpp
Normal file
30
concordia-server/aligned_unit.hpp
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#ifndef ALIGNED_UNIT_HDR
|
||||||
|
#define ALIGNED_UNIT_HDR
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <concordia/tokenized_sentence.hpp>
|
||||||
|
|
||||||
|
class AlignedUnit {
|
||||||
|
public:
|
||||||
|
/*! Constructor.
|
||||||
|
*/
|
||||||
|
AlignedUnit();
|
||||||
|
/*! Destructor.
|
||||||
|
*/
|
||||||
|
virtual ~AlignedUnit();
|
||||||
|
|
||||||
|
boost::shared_ptr<TokenizedSentence> getSourceSentence() {
|
||||||
|
return _sourceSentence;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
boost::shared_ptr<TokenizedSentence> _sourceSentence;
|
||||||
|
|
||||||
|
boost::shared_ptr<TokenizedSentence> _targetSentence;
|
||||||
|
|
||||||
|
std::vector<std::vector<int> > _alignments;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -69,6 +69,25 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmId);
|
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmId);
|
||||||
|
} else if (operation == ADD_ALIGNED_SENTENCES_OP) {
|
||||||
|
std::vector<std::string> sourceSentences;
|
||||||
|
std::vector<std::string> targetSentences;
|
||||||
|
int tmId = d[TM_ID_PARAM].GetInt();
|
||||||
|
// loading data from json
|
||||||
|
const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM];
|
||||||
|
Logger::log("addAlignedSentences");
|
||||||
|
Logger::logInt("sentences to add", sentencesArray.Size());
|
||||||
|
Logger::logInt("tm id", tmId);
|
||||||
|
for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) {
|
||||||
|
if (sentencesArray[i].Size() != 2) {
|
||||||
|
JsonGenerator::signalError(jsonWriter, "sentence should be an array of 2 elements");
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
sourceSentences.push_back(sentencesArray[i][0].GetString());
|
||||||
|
targetSentences.push_back(sentencesArray[i][1].GetString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_indexController->addAlignedSentences(jsonWriter, sourceSentences, targetSentences, tmId);
|
||||||
} else if (operation == REFRESH_INDEX_OP) {
|
} else if (operation == REFRESH_INDEX_OP) {
|
||||||
int tmId = d[TM_ID_PARAM].GetInt();
|
int tmId = d[TM_ID_PARAM].GetInt();
|
||||||
_indexController->refreshIndexFromRAM(jsonWriter, tmId);
|
_indexController->refreshIndexFromRAM(jsonWriter, tmId);
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
|
|
||||||
#define ADD_SENTENCE_OP "addSentence"
|
#define ADD_SENTENCE_OP "addSentence"
|
||||||
#define ADD_SENTENCES_OP "addSentences"
|
#define ADD_SENTENCES_OP "addSentences"
|
||||||
|
#define ADD_ALIGNED_SENTENCES_OP "addAlignedSentences"
|
||||||
#define REFRESH_INDEX_OP "refreshIndex"
|
#define REFRESH_INDEX_OP "refreshIndex"
|
||||||
#define SIMPLE_SEARCH_OP "simpleSearch"
|
#define SIMPLE_SEARCH_OP "simpleSearch"
|
||||||
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
||||||
|
@ -76,6 +76,36 @@ void IndexController::addSentences(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IndexController::addAlignedSentences(
|
||||||
|
rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||||
|
const std::vector<std::string> & sourceSentences,
|
||||||
|
const std::vector<std::string> & targetSentences,
|
||||||
|
const int tmId) {
|
||||||
|
try {
|
||||||
|
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
||||||
|
if (it != _concordiasMap->end()) {
|
||||||
|
std::vector<AlignedUnit> alignedUnits = _getAlignedUnits(sourceSentences, targetSentences);
|
||||||
|
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addAlignedUnits(alignedUnits, tmId);
|
||||||
|
int index = 0;
|
||||||
|
for(std::vector<AlignedUnit>::iterator it = alignedUnits.begin(); it != alignedUnits.end(); ++it) {
|
||||||
|
(*_concordiasMap)[tmId].addTokenizedExample(*(it->getSourceSentence()), sentenceIds.at(index));
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
jsonWriter.StartObject();
|
||||||
|
jsonWriter.String("status");
|
||||||
|
jsonWriter.String("success");
|
||||||
|
jsonWriter.EndObject();
|
||||||
|
} else {
|
||||||
|
JsonGenerator::signalError(jsonWriter, "no such tm!");
|
||||||
|
}
|
||||||
|
} catch (ConcordiaException & e) {
|
||||||
|
std::stringstream errorstream;
|
||||||
|
errorstream << "concordia error: " << e.what();
|
||||||
|
JsonGenerator::signalError(jsonWriter, errorstream.str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||||
const int tmId) {
|
const int tmId) {
|
||||||
try {
|
try {
|
||||||
@ -98,5 +128,11 @@ void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuf
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<AlignedUnit> IndexController::_getAlignedUnits(const std::vector<std::string> & sourceSentences,
|
||||||
|
const std::vector<std::string> & targetSentences) {
|
||||||
|
//TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
|
|
||||||
#include "unit_dao.hpp"
|
#include "unit_dao.hpp"
|
||||||
|
#include "aligned_unit.hpp"
|
||||||
|
|
||||||
#include "rapidjson/writer.h"
|
#include "rapidjson/writer.h"
|
||||||
|
|
||||||
@ -32,10 +33,18 @@ public:
|
|||||||
const std::vector<std::string> & targetSentences,
|
const std::vector<std::string> & targetSentences,
|
||||||
const int tmId);
|
const int tmId);
|
||||||
|
|
||||||
|
void addAlignedSentences(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||||
|
const std::vector<std::string> & sourceSentences,
|
||||||
|
const std::vector<std::string> & targetSentences,
|
||||||
|
const int tmId);
|
||||||
|
|
||||||
void refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
void refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||||
const int tmId);
|
const int tmId);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
std::vector<AlignedUnit> _getAlignedUnits(const std::vector<std::string> & sourceSentences,
|
||||||
|
const std::vector<std::string> & targetSentences);
|
||||||
|
|
||||||
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
|
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
|
||||||
|
|
||||||
UnitDAO _unitDAO;
|
UnitDAO _unitDAO;
|
||||||
|
@ -44,6 +44,13 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
|
|||||||
return newIds;
|
return newIds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addAlignedUnits(
|
||||||
|
const std::vector<AlignedUnit> & alignedUnits,
|
||||||
|
const int tmId) {
|
||||||
|
//TODO
|
||||||
|
std::vector<SUFFIX_MARKER_TYPE> newIds;
|
||||||
|
return newIds;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & fragments) {
|
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & fragments) {
|
||||||
std::vector<SimpleSearchResult> results;
|
std::vector<SimpleSearchResult> results;
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include <concordia/concordia_search_result.hpp>
|
#include <concordia/concordia_search_result.hpp>
|
||||||
#include <boost/shared_ptr.hpp>
|
#include <boost/shared_ptr.hpp>
|
||||||
|
|
||||||
|
#include "aligned_unit.hpp"
|
||||||
#include "simple_search_result.hpp"
|
#include "simple_search_result.hpp"
|
||||||
#include "complete_concordia_search_result.hpp"
|
#include "complete_concordia_search_result.hpp"
|
||||||
#include "db_connection.hpp"
|
#include "db_connection.hpp"
|
||||||
@ -34,6 +35,10 @@ public:
|
|||||||
const std::vector<std::string> & targetSentences,
|
const std::vector<std::string> & targetSentences,
|
||||||
const int tmId);
|
const int tmId);
|
||||||
|
|
||||||
|
std::vector<SUFFIX_MARKER_TYPE> addAlignedUnits(
|
||||||
|
const std::vector<AlignedUnit> & alignedUnits,
|
||||||
|
const int tmId);
|
||||||
|
|
||||||
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & fragments);
|
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & fragments);
|
||||||
|
|
||||||
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
|
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
|
||||||
|
@ -19,6 +19,8 @@ CREATE TABLE unit (
|
|||||||
tm_id integer,
|
tm_id integer,
|
||||||
source_segment text,
|
source_segment text,
|
||||||
target_segment text,
|
target_segment text,
|
||||||
source_tokens integer[]
|
source_tokens integer[],
|
||||||
|
target_tokens integer[],
|
||||||
|
alignments integer[][]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user