concordia server full search

This commit is contained in:
rjawor 2019-01-10 14:27:51 +01:00
parent aa544051dc
commit 7622369f5c
10 changed files with 136 additions and 0 deletions

View File

@ -239,6 +239,12 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
std::string pattern = _getStringParameter(d, PATTERN_PARAM); std::string pattern = _getStringParameter(d, PATTERN_PARAM);
int tmId = _getIntParameter(d, TM_ID_PARAM); int tmId = _getIntParameter(d, TM_ID_PARAM);
_searcherController->simpleSearch(jsonWriter, pattern, tmId); _searcherController->simpleSearch(jsonWriter, pattern, tmId);
} else if (operation == FULL_SEARCH_OP) {
std::string pattern = _getStringParameter(d, PATTERN_PARAM);
int tmId = _getIntParameter(d, TM_ID_PARAM);
int limit = _getIntParameter(d, LIMIT_PARAM);
int offset = _getIntParameter(d, OFFSET_PARAM);
_searcherController->fullSearch(jsonWriter, pattern, tmId, limit, offset);
} else if (operation == LEXICON_SEARCH_OP) { } else if (operation == LEXICON_SEARCH_OP) {
std::string pattern = _getStringParameter(d, PATTERN_PARAM); std::string pattern = _getStringParameter(d, PATTERN_PARAM);
int tmId = _getIntParameter(d, TM_ID_PARAM); int tmId = _getIntParameter(d, TM_ID_PARAM);

View File

@ -16,6 +16,8 @@
#define SOURCE_SENTENCE_PARAM "sourceSentence" #define SOURCE_SENTENCE_PARAM "sourceSentence"
#define TARGET_SENTENCE_PARAM "targetSentence" #define TARGET_SENTENCE_PARAM "targetSentence"
#define TM_ID_PARAM "tmId" #define TM_ID_PARAM "tmId"
#define LIMIT_PARAM "limit"
#define OFFSET_PARAM "offset"
#define TM_LEMMATIZED_PARAM "tmLemmatized" #define TM_LEMMATIZED_PARAM "tmLemmatized"
#define SENTENCES_PARAM "sentences" #define SENTENCES_PARAM "sentences"
#define EXAMPLES_PARAM "examples" #define EXAMPLES_PARAM "examples"
@ -37,6 +39,7 @@
#define GET_LANGUAGES_OP "getLanguages" #define GET_LANGUAGES_OP "getLanguages"
#define REFRESH_INDEX_OP "refreshIndex" #define REFRESH_INDEX_OP "refreshIndex"
#define SIMPLE_SEARCH_OP "simpleSearch" #define SIMPLE_SEARCH_OP "simpleSearch"
#define FULL_SEARCH_OP "fullSearch"
#define LEXICON_SEARCH_OP "lexiconSearch" #define LEXICON_SEARCH_OP "lexiconSearch"
#define CONCORDIA_SEARCH_OP "concordiaSearch" #define CONCORDIA_SEARCH_OP "concordiaSearch"
#define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch" #define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch"

View File

@ -0,0 +1,13 @@
#include "full_search_result.hpp"
FullSearchResult::FullSearchResult(
const int totalCount):
_totalCount(totalCount) {
}
FullSearchResult::~FullSearchResult() {
}
void FullSearchResult::addOccurence(const ExampleOccurence & occurence) {
_occurences.push_back(occurence);
}

View File

@ -0,0 +1,35 @@
#ifndef FULL_SEARCH_RESULT_HDR
#define FULL_SEARCH_RESULT_HDR
#include "example_occurence.hpp"
#include <string>
#include <vector>
class FullSearchResult {
public:
/*! Constructor.
*/
explicit FullSearchResult(const int totalCount);
/*! Destructor.
*/
virtual ~FullSearchResult();
int getTotalCount() const {
return _totalCount;
}
std::vector<ExampleOccurence> getOccurences() const {
return _occurences;
}
void addOccurence(const ExampleOccurence & occurence);
void offsetPattern(int offset);
private:
std::vector<ExampleOccurence> _occurences;
int _totalCount;
};
#endif

View File

@ -60,6 +60,44 @@ void JsonGenerator::writeSimpleSearchResult(rapidjson::Writer<rapidjson::StringB
jsonWriter.EndObject(); //simple search result jsonWriter.EndObject(); //simple search result
} }
void JsonGenerator::writeFullSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const FullSearchResult & result) {
jsonWriter.StartObject();
jsonWriter.String("totalCount");
jsonWriter.Int(result.getTotalCount());
jsonWriter.StartArray();
BOOST_FOREACH(ExampleOccurence occurence, result.getOccurences()) {
jsonWriter.StartObject();
jsonWriter.String("id");
jsonWriter.Int(occurence.getId());
jsonWriter.String("matchedExampleStart");
jsonWriter.Int(occurence.getMatchedExampleStart());
jsonWriter.String("matchedExampleEnd");
jsonWriter.Int(occurence.getMatchedExampleEnd());
jsonWriter.String("sourceSegment");
jsonWriter.String(occurence.getSourceSegment().c_str());
jsonWriter.String("targetSegment");
jsonWriter.String(occurence.getTargetSegment().c_str());
jsonWriter.String("targetFragments");
jsonWriter.StartArray(); // all target fragments
for (std::vector<std::pair<int,int> >::const_iterator it = occurence.getTargetFragments().begin();
it != occurence.getTargetFragments().end(); it++) {
jsonWriter.StartArray(); // single target fragment
jsonWriter.Int(it->first);
jsonWriter.Int(it->second);
jsonWriter.EndArray(); // single target fragment
}
jsonWriter.EndArray(); // all target fragments
jsonWriter.EndObject(); // occurence
}
jsonWriter.EndArray(); //occurences
jsonWriter.EndObject(); //full search result
}
void JsonGenerator::writeLexiconSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, void JsonGenerator::writeLexiconSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const SimpleSearchResult & result) { const SimpleSearchResult & result) {
jsonWriter.StartArray(); jsonWriter.StartArray();

View File

@ -6,6 +6,7 @@
#include "rapidjson/writer.h" #include "rapidjson/writer.h"
#include "simple_search_result.hpp" #include "simple_search_result.hpp"
#include "full_search_result.hpp"
class JsonGenerator { class JsonGenerator {
public: public:
@ -22,6 +23,9 @@ public:
static void writeSimpleSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, static void writeSimpleSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const SimpleSearchResult & result); const SimpleSearchResult & result);
static void writeFullSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const FullSearchResult & result);
static void writeLexiconSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, static void writeLexiconSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const SimpleSearchResult & result); const SimpleSearchResult & result);

View File

@ -7,6 +7,7 @@
#include "json_generator.hpp" #include "json_generator.hpp"
#include "full_search_result.hpp"
#include "logger.hpp" #include "logger.hpp"
@ -40,6 +41,27 @@ void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer>
} }
} }
void SearcherController::fullSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
std::string & pattern,
const int tmId,
const int limit,
const int offset) {
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
if (it != _concordiasMap->end()) {
TokenizedSentence tokenizedPattern = it->second->tokenize(pattern, false, false);
pattern = _lemmatizerFacade->lemmatizeIfNeeded(tokenizedPattern.getTokenizedSentence(), tmId);
FullSearchResult result = _unitDAO.getFullSearchResult(it->second->fullSearch(pattern, limit, offset, true));
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("success");
jsonWriter.String("result");
JsonGenerator::writeFullSearchResult(jsonWriter, result);
jsonWriter.EndObject();
} else {
JsonGenerator::signalError(jsonWriter, "no such tm!");
}
}
void SearcherController::lexiconSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, void SearcherController::lexiconSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
std::string & pattern, std::string & pattern,
const int tmId) { const int tmId) {

View File

@ -29,6 +29,12 @@ public:
std::string & pattern, std::string & pattern,
const int tmId); const int tmId);
void fullSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
std::string & pattern,
const int tmId,
const int limit,
const int offset);
void lexiconSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, void lexiconSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
std::string & pattern, std::string & pattern,
const int tmId); const int tmId);

View File

@ -72,6 +72,11 @@ SimpleSearchResult UnitDAO::getSimpleSearchResult(const MatchedPatternFragment &
return _getResultFromFragment(fragment, ts, true); return _getResultFromFragment(fragment, ts, true);
} }
FullSearchResult UnitDAO::getFullSearchResult(const OccurencesList & occurencesList) {
return FullSearchResult(5);
}
CompleteConcordiaSearchResult UnitDAO::getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult) { CompleteConcordiaSearchResult UnitDAO::getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult) {
CompleteConcordiaSearchResult result(rawConcordiaResult->getBestOverlayScore()); CompleteConcordiaSearchResult result(rawConcordiaResult->getBestOverlayScore());
BOOST_FOREACH(MatchedPatternFragment fragment, rawConcordiaResult->getBestOverlay()) { BOOST_FOREACH(MatchedPatternFragment fragment, rawConcordiaResult->getBestOverlay()) {

View File

@ -8,11 +8,13 @@
#include <concordia/tokenized_sentence.hpp> #include <concordia/tokenized_sentence.hpp>
#include <concordia/substring_occurence.hpp> #include <concordia/substring_occurence.hpp>
#include <concordia/matched_pattern_fragment.hpp> #include <concordia/matched_pattern_fragment.hpp>
#include <concordia/occurences_list.hpp>
#include <concordia/concordia_search_result.hpp> #include <concordia/concordia_search_result.hpp>
#include <concordia/concordia_exception.hpp> #include <concordia/concordia_exception.hpp>
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include "simple_search_result.hpp" #include "simple_search_result.hpp"
#include "full_search_result.hpp"
#include "complete_concordia_search_result.hpp" #include "complete_concordia_search_result.hpp"
#include "db_connection.hpp" #include "db_connection.hpp"
@ -43,6 +45,8 @@ public:
SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment); SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment);
FullSearchResult getFullSearchResult(const OccurencesList & occurencesList);
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult); CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult, TokenizedSentence originalPattern); CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult, TokenizedSentence originalPattern);