full search intelligent

This commit is contained in:
rjawor 2019-09-01 20:31:03 +02:00
parent 63c44d55ea
commit 9e0684db80
2 changed files with 15 additions and 12 deletions

View File

@ -28,28 +28,20 @@ void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer>
// tmId should point to non-lemmatized memory // tmId should point to non-lemmatized memory
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId); boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
if (it != _concordiasMap->end()) { if (it != _concordiasMap->end()) {
Logger::logString("simple search for pattern", pattern);
TokenizedSentence tokenizedPattern = it->second->tokenize(pattern, false, false); TokenizedSentence tokenizedPattern = it->second->tokenize(pattern, false, false);
Logger::logInt("queried tm", tmId);
Tm tm = _tmDAO.getTm(tmId); Tm tm = _tmDAO.getTm(tmId);
Logger::logInt("paired tm", tm.getPairedTmId());
LemmatizerResult lemmatizerResult = _lemmatizerFacade->lemmatizeSentence( LemmatizerResult lemmatizerResult = _lemmatizerFacade->lemmatizeSentence(
tm.getSourceLanguageCode(), tokenizedPattern.getTokenizedSentence()); tm.getSourceLanguageCode(), tokenizedPattern.getTokenizedSentence());
std::string pattern = tokenizedPattern.getTokenizedSentence(); std::string pattern = tokenizedPattern.getTokenizedSentence();
Logger::logString("tokenized pattern", tokenizedPattern.getTokenizedSentence());
Logger::logString("lemmatized pattern", lemmatizerResult.getLemmatizedSentence());
if (lemmatizerResult.isFirstLemmatized()) { if (lemmatizerResult.isFirstLemmatized()) {
Logger::log("searching in paired memory");
// search in lemmatized memory // search in lemmatized memory
it = _concordiasMap->find(tm.getPairedTmId()); it = _concordiasMap->find(tm.getPairedTmId());
pattern = lemmatizerResult.getLemmatizedSentence(); pattern = lemmatizerResult.getLemmatizedSentence();
Logger::logString("Changing pattern to lemmatized", pattern);
if (it == _concordiasMap->end()) { if (it == _concordiasMap->end()) {
JsonGenerator::signalError(jsonWriter, "paired tm not found!"); JsonGenerator::signalError(jsonWriter, "paired tm not found!");
return; return;
} }
} }
Logger::logString("Final pattern", pattern);
SimpleSearchResult result = _unitDAO.getSimpleSearchResult( SimpleSearchResult result = _unitDAO.getSimpleSearchResult(
it->second->simpleSearch(pattern, true)); it->second->simpleSearch(pattern, true));
jsonWriter.StartObject(); jsonWriter.StartObject();
@ -68,11 +60,23 @@ void SearcherController::fullSearch(rapidjson::Writer<rapidjson::StringBuffer> &
const int tmId, const int tmId,
const int limit, const int limit,
const int offset) { const int offset) {
/* // tmId should point to non-lemmatized memory
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId); boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
if (it != _concordiasMap->end()) { if (it != _concordiasMap->end()) {
TokenizedSentence tokenizedPattern = it->second->tokenize(pattern, false, false); TokenizedSentence tokenizedPattern = it->second->tokenize(pattern, false, false);
pattern = _lemmatizerFacade->lemmatizeIfNeeded(tokenizedPattern.getTokenizedSentence(), tmId); Tm tm = _tmDAO.getTm(tmId);
LemmatizerResult lemmatizerResult = _lemmatizerFacade->lemmatizeSentence(
tm.getSourceLanguageCode(), tokenizedPattern.getTokenizedSentence());
std::string pattern = tokenizedPattern.getTokenizedSentence();
if (lemmatizerResult.isFirstLemmatized()) {
// search in lemmatized memory
it = _concordiasMap->find(tm.getPairedTmId());
pattern = lemmatizerResult.getLemmatizedSentence();
if (it == _concordiasMap->end()) {
JsonGenerator::signalError(jsonWriter, "paired tm not found!");
return;
}
}
FullSearchResult result = _unitDAO.getFullSearchResult(it->second->fullSearch(pattern, limit, offset, true), tokenizedPattern.getTokens().size()); FullSearchResult result = _unitDAO.getFullSearchResult(it->second->fullSearch(pattern, limit, offset, true), tokenizedPattern.getTokens().size());
jsonWriter.StartObject(); jsonWriter.StartObject();
jsonWriter.String("status"); jsonWriter.String("status");
@ -83,7 +87,6 @@ void SearcherController::fullSearch(rapidjson::Writer<rapidjson::StringBuffer> &
} else { } else {
JsonGenerator::signalError(jsonWriter, "no such tm!"); JsonGenerator::signalError(jsonWriter, "no such tm!");
} }
*/
} }
void SearcherController::lexiconSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, void SearcherController::lexiconSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,

View File

@ -2,6 +2,7 @@
#define SEARCHER_CONTROLLER_HDR #define SEARCHER_CONTROLLER_HDR
#include <string> #include <string>
#include <utility>
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include <boost/ptr_container/ptr_map.hpp> #include <boost/ptr_container/ptr_map.hpp>
#include <concordia/concordia.hpp> #include <concordia/concordia.hpp>
@ -48,7 +49,6 @@ public:
const std::vector<Interval> & intervals, const std::vector<Interval> & intervals,
const int tmId); const int tmId);
private: private:
std::string _substrUTF8(std::string source, int start, int length); std::string _substrUTF8(std::string source, int start, int length);
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap; boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;