fixed utf8 bug in phrase search

This commit is contained in:
rjawor 2016-02-04 13:02:53 +01:00
parent 25380a81e4
commit 7d0f794db6
2 changed files with 24 additions and 2 deletions

View File

@ -2,8 +2,11 @@
#include <boost/foreach.hpp> #include <boost/foreach.hpp>
#include <vector> #include <vector>
#include <climits>
#include "json_generator.hpp" #include "json_generator.hpp"
#include "logger.hpp"
SearcherController::SearcherController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap) SearcherController::SearcherController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap)
throw(ConcordiaException): throw(ConcordiaException):
@ -43,8 +46,11 @@ void SearcherController::concordiaPhraseSearch(rapidjson::Writer<rapidjson::Stri
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId); boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
if (it != _concordiasMap->end()) { if (it != _concordiasMap->end()) {
if (intervals.size() > 0) { if (intervals.size() > 0) {
std::string shortPattern = pattern.substr(intervals[0].getStart(), intervals[0].getEnd() - intervals[0].getStart()); // std::string shortPattern = pattern.substr(intervals[0].getStart(), intervals[0].getEnd() - intervals[0].getStart());
std::string shortPattern = _substrUTF8(pattern, intervals[0].getStart(), intervals[0].getEnd() - intervals[0].getStart());
Logger::log("concordiaPhraseSearch");
Logger::logString("short pattern", shortPattern);
std::vector<SimpleSearchResult> shortPatternResults = _unitDAO.getSearchResults(it->second->simpleSearch(shortPattern)); std::vector<SimpleSearchResult> shortPatternResults = _unitDAO.getSearchResults(it->second->simpleSearch(shortPattern));
@ -73,7 +79,7 @@ void SearcherController::concordiaPhraseSearch(rapidjson::Writer<rapidjson::Stri
currStart = interval.getEnd(); currStart = interval.getEnd();
} }
CompleteConcordiaSearchResult lastRestResult = _unitDAO.getConcordiaResult( CompleteConcordiaSearchResult lastRestResult = _unitDAO.getConcordiaResult(
it->second->concordiaSearch(pattern.substr(currStart))); it->second->concordiaSearch(_substrUTF8(pattern,currStart,INT_MAX)));
lastRestResult.offsetPattern(currStart); lastRestResult.offsetPattern(currStart);
bestOverlay.insert(bestOverlay.end(), lastRestResult.getBestOverlay().begin(), lastRestResult.getBestOverlay().end()); bestOverlay.insert(bestOverlay.end(), lastRestResult.getBestOverlay().begin(), lastRestResult.getBestOverlay().end());
@ -129,3 +135,17 @@ void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuff
} }
} }
std::string SearcherController::_substrUTF8(std::string source, int start, int length) {
UnicodeString s(source.c_str());
UnicodeString unicodeValue;
s.extract(start, length, unicodeValue);
std::string result;
unicodeValue.toUTF8String(result);
return result;
}

View File

@ -37,6 +37,8 @@ public:
const int tmId); const int tmId);
private: private:
std::string _substrUTF8(std::string source, int start, int length);
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap; boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
UnitDAO _unitDAO; UnitDAO _unitDAO;