fixed utf8 bug in phrase search
This commit is contained in:
parent
25380a81e4
commit
7d0f794db6
@ -2,8 +2,11 @@
|
|||||||
|
|
||||||
#include <boost/foreach.hpp>
|
#include <boost/foreach.hpp>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <climits>
|
||||||
|
|
||||||
#include "json_generator.hpp"
|
#include "json_generator.hpp"
|
||||||
|
#include "logger.hpp"
|
||||||
|
|
||||||
|
|
||||||
SearcherController::SearcherController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap)
|
SearcherController::SearcherController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap)
|
||||||
throw(ConcordiaException):
|
throw(ConcordiaException):
|
||||||
@ -43,8 +46,11 @@ void SearcherController::concordiaPhraseSearch(rapidjson::Writer<rapidjson::Stri
|
|||||||
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
||||||
if (it != _concordiasMap->end()) {
|
if (it != _concordiasMap->end()) {
|
||||||
if (intervals.size() > 0) {
|
if (intervals.size() > 0) {
|
||||||
std::string shortPattern = pattern.substr(intervals[0].getStart(), intervals[0].getEnd() - intervals[0].getStart());
|
// std::string shortPattern = pattern.substr(intervals[0].getStart(), intervals[0].getEnd() - intervals[0].getStart());
|
||||||
|
std::string shortPattern = _substrUTF8(pattern, intervals[0].getStart(), intervals[0].getEnd() - intervals[0].getStart());
|
||||||
|
|
||||||
|
Logger::log("concordiaPhraseSearch");
|
||||||
|
Logger::logString("short pattern", shortPattern);
|
||||||
std::vector<SimpleSearchResult> shortPatternResults = _unitDAO.getSearchResults(it->second->simpleSearch(shortPattern));
|
std::vector<SimpleSearchResult> shortPatternResults = _unitDAO.getSearchResults(it->second->simpleSearch(shortPattern));
|
||||||
|
|
||||||
|
|
||||||
@ -73,7 +79,7 @@ void SearcherController::concordiaPhraseSearch(rapidjson::Writer<rapidjson::Stri
|
|||||||
currStart = interval.getEnd();
|
currStart = interval.getEnd();
|
||||||
}
|
}
|
||||||
CompleteConcordiaSearchResult lastRestResult = _unitDAO.getConcordiaResult(
|
CompleteConcordiaSearchResult lastRestResult = _unitDAO.getConcordiaResult(
|
||||||
it->second->concordiaSearch(pattern.substr(currStart)));
|
it->second->concordiaSearch(_substrUTF8(pattern,currStart,INT_MAX)));
|
||||||
lastRestResult.offsetPattern(currStart);
|
lastRestResult.offsetPattern(currStart);
|
||||||
bestOverlay.insert(bestOverlay.end(), lastRestResult.getBestOverlay().begin(), lastRestResult.getBestOverlay().end());
|
bestOverlay.insert(bestOverlay.end(), lastRestResult.getBestOverlay().begin(), lastRestResult.getBestOverlay().end());
|
||||||
|
|
||||||
@ -129,3 +135,17 @@ void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuff
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string SearcherController::_substrUTF8(std::string source, int start, int length) {
|
||||||
|
UnicodeString s(source.c_str());
|
||||||
|
|
||||||
|
UnicodeString unicodeValue;
|
||||||
|
s.extract(start, length, unicodeValue);
|
||||||
|
|
||||||
|
std::string result;
|
||||||
|
unicodeValue.toUTF8String(result);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,6 +37,8 @@ public:
|
|||||||
const int tmId);
|
const int tmId);
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
std::string _substrUTF8(std::string source, int start, int length);
|
||||||
|
|
||||||
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
|
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
|
||||||
|
|
||||||
UnitDAO _unitDAO;
|
UnitDAO _unitDAO;
|
||||||
|
Loading…
Reference in New Issue
Block a user