option of white space tokenization while searching
This commit is contained in:
parent
31e4f091ad
commit
970dda5dc2
@ -211,11 +211,12 @@ SUFFIX_MARKER_TYPE Concordia::countOccurences(const std::string & pattern)
|
||||
|
||||
|
||||
MatchedPatternFragment Concordia::simpleSearch(
|
||||
const std::string & pattern)
|
||||
const std::string & pattern,
|
||||
bool byWhitespace)
|
||||
throw(ConcordiaException) {
|
||||
if (_T->size() > 0) {
|
||||
return _searcher->simpleSearch(_hashGenerator, _T,
|
||||
_markers, _SA, pattern);
|
||||
_markers, _SA, pattern, byWhitespace);
|
||||
} else {
|
||||
MatchedPatternFragment result(0, 0);
|
||||
return result;
|
||||
@ -235,11 +236,12 @@ std::vector<AnubisSearchResult> Concordia::anubisSearch(
|
||||
}
|
||||
|
||||
boost::shared_ptr<ConcordiaSearchResult> Concordia::concordiaSearch(
|
||||
const std::string & pattern)
|
||||
const std::string & pattern,
|
||||
bool byWhitespace)
|
||||
throw(ConcordiaException) {
|
||||
if (_T->size() > 0) {
|
||||
return _searcher->concordiaSearch(_hashGenerator, _T,
|
||||
_markers, _SA, pattern);
|
||||
_markers, _SA, pattern, byWhitespace);
|
||||
} else {
|
||||
std::string empty;
|
||||
return boost::shared_ptr<ConcordiaSearchResult>(
|
||||
|
@ -126,10 +126,12 @@ public:
|
||||
/*! Performs a simple substring lookup on the index.
|
||||
For more info see \ref tutorial1_2.
|
||||
\param pattern pattern to be searched in the index
|
||||
\param byWhitespace whether to tokenize the pattern by white space
|
||||
\returns matched pattern fragment containing vector of occurences
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
MatchedPatternFragment simpleSearch(const std::string & pattern)
|
||||
MatchedPatternFragment simpleSearch(const std::string & pattern,
|
||||
bool byWhitespace = false)
|
||||
throw(ConcordiaException);
|
||||
|
||||
SUFFIX_MARKER_TYPE countOccurences(const std::string & pattern)
|
||||
@ -154,7 +156,8 @@ public:
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
boost::shared_ptr<ConcordiaSearchResult> concordiaSearch(
|
||||
const std::string & pattern)
|
||||
const std::string & pattern,
|
||||
bool byWhitespace = false)
|
||||
throw(ConcordiaException);
|
||||
|
||||
/*! Loads HDD stored index files to RAM and generates
|
||||
|
@ -18,10 +18,11 @@ MatchedPatternFragment IndexSearcher::simpleSearch(
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
const std::string & pattern) throw(ConcordiaException) {
|
||||
const std::string & pattern,
|
||||
bool byWhitespace) throw(ConcordiaException) {
|
||||
int left;
|
||||
std::vector<INDEX_CHARACTER_TYPE> hash =
|
||||
hashGenerator->generateHash(pattern).getCodes();
|
||||
hashGenerator->generateHash(pattern, byWhitespace).getCodes();
|
||||
saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE);
|
||||
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
|
||||
|
||||
@ -110,8 +111,9 @@ boost::shared_ptr<ConcordiaSearchResult> IndexSearcher::concordiaSearch(
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
const std::string & pattern) throw(ConcordiaException) {
|
||||
TokenizedSentence hashedPattern = hashGenerator->generateHash(pattern);
|
||||
const std::string & pattern,
|
||||
bool byWhitespace) throw(ConcordiaException) {
|
||||
TokenizedSentence hashedPattern = hashGenerator->generateHash(pattern, byWhitespace);
|
||||
boost::shared_ptr<ConcordiaSearchResult> result =
|
||||
boost::shared_ptr<ConcordiaSearchResult>(
|
||||
new ConcordiaSearchResult(hashedPattern));
|
||||
|
@ -50,7 +50,8 @@ public:
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
const std::string & pattern) throw(ConcordiaException);
|
||||
const std::string & pattern,
|
||||
bool byWhitespace = false) throw(ConcordiaException);
|
||||
|
||||
SUFFIX_MARKER_TYPE countOccurences(
|
||||
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||
@ -71,6 +72,7 @@ public:
|
||||
\param markers markers array for the needs of searching
|
||||
\param SA suffix array for the needs of searching
|
||||
\param pattern string pattern to be searched in the index.
|
||||
\param byWhitespace whether to tokenize the pattern by white space
|
||||
\returns vector of results
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
@ -92,6 +94,7 @@ public:
|
||||
\param markers markers array for the needs of searching
|
||||
\param SA suffix array for the needs of searching
|
||||
\param pattern pattern to be searched in the index.
|
||||
\param byWhitespace whether to tokenize the pattern by white space
|
||||
\returns result of the search
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
@ -100,7 +103,8 @@ public:
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
const std::string & pattern) throw(ConcordiaException);
|
||||
const std::string & pattern,
|
||||
bool byWhitespace = false) throw(ConcordiaException);
|
||||
|
||||
private:
|
||||
boost::shared_ptr<ConcordiaSearcher> _concordiaSearcher;
|
||||
|
Loading…
Reference in New Issue
Block a user