option of white space tokenization while searching
This commit is contained in:
parent
31e4f091ad
commit
970dda5dc2
@ -211,11 +211,12 @@ SUFFIX_MARKER_TYPE Concordia::countOccurences(const std::string & pattern)
|
|||||||
|
|
||||||
|
|
||||||
MatchedPatternFragment Concordia::simpleSearch(
|
MatchedPatternFragment Concordia::simpleSearch(
|
||||||
const std::string & pattern)
|
const std::string & pattern,
|
||||||
|
bool byWhitespace)
|
||||||
throw(ConcordiaException) {
|
throw(ConcordiaException) {
|
||||||
if (_T->size() > 0) {
|
if (_T->size() > 0) {
|
||||||
return _searcher->simpleSearch(_hashGenerator, _T,
|
return _searcher->simpleSearch(_hashGenerator, _T,
|
||||||
_markers, _SA, pattern);
|
_markers, _SA, pattern, byWhitespace);
|
||||||
} else {
|
} else {
|
||||||
MatchedPatternFragment result(0, 0);
|
MatchedPatternFragment result(0, 0);
|
||||||
return result;
|
return result;
|
||||||
@ -235,11 +236,12 @@ std::vector<AnubisSearchResult> Concordia::anubisSearch(
|
|||||||
}
|
}
|
||||||
|
|
||||||
boost::shared_ptr<ConcordiaSearchResult> Concordia::concordiaSearch(
|
boost::shared_ptr<ConcordiaSearchResult> Concordia::concordiaSearch(
|
||||||
const std::string & pattern)
|
const std::string & pattern,
|
||||||
|
bool byWhitespace)
|
||||||
throw(ConcordiaException) {
|
throw(ConcordiaException) {
|
||||||
if (_T->size() > 0) {
|
if (_T->size() > 0) {
|
||||||
return _searcher->concordiaSearch(_hashGenerator, _T,
|
return _searcher->concordiaSearch(_hashGenerator, _T,
|
||||||
_markers, _SA, pattern);
|
_markers, _SA, pattern, byWhitespace);
|
||||||
} else {
|
} else {
|
||||||
std::string empty;
|
std::string empty;
|
||||||
return boost::shared_ptr<ConcordiaSearchResult>(
|
return boost::shared_ptr<ConcordiaSearchResult>(
|
||||||
|
@ -126,10 +126,12 @@ public:
|
|||||||
/*! Performs a simple substring lookup on the index.
|
/*! Performs a simple substring lookup on the index.
|
||||||
For more info see \ref tutorial1_2.
|
For more info see \ref tutorial1_2.
|
||||||
\param pattern pattern to be searched in the index
|
\param pattern pattern to be searched in the index
|
||||||
|
\param byWhitespace whether to tokenize the pattern by white space
|
||||||
\returns matched pattern fragment containing vector of occurences
|
\returns matched pattern fragment containing vector of occurences
|
||||||
\throws ConcordiaException
|
\throws ConcordiaException
|
||||||
*/
|
*/
|
||||||
MatchedPatternFragment simpleSearch(const std::string & pattern)
|
MatchedPatternFragment simpleSearch(const std::string & pattern,
|
||||||
|
bool byWhitespace = false)
|
||||||
throw(ConcordiaException);
|
throw(ConcordiaException);
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE countOccurences(const std::string & pattern)
|
SUFFIX_MARKER_TYPE countOccurences(const std::string & pattern)
|
||||||
@ -154,7 +156,8 @@ public:
|
|||||||
\throws ConcordiaException
|
\throws ConcordiaException
|
||||||
*/
|
*/
|
||||||
boost::shared_ptr<ConcordiaSearchResult> concordiaSearch(
|
boost::shared_ptr<ConcordiaSearchResult> concordiaSearch(
|
||||||
const std::string & pattern)
|
const std::string & pattern,
|
||||||
|
bool byWhitespace = false)
|
||||||
throw(ConcordiaException);
|
throw(ConcordiaException);
|
||||||
|
|
||||||
/*! Loads HDD stored index files to RAM and generates
|
/*! Loads HDD stored index files to RAM and generates
|
||||||
|
@ -18,10 +18,11 @@ MatchedPatternFragment IndexSearcher::simpleSearch(
|
|||||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
const std::string & pattern) throw(ConcordiaException) {
|
const std::string & pattern,
|
||||||
|
bool byWhitespace) throw(ConcordiaException) {
|
||||||
int left;
|
int left;
|
||||||
std::vector<INDEX_CHARACTER_TYPE> hash =
|
std::vector<INDEX_CHARACTER_TYPE> hash =
|
||||||
hashGenerator->generateHash(pattern).getCodes();
|
hashGenerator->generateHash(pattern, byWhitespace).getCodes();
|
||||||
saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE);
|
saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE);
|
||||||
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
|
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
|
||||||
|
|
||||||
@ -110,8 +111,9 @@ boost::shared_ptr<ConcordiaSearchResult> IndexSearcher::concordiaSearch(
|
|||||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
const std::string & pattern) throw(ConcordiaException) {
|
const std::string & pattern,
|
||||||
TokenizedSentence hashedPattern = hashGenerator->generateHash(pattern);
|
bool byWhitespace) throw(ConcordiaException) {
|
||||||
|
TokenizedSentence hashedPattern = hashGenerator->generateHash(pattern, byWhitespace);
|
||||||
boost::shared_ptr<ConcordiaSearchResult> result =
|
boost::shared_ptr<ConcordiaSearchResult> result =
|
||||||
boost::shared_ptr<ConcordiaSearchResult>(
|
boost::shared_ptr<ConcordiaSearchResult>(
|
||||||
new ConcordiaSearchResult(hashedPattern));
|
new ConcordiaSearchResult(hashedPattern));
|
||||||
|
@ -50,7 +50,8 @@ public:
|
|||||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
const std::string & pattern) throw(ConcordiaException);
|
const std::string & pattern,
|
||||||
|
bool byWhitespace = false) throw(ConcordiaException);
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE countOccurences(
|
SUFFIX_MARKER_TYPE countOccurences(
|
||||||
boost::shared_ptr<HashGenerator> hashGenerator,
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
@ -71,6 +72,7 @@ public:
|
|||||||
\param markers markers array for the needs of searching
|
\param markers markers array for the needs of searching
|
||||||
\param SA suffix array for the needs of searching
|
\param SA suffix array for the needs of searching
|
||||||
\param pattern string pattern to be searched in the index.
|
\param pattern string pattern to be searched in the index.
|
||||||
|
\param byWhitespace whether to tokenize the pattern by white space
|
||||||
\returns vector of results
|
\returns vector of results
|
||||||
\throws ConcordiaException
|
\throws ConcordiaException
|
||||||
*/
|
*/
|
||||||
@ -92,6 +94,7 @@ public:
|
|||||||
\param markers markers array for the needs of searching
|
\param markers markers array for the needs of searching
|
||||||
\param SA suffix array for the needs of searching
|
\param SA suffix array for the needs of searching
|
||||||
\param pattern pattern to be searched in the index.
|
\param pattern pattern to be searched in the index.
|
||||||
|
\param byWhitespace whether to tokenize the pattern by white space
|
||||||
\returns result of the search
|
\returns result of the search
|
||||||
\throws ConcordiaException
|
\throws ConcordiaException
|
||||||
*/
|
*/
|
||||||
@ -100,7 +103,8 @@ public:
|
|||||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
const std::string & pattern) throw(ConcordiaException);
|
const std::string & pattern,
|
||||||
|
bool byWhitespace = false) throw(ConcordiaException);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
boost::shared_ptr<ConcordiaSearcher> _concordiaSearcher;
|
boost::shared_ptr<ConcordiaSearcher> _concordiaSearcher;
|
||||||
|
Loading…
Reference in New Issue
Block a user