occurrence refactoring
This commit is contained in:
parent
73b3d22d97
commit
d39c0400c9
@ -178,14 +178,14 @@ int main(int argc, char** argv) {
|
|||||||
concordia.simpleSearch(pattern);
|
concordia.simpleSearch(pattern);
|
||||||
time_end = boost::posix_time::microsec_clock::local_time();
|
time_end = boost::posix_time::microsec_clock::local_time();
|
||||||
msdiff = time_end - time_start;
|
msdiff = time_end - time_start;
|
||||||
std::cout << "\tFound: " << result.getOccurences().size()
|
std::cout << "\tFound: " << result.getOccurrences().size()
|
||||||
<< " matches. " << "Search took: "
|
<< " matches. " << "Search took: "
|
||||||
<< msdiff.total_milliseconds() << "ms." << std::endl;
|
<< msdiff.total_milliseconds() << "ms." << std::endl;
|
||||||
if (!cli.count("silent")) {
|
if (!cli.count("silent")) {
|
||||||
BOOST_FOREACH(SubstringOccurence occurence,
|
BOOST_FOREACH(SubstringOccurrence occurrence,
|
||||||
result.getOccurences()) {
|
result.getOccurrences()) {
|
||||||
std::cout << "\t\tfound match in sentence number: "
|
std::cout << "\t\tfound match in sentence number: "
|
||||||
<< occurence.getId() << std::endl;
|
<< occurrence.getId() << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (cli.count("anubis-search")) {
|
} else if (cli.count("anubis-search")) {
|
||||||
@ -237,7 +237,7 @@ int main(int argc, char** argv) {
|
|||||||
<< "," << fragment.getEnd()
|
<< "," << fragment.getEnd()
|
||||||
<< "] (exampleCount,"
|
<< "] (exampleCount,"
|
||||||
<< " patternOffset, length): "
|
<< " patternOffset, length): "
|
||||||
<< fragment.getOccurences().size() << ","
|
<< fragment.getOccurrences().size() << ","
|
||||||
<< fragment.getPatternOffset() << ","
|
<< fragment.getPatternOffset() << ","
|
||||||
<< fragment.getMatchedLength()
|
<< fragment.getMatchedLength()
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
@ -250,7 +250,7 @@ int main(int argc, char** argv) {
|
|||||||
<< "," << fragment.getEnd()
|
<< "," << fragment.getEnd()
|
||||||
<< "] (exampleCount,"
|
<< "] (exampleCount,"
|
||||||
<< " patternOffset, length): "
|
<< " patternOffset, length): "
|
||||||
<< fragment.getOccurences().size() << ","
|
<< fragment.getOccurrences().size() << ","
|
||||||
<< fragment.getPatternOffset() << ","
|
<< fragment.getPatternOffset() << ","
|
||||||
<< fragment.getMatchedLength()
|
<< fragment.getMatchedLength()
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
|
@ -192,9 +192,9 @@ void Concordia::_initializeIndex() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE Concordia::countOccurences(const std::string & pattern) {
|
SUFFIX_MARKER_TYPE Concordia::countOccurrences(const std::string & pattern) {
|
||||||
if (_T->size() > 0) {
|
if (_T->size() > 0) {
|
||||||
return _searcher->countOccurences(_hashGenerator, _T,
|
return _searcher->countOccurrences(_hashGenerator, _T,
|
||||||
_markers, _SA, pattern);
|
_markers, _SA, pattern);
|
||||||
} else {
|
} else {
|
||||||
return 0;
|
return 0;
|
||||||
@ -215,7 +215,7 @@ MatchedPatternFragment Concordia::simpleSearch(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
OccurencesList Concordia::fullSearch(
|
OccurrencesList Concordia::fullSearch(
|
||||||
const std::string & pattern,
|
const std::string & pattern,
|
||||||
int limit,
|
int limit,
|
||||||
int offset,
|
int offset,
|
||||||
@ -225,7 +225,7 @@ OccurencesList Concordia::fullSearch(
|
|||||||
_markers, _SA, pattern, limit, offset, byWhitespace);
|
_markers, _SA, pattern, limit, offset, byWhitespace);
|
||||||
} else {
|
} else {
|
||||||
// If the index or search pattern are empty, return an empty result.
|
// If the index or search pattern are empty, return an empty result.
|
||||||
OccurencesList result(0);
|
OccurrencesList result(0);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
#include "concordia/common/config.hpp"
|
#include "concordia/common/config.hpp"
|
||||||
#include "concordia/example.hpp"
|
#include "concordia/example.hpp"
|
||||||
#include "concordia/matched_pattern_fragment.hpp"
|
#include "concordia/matched_pattern_fragment.hpp"
|
||||||
#include "concordia/occurences_list.hpp"
|
#include "concordia/occurrences_list.hpp"
|
||||||
#include "concordia/concordia_config.hpp"
|
#include "concordia/concordia_config.hpp"
|
||||||
#include "concordia/concordia_index.hpp"
|
#include "concordia/concordia_index.hpp"
|
||||||
#include "concordia/index_searcher.hpp"
|
#include "concordia/index_searcher.hpp"
|
||||||
@ -121,24 +121,24 @@ public:
|
|||||||
For more info see \ref tutorial1_2.
|
For more info see \ref tutorial1_2.
|
||||||
\param pattern pattern to be searched in the index
|
\param pattern pattern to be searched in the index
|
||||||
\param byWhitespace whether to tokenize the pattern by white space
|
\param byWhitespace whether to tokenize the pattern by white space
|
||||||
\returns matched pattern fragment containing vector of occurences
|
\returns matched pattern fragment containing vector of occurrences
|
||||||
\throws ConcordiaException
|
\throws ConcordiaException
|
||||||
*/
|
*/
|
||||||
MatchedPatternFragment simpleSearch(const std::string & pattern,
|
MatchedPatternFragment simpleSearch(const std::string & pattern,
|
||||||
bool byWhitespace = false);
|
bool byWhitespace = false);
|
||||||
|
|
||||||
/*! Performs a substring lookup in RAM-based index, returning all occurences.
|
/*! Performs a substring lookup in RAM-based index, returning all occurrences.
|
||||||
The result contains no more than "limit" occurences, starting at "offset".
|
The result contains no more than "limit" occurrences, starting at "offset".
|
||||||
\param hashGenerator hash generator to be used to convert
|
\param hashGenerator hash generator to be used to convert
|
||||||
input sentence to a hash
|
input sentence to a hash
|
||||||
\param pattern string pattern to be searched in the index.
|
\param pattern string pattern to be searched in the index.
|
||||||
\param limit maximum number of occurences to return
|
\param limit maximum number of occurrences to return
|
||||||
\param offset starting occurence
|
\param offset starting occurrence
|
||||||
\param byWhitespace should the pattern by tokenized by white space
|
\param byWhitespace should the pattern by tokenized by white space
|
||||||
\returns list of occurences of the pattern in the index
|
\returns list of occurrences of the pattern in the index
|
||||||
\throws ConcordiaException
|
\throws ConcordiaException
|
||||||
*/
|
*/
|
||||||
OccurencesList fullSearch(
|
OccurrencesList fullSearch(
|
||||||
const std::string & pattern,
|
const std::string & pattern,
|
||||||
int limit,
|
int limit,
|
||||||
int offset,
|
int offset,
|
||||||
@ -151,13 +151,13 @@ public:
|
|||||||
the lexicon search requires that the match is the whole example source.
|
the lexicon search requires that the match is the whole example source.
|
||||||
\param pattern pattern to be searched in the index
|
\param pattern pattern to be searched in the index
|
||||||
\param byWhitespace whether to tokenize the pattern by white space
|
\param byWhitespace whether to tokenize the pattern by white space
|
||||||
\returns matched pattern fragment containing vector of occurences
|
\returns matched pattern fragment containing vector of occurrences
|
||||||
\throws ConcordiaException
|
\throws ConcordiaException
|
||||||
*/
|
*/
|
||||||
MatchedPatternFragment lexiconSearch(const std::string & pattern,
|
MatchedPatternFragment lexiconSearch(const std::string & pattern,
|
||||||
bool byWhitespace = false);
|
bool byWhitespace = false);
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE countOccurences(const std::string & pattern);
|
SUFFIX_MARKER_TYPE countOccurrences(const std::string & pattern);
|
||||||
|
|
||||||
/*! \deprecated
|
/*! \deprecated
|
||||||
Finds the examples from the index, whose resemblance to the
|
Finds the examples from the index, whose resemblance to the
|
||||||
|
@ -32,15 +32,15 @@ void ConcordiaSearcher::concordiaSearch(
|
|||||||
std::vector<sauchar_t> currentPattern(
|
std::vector<sauchar_t> currentPattern(
|
||||||
patternVector.begin()+highResOffset, patternVector.end());
|
patternVector.begin()+highResOffset, patternVector.end());
|
||||||
SUFFIX_MARKER_TYPE lcpLength;
|
SUFFIX_MARKER_TYPE lcpLength;
|
||||||
std::vector<SubstringOccurence> occurences =
|
std::vector<SubstringOccurrence> occurrences =
|
||||||
lcpSearch(T, markers, SA, currentPattern, lcpLength);
|
lcpSearch(T, markers, SA, currentPattern, lcpLength);
|
||||||
|
|
||||||
if (occurences.size() > 0) {
|
if (occurrences.size() > 0) {
|
||||||
MatchedPatternFragment fragment(offset,
|
MatchedPatternFragment fragment(offset,
|
||||||
lcpLength / sizeof(INDEX_CHARACTER_TYPE));
|
lcpLength / sizeof(INDEX_CHARACTER_TYPE));
|
||||||
|
|
||||||
BOOST_FOREACH(SubstringOccurence occurence, occurences) {
|
BOOST_FOREACH(SubstringOccurrence occurrence, occurrences) {
|
||||||
fragment.addOccurence(occurence);
|
fragment.addOccurrence(occurrence);
|
||||||
}
|
}
|
||||||
result->addFragment(fragment);
|
result->addFragment(fragment);
|
||||||
}
|
}
|
||||||
@ -155,7 +155,7 @@ boost::shared_ptr<TmMatchesMap> ConcordiaSearcher::getTmMatches(
|
|||||||
return tmMatchesMap;
|
return tmMatchesMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<SubstringOccurence> ConcordiaSearcher::lcpSearch(
|
std::vector<SubstringOccurrence> ConcordiaSearcher::lcpSearch(
|
||||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
@ -185,7 +185,7 @@ std::vector<SubstringOccurence> ConcordiaSearcher::lcpSearch(
|
|||||||
SAleft += localLeft;
|
SAleft += localLeft;
|
||||||
} while (patternLength < pattern.size() && size > 0);
|
} while (patternLength < pattern.size() && size > 0);
|
||||||
|
|
||||||
std::vector<SubstringOccurence> result;
|
std::vector<SubstringOccurrence> result;
|
||||||
|
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
// The search managed to find exactly the longest common prefixes.
|
// The search managed to find exactly the longest common prefixes.
|
||||||
@ -208,7 +208,7 @@ std::vector<SubstringOccurence> ConcordiaSearcher::lcpSearch(
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ConcordiaSearcher::_collectResults(
|
void ConcordiaSearcher::_collectResults(
|
||||||
std::vector<SubstringOccurence> & result,
|
std::vector<SubstringOccurrence> & result,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
saidx_t left, saidx_t size) {
|
saidx_t left, saidx_t size) {
|
||||||
@ -219,7 +219,7 @@ void ConcordiaSearcher::_collectResults(
|
|||||||
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
||||||
SUFFIX_MARKER_TYPE marker =
|
SUFFIX_MARKER_TYPE marker =
|
||||||
markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
|
markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
|
||||||
result.push_back(SubstringOccurence(marker));
|
result.push_back(SubstringOccurrence(marker));
|
||||||
|
|
||||||
// truncate results,
|
// truncate results,
|
||||||
// we don't need too many identical pattern overlays
|
// we don't need too many identical pattern overlays
|
||||||
@ -237,54 +237,54 @@ void ConcordiaSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
|
|||||||
SUFFIX_MARKER_TYPE totalPatternLength,
|
SUFFIX_MARKER_TYPE totalPatternLength,
|
||||||
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
||||||
SUFFIX_MARKER_TYPE patternOffset) {
|
SUFFIX_MARKER_TYPE patternOffset) {
|
||||||
SubstringOccurence occurence;
|
SubstringOccurrence occurrence;
|
||||||
if (_getOccurenceFromSA(SA, markers, sa_pos, occurence)) {
|
if (_getOccurrenceFromSA(SA, markers, sa_pos, occurrence)) {
|
||||||
_addOccurenceToMap(tmMatchesMap,
|
_addOccurrenceToMap(tmMatchesMap,
|
||||||
occurence,
|
occurrence,
|
||||||
totalPatternLength,
|
totalPatternLength,
|
||||||
matchedFragmentLength,
|
matchedFragmentLength,
|
||||||
patternOffset);
|
patternOffset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ConcordiaSearcher::_getOccurenceFromSA(
|
bool ConcordiaSearcher::_getOccurrenceFromSA(
|
||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
saidx_t sa_pos,
|
saidx_t sa_pos,
|
||||||
SubstringOccurence & occurence) {
|
SubstringOccurrence & occurrence) {
|
||||||
saidx_t resultPos = SA->at(sa_pos);
|
saidx_t resultPos = SA->at(sa_pos);
|
||||||
|
|
||||||
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
||||||
SUFFIX_MARKER_TYPE marker =
|
SUFFIX_MARKER_TYPE marker =
|
||||||
markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
|
markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
|
||||||
occurence.enterDataFromMarker(marker);
|
occurrence.enterDataFromMarker(marker);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConcordiaSearcher::_addOccurenceToMap(
|
void ConcordiaSearcher::_addOccurrenceToMap(
|
||||||
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
||||||
SubstringOccurence & occurence,
|
SubstringOccurrence & occurrence,
|
||||||
SUFFIX_MARKER_TYPE totalPatternLength,
|
SUFFIX_MARKER_TYPE totalPatternLength,
|
||||||
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
||||||
SUFFIX_MARKER_TYPE patternOffset) {
|
SUFFIX_MARKER_TYPE patternOffset) {
|
||||||
TmMatches * tmMatches;
|
TmMatches * tmMatches;
|
||||||
|
|
||||||
TmMatchesMapIterator mapIterator = tmMatchesMap->find(
|
TmMatchesMapIterator mapIterator = tmMatchesMap->find(
|
||||||
occurence.getId());
|
occurrence.getId());
|
||||||
if (mapIterator != tmMatchesMap->end()) {
|
if (mapIterator != tmMatchesMap->end()) {
|
||||||
tmMatches = mapIterator->second;
|
tmMatches = mapIterator->second;
|
||||||
} else {
|
} else {
|
||||||
tmMatches = new TmMatches(occurence.getId(),
|
tmMatches = new TmMatches(occurrence.getId(),
|
||||||
occurence.getExampleLength(),
|
occurrence.getExampleLength(),
|
||||||
totalPatternLength);
|
totalPatternLength);
|
||||||
SUFFIX_MARKER_TYPE key = occurence.getId();
|
SUFFIX_MARKER_TYPE key = occurrence.getId();
|
||||||
tmMatchesMap->insert(key, tmMatches);
|
tmMatchesMap->insert(key, tmMatches);
|
||||||
}
|
}
|
||||||
|
|
||||||
// add intervals to tmMatches
|
// add intervals to tmMatches
|
||||||
tmMatches->addExampleInterval(
|
tmMatches->addExampleInterval(
|
||||||
occurence.getOffset(),
|
occurrence.getOffset(),
|
||||||
occurence.getOffset() + matchedFragmentLength);
|
occurrence.getOffset() + matchedFragmentLength);
|
||||||
tmMatches->addPatternInterval(
|
tmMatches->addPatternInterval(
|
||||||
patternOffset,
|
patternOffset,
|
||||||
patternOffset + matchedFragmentLength);
|
patternOffset + matchedFragmentLength);
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
#include "concordia/common/config.hpp"
|
#include "concordia/common/config.hpp"
|
||||||
#include "concordia/common/utils.hpp"
|
#include "concordia/common/utils.hpp"
|
||||||
#include "concordia/substring_occurence.hpp"
|
#include "concordia/substring_occurrence.hpp"
|
||||||
#include "concordia/concordia_exception.hpp"
|
#include "concordia/concordia_exception.hpp"
|
||||||
#include "concordia/concordia_config.hpp"
|
#include "concordia/concordia_config.hpp"
|
||||||
#include "concordia/concordia_search_result.hpp"
|
#include "concordia/concordia_search_result.hpp"
|
||||||
@ -100,7 +100,7 @@ public:
|
|||||||
\returns list of locations of the longest fragments
|
\returns list of locations of the longest fragments
|
||||||
\throws ConcordiaException
|
\throws ConcordiaException
|
||||||
*/
|
*/
|
||||||
std::vector<SubstringOccurence> lcpSearch(
|
std::vector<SubstringOccurrence> lcpSearch(
|
||||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
@ -108,7 +108,7 @@ public:
|
|||||||
SUFFIX_MARKER_TYPE & length);
|
SUFFIX_MARKER_TYPE & length);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void _collectResults(std::vector<SubstringOccurence> & result,
|
void _collectResults(std::vector<SubstringOccurrence> & result,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
saidx_t left, saidx_t size);
|
saidx_t left, saidx_t size);
|
||||||
@ -121,13 +121,13 @@ private:
|
|||||||
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
||||||
SUFFIX_MARKER_TYPE patternOffset);
|
SUFFIX_MARKER_TYPE patternOffset);
|
||||||
|
|
||||||
bool _getOccurenceFromSA(boost::shared_ptr<std::vector<saidx_t> > SA,
|
bool _getOccurrenceFromSA(boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
saidx_t sa_pos,
|
saidx_t sa_pos,
|
||||||
SubstringOccurence & occurence);
|
SubstringOccurrence & occurrence);
|
||||||
|
|
||||||
void _addOccurenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
void _addOccurrenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
||||||
SubstringOccurence & occurence,
|
SubstringOccurrence & occurrence,
|
||||||
SUFFIX_MARKER_TYPE totalPatternLength,
|
SUFFIX_MARKER_TYPE totalPatternLength,
|
||||||
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
||||||
SUFFIX_MARKER_TYPE patternOffset);
|
SUFFIX_MARKER_TYPE patternOffset);
|
||||||
|
@ -42,10 +42,10 @@ MatchedPatternFragment IndexSearcher::simpleSearch(
|
|||||||
saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE);
|
saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE);
|
||||||
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos);
|
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos);
|
||||||
|
|
||||||
SubstringOccurence occurence;
|
SubstringOccurrence occurrence;
|
||||||
occurence.enterDataFromMarker(marker);
|
occurrence.enterDataFromMarker(marker);
|
||||||
result.addOccurence(occurence);
|
result.addOccurrence(occurrence);
|
||||||
if (result.getOccurences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) {
|
if (result.getOccurrences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -55,7 +55,7 @@ MatchedPatternFragment IndexSearcher::simpleSearch(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
OccurencesList IndexSearcher::fullSearch(
|
OccurrencesList IndexSearcher::fullSearch(
|
||||||
boost::shared_ptr<HashGenerator> hashGenerator,
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
@ -74,7 +74,7 @@ OccurencesList IndexSearcher::fullSearch(
|
|||||||
(const sauchar_t *) patternArray, patternLength,
|
(const sauchar_t *) patternArray, patternLength,
|
||||||
SA->data(), (saidx_t) SA->size(), &left);
|
SA->data(), (saidx_t) SA->size(), &left);
|
||||||
|
|
||||||
OccurencesList result(size);
|
OccurrencesList result(size);
|
||||||
|
|
||||||
int returnedResults = limit;
|
int returnedResults = limit;
|
||||||
if ((size - offset) < limit) {
|
if ((size - offset) < limit) {
|
||||||
@ -91,9 +91,9 @@ OccurencesList IndexSearcher::fullSearch(
|
|||||||
saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE);
|
saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE);
|
||||||
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos);
|
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos);
|
||||||
|
|
||||||
SubstringOccurence occurence;
|
SubstringOccurrence occurrence;
|
||||||
occurence.enterDataFromMarker(marker);
|
occurrence.enterDataFromMarker(marker);
|
||||||
result.addOccurence(occurence);
|
result.addOccurrence(occurrence);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -148,10 +148,10 @@ MatchedPatternFragment IndexSearcher::lexiconSearch(
|
|||||||
// so we should look at the marker of the next character
|
// so we should look at the marker of the next character
|
||||||
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos + 1);
|
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos + 1);
|
||||||
|
|
||||||
SubstringOccurence occurence;
|
SubstringOccurrence occurrence;
|
||||||
occurence.enterDataFromMarker(marker);
|
occurrence.enterDataFromMarker(marker);
|
||||||
result.addOccurence(occurence);
|
result.addOccurrence(occurrence);
|
||||||
if (result.getOccurences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) {
|
if (result.getOccurrences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -161,7 +161,7 @@ MatchedPatternFragment IndexSearcher::lexiconSearch(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
|
SUFFIX_MARKER_TYPE IndexSearcher::countOccurrences(
|
||||||
boost::shared_ptr<HashGenerator> hashGenerator,
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
@ -182,7 +182,7 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
|
|||||||
(const sauchar_t *) patternArray, patternLength,
|
(const sauchar_t *) patternArray, patternLength,
|
||||||
SA->data(), (saidx_t) SA->size(), &left);
|
SA->data(), (saidx_t) SA->size(), &left);
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE occurencesCount = 0;
|
SUFFIX_MARKER_TYPE occurrencesCount = 0;
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
saidx_t resultPos = SA->at(left + i);
|
saidx_t resultPos = SA->at(left + i);
|
||||||
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
||||||
@ -191,13 +191,13 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
|
|||||||
// obtain accidental results exceeding the boundaries
|
// obtain accidental results exceeding the boundaries
|
||||||
// of characters in hashed index. The above check
|
// of characters in hashed index. The above check
|
||||||
// removes these accidental results.
|
// removes these accidental results.
|
||||||
occurencesCount++;
|
occurrencesCount++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
delete[] patternArray;
|
delete[] patternArray;
|
||||||
|
|
||||||
return occurencesCount;
|
return occurrencesCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
#include "concordia/common/config.hpp"
|
#include "concordia/common/config.hpp"
|
||||||
#include "concordia/matched_pattern_fragment.hpp"
|
#include "concordia/matched_pattern_fragment.hpp"
|
||||||
#include "concordia/occurences_list.hpp"
|
#include "concordia/occurrences_list.hpp"
|
||||||
#include "concordia/hash_generator.hpp"
|
#include "concordia/hash_generator.hpp"
|
||||||
#include "concordia/concordia_exception.hpp"
|
#include "concordia/concordia_exception.hpp"
|
||||||
#include "concordia/concordia_searcher.hpp"
|
#include "concordia/concordia_searcher.hpp"
|
||||||
@ -43,7 +43,7 @@ public:
|
|||||||
\param markers markers array for the needs of searching
|
\param markers markers array for the needs of searching
|
||||||
\param SA suffix array for the needs of searching
|
\param SA suffix array for the needs of searching
|
||||||
\param pattern string pattern to be searched in the index.
|
\param pattern string pattern to be searched in the index.
|
||||||
\returns matched pattern fragment, containing occurences of the pattern in the index
|
\returns matched pattern fragment, containing occurrences of the pattern in the index
|
||||||
\throws ConcordiaException
|
\throws ConcordiaException
|
||||||
*/
|
*/
|
||||||
MatchedPatternFragment simpleSearch(
|
MatchedPatternFragment simpleSearch(
|
||||||
@ -54,21 +54,21 @@ public:
|
|||||||
const std::string & pattern,
|
const std::string & pattern,
|
||||||
bool byWhitespace = false);
|
bool byWhitespace = false);
|
||||||
|
|
||||||
/*! Performs a substring lookup in RAM-based index, returning all occurences.
|
/*! Performs a substring lookup in RAM-based index, returning all occurrences.
|
||||||
The result contains no more than "limit" occurences, starting at "offset".
|
The result contains no more than "limit" occurrences, starting at "offset".
|
||||||
\param hashGenerator hash generator to be used to convert
|
\param hashGenerator hash generator to be used to convert
|
||||||
input sentence to a hash
|
input sentence to a hash
|
||||||
\param T hashed index to search in
|
\param T hashed index to search in
|
||||||
\param markers markers array for the needs of searching
|
\param markers markers array for the needs of searching
|
||||||
\param SA suffix array for the needs of searching
|
\param SA suffix array for the needs of searching
|
||||||
\param pattern string pattern to be searched in the index.
|
\param pattern string pattern to be searched in the index.
|
||||||
\param limit maximum number of occurences to return
|
\param limit maximum number of occurrences to return
|
||||||
\param offset starting occurence
|
\param offset starting occurrence
|
||||||
\param byWhitespace should the pattern by tokenized by white space
|
\param byWhitespace should the pattern by tokenized by white space
|
||||||
\returns list of occurences of the pattern in the index
|
\returns list of occurrences of the pattern in the index
|
||||||
\throws ConcordiaException
|
\throws ConcordiaException
|
||||||
*/
|
*/
|
||||||
OccurencesList fullSearch(
|
OccurrencesList fullSearch(
|
||||||
boost::shared_ptr<HashGenerator> hashGenerator,
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
@ -89,7 +89,7 @@ public:
|
|||||||
\param markers markers array for the needs of searching
|
\param markers markers array for the needs of searching
|
||||||
\param SA suffix array for the needs of searching
|
\param SA suffix array for the needs of searching
|
||||||
\param pattern string pattern to be searched in the index.
|
\param pattern string pattern to be searched in the index.
|
||||||
\returns matched pattern fragment, containing occurences of the pattern in the index
|
\returns matched pattern fragment, containing occurrences of the pattern in the index
|
||||||
\throws ConcordiaException
|
\throws ConcordiaException
|
||||||
*/
|
*/
|
||||||
MatchedPatternFragment lexiconSearch(
|
MatchedPatternFragment lexiconSearch(
|
||||||
@ -100,7 +100,7 @@ public:
|
|||||||
const std::string & pattern,
|
const std::string & pattern,
|
||||||
bool byWhitespace = false);
|
bool byWhitespace = false);
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE countOccurences(
|
SUFFIX_MARKER_TYPE countOccurrences(
|
||||||
boost::shared_ptr<HashGenerator> hashGenerator,
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
|
@ -12,7 +12,7 @@ MatchedPatternFragment::MatchedPatternFragment(
|
|||||||
MatchedPatternFragment::~MatchedPatternFragment() {
|
MatchedPatternFragment::~MatchedPatternFragment() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void MatchedPatternFragment::addOccurence(
|
void MatchedPatternFragment::addOccurrence(
|
||||||
const SubstringOccurence & occurence) {
|
const SubstringOccurrence & occurrence) {
|
||||||
_occurences.push_back(occurence);
|
_occurrences.push_back(occurrence);
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#include "concordia/common/config.hpp"
|
#include "concordia/common/config.hpp"
|
||||||
#include "concordia/interval.hpp"
|
#include "concordia/interval.hpp"
|
||||||
#include "concordia/substring_occurence.hpp"
|
#include "concordia/substring_occurrence.hpp"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <boost/foreach.hpp>
|
#include <boost/foreach.hpp>
|
||||||
@ -30,17 +30,17 @@ public:
|
|||||||
*/
|
*/
|
||||||
virtual ~MatchedPatternFragment();
|
virtual ~MatchedPatternFragment();
|
||||||
|
|
||||||
/*! Getter for occurences.
|
/*! Getter for occurrences.
|
||||||
\returns occurences
|
\returns occurrences
|
||||||
*/
|
*/
|
||||||
std::vector<SubstringOccurence> getOccurences() const {
|
std::vector<SubstringOccurrence> getOccurrences() const {
|
||||||
return _occurences;
|
return _occurrences;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! Adds an occurence to the list.
|
/*! Adds an occurrence to the list.
|
||||||
\param fragment occurence to be added
|
\param fragment occurrence to be added
|
||||||
*/
|
*/
|
||||||
void addOccurence(const SubstringOccurence & occurence);
|
void addOccurrence(const SubstringOccurrence & occurrence);
|
||||||
|
|
||||||
/*! Getter for pattern offset.
|
/*! Getter for pattern offset.
|
||||||
\returns pattern offset
|
\returns pattern offset
|
||||||
@ -68,8 +68,8 @@ public:
|
|||||||
o << "fragment(patternOffset=" << fragment.getPatternOffset()
|
o << "fragment(patternOffset=" << fragment.getPatternOffset()
|
||||||
<< ", matchedLength=" << fragment.getMatchedLength() << ") {"
|
<< ", matchedLength=" << fragment.getMatchedLength() << ") {"
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
BOOST_FOREACH(SubstringOccurence occurence, fragment.getOccurences()) {
|
BOOST_FOREACH(SubstringOccurrence occurrence, fragment.getOccurrences()) {
|
||||||
o << "\t" << occurence << std::endl;
|
o << "\t" << occurrence << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
o << "}";
|
o << "}";
|
||||||
@ -78,7 +78,7 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<SubstringOccurence> _occurences;
|
std::vector<SubstringOccurrence> _occurrences;
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE _patternOffset;
|
SUFFIX_MARKER_TYPE _patternOffset;
|
||||||
|
|
||||||
|
@ -1,13 +0,0 @@
|
|||||||
#include "concordia/occurences_list.hpp"
|
|
||||||
|
|
||||||
OccurencesList::OccurencesList(const SUFFIX_MARKER_TYPE & totalCount):
|
|
||||||
_totalCount(totalCount) {
|
|
||||||
}
|
|
||||||
|
|
||||||
OccurencesList::~OccurencesList() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void OccurencesList::addOccurence(
|
|
||||||
const SubstringOccurence & occurence) {
|
|
||||||
_occurences.push_back(occurence);
|
|
||||||
}
|
|
@ -1,50 +0,0 @@
|
|||||||
#ifndef OCCURENCES_LIST_HDR
|
|
||||||
#define OCCURENCES_LIST_HDR
|
|
||||||
|
|
||||||
#include "concordia/common/config.hpp"
|
|
||||||
#include "concordia/substring_occurence.hpp"
|
|
||||||
#include <vector>
|
|
||||||
#include <iostream>
|
|
||||||
#include <boost/foreach.hpp>
|
|
||||||
|
|
||||||
/*!
|
|
||||||
Class representing the occurences list in full search. The list only
|
|
||||||
contains as many occurences as specified in the "limit" parameter for full search.
|
|
||||||
The "totalCount" field stores the total number of occurences available.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
class OccurencesList {
|
|
||||||
public:
|
|
||||||
/*! Constructor.
|
|
||||||
*/
|
|
||||||
explicit OccurencesList(const SUFFIX_MARKER_TYPE & totalCount);
|
|
||||||
|
|
||||||
/*! Destructor.
|
|
||||||
*/
|
|
||||||
virtual ~OccurencesList();
|
|
||||||
|
|
||||||
/*! Getter for occurences.
|
|
||||||
\returns occurences
|
|
||||||
*/
|
|
||||||
std::vector<SubstringOccurence> getOccurences() const {
|
|
||||||
return _occurences;
|
|
||||||
}
|
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE getTotalCount() const {
|
|
||||||
return _totalCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*! Adds an occurence to the list.
|
|
||||||
\param fragment occurence to be added
|
|
||||||
*/
|
|
||||||
void addOccurence(const SubstringOccurence & occurence);
|
|
||||||
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::vector<SubstringOccurence> _occurences;
|
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE _totalCount;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
13
concordia/occurrences_list.cpp
Normal file
13
concordia/occurrences_list.cpp
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#include "concordia/occurrences_list.hpp"
|
||||||
|
|
||||||
|
OccurrencesList::OccurrencesList(const SUFFIX_MARKER_TYPE & totalCount):
|
||||||
|
_totalCount(totalCount) {
|
||||||
|
}
|
||||||
|
|
||||||
|
OccurrencesList::~OccurrencesList() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void OccurrencesList::addOccurrence(
|
||||||
|
const SubstringOccurrence & occurrence) {
|
||||||
|
_occurrences.push_back(occurrence);
|
||||||
|
}
|
50
concordia/occurrences_list.hpp
Normal file
50
concordia/occurrences_list.hpp
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#ifndef OCCURRENCES_LIST_HDR
|
||||||
|
#define OCCURRENCES_LIST_HDR
|
||||||
|
|
||||||
|
#include "concordia/common/config.hpp"
|
||||||
|
#include "concordia/substring_occurrence.hpp"
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
#include <boost/foreach.hpp>
|
||||||
|
|
||||||
|
/*!
|
||||||
|
Class representing the occurrences list in full search. The list only
|
||||||
|
contains as many occurrences as specified in the "limit" parameter for full search.
|
||||||
|
The "totalCount" field stores the total number of occurrences available.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
class OccurrencesList {
|
||||||
|
public:
|
||||||
|
/*! Constructor.
|
||||||
|
*/
|
||||||
|
explicit OccurrencesList(const SUFFIX_MARKER_TYPE & totalCount);
|
||||||
|
|
||||||
|
/*! Destructor.
|
||||||
|
*/
|
||||||
|
virtual ~OccurrencesList();
|
||||||
|
|
||||||
|
/*! Getter for occurrences.
|
||||||
|
\returns occurrences
|
||||||
|
*/
|
||||||
|
std::vector<SubstringOccurrence> getOccurrences() const {
|
||||||
|
return _occurrences;
|
||||||
|
}
|
||||||
|
|
||||||
|
SUFFIX_MARKER_TYPE getTotalCount() const {
|
||||||
|
return _totalCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*! Adds an occurrence to the list.
|
||||||
|
\param fragment occurrence to be added
|
||||||
|
*/
|
||||||
|
void addOccurrence(const SubstringOccurrence & occurrence);
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<SubstringOccurrence> _occurrences;
|
||||||
|
|
||||||
|
SUFFIX_MARKER_TYPE _totalCount;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -1,16 +1,16 @@
|
|||||||
#include "concordia/substring_occurence.hpp"
|
#include "concordia/substring_occurrence.hpp"
|
||||||
#include "concordia/common/utils.hpp"
|
#include "concordia/common/utils.hpp"
|
||||||
|
|
||||||
SubstringOccurence::SubstringOccurence() {
|
SubstringOccurrence::SubstringOccurrence() {
|
||||||
}
|
}
|
||||||
|
|
||||||
SubstringOccurence::SubstringOccurence(const SUFFIX_MARKER_TYPE & marker) {
|
SubstringOccurrence::SubstringOccurrence(const SUFFIX_MARKER_TYPE & marker) {
|
||||||
_id = Utils::getIdFromMarker(marker);
|
_id = Utils::getIdFromMarker(marker);
|
||||||
_offset = Utils::getOffsetFromMarker(marker);
|
_offset = Utils::getOffsetFromMarker(marker);
|
||||||
_exampleLength = Utils::getLengthFromMarker(marker);
|
_exampleLength = Utils::getLengthFromMarker(marker);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SubstringOccurence::enterDataFromMarker(
|
void SubstringOccurrence::enterDataFromMarker(
|
||||||
const SUFFIX_MARKER_TYPE & marker) {
|
const SUFFIX_MARKER_TYPE & marker) {
|
||||||
_id = Utils::getIdFromMarker(marker);
|
_id = Utils::getIdFromMarker(marker);
|
||||||
_offset = Utils::getOffsetFromMarker(marker);
|
_offset = Utils::getOffsetFromMarker(marker);
|
||||||
@ -18,7 +18,7 @@ void SubstringOccurence::enterDataFromMarker(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
SubstringOccurence::SubstringOccurence(
|
SubstringOccurrence::SubstringOccurrence(
|
||||||
const SUFFIX_MARKER_TYPE & id,
|
const SUFFIX_MARKER_TYPE & id,
|
||||||
const SUFFIX_MARKER_TYPE & offset,
|
const SUFFIX_MARKER_TYPE & offset,
|
||||||
const SUFFIX_MARKER_TYPE & exampleLength):
|
const SUFFIX_MARKER_TYPE & exampleLength):
|
||||||
@ -27,6 +27,6 @@ SubstringOccurence::SubstringOccurence(
|
|||||||
_exampleLength(exampleLength) {
|
_exampleLength(exampleLength) {
|
||||||
}
|
}
|
||||||
|
|
||||||
SubstringOccurence::~SubstringOccurence() {
|
SubstringOccurrence::~SubstringOccurrence() {
|
||||||
}
|
}
|
||||||
|
|
@ -1,31 +1,31 @@
|
|||||||
#ifndef SUBSTRING_OCCURENCE_HDR
|
#ifndef SUBSTRING_OCCURRENCE_HDR
|
||||||
#define SUBSTRING_OCCURENCE_HDR
|
#define SUBSTRING_OCCURRENCE_HDR
|
||||||
|
|
||||||
#include "concordia/common/config.hpp"
|
#include "concordia/common/config.hpp"
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
Class representing occurence of a searched substring.
|
Class representing occurrence of a searched substring.
|
||||||
It holds the following information:
|
It holds the following information:
|
||||||
- id of the example where the substring was found
|
- id of the example where the substring was found
|
||||||
- offset of the matched substring in this example
|
- offset of the matched substring in this example
|
||||||
- length of the example
|
- length of the example
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class SubstringOccurence {
|
class SubstringOccurrence {
|
||||||
public:
|
public:
|
||||||
/*!
|
/*!
|
||||||
Constructor.
|
Constructor.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
SubstringOccurence();
|
SubstringOccurrence();
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
Constructor taking data from a marker.
|
Constructor taking data from a marker.
|
||||||
\param marker
|
\param marker
|
||||||
*/
|
*/
|
||||||
explicit SubstringOccurence(const SUFFIX_MARKER_TYPE & marker);
|
explicit SubstringOccurrence(const SUFFIX_MARKER_TYPE & marker);
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
Constructor with three arguments.
|
Constructor with three arguments.
|
||||||
@ -33,12 +33,12 @@ public:
|
|||||||
\param offset offset of the substring in the example
|
\param offset offset of the substring in the example
|
||||||
\param exampleLength length of the example
|
\param exampleLength length of the example
|
||||||
*/
|
*/
|
||||||
SubstringOccurence(const SUFFIX_MARKER_TYPE & id,
|
SubstringOccurrence(const SUFFIX_MARKER_TYPE & id,
|
||||||
const SUFFIX_MARKER_TYPE & offset,
|
const SUFFIX_MARKER_TYPE & offset,
|
||||||
const SUFFIX_MARKER_TYPE & exampleLength);
|
const SUFFIX_MARKER_TYPE & exampleLength);
|
||||||
/*! Destructor.
|
/*! Destructor.
|
||||||
*/
|
*/
|
||||||
virtual ~SubstringOccurence();
|
virtual ~SubstringOccurrence();
|
||||||
|
|
||||||
/*! Getter for example id.
|
/*! Getter for example id.
|
||||||
\returns example id
|
\returns example id
|
||||||
@ -67,9 +67,9 @@ public:
|
|||||||
void enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker);
|
void enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker);
|
||||||
|
|
||||||
friend std::ostream & operator << (std::ostream & o,
|
friend std::ostream & operator << (std::ostream & o,
|
||||||
const SubstringOccurence & occurence) {
|
const SubstringOccurrence & occurrence) {
|
||||||
return o << "occurence(exampleId=" << occurence.getId()
|
return o << "occurrence(exampleId=" << occurrence.getId()
|
||||||
<< ", offset=" << occurence.getOffset() << ")";
|
<< ", offset=" << occurrence.getOffset() << ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -73,14 +73,14 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
|
|||||||
|
|
||||||
concordia.clearIndex();
|
concordia.clearIndex();
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 2);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 123);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 123);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getOffset(), 1);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getOffset(), 1);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 51);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getId(), 51);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getOffset(), 1);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getOffset(), 1);
|
||||||
|
|
||||||
// Checking pattern spanning over 2 segments
|
// Checking pattern spanning over 2 segments
|
||||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().size(), 0);
|
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().size(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
||||||
@ -137,19 +137,19 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
|||||||
|
|
||||||
concordia2.clearIndex();
|
concordia2.clearIndex();
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 3);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 3);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 312);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 312);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getOffset(), 0);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getOffset(), 0);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 45);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getId(), 45);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getOffset(), 1);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getOffset(), 1);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(2).getId(), 29);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(2).getId(), 29);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(2).getOffset(), 0);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(2).getOffset(), 0);
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().size(), 2);
|
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().size(), 2);
|
||||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 202);
|
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(0).getId(), 202);
|
||||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getOffset(), 1);
|
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(0).getOffset(), 1);
|
||||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 312);
|
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(1).getId(), 312);
|
||||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getOffset(), 1);
|
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(1).getOffset(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
|
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
|
||||||
@ -167,9 +167,9 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
|
|||||||
|
|
||||||
concordia2.clearIndex();
|
concordia2.clearIndex();
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 1);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 1);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 312);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 312);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getOffset(), 2);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getOffset(), 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 )
|
BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 )
|
||||||
@ -185,35 +185,35 @@ BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 )
|
|||||||
|
|
||||||
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
|
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
|
||||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||||
OccurencesList searchResult0 = concordia2.fullSearch("okno", 10, 0);
|
OccurrencesList searchResult0 = concordia2.fullSearch("okno", 10, 0);
|
||||||
/*
|
/*
|
||||||
search0
|
search0
|
||||||
occurence(exampleId=4, offset=1)
|
occurrence(exampleId=4, offset=1)
|
||||||
occurence(exampleId=3, offset=2)
|
occurrence(exampleId=3, offset=2)
|
||||||
occurence(exampleId=2, offset=2)
|
occurrence(exampleId=2, offset=2)
|
||||||
occurence(exampleId=4, offset=3)
|
occurrence(exampleId=4, offset=3)
|
||||||
occurence(exampleId=1, offset=2)
|
occurrence(exampleId=1, offset=2)
|
||||||
*/
|
*/
|
||||||
OccurencesList searchResult1 = concordia2.fullSearch("okno", 2, 1);
|
OccurrencesList searchResult1 = concordia2.fullSearch("okno", 2, 1);
|
||||||
OccurencesList searchResult2 = concordia2.fullSearch("okno", 10, 3);
|
OccurrencesList searchResult2 = concordia2.fullSearch("okno", 10, 3);
|
||||||
OccurencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3);
|
OccurrencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3);
|
||||||
OccurencesList searchResult4 = concordia2.fullSearch("okno", 10, 6);
|
OccurrencesList searchResult4 = concordia2.fullSearch("okno", 10, 6);
|
||||||
|
|
||||||
concordia2.clearIndex();
|
concordia2.clearIndex();
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 5);
|
BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 5);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 2);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 3);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 3);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 2);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getId(), 2);
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult2.getTotalCount(), 5);
|
BOOST_CHECK_EQUAL(searchResult2.getTotalCount(), 5);
|
||||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 4);
|
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(0).getId(), 4);
|
||||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 1);
|
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(1).getId(), 1);
|
||||||
|
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult3.getTotalCount(), 0);
|
BOOST_CHECK_EQUAL(searchResult3.getTotalCount(), 0);
|
||||||
BOOST_CHECK_EQUAL(searchResult4.getTotalCount(), 5);
|
BOOST_CHECK_EQUAL(searchResult4.getTotalCount(), 5);
|
||||||
BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 0);
|
BOOST_CHECK_EQUAL(searchResult4.getOccurrences().size(), 0);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -267,16 +267,16 @@ BOOST_AUTO_TEST_CASE( ConcordiaLexiconSearch1 )
|
|||||||
concordia.clearIndex();
|
concordia.clearIndex();
|
||||||
|
|
||||||
// first two patterns do not cover the whole example source
|
// first two patterns do not cover the whole example source
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 0);
|
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 0);
|
||||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().size(), 0);
|
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().size(), 0);
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult3.getOccurences().size(), 1);
|
BOOST_CHECK_EQUAL(searchResult3.getOccurrences().size(), 1);
|
||||||
BOOST_CHECK_EQUAL(searchResult3.getOccurences().at(0).getId(), 123);
|
BOOST_CHECK_EQUAL(searchResult3.getOccurrences().at(0).getId(), 123);
|
||||||
BOOST_CHECK_EQUAL(searchResult3.getOccurences().at(0).getOffset(), 0);
|
BOOST_CHECK_EQUAL(searchResult3.getOccurrences().at(0).getOffset(), 0);
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 1);
|
BOOST_CHECK_EQUAL(searchResult4.getOccurrences().size(), 1);
|
||||||
BOOST_CHECK_EQUAL(searchResult4.getOccurences().at(0).getId(), 14);
|
BOOST_CHECK_EQUAL(searchResult4.getOccurrences().at(0).getId(), 14);
|
||||||
BOOST_CHECK_EQUAL(searchResult4.getOccurences().at(0).getOffset(), 0);
|
BOOST_CHECK_EQUAL(searchResult4.getOccurrences().at(0).getOffset(), 0);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -351,43 +351,43 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
adding fragment: offset=0, length=2
|
adding fragment: offset=0, length=2
|
||||||
adding occurence: example id=167, offset=2
|
adding occurrence: example id=167, offset=2
|
||||||
adding occurence: example id=45, offset=3
|
adding occurrence: example id=45, offset=3
|
||||||
adding occurence: example id=51, offset=1
|
adding occurrence: example id=51, offset=1
|
||||||
adding occurence: example id=123, offset=1
|
adding occurrence: example id=123, offset=1
|
||||||
adding fragment: offset=1, length=1
|
adding fragment: offset=1, length=1
|
||||||
adding occurence: example id=167, offset=3
|
adding occurrence: example id=167, offset=3
|
||||||
adding occurence: example id=45, offset=4
|
adding occurrence: example id=45, offset=4
|
||||||
adding occurence: example id=51, offset=2
|
adding occurrence: example id=51, offset=2
|
||||||
adding occurence: example id=123, offset=2
|
adding occurrence: example id=123, offset=2
|
||||||
adding fragment: offset=2, length=1
|
adding fragment: offset=2, length=1
|
||||||
adding occurence: example id=167, offset=1
|
adding occurrence: example id=167, offset=1
|
||||||
*/
|
*/
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().size(), 3);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().size(), 3);
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getPatternOffset(), 0);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getPatternOffset(), 0);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getMatchedLength(), 2);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getMatchedLength(), 2);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getId(), 167);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getId(), 167);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getOffset(), 2);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getOffset(), 2);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(1).getId(), 45);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(1).getId(), 45);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(1).getOffset(), 3);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(1).getOffset(), 3);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(2).getId(), 51);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(2).getId(), 51);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(2).getOffset(), 1);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(2).getOffset(), 1);
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getPatternOffset(), 1);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getPatternOffset(), 1);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getMatchedLength(), 1);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getMatchedLength(), 1);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(0).getId(), 167);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(0).getId(), 167);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(0).getOffset(), 3);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(0).getOffset(), 3);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(1).getId(), 45);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(1).getId(), 45);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(1).getOffset(), 4);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(1).getOffset(), 4);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(2).getId(), 51);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(2).getId(), 51);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(2).getOffset(), 2);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(2).getOffset(), 2);
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getPatternOffset(), 2);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getPatternOffset(), 2);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getMatchedLength(), 1);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getMatchedLength(), 1);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurences().at(0).getId(), 167);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurrences().at(0).getId(), 167);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurences().at(0).getOffset(), 1);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurrences().at(0).getOffset(), 1);
|
||||||
|
|
||||||
concordia.clearIndex();
|
concordia.clearIndex();
|
||||||
}
|
}
|
||||||
@ -443,43 +443,43 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
|
|||||||
|
|
||||||
Best overlay {
|
Best overlay {
|
||||||
fragment(patternOffset=1, matchedLength=4) {
|
fragment(patternOffset=1, matchedLength=4) {
|
||||||
occurence(exampleId=321, offset=0)
|
occurrence(exampleId=321, offset=0)
|
||||||
}
|
}
|
||||||
fragment(patternOffset=5, matchedLength=4) {
|
fragment(patternOffset=5, matchedLength=4) {
|
||||||
occurence(exampleId=14, offset=7)
|
occurrence(exampleId=14, offset=7)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
All fragments {
|
All fragments {
|
||||||
fragment(patternOffset=4, matchedLength=5) {
|
fragment(patternOffset=4, matchedLength=5) {
|
||||||
occurence(exampleId=14, offset=6)
|
occurrence(exampleId=14, offset=6)
|
||||||
}
|
}
|
||||||
fragment(patternOffset=1, matchedLength=4) {
|
fragment(patternOffset=1, matchedLength=4) {
|
||||||
occurence(exampleId=321, offset=0)
|
occurrence(exampleId=321, offset=0)
|
||||||
}
|
}
|
||||||
fragment(patternOffset=5, matchedLength=4) {
|
fragment(patternOffset=5, matchedLength=4) {
|
||||||
occurence(exampleId=14, offset=7)
|
occurrence(exampleId=14, offset=7)
|
||||||
}
|
}
|
||||||
fragment(patternOffset=2, matchedLength=3) {
|
fragment(patternOffset=2, matchedLength=3) {
|
||||||
occurence(exampleId=321, offset=1)
|
occurrence(exampleId=321, offset=1)
|
||||||
}
|
}
|
||||||
fragment(patternOffset=6, matchedLength=3) {
|
fragment(patternOffset=6, matchedLength=3) {
|
||||||
occurence(exampleId=14, offset=8)
|
occurrence(exampleId=14, offset=8)
|
||||||
}
|
}
|
||||||
fragment(patternOffset=3, matchedLength=2) {
|
fragment(patternOffset=3, matchedLength=2) {
|
||||||
occurence(exampleId=321, offset=2)
|
occurrence(exampleId=321, offset=2)
|
||||||
}
|
}
|
||||||
fragment(patternOffset=7, matchedLength=2) {
|
fragment(patternOffset=7, matchedLength=2) {
|
||||||
occurence(exampleId=14, offset=9)
|
occurrence(exampleId=14, offset=9)
|
||||||
}
|
}
|
||||||
fragment(patternOffset=8, matchedLength=1) {
|
fragment(patternOffset=8, matchedLength=1) {
|
||||||
occurence(exampleId=14, offset=10)
|
occurrence(exampleId=14, offset=10)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getId(), 14);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getId(), 14);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getOffset(), 6);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getOffset(), 6);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getPatternOffset(), 4);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getPatternOffset(), 4);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getMatchedLength(), 5);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getMatchedLength(), 5);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getStart(), 4);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getStart(), 4);
|
||||||
@ -522,7 +522,7 @@ BOOST_AUTO_TEST_CASE( Tokenize )
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ConcordiaCountOccurences )
|
BOOST_AUTO_TEST_CASE( ConcordiaCountOccurrences )
|
||||||
{
|
{
|
||||||
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
|
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
|
||||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||||
@ -556,12 +556,12 @@ BOOST_AUTO_TEST_CASE( ConcordiaCountOccurences )
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(concordia.countOccurences("Ala posiada"), 0);
|
BOOST_CHECK_EQUAL(concordia.countOccurrences("Ala posiada"), 0);
|
||||||
BOOST_CHECK_EQUAL(concordia.countOccurences("Marysia posiada"), 0);
|
BOOST_CHECK_EQUAL(concordia.countOccurrences("Marysia posiada"), 0);
|
||||||
BOOST_CHECK_EQUAL(concordia.countOccurences("Marysia posiada rysia"), 1);
|
BOOST_CHECK_EQUAL(concordia.countOccurrences("Marysia posiada rysia"), 1);
|
||||||
BOOST_CHECK_EQUAL(concordia.countOccurences("kota Ala posiada"), 0);
|
BOOST_CHECK_EQUAL(concordia.countOccurrences("kota Ala posiada"), 0);
|
||||||
BOOST_CHECK_EQUAL(concordia.countOccurences("Ala posiada kota"), 2);
|
BOOST_CHECK_EQUAL(concordia.countOccurrences("Ala posiada kota"), 2);
|
||||||
BOOST_CHECK_EQUAL(concordia.countOccurences("Ala posiada kota i psa"), 1);
|
BOOST_CHECK_EQUAL(concordia.countOccurrences("Ala posiada kota i psa"), 1);
|
||||||
|
|
||||||
concordia.clearIndex();
|
concordia.clearIndex();
|
||||||
|
|
||||||
|
@ -135,13 +135,13 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
|||||||
SA->push_back(11);
|
SA->push_back(11);
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE highResLength;
|
SUFFIX_MARKER_TYPE highResLength;
|
||||||
std::vector<SubstringOccurence> result = searcher.lcpSearch(T, markers, SA, pattern, highResLength);
|
std::vector<SubstringOccurrence> result = searcher.lcpSearch(T, markers, SA, pattern, highResLength);
|
||||||
SUFFIX_MARKER_TYPE length = highResLength / sizeof(INDEX_CHARACTER_TYPE);
|
SUFFIX_MARKER_TYPE length = highResLength / sizeof(INDEX_CHARACTER_TYPE);
|
||||||
|
|
||||||
/* Expecting to get the following results from SA:
|
/* Expecting to get the following results from SA:
|
||||||
3: ana
|
3: ana
|
||||||
1: anana
|
1: anana
|
||||||
Which are 2 substring occurences (34,3) and (34,1) with the lcp length = 2;
|
Which are 2 substring occurrences (34,3) and (34,1) with the lcp length = 2;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(result.size(),2);
|
BOOST_CHECK_EQUAL(result.size(),2);
|
||||||
@ -185,12 +185,12 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
|||||||
pattern2.push_back(2);
|
pattern2.push_back(2);
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE highResLength2;
|
SUFFIX_MARKER_TYPE highResLength2;
|
||||||
std::vector<SubstringOccurence> result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2);
|
std::vector<SubstringOccurrence> result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2);
|
||||||
SUFFIX_MARKER_TYPE length2 = highResLength2 / sizeof(INDEX_CHARACTER_TYPE);
|
SUFFIX_MARKER_TYPE length2 = highResLength2 / sizeof(INDEX_CHARACTER_TYPE);
|
||||||
|
|
||||||
/* Expecting to get one result from SA:
|
/* Expecting to get one result from SA:
|
||||||
0: banana
|
0: banana
|
||||||
Which is one substring occurence (34,0) with the lcp length = 6;
|
Which is one substring occurrence (34,0) with the lcp length = 6;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
@ -228,12 +228,12 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
|||||||
pattern3.push_back(3);
|
pattern3.push_back(3);
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE highResLength3;
|
SUFFIX_MARKER_TYPE highResLength3;
|
||||||
std::vector<SubstringOccurence> result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3);
|
std::vector<SubstringOccurrence> result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3);
|
||||||
SUFFIX_MARKER_TYPE length3 = highResLength3 / sizeof(INDEX_CHARACTER_TYPE);
|
SUFFIX_MARKER_TYPE length3 = highResLength3 / sizeof(INDEX_CHARACTER_TYPE);
|
||||||
|
|
||||||
/* Expecting to get one result from SA:
|
/* Expecting to get one result from SA:
|
||||||
0: banana
|
0: banana
|
||||||
Which is one substring occurence (34,0) with the lcp length = 5;
|
Which is one substring occurrence (34,0) with the lcp length = 5;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(result3.size(),1);
|
BOOST_CHECK_EQUAL(result3.size(),1);
|
||||||
@ -265,13 +265,13 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
|||||||
pattern4.push_back(4);
|
pattern4.push_back(4);
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE highResLength4;
|
SUFFIX_MARKER_TYPE highResLength4;
|
||||||
std::vector<SubstringOccurence> result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4);
|
std::vector<SubstringOccurrence> result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4);
|
||||||
SUFFIX_MARKER_TYPE length4 = highResLength4 / sizeof(INDEX_CHARACTER_TYPE);
|
SUFFIX_MARKER_TYPE length4 = highResLength4 / sizeof(INDEX_CHARACTER_TYPE);
|
||||||
|
|
||||||
/* Expecting to get 2 results from SA:
|
/* Expecting to get 2 results from SA:
|
||||||
4: na
|
4: na
|
||||||
2: nana
|
2: nana
|
||||||
Which are 2 substring occurences (34,4) and (34,2) with the lcp length = 2;
|
Which are 2 substring occurrences (34,4) and (34,2) with the lcp length = 2;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(result4.size(),2);
|
BOOST_CHECK_EQUAL(result4.size(),2);
|
||||||
@ -296,7 +296,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
|||||||
pattern5.push_back(4);
|
pattern5.push_back(4);
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE highResLength5;
|
SUFFIX_MARKER_TYPE highResLength5;
|
||||||
std::vector<SubstringOccurence> result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5);
|
std::vector<SubstringOccurrence> result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5);
|
||||||
SUFFIX_MARKER_TYPE length5 = highResLength5 / sizeof(INDEX_CHARACTER_TYPE);
|
SUFFIX_MARKER_TYPE length5 = highResLength5 / sizeof(INDEX_CHARACTER_TYPE);
|
||||||
|
|
||||||
/* Expecting to get 0 results from SA, lcp length = 0;
|
/* Expecting to get 0 results from SA, lcp length = 0;
|
||||||
@ -320,7 +320,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
|||||||
pattern6.push_back(0);
|
pattern6.push_back(0);
|
||||||
|
|
||||||
SUFFIX_MARKER_TYPE highResLength6;
|
SUFFIX_MARKER_TYPE highResLength6;
|
||||||
std::vector<SubstringOccurence> result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6);
|
std::vector<SubstringOccurrence> result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6);
|
||||||
SUFFIX_MARKER_TYPE length6 = highResLength6 / sizeof(INDEX_CHARACTER_TYPE);
|
SUFFIX_MARKER_TYPE length6 = highResLength6 / sizeof(INDEX_CHARACTER_TYPE);
|
||||||
|
|
||||||
/* Expecting to get 0 results from SA, lcp length = 0;
|
/* Expecting to get 0 results from SA, lcp length = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user