occurrence refactoring

This commit is contained in:
rjawor 2019-01-22 14:07:28 +01:00
parent 73b3d22d97
commit d39c0400c9
17 changed files with 268 additions and 268 deletions

View File

@ -178,14 +178,14 @@ int main(int argc, char** argv) {
concordia.simpleSearch(pattern);
time_end = boost::posix_time::microsec_clock::local_time();
msdiff = time_end - time_start;
std::cout << "\tFound: " << result.getOccurences().size()
std::cout << "\tFound: " << result.getOccurrences().size()
<< " matches. " << "Search took: "
<< msdiff.total_milliseconds() << "ms." << std::endl;
if (!cli.count("silent")) {
BOOST_FOREACH(SubstringOccurence occurence,
result.getOccurences()) {
BOOST_FOREACH(SubstringOccurrence occurrence,
result.getOccurrences()) {
std::cout << "\t\tfound match in sentence number: "
<< occurence.getId() << std::endl;
<< occurrence.getId() << std::endl;
}
}
} else if (cli.count("anubis-search")) {
@ -237,7 +237,7 @@ int main(int argc, char** argv) {
<< "," << fragment.getEnd()
<< "] (exampleCount,"
<< " patternOffset, length): "
<< fragment.getOccurences().size() << ","
<< fragment.getOccurrences().size() << ","
<< fragment.getPatternOffset() << ","
<< fragment.getMatchedLength()
<< std::endl;
@ -250,7 +250,7 @@ int main(int argc, char** argv) {
<< "," << fragment.getEnd()
<< "] (exampleCount,"
<< " patternOffset, length): "
<< fragment.getOccurences().size() << ","
<< fragment.getOccurrences().size() << ","
<< fragment.getPatternOffset() << ","
<< fragment.getMatchedLength()
<< std::endl;

View File

@ -192,9 +192,9 @@ void Concordia::_initializeIndex() {
}
}
SUFFIX_MARKER_TYPE Concordia::countOccurences(const std::string & pattern) {
SUFFIX_MARKER_TYPE Concordia::countOccurrences(const std::string & pattern) {
if (_T->size() > 0) {
return _searcher->countOccurences(_hashGenerator, _T,
return _searcher->countOccurrences(_hashGenerator, _T,
_markers, _SA, pattern);
} else {
return 0;
@ -215,7 +215,7 @@ MatchedPatternFragment Concordia::simpleSearch(
}
}
OccurencesList Concordia::fullSearch(
OccurrencesList Concordia::fullSearch(
const std::string & pattern,
int limit,
int offset,
@ -225,7 +225,7 @@ OccurencesList Concordia::fullSearch(
_markers, _SA, pattern, limit, offset, byWhitespace);
} else {
// If the index or search pattern are empty, return an empty result.
OccurencesList result(0);
OccurrencesList result(0);
return result;
}
}

View File

@ -9,7 +9,7 @@
#include "concordia/common/config.hpp"
#include "concordia/example.hpp"
#include "concordia/matched_pattern_fragment.hpp"
#include "concordia/occurences_list.hpp"
#include "concordia/occurrences_list.hpp"
#include "concordia/concordia_config.hpp"
#include "concordia/concordia_index.hpp"
#include "concordia/index_searcher.hpp"
@ -121,24 +121,24 @@ public:
For more info see \ref tutorial1_2.
\param pattern pattern to be searched in the index
\param byWhitespace whether to tokenize the pattern by white space
\returns matched pattern fragment containing vector of occurences
\returns matched pattern fragment containing vector of occurrences
\throws ConcordiaException
*/
MatchedPatternFragment simpleSearch(const std::string & pattern,
bool byWhitespace = false);
/*! Performs a substring lookup in RAM-based index, returning all occurences.
The result contains no more than "limit" occurences, starting at "offset".
/*! Performs a substring lookup in RAM-based index, returning all occurrences.
The result contains no more than "limit" occurrences, starting at "offset".
\param hashGenerator hash generator to be used to convert
input sentence to a hash
\param pattern string pattern to be searched in the index.
\param limit maximum number of occurences to return
\param offset starting occurence
\param limit maximum number of occurrences to return
\param offset starting occurrence
\param byWhitespace should the pattern by tokenized by white space
\returns list of occurences of the pattern in the index
\returns list of occurrences of the pattern in the index
\throws ConcordiaException
*/
OccurencesList fullSearch(
OccurrencesList fullSearch(
const std::string & pattern,
int limit,
int offset,
@ -151,13 +151,13 @@ public:
the lexicon search requires that the match is the whole example source.
\param pattern pattern to be searched in the index
\param byWhitespace whether to tokenize the pattern by white space
\returns matched pattern fragment containing vector of occurences
\returns matched pattern fragment containing vector of occurrences
\throws ConcordiaException
*/
MatchedPatternFragment lexiconSearch(const std::string & pattern,
bool byWhitespace = false);
SUFFIX_MARKER_TYPE countOccurences(const std::string & pattern);
SUFFIX_MARKER_TYPE countOccurrences(const std::string & pattern);
/*! \deprecated
Finds the examples from the index, whose resemblance to the

View File

@ -32,15 +32,15 @@ void ConcordiaSearcher::concordiaSearch(
std::vector<sauchar_t> currentPattern(
patternVector.begin()+highResOffset, patternVector.end());
SUFFIX_MARKER_TYPE lcpLength;
std::vector<SubstringOccurence> occurences =
std::vector<SubstringOccurrence> occurrences =
lcpSearch(T, markers, SA, currentPattern, lcpLength);
if (occurences.size() > 0) {
if (occurrences.size() > 0) {
MatchedPatternFragment fragment(offset,
lcpLength / sizeof(INDEX_CHARACTER_TYPE));
BOOST_FOREACH(SubstringOccurence occurence, occurences) {
fragment.addOccurence(occurence);
BOOST_FOREACH(SubstringOccurrence occurrence, occurrences) {
fragment.addOccurrence(occurrence);
}
result->addFragment(fragment);
}
@ -155,7 +155,7 @@ boost::shared_ptr<TmMatchesMap> ConcordiaSearcher::getTmMatches(
return tmMatchesMap;
}
std::vector<SubstringOccurence> ConcordiaSearcher::lcpSearch(
std::vector<SubstringOccurrence> ConcordiaSearcher::lcpSearch(
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
@ -185,7 +185,7 @@ std::vector<SubstringOccurence> ConcordiaSearcher::lcpSearch(
SAleft += localLeft;
} while (patternLength < pattern.size() && size > 0);
std::vector<SubstringOccurence> result;
std::vector<SubstringOccurrence> result;
if (size == 0) {
// The search managed to find exactly the longest common prefixes.
@ -208,7 +208,7 @@ std::vector<SubstringOccurence> ConcordiaSearcher::lcpSearch(
}
void ConcordiaSearcher::_collectResults(
std::vector<SubstringOccurence> & result,
std::vector<SubstringOccurrence> & result,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
saidx_t left, saidx_t size) {
@ -219,7 +219,7 @@ void ConcordiaSearcher::_collectResults(
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
SUFFIX_MARKER_TYPE marker =
markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
result.push_back(SubstringOccurence(marker));
result.push_back(SubstringOccurrence(marker));
// truncate results,
// we don't need too many identical pattern overlays
@ -237,54 +237,54 @@ void ConcordiaSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
SUFFIX_MARKER_TYPE totalPatternLength,
SUFFIX_MARKER_TYPE matchedFragmentLength,
SUFFIX_MARKER_TYPE patternOffset) {
SubstringOccurence occurence;
if (_getOccurenceFromSA(SA, markers, sa_pos, occurence)) {
_addOccurenceToMap(tmMatchesMap,
occurence,
SubstringOccurrence occurrence;
if (_getOccurrenceFromSA(SA, markers, sa_pos, occurrence)) {
_addOccurrenceToMap(tmMatchesMap,
occurrence,
totalPatternLength,
matchedFragmentLength,
patternOffset);
}
}
bool ConcordiaSearcher::_getOccurenceFromSA(
bool ConcordiaSearcher::_getOccurrenceFromSA(
boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
saidx_t sa_pos,
SubstringOccurence & occurence) {
SubstringOccurrence & occurrence) {
saidx_t resultPos = SA->at(sa_pos);
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
SUFFIX_MARKER_TYPE marker =
markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
occurence.enterDataFromMarker(marker);
occurrence.enterDataFromMarker(marker);
}
}
void ConcordiaSearcher::_addOccurenceToMap(
void ConcordiaSearcher::_addOccurrenceToMap(
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
SubstringOccurence & occurence,
SubstringOccurrence & occurrence,
SUFFIX_MARKER_TYPE totalPatternLength,
SUFFIX_MARKER_TYPE matchedFragmentLength,
SUFFIX_MARKER_TYPE patternOffset) {
TmMatches * tmMatches;
TmMatchesMapIterator mapIterator = tmMatchesMap->find(
occurence.getId());
occurrence.getId());
if (mapIterator != tmMatchesMap->end()) {
tmMatches = mapIterator->second;
} else {
tmMatches = new TmMatches(occurence.getId(),
occurence.getExampleLength(),
tmMatches = new TmMatches(occurrence.getId(),
occurrence.getExampleLength(),
totalPatternLength);
SUFFIX_MARKER_TYPE key = occurence.getId();
SUFFIX_MARKER_TYPE key = occurrence.getId();
tmMatchesMap->insert(key, tmMatches);
}
// add intervals to tmMatches
tmMatches->addExampleInterval(
occurence.getOffset(),
occurence.getOffset() + matchedFragmentLength);
occurrence.getOffset(),
occurrence.getOffset() + matchedFragmentLength);
tmMatches->addPatternInterval(
patternOffset,
patternOffset + matchedFragmentLength);

View File

@ -5,7 +5,7 @@
#include "concordia/common/config.hpp"
#include "concordia/common/utils.hpp"
#include "concordia/substring_occurence.hpp"
#include "concordia/substring_occurrence.hpp"
#include "concordia/concordia_exception.hpp"
#include "concordia/concordia_config.hpp"
#include "concordia/concordia_search_result.hpp"
@ -100,7 +100,7 @@ public:
\returns list of locations of the longest fragments
\throws ConcordiaException
*/
std::vector<SubstringOccurence> lcpSearch(
std::vector<SubstringOccurrence> lcpSearch(
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
@ -108,7 +108,7 @@ public:
SUFFIX_MARKER_TYPE & length);
private:
void _collectResults(std::vector<SubstringOccurence> & result,
void _collectResults(std::vector<SubstringOccurrence> & result,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
saidx_t left, saidx_t size);
@ -121,13 +121,13 @@ private:
SUFFIX_MARKER_TYPE matchedFragmentLength,
SUFFIX_MARKER_TYPE patternOffset);
bool _getOccurenceFromSA(boost::shared_ptr<std::vector<saidx_t> > SA,
bool _getOccurrenceFromSA(boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
saidx_t sa_pos,
SubstringOccurence & occurence);
SubstringOccurrence & occurrence);
void _addOccurenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap,
SubstringOccurence & occurence,
void _addOccurrenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap,
SubstringOccurrence & occurrence,
SUFFIX_MARKER_TYPE totalPatternLength,
SUFFIX_MARKER_TYPE matchedFragmentLength,
SUFFIX_MARKER_TYPE patternOffset);

View File

@ -42,10 +42,10 @@ MatchedPatternFragment IndexSearcher::simpleSearch(
saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE);
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos);
SubstringOccurence occurence;
occurence.enterDataFromMarker(marker);
result.addOccurence(occurence);
if (result.getOccurences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) {
SubstringOccurrence occurrence;
occurrence.enterDataFromMarker(marker);
result.addOccurrence(occurrence);
if (result.getOccurrences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) {
break;
}
}
@ -55,7 +55,7 @@ MatchedPatternFragment IndexSearcher::simpleSearch(
return result;
}
OccurencesList IndexSearcher::fullSearch(
OccurrencesList IndexSearcher::fullSearch(
boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
@ -74,7 +74,7 @@ OccurencesList IndexSearcher::fullSearch(
(const sauchar_t *) patternArray, patternLength,
SA->data(), (saidx_t) SA->size(), &left);
OccurencesList result(size);
OccurrencesList result(size);
int returnedResults = limit;
if ((size - offset) < limit) {
@ -91,9 +91,9 @@ OccurencesList IndexSearcher::fullSearch(
saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE);
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos);
SubstringOccurence occurence;
occurence.enterDataFromMarker(marker);
result.addOccurence(occurence);
SubstringOccurrence occurrence;
occurrence.enterDataFromMarker(marker);
result.addOccurrence(occurrence);
}
}
@ -148,10 +148,10 @@ MatchedPatternFragment IndexSearcher::lexiconSearch(
// so we should look at the marker of the next character
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos + 1);
SubstringOccurence occurence;
occurence.enterDataFromMarker(marker);
result.addOccurence(occurence);
if (result.getOccurences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) {
SubstringOccurrence occurrence;
occurrence.enterDataFromMarker(marker);
result.addOccurrence(occurrence);
if (result.getOccurrences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) {
break;
}
}
@ -161,7 +161,7 @@ MatchedPatternFragment IndexSearcher::lexiconSearch(
return result;
}
SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
SUFFIX_MARKER_TYPE IndexSearcher::countOccurrences(
boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
@ -182,7 +182,7 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
(const sauchar_t *) patternArray, patternLength,
SA->data(), (saidx_t) SA->size(), &left);
SUFFIX_MARKER_TYPE occurencesCount = 0;
SUFFIX_MARKER_TYPE occurrencesCount = 0;
for (int i = 0; i < size; ++i) {
saidx_t resultPos = SA->at(left + i);
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
@ -191,13 +191,13 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
// obtain accidental results exceeding the boundaries
// of characters in hashed index. The above check
// removes these accidental results.
occurencesCount++;
occurrencesCount++;
}
}
delete[] patternArray;
return occurencesCount;
return occurrencesCount;
}

View File

@ -8,7 +8,7 @@
#include "concordia/common/config.hpp"
#include "concordia/matched_pattern_fragment.hpp"
#include "concordia/occurences_list.hpp"
#include "concordia/occurrences_list.hpp"
#include "concordia/hash_generator.hpp"
#include "concordia/concordia_exception.hpp"
#include "concordia/concordia_searcher.hpp"
@ -43,7 +43,7 @@ public:
\param markers markers array for the needs of searching
\param SA suffix array for the needs of searching
\param pattern string pattern to be searched in the index.
\returns matched pattern fragment, containing occurences of the pattern in the index
\returns matched pattern fragment, containing occurrences of the pattern in the index
\throws ConcordiaException
*/
MatchedPatternFragment simpleSearch(
@ -54,21 +54,21 @@ public:
const std::string & pattern,
bool byWhitespace = false);
/*! Performs a substring lookup in RAM-based index, returning all occurences.
The result contains no more than "limit" occurences, starting at "offset".
/*! Performs a substring lookup in RAM-based index, returning all occurrences.
The result contains no more than "limit" occurrences, starting at "offset".
\param hashGenerator hash generator to be used to convert
input sentence to a hash
\param T hashed index to search in
\param markers markers array for the needs of searching
\param SA suffix array for the needs of searching
\param pattern string pattern to be searched in the index.
\param limit maximum number of occurences to return
\param offset starting occurence
\param limit maximum number of occurrences to return
\param offset starting occurrence
\param byWhitespace should the pattern by tokenized by white space
\returns list of occurences of the pattern in the index
\returns list of occurrences of the pattern in the index
\throws ConcordiaException
*/
OccurencesList fullSearch(
OccurrencesList fullSearch(
boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
@ -89,7 +89,7 @@ public:
\param markers markers array for the needs of searching
\param SA suffix array for the needs of searching
\param pattern string pattern to be searched in the index.
\returns matched pattern fragment, containing occurences of the pattern in the index
\returns matched pattern fragment, containing occurrences of the pattern in the index
\throws ConcordiaException
*/
MatchedPatternFragment lexiconSearch(
@ -100,7 +100,7 @@ public:
const std::string & pattern,
bool byWhitespace = false);
SUFFIX_MARKER_TYPE countOccurences(
SUFFIX_MARKER_TYPE countOccurrences(
boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,

View File

@ -12,7 +12,7 @@ MatchedPatternFragment::MatchedPatternFragment(
MatchedPatternFragment::~MatchedPatternFragment() {
}
void MatchedPatternFragment::addOccurence(
const SubstringOccurence & occurence) {
_occurences.push_back(occurence);
void MatchedPatternFragment::addOccurrence(
const SubstringOccurrence & occurrence) {
_occurrences.push_back(occurrence);
}

View File

@ -3,7 +3,7 @@
#include "concordia/common/config.hpp"
#include "concordia/interval.hpp"
#include "concordia/substring_occurence.hpp"
#include "concordia/substring_occurrence.hpp"
#include <vector>
#include <iostream>
#include <boost/foreach.hpp>
@ -30,17 +30,17 @@ public:
*/
virtual ~MatchedPatternFragment();
/*! Getter for occurences.
\returns occurences
/*! Getter for occurrences.
\returns occurrences
*/
std::vector<SubstringOccurence> getOccurences() const {
return _occurences;
std::vector<SubstringOccurrence> getOccurrences() const {
return _occurrences;
}
/*! Adds an occurence to the list.
\param fragment occurence to be added
/*! Adds an occurrence to the list.
\param fragment occurrence to be added
*/
void addOccurence(const SubstringOccurence & occurence);
void addOccurrence(const SubstringOccurrence & occurrence);
/*! Getter for pattern offset.
\returns pattern offset
@ -68,8 +68,8 @@ public:
o << "fragment(patternOffset=" << fragment.getPatternOffset()
<< ", matchedLength=" << fragment.getMatchedLength() << ") {"
<< std::endl;
BOOST_FOREACH(SubstringOccurence occurence, fragment.getOccurences()) {
o << "\t" << occurence << std::endl;
BOOST_FOREACH(SubstringOccurrence occurrence, fragment.getOccurrences()) {
o << "\t" << occurrence << std::endl;
}
o << "}";
@ -78,7 +78,7 @@ public:
private:
std::vector<SubstringOccurence> _occurences;
std::vector<SubstringOccurrence> _occurrences;
SUFFIX_MARKER_TYPE _patternOffset;

View File

@ -1,13 +0,0 @@
#include "concordia/occurences_list.hpp"
OccurencesList::OccurencesList(const SUFFIX_MARKER_TYPE & totalCount):
_totalCount(totalCount) {
}
OccurencesList::~OccurencesList() {
}
void OccurencesList::addOccurence(
const SubstringOccurence & occurence) {
_occurences.push_back(occurence);
}

View File

@ -1,50 +0,0 @@
#ifndef OCCURENCES_LIST_HDR
#define OCCURENCES_LIST_HDR
#include "concordia/common/config.hpp"
#include "concordia/substring_occurence.hpp"
#include <vector>
#include <iostream>
#include <boost/foreach.hpp>
/*!
Class representing the occurences list in full search. The list only
contains as many occurences as specified in the "limit" parameter for full search.
The "totalCount" field stores the total number of occurences available.
*/
class OccurencesList {
public:
/*! Constructor.
*/
explicit OccurencesList(const SUFFIX_MARKER_TYPE & totalCount);
/*! Destructor.
*/
virtual ~OccurencesList();
/*! Getter for occurences.
\returns occurences
*/
std::vector<SubstringOccurence> getOccurences() const {
return _occurences;
}
SUFFIX_MARKER_TYPE getTotalCount() const {
return _totalCount;
}
/*! Adds an occurence to the list.
\param fragment occurence to be added
*/
void addOccurence(const SubstringOccurence & occurence);
private:
std::vector<SubstringOccurence> _occurences;
SUFFIX_MARKER_TYPE _totalCount;
};
#endif

View File

@ -0,0 +1,13 @@
#include "concordia/occurrences_list.hpp"
OccurrencesList::OccurrencesList(const SUFFIX_MARKER_TYPE & totalCount):
_totalCount(totalCount) {
}
OccurrencesList::~OccurrencesList() {
}
void OccurrencesList::addOccurrence(
const SubstringOccurrence & occurrence) {
_occurrences.push_back(occurrence);
}

View File

@ -0,0 +1,50 @@
#ifndef OCCURRENCES_LIST_HDR
#define OCCURRENCES_LIST_HDR
#include "concordia/common/config.hpp"
#include "concordia/substring_occurrence.hpp"
#include <vector>
#include <iostream>
#include <boost/foreach.hpp>
/*!
Class representing the occurrences list in full search. The list only
contains as many occurrences as specified in the "limit" parameter for full search.
The "totalCount" field stores the total number of occurrences available.
*/
class OccurrencesList {
public:
/*! Constructor.
*/
explicit OccurrencesList(const SUFFIX_MARKER_TYPE & totalCount);
/*! Destructor.
*/
virtual ~OccurrencesList();
/*! Getter for occurrences.
\returns occurrences
*/
std::vector<SubstringOccurrence> getOccurrences() const {
return _occurrences;
}
SUFFIX_MARKER_TYPE getTotalCount() const {
return _totalCount;
}
/*! Adds an occurrence to the list.
\param fragment occurrence to be added
*/
void addOccurrence(const SubstringOccurrence & occurrence);
private:
std::vector<SubstringOccurrence> _occurrences;
SUFFIX_MARKER_TYPE _totalCount;
};
#endif

View File

@ -1,16 +1,16 @@
#include "concordia/substring_occurence.hpp"
#include "concordia/substring_occurrence.hpp"
#include "concordia/common/utils.hpp"
SubstringOccurence::SubstringOccurence() {
SubstringOccurrence::SubstringOccurrence() {
}
SubstringOccurence::SubstringOccurence(const SUFFIX_MARKER_TYPE & marker) {
SubstringOccurrence::SubstringOccurrence(const SUFFIX_MARKER_TYPE & marker) {
_id = Utils::getIdFromMarker(marker);
_offset = Utils::getOffsetFromMarker(marker);
_exampleLength = Utils::getLengthFromMarker(marker);
}
void SubstringOccurence::enterDataFromMarker(
void SubstringOccurrence::enterDataFromMarker(
const SUFFIX_MARKER_TYPE & marker) {
_id = Utils::getIdFromMarker(marker);
_offset = Utils::getOffsetFromMarker(marker);
@ -18,7 +18,7 @@ void SubstringOccurence::enterDataFromMarker(
}
SubstringOccurence::SubstringOccurence(
SubstringOccurrence::SubstringOccurrence(
const SUFFIX_MARKER_TYPE & id,
const SUFFIX_MARKER_TYPE & offset,
const SUFFIX_MARKER_TYPE & exampleLength):
@ -27,6 +27,6 @@ SubstringOccurence::SubstringOccurence(
_exampleLength(exampleLength) {
}
SubstringOccurence::~SubstringOccurence() {
SubstringOccurrence::~SubstringOccurrence() {
}

View File

@ -1,31 +1,31 @@
#ifndef SUBSTRING_OCCURENCE_HDR
#define SUBSTRING_OCCURENCE_HDR
#ifndef SUBSTRING_OCCURRENCE_HDR
#define SUBSTRING_OCCURRENCE_HDR
#include "concordia/common/config.hpp"
#include <string>
#include <iostream>
/*!
Class representing occurence of a searched substring.
Class representing occurrence of a searched substring.
It holds the following information:
- id of the example where the substring was found
- offset of the matched substring in this example
- length of the example
*/
class SubstringOccurence {
class SubstringOccurrence {
public:
/*!
Constructor.
*/
SubstringOccurence();
SubstringOccurrence();
/*!
Constructor taking data from a marker.
\param marker
*/
explicit SubstringOccurence(const SUFFIX_MARKER_TYPE & marker);
explicit SubstringOccurrence(const SUFFIX_MARKER_TYPE & marker);
/*!
Constructor with three arguments.
@ -33,12 +33,12 @@ public:
\param offset offset of the substring in the example
\param exampleLength length of the example
*/
SubstringOccurence(const SUFFIX_MARKER_TYPE & id,
SubstringOccurrence(const SUFFIX_MARKER_TYPE & id,
const SUFFIX_MARKER_TYPE & offset,
const SUFFIX_MARKER_TYPE & exampleLength);
/*! Destructor.
*/
virtual ~SubstringOccurence();
virtual ~SubstringOccurrence();
/*! Getter for example id.
\returns example id
@ -67,9 +67,9 @@ public:
void enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker);
friend std::ostream & operator << (std::ostream & o,
const SubstringOccurence & occurence) {
return o << "occurence(exampleId=" << occurence.getId()
<< ", offset=" << occurence.getOffset() << ")";
const SubstringOccurrence & occurrence) {
return o << "occurrence(exampleId=" << occurrence.getId()
<< ", offset=" << occurrence.getOffset() << ")";
}

View File

@ -73,14 +73,14 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
concordia.clearIndex();
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 123);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getOffset(), 1);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 51);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getOffset(), 1);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 2);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 123);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getOffset(), 1);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getId(), 51);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getOffset(), 1);
// Checking pattern spanning over 2 segments
BOOST_CHECK_EQUAL(searchResult2.getOccurences().size(), 0);
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().size(), 0);
}
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
@ -137,19 +137,19 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
concordia2.clearIndex();
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 3);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 312);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getOffset(), 0);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 45);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getOffset(), 1);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(2).getId(), 29);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(2).getOffset(), 0);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 3);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 312);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getOffset(), 0);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getId(), 45);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getOffset(), 1);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(2).getId(), 29);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(2).getOffset(), 0);
BOOST_CHECK_EQUAL(searchResult2.getOccurences().size(), 2);
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 202);
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getOffset(), 1);
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 312);
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getOffset(), 1);
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().size(), 2);
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(0).getId(), 202);
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(0).getOffset(), 1);
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(1).getId(), 312);
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(1).getOffset(), 1);
}
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
@ -167,9 +167,9 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
concordia2.clearIndex();
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 1);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 312);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getOffset(), 2);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 1);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 312);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getOffset(), 2);
}
BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 )
@ -185,35 +185,35 @@ BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 )
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
OccurencesList searchResult0 = concordia2.fullSearch("okno", 10, 0);
OccurrencesList searchResult0 = concordia2.fullSearch("okno", 10, 0);
/*
search0
occurence(exampleId=4, offset=1)
occurence(exampleId=3, offset=2)
occurence(exampleId=2, offset=2)
occurence(exampleId=4, offset=3)
occurence(exampleId=1, offset=2)
occurrence(exampleId=4, offset=1)
occurrence(exampleId=3, offset=2)
occurrence(exampleId=2, offset=2)
occurrence(exampleId=4, offset=3)
occurrence(exampleId=1, offset=2)
*/
OccurencesList searchResult1 = concordia2.fullSearch("okno", 2, 1);
OccurencesList searchResult2 = concordia2.fullSearch("okno", 10, 3);
OccurencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3);
OccurencesList searchResult4 = concordia2.fullSearch("okno", 10, 6);
OccurrencesList searchResult1 = concordia2.fullSearch("okno", 2, 1);
OccurrencesList searchResult2 = concordia2.fullSearch("okno", 10, 3);
OccurrencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3);
OccurrencesList searchResult4 = concordia2.fullSearch("okno", 10, 6);
concordia2.clearIndex();
BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 5);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 3);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 2);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 2);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 3);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getId(), 2);
BOOST_CHECK_EQUAL(searchResult2.getTotalCount(), 5);
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 4);
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 1);
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(0).getId(), 4);
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(1).getId(), 1);
BOOST_CHECK_EQUAL(searchResult3.getTotalCount(), 0);
BOOST_CHECK_EQUAL(searchResult4.getTotalCount(), 5);
BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 0);
BOOST_CHECK_EQUAL(searchResult4.getOccurrences().size(), 0);
}
@ -267,16 +267,16 @@ BOOST_AUTO_TEST_CASE( ConcordiaLexiconSearch1 )
concordia.clearIndex();
// first two patterns do not cover the whole example source
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 0);
BOOST_CHECK_EQUAL(searchResult2.getOccurences().size(), 0);
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 0);
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().size(), 0);
BOOST_CHECK_EQUAL(searchResult3.getOccurences().size(), 1);
BOOST_CHECK_EQUAL(searchResult3.getOccurences().at(0).getId(), 123);
BOOST_CHECK_EQUAL(searchResult3.getOccurences().at(0).getOffset(), 0);
BOOST_CHECK_EQUAL(searchResult3.getOccurrences().size(), 1);
BOOST_CHECK_EQUAL(searchResult3.getOccurrences().at(0).getId(), 123);
BOOST_CHECK_EQUAL(searchResult3.getOccurrences().at(0).getOffset(), 0);
BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 1);
BOOST_CHECK_EQUAL(searchResult4.getOccurences().at(0).getId(), 14);
BOOST_CHECK_EQUAL(searchResult4.getOccurences().at(0).getOffset(), 0);
BOOST_CHECK_EQUAL(searchResult4.getOccurrences().size(), 1);
BOOST_CHECK_EQUAL(searchResult4.getOccurrences().at(0).getId(), 14);
BOOST_CHECK_EQUAL(searchResult4.getOccurrences().at(0).getOffset(), 0);
}
@ -351,43 +351,43 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
/*
adding fragment: offset=0, length=2
adding occurence: example id=167, offset=2
adding occurence: example id=45, offset=3
adding occurence: example id=51, offset=1
adding occurence: example id=123, offset=1
adding occurrence: example id=167, offset=2
adding occurrence: example id=45, offset=3
adding occurrence: example id=51, offset=1
adding occurrence: example id=123, offset=1
adding fragment: offset=1, length=1
adding occurence: example id=167, offset=3
adding occurence: example id=45, offset=4
adding occurence: example id=51, offset=2
adding occurence: example id=123, offset=2
adding occurrence: example id=167, offset=3
adding occurrence: example id=45, offset=4
adding occurrence: example id=51, offset=2
adding occurrence: example id=123, offset=2
adding fragment: offset=2, length=1
adding occurence: example id=167, offset=1
adding occurrence: example id=167, offset=1
*/
BOOST_CHECK_EQUAL(searchResult1->getFragments().size(), 3);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getPatternOffset(), 0);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getMatchedLength(), 2);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getId(), 167);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getOffset(), 2);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(1).getId(), 45);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(1).getOffset(), 3);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(2).getId(), 51);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(2).getOffset(), 1);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getId(), 167);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getOffset(), 2);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(1).getId(), 45);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(1).getOffset(), 3);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(2).getId(), 51);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(2).getOffset(), 1);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getPatternOffset(), 1);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getMatchedLength(), 1);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(0).getId(), 167);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(0).getOffset(), 3);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(1).getId(), 45);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(1).getOffset(), 4);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(2).getId(), 51);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(2).getOffset(), 2);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(0).getId(), 167);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(0).getOffset(), 3);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(1).getId(), 45);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(1).getOffset(), 4);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(2).getId(), 51);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(2).getOffset(), 2);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getPatternOffset(), 2);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getMatchedLength(), 1);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurences().at(0).getId(), 167);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurences().at(0).getOffset(), 1);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurrences().at(0).getId(), 167);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurrences().at(0).getOffset(), 1);
concordia.clearIndex();
}
@ -443,43 +443,43 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
Best overlay {
fragment(patternOffset=1, matchedLength=4) {
occurence(exampleId=321, offset=0)
occurrence(exampleId=321, offset=0)
}
fragment(patternOffset=5, matchedLength=4) {
occurence(exampleId=14, offset=7)
occurrence(exampleId=14, offset=7)
}
}
All fragments {
fragment(patternOffset=4, matchedLength=5) {
occurence(exampleId=14, offset=6)
occurrence(exampleId=14, offset=6)
}
fragment(patternOffset=1, matchedLength=4) {
occurence(exampleId=321, offset=0)
occurrence(exampleId=321, offset=0)
}
fragment(patternOffset=5, matchedLength=4) {
occurence(exampleId=14, offset=7)
occurrence(exampleId=14, offset=7)
}
fragment(patternOffset=2, matchedLength=3) {
occurence(exampleId=321, offset=1)
occurrence(exampleId=321, offset=1)
}
fragment(patternOffset=6, matchedLength=3) {
occurence(exampleId=14, offset=8)
occurrence(exampleId=14, offset=8)
}
fragment(patternOffset=3, matchedLength=2) {
occurence(exampleId=321, offset=2)
occurrence(exampleId=321, offset=2)
}
fragment(patternOffset=7, matchedLength=2) {
occurence(exampleId=14, offset=9)
occurrence(exampleId=14, offset=9)
}
fragment(patternOffset=8, matchedLength=1) {
occurence(exampleId=14, offset=10)
occurrence(exampleId=14, offset=10)
}
}
*/
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getId(), 14);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getOffset(), 6);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getId(), 14);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getOffset(), 6);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getPatternOffset(), 4);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getMatchedLength(), 5);
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getStart(), 4);
@ -522,7 +522,7 @@ BOOST_AUTO_TEST_CASE( Tokenize )
}
BOOST_AUTO_TEST_CASE( ConcordiaCountOccurences )
BOOST_AUTO_TEST_CASE( ConcordiaCountOccurrences )
{
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
@ -556,12 +556,12 @@ BOOST_AUTO_TEST_CASE( ConcordiaCountOccurences )
*/
BOOST_CHECK_EQUAL(concordia.countOccurences("Ala posiada"), 0);
BOOST_CHECK_EQUAL(concordia.countOccurences("Marysia posiada"), 0);
BOOST_CHECK_EQUAL(concordia.countOccurences("Marysia posiada rysia"), 1);
BOOST_CHECK_EQUAL(concordia.countOccurences("kota Ala posiada"), 0);
BOOST_CHECK_EQUAL(concordia.countOccurences("Ala posiada kota"), 2);
BOOST_CHECK_EQUAL(concordia.countOccurences("Ala posiada kota i psa"), 1);
BOOST_CHECK_EQUAL(concordia.countOccurrences("Ala posiada"), 0);
BOOST_CHECK_EQUAL(concordia.countOccurrences("Marysia posiada"), 0);
BOOST_CHECK_EQUAL(concordia.countOccurrences("Marysia posiada rysia"), 1);
BOOST_CHECK_EQUAL(concordia.countOccurrences("kota Ala posiada"), 0);
BOOST_CHECK_EQUAL(concordia.countOccurrences("Ala posiada kota"), 2);
BOOST_CHECK_EQUAL(concordia.countOccurrences("Ala posiada kota i psa"), 1);
concordia.clearIndex();

View File

@ -135,13 +135,13 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
SA->push_back(11);
SUFFIX_MARKER_TYPE highResLength;
std::vector<SubstringOccurence> result = searcher.lcpSearch(T, markers, SA, pattern, highResLength);
std::vector<SubstringOccurrence> result = searcher.lcpSearch(T, markers, SA, pattern, highResLength);
SUFFIX_MARKER_TYPE length = highResLength / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get the following results from SA:
3: ana
1: anana
Which are 2 substring occurences (34,3) and (34,1) with the lcp length = 2;
Which are 2 substring occurrences (34,3) and (34,1) with the lcp length = 2;
*/
BOOST_CHECK_EQUAL(result.size(),2);
@ -185,12 +185,12 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
pattern2.push_back(2);
SUFFIX_MARKER_TYPE highResLength2;
std::vector<SubstringOccurence> result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2);
std::vector<SubstringOccurrence> result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2);
SUFFIX_MARKER_TYPE length2 = highResLength2 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get one result from SA:
0: banana
Which is one substring occurence (34,0) with the lcp length = 6;
Which is one substring occurrence (34,0) with the lcp length = 6;
*/
@ -228,12 +228,12 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
pattern3.push_back(3);
SUFFIX_MARKER_TYPE highResLength3;
std::vector<SubstringOccurence> result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3);
std::vector<SubstringOccurrence> result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3);
SUFFIX_MARKER_TYPE length3 = highResLength3 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get one result from SA:
0: banana
Which is one substring occurence (34,0) with the lcp length = 5;
Which is one substring occurrence (34,0) with the lcp length = 5;
*/
BOOST_CHECK_EQUAL(result3.size(),1);
@ -265,13 +265,13 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
pattern4.push_back(4);
SUFFIX_MARKER_TYPE highResLength4;
std::vector<SubstringOccurence> result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4);
std::vector<SubstringOccurrence> result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4);
SUFFIX_MARKER_TYPE length4 = highResLength4 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get 2 results from SA:
4: na
2: nana
Which are 2 substring occurences (34,4) and (34,2) with the lcp length = 2;
Which are 2 substring occurrences (34,4) and (34,2) with the lcp length = 2;
*/
BOOST_CHECK_EQUAL(result4.size(),2);
@ -296,7 +296,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
pattern5.push_back(4);
SUFFIX_MARKER_TYPE highResLength5;
std::vector<SubstringOccurence> result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5);
std::vector<SubstringOccurrence> result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5);
SUFFIX_MARKER_TYPE length5 = highResLength5 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get 0 results from SA, lcp length = 0;
@ -320,7 +320,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
pattern6.push_back(0);
SUFFIX_MARKER_TYPE highResLength6;
std::vector<SubstringOccurence> result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6);
std::vector<SubstringOccurrence> result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6);
SUFFIX_MARKER_TYPE length6 = highResLength6 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get 0 results from SA, lcp length = 0;