From d39c0400c96b4427e9fac02532546f8be7300e60 Mon Sep 17 00:00:00 2001 From: rjawor Date: Tue, 22 Jan 2019 14:07:28 +0100 Subject: [PATCH] occurrence refactoring --- concordia-console/concordia-console.cpp | 12 +- concordia/concordia.cpp | 8 +- concordia/concordia.hpp | 20 +- concordia/concordia_searcher.cpp | 46 ++--- concordia/concordia_searcher.hpp | 14 +- concordia/index_searcher.cpp | 34 ++-- concordia/index_searcher.hpp | 20 +- concordia/matched_pattern_fragment.cpp | 6 +- concordia/matched_pattern_fragment.hpp | 22 +-- concordia/occurences_list.cpp | 13 -- concordia/occurences_list.hpp | 50 ----- concordia/occurrences_list.cpp | 13 ++ concordia/occurrences_list.hpp | 50 +++++ ...occurence.cpp => substring_occurrence.cpp} | 12 +- ...occurence.hpp => substring_occurrence.hpp} | 22 +-- concordia/t/test_concordia.cpp | 174 +++++++++--------- concordia/t/test_concordia_searcher.cpp | 20 +- 17 files changed, 268 insertions(+), 268 deletions(-) delete mode 100644 concordia/occurences_list.cpp delete mode 100644 concordia/occurences_list.hpp create mode 100644 concordia/occurrences_list.cpp create mode 100644 concordia/occurrences_list.hpp rename concordia/{substring_occurence.cpp => substring_occurrence.cpp} (72%) rename concordia/{substring_occurence.hpp => substring_occurrence.hpp} (73%) diff --git a/concordia-console/concordia-console.cpp b/concordia-console/concordia-console.cpp index 95c8456..0d8d922 100644 --- a/concordia-console/concordia-console.cpp +++ b/concordia-console/concordia-console.cpp @@ -178,14 +178,14 @@ int main(int argc, char** argv) { concordia.simpleSearch(pattern); time_end = boost::posix_time::microsec_clock::local_time(); msdiff = time_end - time_start; - std::cout << "\tFound: " << result.getOccurences().size() + std::cout << "\tFound: " << result.getOccurrences().size() << " matches. " << "Search took: " << msdiff.total_milliseconds() << "ms." << std::endl; if (!cli.count("silent")) { - BOOST_FOREACH(SubstringOccurence occurence, - result.getOccurences()) { + BOOST_FOREACH(SubstringOccurrence occurrence, + result.getOccurrences()) { std::cout << "\t\tfound match in sentence number: " - << occurence.getId() << std::endl; + << occurrence.getId() << std::endl; } } } else if (cli.count("anubis-search")) { @@ -237,7 +237,7 @@ int main(int argc, char** argv) { << "," << fragment.getEnd() << "] (exampleCount," << " patternOffset, length): " - << fragment.getOccurences().size() << "," + << fragment.getOccurrences().size() << "," << fragment.getPatternOffset() << "," << fragment.getMatchedLength() << std::endl; @@ -250,7 +250,7 @@ int main(int argc, char** argv) { << "," << fragment.getEnd() << "] (exampleCount," << " patternOffset, length): " - << fragment.getOccurences().size() << "," + << fragment.getOccurrences().size() << "," << fragment.getPatternOffset() << "," << fragment.getMatchedLength() << std::endl; diff --git a/concordia/concordia.cpp b/concordia/concordia.cpp index 587cb8f..1800d46 100644 --- a/concordia/concordia.cpp +++ b/concordia/concordia.cpp @@ -192,9 +192,9 @@ void Concordia::_initializeIndex() { } } -SUFFIX_MARKER_TYPE Concordia::countOccurences(const std::string & pattern) { +SUFFIX_MARKER_TYPE Concordia::countOccurrences(const std::string & pattern) { if (_T->size() > 0) { - return _searcher->countOccurences(_hashGenerator, _T, + return _searcher->countOccurrences(_hashGenerator, _T, _markers, _SA, pattern); } else { return 0; @@ -215,7 +215,7 @@ MatchedPatternFragment Concordia::simpleSearch( } } -OccurencesList Concordia::fullSearch( +OccurrencesList Concordia::fullSearch( const std::string & pattern, int limit, int offset, @@ -225,7 +225,7 @@ OccurencesList Concordia::fullSearch( _markers, _SA, pattern, limit, offset, byWhitespace); } else { // If the index or search pattern are empty, return an empty result. - OccurencesList result(0); + OccurrencesList result(0); return result; } } diff --git a/concordia/concordia.hpp b/concordia/concordia.hpp index 564bb1b..7f6570b 100644 --- a/concordia/concordia.hpp +++ b/concordia/concordia.hpp @@ -9,7 +9,7 @@ #include "concordia/common/config.hpp" #include "concordia/example.hpp" #include "concordia/matched_pattern_fragment.hpp" -#include "concordia/occurences_list.hpp" +#include "concordia/occurrences_list.hpp" #include "concordia/concordia_config.hpp" #include "concordia/concordia_index.hpp" #include "concordia/index_searcher.hpp" @@ -121,24 +121,24 @@ public: For more info see \ref tutorial1_2. \param pattern pattern to be searched in the index \param byWhitespace whether to tokenize the pattern by white space - \returns matched pattern fragment containing vector of occurences + \returns matched pattern fragment containing vector of occurrences \throws ConcordiaException */ MatchedPatternFragment simpleSearch(const std::string & pattern, bool byWhitespace = false); - /*! Performs a substring lookup in RAM-based index, returning all occurences. - The result contains no more than "limit" occurences, starting at "offset". + /*! Performs a substring lookup in RAM-based index, returning all occurrences. + The result contains no more than "limit" occurrences, starting at "offset". \param hashGenerator hash generator to be used to convert input sentence to a hash \param pattern string pattern to be searched in the index. - \param limit maximum number of occurences to return - \param offset starting occurence + \param limit maximum number of occurrences to return + \param offset starting occurrence \param byWhitespace should the pattern by tokenized by white space - \returns list of occurences of the pattern in the index + \returns list of occurrences of the pattern in the index \throws ConcordiaException */ - OccurencesList fullSearch( + OccurrencesList fullSearch( const std::string & pattern, int limit, int offset, @@ -151,13 +151,13 @@ public: the lexicon search requires that the match is the whole example source. \param pattern pattern to be searched in the index \param byWhitespace whether to tokenize the pattern by white space - \returns matched pattern fragment containing vector of occurences + \returns matched pattern fragment containing vector of occurrences \throws ConcordiaException */ MatchedPatternFragment lexiconSearch(const std::string & pattern, bool byWhitespace = false); - SUFFIX_MARKER_TYPE countOccurences(const std::string & pattern); + SUFFIX_MARKER_TYPE countOccurrences(const std::string & pattern); /*! \deprecated Finds the examples from the index, whose resemblance to the diff --git a/concordia/concordia_searcher.cpp b/concordia/concordia_searcher.cpp index ebeffba..09fa708 100644 --- a/concordia/concordia_searcher.cpp +++ b/concordia/concordia_searcher.cpp @@ -32,15 +32,15 @@ void ConcordiaSearcher::concordiaSearch( std::vector currentPattern( patternVector.begin()+highResOffset, patternVector.end()); SUFFIX_MARKER_TYPE lcpLength; - std::vector occurences = + std::vector occurrences = lcpSearch(T, markers, SA, currentPattern, lcpLength); - if (occurences.size() > 0) { + if (occurrences.size() > 0) { MatchedPatternFragment fragment(offset, lcpLength / sizeof(INDEX_CHARACTER_TYPE)); - BOOST_FOREACH(SubstringOccurence occurence, occurences) { - fragment.addOccurence(occurence); + BOOST_FOREACH(SubstringOccurrence occurrence, occurrences) { + fragment.addOccurrence(occurrence); } result->addFragment(fragment); } @@ -155,7 +155,7 @@ boost::shared_ptr ConcordiaSearcher::getTmMatches( return tmMatchesMap; } -std::vector ConcordiaSearcher::lcpSearch( +std::vector ConcordiaSearcher::lcpSearch( boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, @@ -185,7 +185,7 @@ std::vector ConcordiaSearcher::lcpSearch( SAleft += localLeft; } while (patternLength < pattern.size() && size > 0); - std::vector result; + std::vector result; if (size == 0) { // The search managed to find exactly the longest common prefixes. @@ -208,7 +208,7 @@ std::vector ConcordiaSearcher::lcpSearch( } void ConcordiaSearcher::_collectResults( - std::vector & result, + std::vector & result, boost::shared_ptr > markers, boost::shared_ptr > SA, saidx_t left, saidx_t size) { @@ -219,7 +219,7 @@ void ConcordiaSearcher::_collectResults( if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) { SUFFIX_MARKER_TYPE marker = markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE)); - result.push_back(SubstringOccurence(marker)); + result.push_back(SubstringOccurrence(marker)); // truncate results, // we don't need too many identical pattern overlays @@ -237,54 +237,54 @@ void ConcordiaSearcher::_addToMap(boost::shared_ptr > SA, SUFFIX_MARKER_TYPE totalPatternLength, SUFFIX_MARKER_TYPE matchedFragmentLength, SUFFIX_MARKER_TYPE patternOffset) { - SubstringOccurence occurence; - if (_getOccurenceFromSA(SA, markers, sa_pos, occurence)) { - _addOccurenceToMap(tmMatchesMap, - occurence, + SubstringOccurrence occurrence; + if (_getOccurrenceFromSA(SA, markers, sa_pos, occurrence)) { + _addOccurrenceToMap(tmMatchesMap, + occurrence, totalPatternLength, matchedFragmentLength, patternOffset); } } -bool ConcordiaSearcher::_getOccurenceFromSA( +bool ConcordiaSearcher::_getOccurrenceFromSA( boost::shared_ptr > SA, boost::shared_ptr > markers, saidx_t sa_pos, - SubstringOccurence & occurence) { + SubstringOccurrence & occurrence) { saidx_t resultPos = SA->at(sa_pos); if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) { SUFFIX_MARKER_TYPE marker = markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE)); - occurence.enterDataFromMarker(marker); + occurrence.enterDataFromMarker(marker); } } -void ConcordiaSearcher::_addOccurenceToMap( +void ConcordiaSearcher::_addOccurrenceToMap( boost::shared_ptr tmMatchesMap, - SubstringOccurence & occurence, + SubstringOccurrence & occurrence, SUFFIX_MARKER_TYPE totalPatternLength, SUFFIX_MARKER_TYPE matchedFragmentLength, SUFFIX_MARKER_TYPE patternOffset) { TmMatches * tmMatches; TmMatchesMapIterator mapIterator = tmMatchesMap->find( - occurence.getId()); + occurrence.getId()); if (mapIterator != tmMatchesMap->end()) { tmMatches = mapIterator->second; } else { - tmMatches = new TmMatches(occurence.getId(), - occurence.getExampleLength(), + tmMatches = new TmMatches(occurrence.getId(), + occurrence.getExampleLength(), totalPatternLength); - SUFFIX_MARKER_TYPE key = occurence.getId(); + SUFFIX_MARKER_TYPE key = occurrence.getId(); tmMatchesMap->insert(key, tmMatches); } // add intervals to tmMatches tmMatches->addExampleInterval( - occurence.getOffset(), - occurence.getOffset() + matchedFragmentLength); + occurrence.getOffset(), + occurrence.getOffset() + matchedFragmentLength); tmMatches->addPatternInterval( patternOffset, patternOffset + matchedFragmentLength); diff --git a/concordia/concordia_searcher.hpp b/concordia/concordia_searcher.hpp index 3e877c1..7d9011f 100644 --- a/concordia/concordia_searcher.hpp +++ b/concordia/concordia_searcher.hpp @@ -5,7 +5,7 @@ #include "concordia/common/config.hpp" #include "concordia/common/utils.hpp" -#include "concordia/substring_occurence.hpp" +#include "concordia/substring_occurrence.hpp" #include "concordia/concordia_exception.hpp" #include "concordia/concordia_config.hpp" #include "concordia/concordia_search_result.hpp" @@ -100,7 +100,7 @@ public: \returns list of locations of the longest fragments \throws ConcordiaException */ - std::vector lcpSearch( + std::vector lcpSearch( boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, @@ -108,7 +108,7 @@ public: SUFFIX_MARKER_TYPE & length); private: - void _collectResults(std::vector & result, + void _collectResults(std::vector & result, boost::shared_ptr > markers, boost::shared_ptr > SA, saidx_t left, saidx_t size); @@ -121,13 +121,13 @@ private: SUFFIX_MARKER_TYPE matchedFragmentLength, SUFFIX_MARKER_TYPE patternOffset); - bool _getOccurenceFromSA(boost::shared_ptr > SA, + bool _getOccurrenceFromSA(boost::shared_ptr > SA, boost::shared_ptr > markers, saidx_t sa_pos, - SubstringOccurence & occurence); + SubstringOccurrence & occurrence); - void _addOccurenceToMap(boost::shared_ptr tmMatchesMap, - SubstringOccurence & occurence, + void _addOccurrenceToMap(boost::shared_ptr tmMatchesMap, + SubstringOccurrence & occurrence, SUFFIX_MARKER_TYPE totalPatternLength, SUFFIX_MARKER_TYPE matchedFragmentLength, SUFFIX_MARKER_TYPE patternOffset); diff --git a/concordia/index_searcher.cpp b/concordia/index_searcher.cpp index fe4efc2..b937d56 100644 --- a/concordia/index_searcher.cpp +++ b/concordia/index_searcher.cpp @@ -42,10 +42,10 @@ MatchedPatternFragment IndexSearcher::simpleSearch( saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos); - SubstringOccurence occurence; - occurence.enterDataFromMarker(marker); - result.addOccurence(occurence); - if (result.getOccurences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) { + SubstringOccurrence occurrence; + occurrence.enterDataFromMarker(marker); + result.addOccurrence(occurrence); + if (result.getOccurrences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) { break; } } @@ -55,7 +55,7 @@ MatchedPatternFragment IndexSearcher::simpleSearch( return result; } -OccurencesList IndexSearcher::fullSearch( +OccurrencesList IndexSearcher::fullSearch( boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, @@ -74,7 +74,7 @@ OccurencesList IndexSearcher::fullSearch( (const sauchar_t *) patternArray, patternLength, SA->data(), (saidx_t) SA->size(), &left); - OccurencesList result(size); + OccurrencesList result(size); int returnedResults = limit; if ((size - offset) < limit) { @@ -91,9 +91,9 @@ OccurencesList IndexSearcher::fullSearch( saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos); - SubstringOccurence occurence; - occurence.enterDataFromMarker(marker); - result.addOccurence(occurence); + SubstringOccurrence occurrence; + occurrence.enterDataFromMarker(marker); + result.addOccurrence(occurrence); } } @@ -148,10 +148,10 @@ MatchedPatternFragment IndexSearcher::lexiconSearch( // so we should look at the marker of the next character SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos + 1); - SubstringOccurence occurence; - occurence.enterDataFromMarker(marker); - result.addOccurence(occurence); - if (result.getOccurences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) { + SubstringOccurrence occurrence; + occurrence.enterDataFromMarker(marker); + result.addOccurrence(occurrence); + if (result.getOccurrences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) { break; } } @@ -161,7 +161,7 @@ MatchedPatternFragment IndexSearcher::lexiconSearch( return result; } -SUFFIX_MARKER_TYPE IndexSearcher::countOccurences( +SUFFIX_MARKER_TYPE IndexSearcher::countOccurrences( boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, @@ -182,7 +182,7 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences( (const sauchar_t *) patternArray, patternLength, SA->data(), (saidx_t) SA->size(), &left); - SUFFIX_MARKER_TYPE occurencesCount = 0; + SUFFIX_MARKER_TYPE occurrencesCount = 0; for (int i = 0; i < size; ++i) { saidx_t resultPos = SA->at(left + i); if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) { @@ -191,13 +191,13 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences( // obtain accidental results exceeding the boundaries // of characters in hashed index. The above check // removes these accidental results. - occurencesCount++; + occurrencesCount++; } } delete[] patternArray; - return occurencesCount; + return occurrencesCount; } diff --git a/concordia/index_searcher.hpp b/concordia/index_searcher.hpp index 7916ac1..eb4a6a8 100644 --- a/concordia/index_searcher.hpp +++ b/concordia/index_searcher.hpp @@ -8,7 +8,7 @@ #include "concordia/common/config.hpp" #include "concordia/matched_pattern_fragment.hpp" -#include "concordia/occurences_list.hpp" +#include "concordia/occurrences_list.hpp" #include "concordia/hash_generator.hpp" #include "concordia/concordia_exception.hpp" #include "concordia/concordia_searcher.hpp" @@ -43,7 +43,7 @@ public: \param markers markers array for the needs of searching \param SA suffix array for the needs of searching \param pattern string pattern to be searched in the index. - \returns matched pattern fragment, containing occurences of the pattern in the index + \returns matched pattern fragment, containing occurrences of the pattern in the index \throws ConcordiaException */ MatchedPatternFragment simpleSearch( @@ -54,21 +54,21 @@ public: const std::string & pattern, bool byWhitespace = false); - /*! Performs a substring lookup in RAM-based index, returning all occurences. - The result contains no more than "limit" occurences, starting at "offset". + /*! Performs a substring lookup in RAM-based index, returning all occurrences. + The result contains no more than "limit" occurrences, starting at "offset". \param hashGenerator hash generator to be used to convert input sentence to a hash \param T hashed index to search in \param markers markers array for the needs of searching \param SA suffix array for the needs of searching \param pattern string pattern to be searched in the index. - \param limit maximum number of occurences to return - \param offset starting occurence + \param limit maximum number of occurrences to return + \param offset starting occurrence \param byWhitespace should the pattern by tokenized by white space - \returns list of occurences of the pattern in the index + \returns list of occurrences of the pattern in the index \throws ConcordiaException */ - OccurencesList fullSearch( + OccurrencesList fullSearch( boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, @@ -89,7 +89,7 @@ public: \param markers markers array for the needs of searching \param SA suffix array for the needs of searching \param pattern string pattern to be searched in the index. - \returns matched pattern fragment, containing occurences of the pattern in the index + \returns matched pattern fragment, containing occurrences of the pattern in the index \throws ConcordiaException */ MatchedPatternFragment lexiconSearch( @@ -100,7 +100,7 @@ public: const std::string & pattern, bool byWhitespace = false); - SUFFIX_MARKER_TYPE countOccurences( + SUFFIX_MARKER_TYPE countOccurrences( boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, diff --git a/concordia/matched_pattern_fragment.cpp b/concordia/matched_pattern_fragment.cpp index a120a80..95d5845 100644 --- a/concordia/matched_pattern_fragment.cpp +++ b/concordia/matched_pattern_fragment.cpp @@ -12,7 +12,7 @@ MatchedPatternFragment::MatchedPatternFragment( MatchedPatternFragment::~MatchedPatternFragment() { } -void MatchedPatternFragment::addOccurence( - const SubstringOccurence & occurence) { - _occurences.push_back(occurence); +void MatchedPatternFragment::addOccurrence( + const SubstringOccurrence & occurrence) { + _occurrences.push_back(occurrence); } diff --git a/concordia/matched_pattern_fragment.hpp b/concordia/matched_pattern_fragment.hpp index bb90bfa..4e8df8f 100644 --- a/concordia/matched_pattern_fragment.hpp +++ b/concordia/matched_pattern_fragment.hpp @@ -3,7 +3,7 @@ #include "concordia/common/config.hpp" #include "concordia/interval.hpp" -#include "concordia/substring_occurence.hpp" +#include "concordia/substring_occurrence.hpp" #include #include #include @@ -30,17 +30,17 @@ public: */ virtual ~MatchedPatternFragment(); - /*! Getter for occurences. - \returns occurences + /*! Getter for occurrences. + \returns occurrences */ - std::vector getOccurences() const { - return _occurences; + std::vector getOccurrences() const { + return _occurrences; } - /*! Adds an occurence to the list. - \param fragment occurence to be added + /*! Adds an occurrence to the list. + \param fragment occurrence to be added */ - void addOccurence(const SubstringOccurence & occurence); + void addOccurrence(const SubstringOccurrence & occurrence); /*! Getter for pattern offset. \returns pattern offset @@ -68,8 +68,8 @@ public: o << "fragment(patternOffset=" << fragment.getPatternOffset() << ", matchedLength=" << fragment.getMatchedLength() << ") {" << std::endl; - BOOST_FOREACH(SubstringOccurence occurence, fragment.getOccurences()) { - o << "\t" << occurence << std::endl; + BOOST_FOREACH(SubstringOccurrence occurrence, fragment.getOccurrences()) { + o << "\t" << occurrence << std::endl; } o << "}"; @@ -78,7 +78,7 @@ public: private: - std::vector _occurences; + std::vector _occurrences; SUFFIX_MARKER_TYPE _patternOffset; diff --git a/concordia/occurences_list.cpp b/concordia/occurences_list.cpp deleted file mode 100644 index 83cde5d..0000000 --- a/concordia/occurences_list.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "concordia/occurences_list.hpp" - -OccurencesList::OccurencesList(const SUFFIX_MARKER_TYPE & totalCount): - _totalCount(totalCount) { -} - -OccurencesList::~OccurencesList() { -} - -void OccurencesList::addOccurence( - const SubstringOccurence & occurence) { - _occurences.push_back(occurence); -} diff --git a/concordia/occurences_list.hpp b/concordia/occurences_list.hpp deleted file mode 100644 index 7380eea..0000000 --- a/concordia/occurences_list.hpp +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef OCCURENCES_LIST_HDR -#define OCCURENCES_LIST_HDR - -#include "concordia/common/config.hpp" -#include "concordia/substring_occurence.hpp" -#include -#include -#include - -/*! - Class representing the occurences list in full search. The list only - contains as many occurences as specified in the "limit" parameter for full search. - The "totalCount" field stores the total number of occurences available. - -*/ - -class OccurencesList { -public: - /*! Constructor. - */ - explicit OccurencesList(const SUFFIX_MARKER_TYPE & totalCount); - - /*! Destructor. - */ - virtual ~OccurencesList(); - - /*! Getter for occurences. - \returns occurences - */ - std::vector getOccurences() const { - return _occurences; - } - - SUFFIX_MARKER_TYPE getTotalCount() const { - return _totalCount; - } - - /*! Adds an occurence to the list. - \param fragment occurence to be added - */ - void addOccurence(const SubstringOccurence & occurence); - - -private: - std::vector _occurences; - - SUFFIX_MARKER_TYPE _totalCount; -}; - -#endif diff --git a/concordia/occurrences_list.cpp b/concordia/occurrences_list.cpp new file mode 100644 index 0000000..43c16c0 --- /dev/null +++ b/concordia/occurrences_list.cpp @@ -0,0 +1,13 @@ +#include "concordia/occurrences_list.hpp" + +OccurrencesList::OccurrencesList(const SUFFIX_MARKER_TYPE & totalCount): + _totalCount(totalCount) { +} + +OccurrencesList::~OccurrencesList() { +} + +void OccurrencesList::addOccurrence( + const SubstringOccurrence & occurrence) { + _occurrences.push_back(occurrence); +} diff --git a/concordia/occurrences_list.hpp b/concordia/occurrences_list.hpp new file mode 100644 index 0000000..17b1418 --- /dev/null +++ b/concordia/occurrences_list.hpp @@ -0,0 +1,50 @@ +#ifndef OCCURRENCES_LIST_HDR +#define OCCURRENCES_LIST_HDR + +#include "concordia/common/config.hpp" +#include "concordia/substring_occurrence.hpp" +#include +#include +#include + +/*! + Class representing the occurrences list in full search. The list only + contains as many occurrences as specified in the "limit" parameter for full search. + The "totalCount" field stores the total number of occurrences available. + +*/ + +class OccurrencesList { +public: + /*! Constructor. + */ + explicit OccurrencesList(const SUFFIX_MARKER_TYPE & totalCount); + + /*! Destructor. + */ + virtual ~OccurrencesList(); + + /*! Getter for occurrences. + \returns occurrences + */ + std::vector getOccurrences() const { + return _occurrences; + } + + SUFFIX_MARKER_TYPE getTotalCount() const { + return _totalCount; + } + + /*! Adds an occurrence to the list. + \param fragment occurrence to be added + */ + void addOccurrence(const SubstringOccurrence & occurrence); + + +private: + std::vector _occurrences; + + SUFFIX_MARKER_TYPE _totalCount; +}; + +#endif diff --git a/concordia/substring_occurence.cpp b/concordia/substring_occurrence.cpp similarity index 72% rename from concordia/substring_occurence.cpp rename to concordia/substring_occurrence.cpp index 49ea211..a6789f3 100644 --- a/concordia/substring_occurence.cpp +++ b/concordia/substring_occurrence.cpp @@ -1,16 +1,16 @@ -#include "concordia/substring_occurence.hpp" +#include "concordia/substring_occurrence.hpp" #include "concordia/common/utils.hpp" -SubstringOccurence::SubstringOccurence() { +SubstringOccurrence::SubstringOccurrence() { } -SubstringOccurence::SubstringOccurence(const SUFFIX_MARKER_TYPE & marker) { +SubstringOccurrence::SubstringOccurrence(const SUFFIX_MARKER_TYPE & marker) { _id = Utils::getIdFromMarker(marker); _offset = Utils::getOffsetFromMarker(marker); _exampleLength = Utils::getLengthFromMarker(marker); } -void SubstringOccurence::enterDataFromMarker( +void SubstringOccurrence::enterDataFromMarker( const SUFFIX_MARKER_TYPE & marker) { _id = Utils::getIdFromMarker(marker); _offset = Utils::getOffsetFromMarker(marker); @@ -18,7 +18,7 @@ void SubstringOccurence::enterDataFromMarker( } -SubstringOccurence::SubstringOccurence( +SubstringOccurrence::SubstringOccurrence( const SUFFIX_MARKER_TYPE & id, const SUFFIX_MARKER_TYPE & offset, const SUFFIX_MARKER_TYPE & exampleLength): @@ -27,6 +27,6 @@ SubstringOccurence::SubstringOccurence( _exampleLength(exampleLength) { } -SubstringOccurence::~SubstringOccurence() { +SubstringOccurrence::~SubstringOccurrence() { } diff --git a/concordia/substring_occurence.hpp b/concordia/substring_occurrence.hpp similarity index 73% rename from concordia/substring_occurence.hpp rename to concordia/substring_occurrence.hpp index b4de964..669fb8f 100644 --- a/concordia/substring_occurence.hpp +++ b/concordia/substring_occurrence.hpp @@ -1,31 +1,31 @@ -#ifndef SUBSTRING_OCCURENCE_HDR -#define SUBSTRING_OCCURENCE_HDR +#ifndef SUBSTRING_OCCURRENCE_HDR +#define SUBSTRING_OCCURRENCE_HDR #include "concordia/common/config.hpp" #include #include /*! - Class representing occurence of a searched substring. + Class representing occurrence of a searched substring. It holds the following information: - id of the example where the substring was found - offset of the matched substring in this example - length of the example */ -class SubstringOccurence { +class SubstringOccurrence { public: /*! Constructor. */ - SubstringOccurence(); + SubstringOccurrence(); /*! Constructor taking data from a marker. \param marker */ - explicit SubstringOccurence(const SUFFIX_MARKER_TYPE & marker); + explicit SubstringOccurrence(const SUFFIX_MARKER_TYPE & marker); /*! Constructor with three arguments. @@ -33,12 +33,12 @@ public: \param offset offset of the substring in the example \param exampleLength length of the example */ - SubstringOccurence(const SUFFIX_MARKER_TYPE & id, + SubstringOccurrence(const SUFFIX_MARKER_TYPE & id, const SUFFIX_MARKER_TYPE & offset, const SUFFIX_MARKER_TYPE & exampleLength); /*! Destructor. */ - virtual ~SubstringOccurence(); + virtual ~SubstringOccurrence(); /*! Getter for example id. \returns example id @@ -67,9 +67,9 @@ public: void enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker); friend std::ostream & operator << (std::ostream & o, - const SubstringOccurence & occurence) { - return o << "occurence(exampleId=" << occurence.getId() - << ", offset=" << occurence.getOffset() << ")"; + const SubstringOccurrence & occurrence) { + return o << "occurrence(exampleId=" << occurrence.getId() + << ", offset=" << occurrence.getOffset() << ")"; } diff --git a/concordia/t/test_concordia.cpp b/concordia/t/test_concordia.cpp index b17f64f..3ae8864 100644 --- a/concordia/t/test_concordia.cpp +++ b/concordia/t/test_concordia.cpp @@ -73,14 +73,14 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 ) concordia.clearIndex(); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 123); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getOffset(), 1); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 51); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getOffset(), 1); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 2); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 123); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getOffset(), 1); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getId(), 51); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getOffset(), 1); // Checking pattern spanning over 2 segments - BOOST_CHECK_EQUAL(searchResult2.getOccurences().size(), 0); + BOOST_CHECK_EQUAL(searchResult2.getOccurrences().size(), 0); } BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 ) @@ -137,19 +137,19 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 ) concordia2.clearIndex(); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 3); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 312); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getOffset(), 0); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 45); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getOffset(), 1); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(2).getId(), 29); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(2).getOffset(), 0); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 3); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 312); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getOffset(), 0); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getId(), 45); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getOffset(), 1); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(2).getId(), 29); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(2).getOffset(), 0); - BOOST_CHECK_EQUAL(searchResult2.getOccurences().size(), 2); - BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 202); - BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getOffset(), 1); - BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 312); - BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getOffset(), 1); + BOOST_CHECK_EQUAL(searchResult2.getOccurrences().size(), 2); + BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(0).getId(), 202); + BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(0).getOffset(), 1); + BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(1).getId(), 312); + BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(1).getOffset(), 1); } BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 ) @@ -167,9 +167,9 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 ) concordia2.clearIndex(); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 1); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 312); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getOffset(), 2); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 1); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 312); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getOffset(), 2); } BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 ) @@ -185,35 +185,35 @@ BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 ) Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(), TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); - OccurencesList searchResult0 = concordia2.fullSearch("okno", 10, 0); + OccurrencesList searchResult0 = concordia2.fullSearch("okno", 10, 0); /* search0 - occurence(exampleId=4, offset=1) - occurence(exampleId=3, offset=2) - occurence(exampleId=2, offset=2) - occurence(exampleId=4, offset=3) - occurence(exampleId=1, offset=2) + occurrence(exampleId=4, offset=1) + occurrence(exampleId=3, offset=2) + occurrence(exampleId=2, offset=2) + occurrence(exampleId=4, offset=3) + occurrence(exampleId=1, offset=2) */ - OccurencesList searchResult1 = concordia2.fullSearch("okno", 2, 1); - OccurencesList searchResult2 = concordia2.fullSearch("okno", 10, 3); - OccurencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3); - OccurencesList searchResult4 = concordia2.fullSearch("okno", 10, 6); + OccurrencesList searchResult1 = concordia2.fullSearch("okno", 2, 1); + OccurrencesList searchResult2 = concordia2.fullSearch("okno", 10, 3); + OccurrencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3); + OccurrencesList searchResult4 = concordia2.fullSearch("okno", 10, 6); concordia2.clearIndex(); BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 5); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 3); - BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 2); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 2); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 3); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getId(), 2); BOOST_CHECK_EQUAL(searchResult2.getTotalCount(), 5); - BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 4); - BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 1); + BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(0).getId(), 4); + BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(1).getId(), 1); BOOST_CHECK_EQUAL(searchResult3.getTotalCount(), 0); BOOST_CHECK_EQUAL(searchResult4.getTotalCount(), 5); - BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 0); + BOOST_CHECK_EQUAL(searchResult4.getOccurrences().size(), 0); } @@ -267,16 +267,16 @@ BOOST_AUTO_TEST_CASE( ConcordiaLexiconSearch1 ) concordia.clearIndex(); // first two patterns do not cover the whole example source - BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 0); - BOOST_CHECK_EQUAL(searchResult2.getOccurences().size(), 0); + BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 0); + BOOST_CHECK_EQUAL(searchResult2.getOccurrences().size(), 0); - BOOST_CHECK_EQUAL(searchResult3.getOccurences().size(), 1); - BOOST_CHECK_EQUAL(searchResult3.getOccurences().at(0).getId(), 123); - BOOST_CHECK_EQUAL(searchResult3.getOccurences().at(0).getOffset(), 0); + BOOST_CHECK_EQUAL(searchResult3.getOccurrences().size(), 1); + BOOST_CHECK_EQUAL(searchResult3.getOccurrences().at(0).getId(), 123); + BOOST_CHECK_EQUAL(searchResult3.getOccurrences().at(0).getOffset(), 0); - BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 1); - BOOST_CHECK_EQUAL(searchResult4.getOccurences().at(0).getId(), 14); - BOOST_CHECK_EQUAL(searchResult4.getOccurences().at(0).getOffset(), 0); + BOOST_CHECK_EQUAL(searchResult4.getOccurrences().size(), 1); + BOOST_CHECK_EQUAL(searchResult4.getOccurrences().at(0).getId(), 14); + BOOST_CHECK_EQUAL(searchResult4.getOccurrences().at(0).getOffset(), 0); } @@ -351,43 +351,43 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch1 ) /* adding fragment: offset=0, length=2 - adding occurence: example id=167, offset=2 - adding occurence: example id=45, offset=3 - adding occurence: example id=51, offset=1 - adding occurence: example id=123, offset=1 + adding occurrence: example id=167, offset=2 + adding occurrence: example id=45, offset=3 + adding occurrence: example id=51, offset=1 + adding occurrence: example id=123, offset=1 adding fragment: offset=1, length=1 - adding occurence: example id=167, offset=3 - adding occurence: example id=45, offset=4 - adding occurence: example id=51, offset=2 - adding occurence: example id=123, offset=2 + adding occurrence: example id=167, offset=3 + adding occurrence: example id=45, offset=4 + adding occurrence: example id=51, offset=2 + adding occurrence: example id=123, offset=2 adding fragment: offset=2, length=1 - adding occurence: example id=167, offset=1 + adding occurrence: example id=167, offset=1 */ BOOST_CHECK_EQUAL(searchResult1->getFragments().size(), 3); BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getPatternOffset(), 0); BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getMatchedLength(), 2); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getId(), 167); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getOffset(), 2); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(1).getId(), 45); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(1).getOffset(), 3); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(2).getId(), 51); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(2).getOffset(), 1); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getId(), 167); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getOffset(), 2); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(1).getId(), 45); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(1).getOffset(), 3); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(2).getId(), 51); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(2).getOffset(), 1); BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getPatternOffset(), 1); BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getMatchedLength(), 1); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(0).getId(), 167); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(0).getOffset(), 3); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(1).getId(), 45); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(1).getOffset(), 4); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(2).getId(), 51); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(2).getOffset(), 2); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(0).getId(), 167); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(0).getOffset(), 3); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(1).getId(), 45); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(1).getOffset(), 4); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(2).getId(), 51); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(2).getOffset(), 2); BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getPatternOffset(), 2); BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getMatchedLength(), 1); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurences().at(0).getId(), 167); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurences().at(0).getOffset(), 1); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurrences().at(0).getId(), 167); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurrences().at(0).getOffset(), 1); concordia.clearIndex(); } @@ -443,43 +443,43 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch2 ) Best overlay { fragment(patternOffset=1, matchedLength=4) { - occurence(exampleId=321, offset=0) + occurrence(exampleId=321, offset=0) } fragment(patternOffset=5, matchedLength=4) { - occurence(exampleId=14, offset=7) + occurrence(exampleId=14, offset=7) } } All fragments { fragment(patternOffset=4, matchedLength=5) { - occurence(exampleId=14, offset=6) + occurrence(exampleId=14, offset=6) } fragment(patternOffset=1, matchedLength=4) { - occurence(exampleId=321, offset=0) + occurrence(exampleId=321, offset=0) } fragment(patternOffset=5, matchedLength=4) { - occurence(exampleId=14, offset=7) + occurrence(exampleId=14, offset=7) } fragment(patternOffset=2, matchedLength=3) { - occurence(exampleId=321, offset=1) + occurrence(exampleId=321, offset=1) } fragment(patternOffset=6, matchedLength=3) { - occurence(exampleId=14, offset=8) + occurrence(exampleId=14, offset=8) } fragment(patternOffset=3, matchedLength=2) { - occurence(exampleId=321, offset=2) + occurrence(exampleId=321, offset=2) } fragment(patternOffset=7, matchedLength=2) { - occurence(exampleId=14, offset=9) + occurrence(exampleId=14, offset=9) } fragment(patternOffset=8, matchedLength=1) { - occurence(exampleId=14, offset=10) + occurrence(exampleId=14, offset=10) } } */ - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getId(), 14); - BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getOffset(), 6); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getId(), 14); + BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getOffset(), 6); BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getPatternOffset(), 4); BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getMatchedLength(), 5); BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getStart(), 4); @@ -522,7 +522,7 @@ BOOST_AUTO_TEST_CASE( Tokenize ) } -BOOST_AUTO_TEST_CASE( ConcordiaCountOccurences ) +BOOST_AUTO_TEST_CASE( ConcordiaCountOccurrences ) { Concordia concordia = Concordia(TestResourcesManager::getTempPath(), TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); @@ -556,12 +556,12 @@ BOOST_AUTO_TEST_CASE( ConcordiaCountOccurences ) */ - BOOST_CHECK_EQUAL(concordia.countOccurences("Ala posiada"), 0); - BOOST_CHECK_EQUAL(concordia.countOccurences("Marysia posiada"), 0); - BOOST_CHECK_EQUAL(concordia.countOccurences("Marysia posiada rysia"), 1); - BOOST_CHECK_EQUAL(concordia.countOccurences("kota Ala posiada"), 0); - BOOST_CHECK_EQUAL(concordia.countOccurences("Ala posiada kota"), 2); - BOOST_CHECK_EQUAL(concordia.countOccurences("Ala posiada kota i psa"), 1); + BOOST_CHECK_EQUAL(concordia.countOccurrences("Ala posiada"), 0); + BOOST_CHECK_EQUAL(concordia.countOccurrences("Marysia posiada"), 0); + BOOST_CHECK_EQUAL(concordia.countOccurrences("Marysia posiada rysia"), 1); + BOOST_CHECK_EQUAL(concordia.countOccurrences("kota Ala posiada"), 0); + BOOST_CHECK_EQUAL(concordia.countOccurrences("Ala posiada kota"), 2); + BOOST_CHECK_EQUAL(concordia.countOccurrences("Ala posiada kota i psa"), 1); concordia.clearIndex(); diff --git a/concordia/t/test_concordia_searcher.cpp b/concordia/t/test_concordia_searcher.cpp index ff4f911..16dc68d 100644 --- a/concordia/t/test_concordia_searcher.cpp +++ b/concordia/t/test_concordia_searcher.cpp @@ -135,13 +135,13 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) SA->push_back(11); SUFFIX_MARKER_TYPE highResLength; - std::vector result = searcher.lcpSearch(T, markers, SA, pattern, highResLength); + std::vector result = searcher.lcpSearch(T, markers, SA, pattern, highResLength); SUFFIX_MARKER_TYPE length = highResLength / sizeof(INDEX_CHARACTER_TYPE); /* Expecting to get the following results from SA: 3: ana 1: anana - Which are 2 substring occurences (34,3) and (34,1) with the lcp length = 2; + Which are 2 substring occurrences (34,3) and (34,1) with the lcp length = 2; */ BOOST_CHECK_EQUAL(result.size(),2); @@ -185,12 +185,12 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) pattern2.push_back(2); SUFFIX_MARKER_TYPE highResLength2; - std::vector result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2); + std::vector result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2); SUFFIX_MARKER_TYPE length2 = highResLength2 / sizeof(INDEX_CHARACTER_TYPE); /* Expecting to get one result from SA: 0: banana - Which is one substring occurence (34,0) with the lcp length = 6; + Which is one substring occurrence (34,0) with the lcp length = 6; */ @@ -228,12 +228,12 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) pattern3.push_back(3); SUFFIX_MARKER_TYPE highResLength3; - std::vector result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3); + std::vector result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3); SUFFIX_MARKER_TYPE length3 = highResLength3 / sizeof(INDEX_CHARACTER_TYPE); /* Expecting to get one result from SA: 0: banana - Which is one substring occurence (34,0) with the lcp length = 5; + Which is one substring occurrence (34,0) with the lcp length = 5; */ BOOST_CHECK_EQUAL(result3.size(),1); @@ -265,13 +265,13 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) pattern4.push_back(4); SUFFIX_MARKER_TYPE highResLength4; - std::vector result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4); + std::vector result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4); SUFFIX_MARKER_TYPE length4 = highResLength4 / sizeof(INDEX_CHARACTER_TYPE); /* Expecting to get 2 results from SA: 4: na 2: nana - Which are 2 substring occurences (34,4) and (34,2) with the lcp length = 2; + Which are 2 substring occurrences (34,4) and (34,2) with the lcp length = 2; */ BOOST_CHECK_EQUAL(result4.size(),2); @@ -296,7 +296,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) pattern5.push_back(4); SUFFIX_MARKER_TYPE highResLength5; - std::vector result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5); + std::vector result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5); SUFFIX_MARKER_TYPE length5 = highResLength5 / sizeof(INDEX_CHARACTER_TYPE); /* Expecting to get 0 results from SA, lcp length = 0; @@ -320,7 +320,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) pattern6.push_back(0); SUFFIX_MARKER_TYPE highResLength6; - std::vector result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6); + std::vector result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6); SUFFIX_MARKER_TYPE length6 = highResLength6 / sizeof(INDEX_CHARACTER_TYPE); /* Expecting to get 0 results from SA, lcp length = 0;