occurrence refactoring
This commit is contained in:
parent
73b3d22d97
commit
d39c0400c9
@ -178,14 +178,14 @@ int main(int argc, char** argv) {
|
||||
concordia.simpleSearch(pattern);
|
||||
time_end = boost::posix_time::microsec_clock::local_time();
|
||||
msdiff = time_end - time_start;
|
||||
std::cout << "\tFound: " << result.getOccurences().size()
|
||||
std::cout << "\tFound: " << result.getOccurrences().size()
|
||||
<< " matches. " << "Search took: "
|
||||
<< msdiff.total_milliseconds() << "ms." << std::endl;
|
||||
if (!cli.count("silent")) {
|
||||
BOOST_FOREACH(SubstringOccurence occurence,
|
||||
result.getOccurences()) {
|
||||
BOOST_FOREACH(SubstringOccurrence occurrence,
|
||||
result.getOccurrences()) {
|
||||
std::cout << "\t\tfound match in sentence number: "
|
||||
<< occurence.getId() << std::endl;
|
||||
<< occurrence.getId() << std::endl;
|
||||
}
|
||||
}
|
||||
} else if (cli.count("anubis-search")) {
|
||||
@ -237,7 +237,7 @@ int main(int argc, char** argv) {
|
||||
<< "," << fragment.getEnd()
|
||||
<< "] (exampleCount,"
|
||||
<< " patternOffset, length): "
|
||||
<< fragment.getOccurences().size() << ","
|
||||
<< fragment.getOccurrences().size() << ","
|
||||
<< fragment.getPatternOffset() << ","
|
||||
<< fragment.getMatchedLength()
|
||||
<< std::endl;
|
||||
@ -250,7 +250,7 @@ int main(int argc, char** argv) {
|
||||
<< "," << fragment.getEnd()
|
||||
<< "] (exampleCount,"
|
||||
<< " patternOffset, length): "
|
||||
<< fragment.getOccurences().size() << ","
|
||||
<< fragment.getOccurrences().size() << ","
|
||||
<< fragment.getPatternOffset() << ","
|
||||
<< fragment.getMatchedLength()
|
||||
<< std::endl;
|
||||
|
@ -192,9 +192,9 @@ void Concordia::_initializeIndex() {
|
||||
}
|
||||
}
|
||||
|
||||
SUFFIX_MARKER_TYPE Concordia::countOccurences(const std::string & pattern) {
|
||||
SUFFIX_MARKER_TYPE Concordia::countOccurrences(const std::string & pattern) {
|
||||
if (_T->size() > 0) {
|
||||
return _searcher->countOccurences(_hashGenerator, _T,
|
||||
return _searcher->countOccurrences(_hashGenerator, _T,
|
||||
_markers, _SA, pattern);
|
||||
} else {
|
||||
return 0;
|
||||
@ -215,7 +215,7 @@ MatchedPatternFragment Concordia::simpleSearch(
|
||||
}
|
||||
}
|
||||
|
||||
OccurencesList Concordia::fullSearch(
|
||||
OccurrencesList Concordia::fullSearch(
|
||||
const std::string & pattern,
|
||||
int limit,
|
||||
int offset,
|
||||
@ -225,7 +225,7 @@ OccurencesList Concordia::fullSearch(
|
||||
_markers, _SA, pattern, limit, offset, byWhitespace);
|
||||
} else {
|
||||
// If the index or search pattern are empty, return an empty result.
|
||||
OccurencesList result(0);
|
||||
OccurrencesList result(0);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include "concordia/common/config.hpp"
|
||||
#include "concordia/example.hpp"
|
||||
#include "concordia/matched_pattern_fragment.hpp"
|
||||
#include "concordia/occurences_list.hpp"
|
||||
#include "concordia/occurrences_list.hpp"
|
||||
#include "concordia/concordia_config.hpp"
|
||||
#include "concordia/concordia_index.hpp"
|
||||
#include "concordia/index_searcher.hpp"
|
||||
@ -121,24 +121,24 @@ public:
|
||||
For more info see \ref tutorial1_2.
|
||||
\param pattern pattern to be searched in the index
|
||||
\param byWhitespace whether to tokenize the pattern by white space
|
||||
\returns matched pattern fragment containing vector of occurences
|
||||
\returns matched pattern fragment containing vector of occurrences
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
MatchedPatternFragment simpleSearch(const std::string & pattern,
|
||||
bool byWhitespace = false);
|
||||
|
||||
/*! Performs a substring lookup in RAM-based index, returning all occurences.
|
||||
The result contains no more than "limit" occurences, starting at "offset".
|
||||
/*! Performs a substring lookup in RAM-based index, returning all occurrences.
|
||||
The result contains no more than "limit" occurrences, starting at "offset".
|
||||
\param hashGenerator hash generator to be used to convert
|
||||
input sentence to a hash
|
||||
\param pattern string pattern to be searched in the index.
|
||||
\param limit maximum number of occurences to return
|
||||
\param offset starting occurence
|
||||
\param limit maximum number of occurrences to return
|
||||
\param offset starting occurrence
|
||||
\param byWhitespace should the pattern by tokenized by white space
|
||||
\returns list of occurences of the pattern in the index
|
||||
\returns list of occurrences of the pattern in the index
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
OccurencesList fullSearch(
|
||||
OccurrencesList fullSearch(
|
||||
const std::string & pattern,
|
||||
int limit,
|
||||
int offset,
|
||||
@ -151,13 +151,13 @@ public:
|
||||
the lexicon search requires that the match is the whole example source.
|
||||
\param pattern pattern to be searched in the index
|
||||
\param byWhitespace whether to tokenize the pattern by white space
|
||||
\returns matched pattern fragment containing vector of occurences
|
||||
\returns matched pattern fragment containing vector of occurrences
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
MatchedPatternFragment lexiconSearch(const std::string & pattern,
|
||||
bool byWhitespace = false);
|
||||
|
||||
SUFFIX_MARKER_TYPE countOccurences(const std::string & pattern);
|
||||
SUFFIX_MARKER_TYPE countOccurrences(const std::string & pattern);
|
||||
|
||||
/*! \deprecated
|
||||
Finds the examples from the index, whose resemblance to the
|
||||
|
@ -32,15 +32,15 @@ void ConcordiaSearcher::concordiaSearch(
|
||||
std::vector<sauchar_t> currentPattern(
|
||||
patternVector.begin()+highResOffset, patternVector.end());
|
||||
SUFFIX_MARKER_TYPE lcpLength;
|
||||
std::vector<SubstringOccurence> occurences =
|
||||
std::vector<SubstringOccurrence> occurrences =
|
||||
lcpSearch(T, markers, SA, currentPattern, lcpLength);
|
||||
|
||||
if (occurences.size() > 0) {
|
||||
if (occurrences.size() > 0) {
|
||||
MatchedPatternFragment fragment(offset,
|
||||
lcpLength / sizeof(INDEX_CHARACTER_TYPE));
|
||||
|
||||
BOOST_FOREACH(SubstringOccurence occurence, occurences) {
|
||||
fragment.addOccurence(occurence);
|
||||
BOOST_FOREACH(SubstringOccurrence occurrence, occurrences) {
|
||||
fragment.addOccurrence(occurrence);
|
||||
}
|
||||
result->addFragment(fragment);
|
||||
}
|
||||
@ -155,7 +155,7 @@ boost::shared_ptr<TmMatchesMap> ConcordiaSearcher::getTmMatches(
|
||||
return tmMatchesMap;
|
||||
}
|
||||
|
||||
std::vector<SubstringOccurence> ConcordiaSearcher::lcpSearch(
|
||||
std::vector<SubstringOccurrence> ConcordiaSearcher::lcpSearch(
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
@ -185,7 +185,7 @@ std::vector<SubstringOccurence> ConcordiaSearcher::lcpSearch(
|
||||
SAleft += localLeft;
|
||||
} while (patternLength < pattern.size() && size > 0);
|
||||
|
||||
std::vector<SubstringOccurence> result;
|
||||
std::vector<SubstringOccurrence> result;
|
||||
|
||||
if (size == 0) {
|
||||
// The search managed to find exactly the longest common prefixes.
|
||||
@ -208,7 +208,7 @@ std::vector<SubstringOccurence> ConcordiaSearcher::lcpSearch(
|
||||
}
|
||||
|
||||
void ConcordiaSearcher::_collectResults(
|
||||
std::vector<SubstringOccurence> & result,
|
||||
std::vector<SubstringOccurrence> & result,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
saidx_t left, saidx_t size) {
|
||||
@ -219,7 +219,7 @@ void ConcordiaSearcher::_collectResults(
|
||||
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
||||
SUFFIX_MARKER_TYPE marker =
|
||||
markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
|
||||
result.push_back(SubstringOccurence(marker));
|
||||
result.push_back(SubstringOccurrence(marker));
|
||||
|
||||
// truncate results,
|
||||
// we don't need too many identical pattern overlays
|
||||
@ -237,54 +237,54 @@ void ConcordiaSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
SUFFIX_MARKER_TYPE totalPatternLength,
|
||||
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
||||
SUFFIX_MARKER_TYPE patternOffset) {
|
||||
SubstringOccurence occurence;
|
||||
if (_getOccurenceFromSA(SA, markers, sa_pos, occurence)) {
|
||||
_addOccurenceToMap(tmMatchesMap,
|
||||
occurence,
|
||||
SubstringOccurrence occurrence;
|
||||
if (_getOccurrenceFromSA(SA, markers, sa_pos, occurrence)) {
|
||||
_addOccurrenceToMap(tmMatchesMap,
|
||||
occurrence,
|
||||
totalPatternLength,
|
||||
matchedFragmentLength,
|
||||
patternOffset);
|
||||
}
|
||||
}
|
||||
|
||||
bool ConcordiaSearcher::_getOccurenceFromSA(
|
||||
bool ConcordiaSearcher::_getOccurrenceFromSA(
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
saidx_t sa_pos,
|
||||
SubstringOccurence & occurence) {
|
||||
SubstringOccurrence & occurrence) {
|
||||
saidx_t resultPos = SA->at(sa_pos);
|
||||
|
||||
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
||||
SUFFIX_MARKER_TYPE marker =
|
||||
markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
|
||||
occurence.enterDataFromMarker(marker);
|
||||
occurrence.enterDataFromMarker(marker);
|
||||
}
|
||||
}
|
||||
|
||||
void ConcordiaSearcher::_addOccurenceToMap(
|
||||
void ConcordiaSearcher::_addOccurrenceToMap(
|
||||
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
||||
SubstringOccurence & occurence,
|
||||
SubstringOccurrence & occurrence,
|
||||
SUFFIX_MARKER_TYPE totalPatternLength,
|
||||
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
||||
SUFFIX_MARKER_TYPE patternOffset) {
|
||||
TmMatches * tmMatches;
|
||||
|
||||
TmMatchesMapIterator mapIterator = tmMatchesMap->find(
|
||||
occurence.getId());
|
||||
occurrence.getId());
|
||||
if (mapIterator != tmMatchesMap->end()) {
|
||||
tmMatches = mapIterator->second;
|
||||
} else {
|
||||
tmMatches = new TmMatches(occurence.getId(),
|
||||
occurence.getExampleLength(),
|
||||
tmMatches = new TmMatches(occurrence.getId(),
|
||||
occurrence.getExampleLength(),
|
||||
totalPatternLength);
|
||||
SUFFIX_MARKER_TYPE key = occurence.getId();
|
||||
SUFFIX_MARKER_TYPE key = occurrence.getId();
|
||||
tmMatchesMap->insert(key, tmMatches);
|
||||
}
|
||||
|
||||
// add intervals to tmMatches
|
||||
tmMatches->addExampleInterval(
|
||||
occurence.getOffset(),
|
||||
occurence.getOffset() + matchedFragmentLength);
|
||||
occurrence.getOffset(),
|
||||
occurrence.getOffset() + matchedFragmentLength);
|
||||
tmMatches->addPatternInterval(
|
||||
patternOffset,
|
||||
patternOffset + matchedFragmentLength);
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
#include "concordia/common/config.hpp"
|
||||
#include "concordia/common/utils.hpp"
|
||||
#include "concordia/substring_occurence.hpp"
|
||||
#include "concordia/substring_occurrence.hpp"
|
||||
#include "concordia/concordia_exception.hpp"
|
||||
#include "concordia/concordia_config.hpp"
|
||||
#include "concordia/concordia_search_result.hpp"
|
||||
@ -100,7 +100,7 @@ public:
|
||||
\returns list of locations of the longest fragments
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
std::vector<SubstringOccurence> lcpSearch(
|
||||
std::vector<SubstringOccurrence> lcpSearch(
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
@ -108,7 +108,7 @@ public:
|
||||
SUFFIX_MARKER_TYPE & length);
|
||||
|
||||
private:
|
||||
void _collectResults(std::vector<SubstringOccurence> & result,
|
||||
void _collectResults(std::vector<SubstringOccurrence> & result,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
saidx_t left, saidx_t size);
|
||||
@ -121,13 +121,13 @@ private:
|
||||
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
||||
SUFFIX_MARKER_TYPE patternOffset);
|
||||
|
||||
bool _getOccurenceFromSA(boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
bool _getOccurrenceFromSA(boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
saidx_t sa_pos,
|
||||
SubstringOccurence & occurence);
|
||||
SubstringOccurrence & occurrence);
|
||||
|
||||
void _addOccurenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
||||
SubstringOccurence & occurence,
|
||||
void _addOccurrenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
||||
SubstringOccurrence & occurrence,
|
||||
SUFFIX_MARKER_TYPE totalPatternLength,
|
||||
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
||||
SUFFIX_MARKER_TYPE patternOffset);
|
||||
|
@ -42,10 +42,10 @@ MatchedPatternFragment IndexSearcher::simpleSearch(
|
||||
saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE);
|
||||
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos);
|
||||
|
||||
SubstringOccurence occurence;
|
||||
occurence.enterDataFromMarker(marker);
|
||||
result.addOccurence(occurence);
|
||||
if (result.getOccurences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) {
|
||||
SubstringOccurrence occurrence;
|
||||
occurrence.enterDataFromMarker(marker);
|
||||
result.addOccurrence(occurrence);
|
||||
if (result.getOccurrences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -55,7 +55,7 @@ MatchedPatternFragment IndexSearcher::simpleSearch(
|
||||
return result;
|
||||
}
|
||||
|
||||
OccurencesList IndexSearcher::fullSearch(
|
||||
OccurrencesList IndexSearcher::fullSearch(
|
||||
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
@ -74,7 +74,7 @@ OccurencesList IndexSearcher::fullSearch(
|
||||
(const sauchar_t *) patternArray, patternLength,
|
||||
SA->data(), (saidx_t) SA->size(), &left);
|
||||
|
||||
OccurencesList result(size);
|
||||
OccurrencesList result(size);
|
||||
|
||||
int returnedResults = limit;
|
||||
if ((size - offset) < limit) {
|
||||
@ -91,9 +91,9 @@ OccurencesList IndexSearcher::fullSearch(
|
||||
saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE);
|
||||
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos);
|
||||
|
||||
SubstringOccurence occurence;
|
||||
occurence.enterDataFromMarker(marker);
|
||||
result.addOccurence(occurence);
|
||||
SubstringOccurrence occurrence;
|
||||
occurrence.enterDataFromMarker(marker);
|
||||
result.addOccurrence(occurrence);
|
||||
}
|
||||
}
|
||||
|
||||
@ -148,10 +148,10 @@ MatchedPatternFragment IndexSearcher::lexiconSearch(
|
||||
// so we should look at the marker of the next character
|
||||
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos + 1);
|
||||
|
||||
SubstringOccurence occurence;
|
||||
occurence.enterDataFromMarker(marker);
|
||||
result.addOccurence(occurence);
|
||||
if (result.getOccurences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) {
|
||||
SubstringOccurrence occurrence;
|
||||
occurrence.enterDataFromMarker(marker);
|
||||
result.addOccurrence(occurrence);
|
||||
if (result.getOccurrences().size() >= CONCORDIA_SEARCH_MAX_RESULTS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -161,7 +161,7 @@ MatchedPatternFragment IndexSearcher::lexiconSearch(
|
||||
return result;
|
||||
}
|
||||
|
||||
SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
|
||||
SUFFIX_MARKER_TYPE IndexSearcher::countOccurrences(
|
||||
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
@ -182,7 +182,7 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
|
||||
(const sauchar_t *) patternArray, patternLength,
|
||||
SA->data(), (saidx_t) SA->size(), &left);
|
||||
|
||||
SUFFIX_MARKER_TYPE occurencesCount = 0;
|
||||
SUFFIX_MARKER_TYPE occurrencesCount = 0;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
saidx_t resultPos = SA->at(left + i);
|
||||
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
||||
@ -191,13 +191,13 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
|
||||
// obtain accidental results exceeding the boundaries
|
||||
// of characters in hashed index. The above check
|
||||
// removes these accidental results.
|
||||
occurencesCount++;
|
||||
occurrencesCount++;
|
||||
}
|
||||
}
|
||||
|
||||
delete[] patternArray;
|
||||
|
||||
return occurencesCount;
|
||||
return occurrencesCount;
|
||||
}
|
||||
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
#include "concordia/common/config.hpp"
|
||||
#include "concordia/matched_pattern_fragment.hpp"
|
||||
#include "concordia/occurences_list.hpp"
|
||||
#include "concordia/occurrences_list.hpp"
|
||||
#include "concordia/hash_generator.hpp"
|
||||
#include "concordia/concordia_exception.hpp"
|
||||
#include "concordia/concordia_searcher.hpp"
|
||||
@ -43,7 +43,7 @@ public:
|
||||
\param markers markers array for the needs of searching
|
||||
\param SA suffix array for the needs of searching
|
||||
\param pattern string pattern to be searched in the index.
|
||||
\returns matched pattern fragment, containing occurences of the pattern in the index
|
||||
\returns matched pattern fragment, containing occurrences of the pattern in the index
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
MatchedPatternFragment simpleSearch(
|
||||
@ -54,21 +54,21 @@ public:
|
||||
const std::string & pattern,
|
||||
bool byWhitespace = false);
|
||||
|
||||
/*! Performs a substring lookup in RAM-based index, returning all occurences.
|
||||
The result contains no more than "limit" occurences, starting at "offset".
|
||||
/*! Performs a substring lookup in RAM-based index, returning all occurrences.
|
||||
The result contains no more than "limit" occurrences, starting at "offset".
|
||||
\param hashGenerator hash generator to be used to convert
|
||||
input sentence to a hash
|
||||
\param T hashed index to search in
|
||||
\param markers markers array for the needs of searching
|
||||
\param SA suffix array for the needs of searching
|
||||
\param pattern string pattern to be searched in the index.
|
||||
\param limit maximum number of occurences to return
|
||||
\param offset starting occurence
|
||||
\param limit maximum number of occurrences to return
|
||||
\param offset starting occurrence
|
||||
\param byWhitespace should the pattern by tokenized by white space
|
||||
\returns list of occurences of the pattern in the index
|
||||
\returns list of occurrences of the pattern in the index
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
OccurencesList fullSearch(
|
||||
OccurrencesList fullSearch(
|
||||
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
@ -89,7 +89,7 @@ public:
|
||||
\param markers markers array for the needs of searching
|
||||
\param SA suffix array for the needs of searching
|
||||
\param pattern string pattern to be searched in the index.
|
||||
\returns matched pattern fragment, containing occurences of the pattern in the index
|
||||
\returns matched pattern fragment, containing occurrences of the pattern in the index
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
MatchedPatternFragment lexiconSearch(
|
||||
@ -100,7 +100,7 @@ public:
|
||||
const std::string & pattern,
|
||||
bool byWhitespace = false);
|
||||
|
||||
SUFFIX_MARKER_TYPE countOccurences(
|
||||
SUFFIX_MARKER_TYPE countOccurrences(
|
||||
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
|
@ -12,7 +12,7 @@ MatchedPatternFragment::MatchedPatternFragment(
|
||||
MatchedPatternFragment::~MatchedPatternFragment() {
|
||||
}
|
||||
|
||||
void MatchedPatternFragment::addOccurence(
|
||||
const SubstringOccurence & occurence) {
|
||||
_occurences.push_back(occurence);
|
||||
void MatchedPatternFragment::addOccurrence(
|
||||
const SubstringOccurrence & occurrence) {
|
||||
_occurrences.push_back(occurrence);
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
#include "concordia/common/config.hpp"
|
||||
#include "concordia/interval.hpp"
|
||||
#include "concordia/substring_occurence.hpp"
|
||||
#include "concordia/substring_occurrence.hpp"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <boost/foreach.hpp>
|
||||
@ -30,17 +30,17 @@ public:
|
||||
*/
|
||||
virtual ~MatchedPatternFragment();
|
||||
|
||||
/*! Getter for occurences.
|
||||
\returns occurences
|
||||
/*! Getter for occurrences.
|
||||
\returns occurrences
|
||||
*/
|
||||
std::vector<SubstringOccurence> getOccurences() const {
|
||||
return _occurences;
|
||||
std::vector<SubstringOccurrence> getOccurrences() const {
|
||||
return _occurrences;
|
||||
}
|
||||
|
||||
/*! Adds an occurence to the list.
|
||||
\param fragment occurence to be added
|
||||
/*! Adds an occurrence to the list.
|
||||
\param fragment occurrence to be added
|
||||
*/
|
||||
void addOccurence(const SubstringOccurence & occurence);
|
||||
void addOccurrence(const SubstringOccurrence & occurrence);
|
||||
|
||||
/*! Getter for pattern offset.
|
||||
\returns pattern offset
|
||||
@ -68,8 +68,8 @@ public:
|
||||
o << "fragment(patternOffset=" << fragment.getPatternOffset()
|
||||
<< ", matchedLength=" << fragment.getMatchedLength() << ") {"
|
||||
<< std::endl;
|
||||
BOOST_FOREACH(SubstringOccurence occurence, fragment.getOccurences()) {
|
||||
o << "\t" << occurence << std::endl;
|
||||
BOOST_FOREACH(SubstringOccurrence occurrence, fragment.getOccurrences()) {
|
||||
o << "\t" << occurrence << std::endl;
|
||||
}
|
||||
|
||||
o << "}";
|
||||
@ -78,7 +78,7 @@ public:
|
||||
|
||||
|
||||
private:
|
||||
std::vector<SubstringOccurence> _occurences;
|
||||
std::vector<SubstringOccurrence> _occurrences;
|
||||
|
||||
SUFFIX_MARKER_TYPE _patternOffset;
|
||||
|
||||
|
@ -1,13 +0,0 @@
|
||||
#include "concordia/occurences_list.hpp"
|
||||
|
||||
OccurencesList::OccurencesList(const SUFFIX_MARKER_TYPE & totalCount):
|
||||
_totalCount(totalCount) {
|
||||
}
|
||||
|
||||
OccurencesList::~OccurencesList() {
|
||||
}
|
||||
|
||||
void OccurencesList::addOccurence(
|
||||
const SubstringOccurence & occurence) {
|
||||
_occurences.push_back(occurence);
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
#ifndef OCCURENCES_LIST_HDR
|
||||
#define OCCURENCES_LIST_HDR
|
||||
|
||||
#include "concordia/common/config.hpp"
|
||||
#include "concordia/substring_occurence.hpp"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
/*!
|
||||
Class representing the occurences list in full search. The list only
|
||||
contains as many occurences as specified in the "limit" parameter for full search.
|
||||
The "totalCount" field stores the total number of occurences available.
|
||||
|
||||
*/
|
||||
|
||||
class OccurencesList {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
explicit OccurencesList(const SUFFIX_MARKER_TYPE & totalCount);
|
||||
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~OccurencesList();
|
||||
|
||||
/*! Getter for occurences.
|
||||
\returns occurences
|
||||
*/
|
||||
std::vector<SubstringOccurence> getOccurences() const {
|
||||
return _occurences;
|
||||
}
|
||||
|
||||
SUFFIX_MARKER_TYPE getTotalCount() const {
|
||||
return _totalCount;
|
||||
}
|
||||
|
||||
/*! Adds an occurence to the list.
|
||||
\param fragment occurence to be added
|
||||
*/
|
||||
void addOccurence(const SubstringOccurence & occurence);
|
||||
|
||||
|
||||
private:
|
||||
std::vector<SubstringOccurence> _occurences;
|
||||
|
||||
SUFFIX_MARKER_TYPE _totalCount;
|
||||
};
|
||||
|
||||
#endif
|
13
concordia/occurrences_list.cpp
Normal file
13
concordia/occurrences_list.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
#include "concordia/occurrences_list.hpp"
|
||||
|
||||
OccurrencesList::OccurrencesList(const SUFFIX_MARKER_TYPE & totalCount):
|
||||
_totalCount(totalCount) {
|
||||
}
|
||||
|
||||
OccurrencesList::~OccurrencesList() {
|
||||
}
|
||||
|
||||
void OccurrencesList::addOccurrence(
|
||||
const SubstringOccurrence & occurrence) {
|
||||
_occurrences.push_back(occurrence);
|
||||
}
|
50
concordia/occurrences_list.hpp
Normal file
50
concordia/occurrences_list.hpp
Normal file
@ -0,0 +1,50 @@
|
||||
#ifndef OCCURRENCES_LIST_HDR
|
||||
#define OCCURRENCES_LIST_HDR
|
||||
|
||||
#include "concordia/common/config.hpp"
|
||||
#include "concordia/substring_occurrence.hpp"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
/*!
|
||||
Class representing the occurrences list in full search. The list only
|
||||
contains as many occurrences as specified in the "limit" parameter for full search.
|
||||
The "totalCount" field stores the total number of occurrences available.
|
||||
|
||||
*/
|
||||
|
||||
class OccurrencesList {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
explicit OccurrencesList(const SUFFIX_MARKER_TYPE & totalCount);
|
||||
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~OccurrencesList();
|
||||
|
||||
/*! Getter for occurrences.
|
||||
\returns occurrences
|
||||
*/
|
||||
std::vector<SubstringOccurrence> getOccurrences() const {
|
||||
return _occurrences;
|
||||
}
|
||||
|
||||
SUFFIX_MARKER_TYPE getTotalCount() const {
|
||||
return _totalCount;
|
||||
}
|
||||
|
||||
/*! Adds an occurrence to the list.
|
||||
\param fragment occurrence to be added
|
||||
*/
|
||||
void addOccurrence(const SubstringOccurrence & occurrence);
|
||||
|
||||
|
||||
private:
|
||||
std::vector<SubstringOccurrence> _occurrences;
|
||||
|
||||
SUFFIX_MARKER_TYPE _totalCount;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,16 +1,16 @@
|
||||
#include "concordia/substring_occurence.hpp"
|
||||
#include "concordia/substring_occurrence.hpp"
|
||||
#include "concordia/common/utils.hpp"
|
||||
|
||||
SubstringOccurence::SubstringOccurence() {
|
||||
SubstringOccurrence::SubstringOccurrence() {
|
||||
}
|
||||
|
||||
SubstringOccurence::SubstringOccurence(const SUFFIX_MARKER_TYPE & marker) {
|
||||
SubstringOccurrence::SubstringOccurrence(const SUFFIX_MARKER_TYPE & marker) {
|
||||
_id = Utils::getIdFromMarker(marker);
|
||||
_offset = Utils::getOffsetFromMarker(marker);
|
||||
_exampleLength = Utils::getLengthFromMarker(marker);
|
||||
}
|
||||
|
||||
void SubstringOccurence::enterDataFromMarker(
|
||||
void SubstringOccurrence::enterDataFromMarker(
|
||||
const SUFFIX_MARKER_TYPE & marker) {
|
||||
_id = Utils::getIdFromMarker(marker);
|
||||
_offset = Utils::getOffsetFromMarker(marker);
|
||||
@ -18,7 +18,7 @@ void SubstringOccurence::enterDataFromMarker(
|
||||
}
|
||||
|
||||
|
||||
SubstringOccurence::SubstringOccurence(
|
||||
SubstringOccurrence::SubstringOccurrence(
|
||||
const SUFFIX_MARKER_TYPE & id,
|
||||
const SUFFIX_MARKER_TYPE & offset,
|
||||
const SUFFIX_MARKER_TYPE & exampleLength):
|
||||
@ -27,6 +27,6 @@ SubstringOccurence::SubstringOccurence(
|
||||
_exampleLength(exampleLength) {
|
||||
}
|
||||
|
||||
SubstringOccurence::~SubstringOccurence() {
|
||||
SubstringOccurrence::~SubstringOccurrence() {
|
||||
}
|
||||
|
@ -1,31 +1,31 @@
|
||||
#ifndef SUBSTRING_OCCURENCE_HDR
|
||||
#define SUBSTRING_OCCURENCE_HDR
|
||||
#ifndef SUBSTRING_OCCURRENCE_HDR
|
||||
#define SUBSTRING_OCCURRENCE_HDR
|
||||
|
||||
#include "concordia/common/config.hpp"
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
/*!
|
||||
Class representing occurence of a searched substring.
|
||||
Class representing occurrence of a searched substring.
|
||||
It holds the following information:
|
||||
- id of the example where the substring was found
|
||||
- offset of the matched substring in this example
|
||||
- length of the example
|
||||
*/
|
||||
|
||||
class SubstringOccurence {
|
||||
class SubstringOccurrence {
|
||||
public:
|
||||
/*!
|
||||
Constructor.
|
||||
|
||||
*/
|
||||
SubstringOccurence();
|
||||
SubstringOccurrence();
|
||||
|
||||
/*!
|
||||
Constructor taking data from a marker.
|
||||
\param marker
|
||||
*/
|
||||
explicit SubstringOccurence(const SUFFIX_MARKER_TYPE & marker);
|
||||
explicit SubstringOccurrence(const SUFFIX_MARKER_TYPE & marker);
|
||||
|
||||
/*!
|
||||
Constructor with three arguments.
|
||||
@ -33,12 +33,12 @@ public:
|
||||
\param offset offset of the substring in the example
|
||||
\param exampleLength length of the example
|
||||
*/
|
||||
SubstringOccurence(const SUFFIX_MARKER_TYPE & id,
|
||||
SubstringOccurrence(const SUFFIX_MARKER_TYPE & id,
|
||||
const SUFFIX_MARKER_TYPE & offset,
|
||||
const SUFFIX_MARKER_TYPE & exampleLength);
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~SubstringOccurence();
|
||||
virtual ~SubstringOccurrence();
|
||||
|
||||
/*! Getter for example id.
|
||||
\returns example id
|
||||
@ -67,9 +67,9 @@ public:
|
||||
void enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker);
|
||||
|
||||
friend std::ostream & operator << (std::ostream & o,
|
||||
const SubstringOccurence & occurence) {
|
||||
return o << "occurence(exampleId=" << occurence.getId()
|
||||
<< ", offset=" << occurence.getOffset() << ")";
|
||||
const SubstringOccurrence & occurrence) {
|
||||
return o << "occurrence(exampleId=" << occurrence.getId()
|
||||
<< ", offset=" << occurrence.getOffset() << ")";
|
||||
}
|
||||
|
||||
|
@ -73,14 +73,14 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
|
||||
|
||||
concordia.clearIndex();
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 123);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getOffset(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 51);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getOffset(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 123);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getOffset(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getId(), 51);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getOffset(), 1);
|
||||
|
||||
// Checking pattern spanning over 2 segments
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().size(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().size(), 0);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
||||
@ -137,19 +137,19 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
||||
|
||||
concordia2.clearIndex();
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 3);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 312);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getOffset(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 45);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getOffset(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(2).getId(), 29);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(2).getOffset(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 3);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 312);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getOffset(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getId(), 45);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getOffset(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(2).getId(), 29);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(2).getOffset(), 0);
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().size(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 202);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getOffset(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 312);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getOffset(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().size(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(0).getId(), 202);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(0).getOffset(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(1).getId(), 312);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(1).getOffset(), 1);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
|
||||
@ -167,9 +167,9 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
|
||||
|
||||
concordia2.clearIndex();
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 312);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getOffset(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 312);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getOffset(), 2);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 )
|
||||
@ -185,35 +185,35 @@ BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 )
|
||||
|
||||
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
OccurencesList searchResult0 = concordia2.fullSearch("okno", 10, 0);
|
||||
OccurrencesList searchResult0 = concordia2.fullSearch("okno", 10, 0);
|
||||
/*
|
||||
search0
|
||||
occurence(exampleId=4, offset=1)
|
||||
occurence(exampleId=3, offset=2)
|
||||
occurence(exampleId=2, offset=2)
|
||||
occurence(exampleId=4, offset=3)
|
||||
occurence(exampleId=1, offset=2)
|
||||
occurrence(exampleId=4, offset=1)
|
||||
occurrence(exampleId=3, offset=2)
|
||||
occurrence(exampleId=2, offset=2)
|
||||
occurrence(exampleId=4, offset=3)
|
||||
occurrence(exampleId=1, offset=2)
|
||||
*/
|
||||
OccurencesList searchResult1 = concordia2.fullSearch("okno", 2, 1);
|
||||
OccurencesList searchResult2 = concordia2.fullSearch("okno", 10, 3);
|
||||
OccurencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3);
|
||||
OccurencesList searchResult4 = concordia2.fullSearch("okno", 10, 6);
|
||||
OccurrencesList searchResult1 = concordia2.fullSearch("okno", 2, 1);
|
||||
OccurrencesList searchResult2 = concordia2.fullSearch("okno", 10, 3);
|
||||
OccurrencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3);
|
||||
OccurrencesList searchResult4 = concordia2.fullSearch("okno", 10, 6);
|
||||
|
||||
concordia2.clearIndex();
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 5);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 3);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(0).getId(), 3);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().at(1).getId(), 2);
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult2.getTotalCount(), 5);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 4);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(0).getId(), 4);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().at(1).getId(), 1);
|
||||
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult3.getTotalCount(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult4.getTotalCount(), 5);
|
||||
BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult4.getOccurrences().size(), 0);
|
||||
|
||||
}
|
||||
|
||||
@ -267,16 +267,16 @@ BOOST_AUTO_TEST_CASE( ConcordiaLexiconSearch1 )
|
||||
concordia.clearIndex();
|
||||
|
||||
// first two patterns do not cover the whole example source
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().size(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurrences().size(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurrences().size(), 0);
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult3.getOccurences().size(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult3.getOccurences().at(0).getId(), 123);
|
||||
BOOST_CHECK_EQUAL(searchResult3.getOccurences().at(0).getOffset(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult3.getOccurrences().size(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult3.getOccurrences().at(0).getId(), 123);
|
||||
BOOST_CHECK_EQUAL(searchResult3.getOccurrences().at(0).getOffset(), 0);
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult4.getOccurences().at(0).getId(), 14);
|
||||
BOOST_CHECK_EQUAL(searchResult4.getOccurences().at(0).getOffset(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult4.getOccurrences().size(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult4.getOccurrences().at(0).getId(), 14);
|
||||
BOOST_CHECK_EQUAL(searchResult4.getOccurrences().at(0).getOffset(), 0);
|
||||
|
||||
}
|
||||
|
||||
@ -351,43 +351,43 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
|
||||
|
||||
/*
|
||||
adding fragment: offset=0, length=2
|
||||
adding occurence: example id=167, offset=2
|
||||
adding occurence: example id=45, offset=3
|
||||
adding occurence: example id=51, offset=1
|
||||
adding occurence: example id=123, offset=1
|
||||
adding occurrence: example id=167, offset=2
|
||||
adding occurrence: example id=45, offset=3
|
||||
adding occurrence: example id=51, offset=1
|
||||
adding occurrence: example id=123, offset=1
|
||||
adding fragment: offset=1, length=1
|
||||
adding occurence: example id=167, offset=3
|
||||
adding occurence: example id=45, offset=4
|
||||
adding occurence: example id=51, offset=2
|
||||
adding occurence: example id=123, offset=2
|
||||
adding occurrence: example id=167, offset=3
|
||||
adding occurrence: example id=45, offset=4
|
||||
adding occurrence: example id=51, offset=2
|
||||
adding occurrence: example id=123, offset=2
|
||||
adding fragment: offset=2, length=1
|
||||
adding occurence: example id=167, offset=1
|
||||
adding occurrence: example id=167, offset=1
|
||||
*/
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().size(), 3);
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getPatternOffset(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getMatchedLength(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getId(), 167);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getOffset(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(1).getId(), 45);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(1).getOffset(), 3);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(2).getId(), 51);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(2).getOffset(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getId(), 167);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getOffset(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(1).getId(), 45);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(1).getOffset(), 3);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(2).getId(), 51);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(2).getOffset(), 1);
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getPatternOffset(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getMatchedLength(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(0).getId(), 167);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(0).getOffset(), 3);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(1).getId(), 45);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(1).getOffset(), 4);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(2).getId(), 51);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurences().at(2).getOffset(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(0).getId(), 167);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(0).getOffset(), 3);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(1).getId(), 45);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(1).getOffset(), 4);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(2).getId(), 51);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(1).getOccurrences().at(2).getOffset(), 2);
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getPatternOffset(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getMatchedLength(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurences().at(0).getId(), 167);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurences().at(0).getOffset(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurrences().at(0).getId(), 167);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(2).getOccurrences().at(0).getOffset(), 1);
|
||||
|
||||
concordia.clearIndex();
|
||||
}
|
||||
@ -443,43 +443,43 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
|
||||
|
||||
Best overlay {
|
||||
fragment(patternOffset=1, matchedLength=4) {
|
||||
occurence(exampleId=321, offset=0)
|
||||
occurrence(exampleId=321, offset=0)
|
||||
}
|
||||
fragment(patternOffset=5, matchedLength=4) {
|
||||
occurence(exampleId=14, offset=7)
|
||||
occurrence(exampleId=14, offset=7)
|
||||
}
|
||||
}
|
||||
All fragments {
|
||||
fragment(patternOffset=4, matchedLength=5) {
|
||||
occurence(exampleId=14, offset=6)
|
||||
occurrence(exampleId=14, offset=6)
|
||||
}
|
||||
fragment(patternOffset=1, matchedLength=4) {
|
||||
occurence(exampleId=321, offset=0)
|
||||
occurrence(exampleId=321, offset=0)
|
||||
}
|
||||
fragment(patternOffset=5, matchedLength=4) {
|
||||
occurence(exampleId=14, offset=7)
|
||||
occurrence(exampleId=14, offset=7)
|
||||
}
|
||||
fragment(patternOffset=2, matchedLength=3) {
|
||||
occurence(exampleId=321, offset=1)
|
||||
occurrence(exampleId=321, offset=1)
|
||||
}
|
||||
fragment(patternOffset=6, matchedLength=3) {
|
||||
occurence(exampleId=14, offset=8)
|
||||
occurrence(exampleId=14, offset=8)
|
||||
}
|
||||
fragment(patternOffset=3, matchedLength=2) {
|
||||
occurence(exampleId=321, offset=2)
|
||||
occurrence(exampleId=321, offset=2)
|
||||
}
|
||||
fragment(patternOffset=7, matchedLength=2) {
|
||||
occurence(exampleId=14, offset=9)
|
||||
occurrence(exampleId=14, offset=9)
|
||||
}
|
||||
fragment(patternOffset=8, matchedLength=1) {
|
||||
occurence(exampleId=14, offset=10)
|
||||
occurrence(exampleId=14, offset=10)
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getId(), 14);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurences().at(0).getOffset(), 6);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getId(), 14);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getOccurrences().at(0).getOffset(), 6);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getPatternOffset(), 4);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getMatchedLength(), 5);
|
||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getStart(), 4);
|
||||
@ -522,7 +522,7 @@ BOOST_AUTO_TEST_CASE( Tokenize )
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaCountOccurences )
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaCountOccurrences )
|
||||
{
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
@ -556,12 +556,12 @@ BOOST_AUTO_TEST_CASE( ConcordiaCountOccurences )
|
||||
*/
|
||||
|
||||
|
||||
BOOST_CHECK_EQUAL(concordia.countOccurences("Ala posiada"), 0);
|
||||
BOOST_CHECK_EQUAL(concordia.countOccurences("Marysia posiada"), 0);
|
||||
BOOST_CHECK_EQUAL(concordia.countOccurences("Marysia posiada rysia"), 1);
|
||||
BOOST_CHECK_EQUAL(concordia.countOccurences("kota Ala posiada"), 0);
|
||||
BOOST_CHECK_EQUAL(concordia.countOccurences("Ala posiada kota"), 2);
|
||||
BOOST_CHECK_EQUAL(concordia.countOccurences("Ala posiada kota i psa"), 1);
|
||||
BOOST_CHECK_EQUAL(concordia.countOccurrences("Ala posiada"), 0);
|
||||
BOOST_CHECK_EQUAL(concordia.countOccurrences("Marysia posiada"), 0);
|
||||
BOOST_CHECK_EQUAL(concordia.countOccurrences("Marysia posiada rysia"), 1);
|
||||
BOOST_CHECK_EQUAL(concordia.countOccurrences("kota Ala posiada"), 0);
|
||||
BOOST_CHECK_EQUAL(concordia.countOccurrences("Ala posiada kota"), 2);
|
||||
BOOST_CHECK_EQUAL(concordia.countOccurrences("Ala posiada kota i psa"), 1);
|
||||
|
||||
concordia.clearIndex();
|
||||
|
||||
|
@ -135,13 +135,13 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
||||
SA->push_back(11);
|
||||
|
||||
SUFFIX_MARKER_TYPE highResLength;
|
||||
std::vector<SubstringOccurence> result = searcher.lcpSearch(T, markers, SA, pattern, highResLength);
|
||||
std::vector<SubstringOccurrence> result = searcher.lcpSearch(T, markers, SA, pattern, highResLength);
|
||||
SUFFIX_MARKER_TYPE length = highResLength / sizeof(INDEX_CHARACTER_TYPE);
|
||||
|
||||
/* Expecting to get the following results from SA:
|
||||
3: ana
|
||||
1: anana
|
||||
Which are 2 substring occurences (34,3) and (34,1) with the lcp length = 2;
|
||||
Which are 2 substring occurrences (34,3) and (34,1) with the lcp length = 2;
|
||||
*/
|
||||
|
||||
BOOST_CHECK_EQUAL(result.size(),2);
|
||||
@ -185,12 +185,12 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
||||
pattern2.push_back(2);
|
||||
|
||||
SUFFIX_MARKER_TYPE highResLength2;
|
||||
std::vector<SubstringOccurence> result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2);
|
||||
std::vector<SubstringOccurrence> result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2);
|
||||
SUFFIX_MARKER_TYPE length2 = highResLength2 / sizeof(INDEX_CHARACTER_TYPE);
|
||||
|
||||
/* Expecting to get one result from SA:
|
||||
0: banana
|
||||
Which is one substring occurence (34,0) with the lcp length = 6;
|
||||
Which is one substring occurrence (34,0) with the lcp length = 6;
|
||||
*/
|
||||
|
||||
|
||||
@ -228,12 +228,12 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
||||
pattern3.push_back(3);
|
||||
|
||||
SUFFIX_MARKER_TYPE highResLength3;
|
||||
std::vector<SubstringOccurence> result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3);
|
||||
std::vector<SubstringOccurrence> result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3);
|
||||
SUFFIX_MARKER_TYPE length3 = highResLength3 / sizeof(INDEX_CHARACTER_TYPE);
|
||||
|
||||
/* Expecting to get one result from SA:
|
||||
0: banana
|
||||
Which is one substring occurence (34,0) with the lcp length = 5;
|
||||
Which is one substring occurrence (34,0) with the lcp length = 5;
|
||||
*/
|
||||
|
||||
BOOST_CHECK_EQUAL(result3.size(),1);
|
||||
@ -265,13 +265,13 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
||||
pattern4.push_back(4);
|
||||
|
||||
SUFFIX_MARKER_TYPE highResLength4;
|
||||
std::vector<SubstringOccurence> result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4);
|
||||
std::vector<SubstringOccurrence> result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4);
|
||||
SUFFIX_MARKER_TYPE length4 = highResLength4 / sizeof(INDEX_CHARACTER_TYPE);
|
||||
|
||||
/* Expecting to get 2 results from SA:
|
||||
4: na
|
||||
2: nana
|
||||
Which are 2 substring occurences (34,4) and (34,2) with the lcp length = 2;
|
||||
Which are 2 substring occurrences (34,4) and (34,2) with the lcp length = 2;
|
||||
*/
|
||||
|
||||
BOOST_CHECK_EQUAL(result4.size(),2);
|
||||
@ -296,7 +296,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
||||
pattern5.push_back(4);
|
||||
|
||||
SUFFIX_MARKER_TYPE highResLength5;
|
||||
std::vector<SubstringOccurence> result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5);
|
||||
std::vector<SubstringOccurrence> result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5);
|
||||
SUFFIX_MARKER_TYPE length5 = highResLength5 / sizeof(INDEX_CHARACTER_TYPE);
|
||||
|
||||
/* Expecting to get 0 results from SA, lcp length = 0;
|
||||
@ -320,7 +320,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
||||
pattern6.push_back(0);
|
||||
|
||||
SUFFIX_MARKER_TYPE highResLength6;
|
||||
std::vector<SubstringOccurence> result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6);
|
||||
std::vector<SubstringOccurrence> result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6);
|
||||
SUFFIX_MARKER_TYPE length6 = highResLength6 / sizeof(INDEX_CHARACTER_TYPE);
|
||||
|
||||
/* Expecting to get 0 results from SA, lcp length = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user