getTmMatches

Former-commit-id: 94aa3db2db88195c61c6ac70006c0e1d743dc854
This commit is contained in:
rjawor 2015-04-14 20:14:30 +02:00
parent f03b4ad954
commit e02bbaa0fa
12 changed files with 297 additions and 77 deletions

View File

@ -8,7 +8,7 @@ DONE 3. Dzielenie zdań (max 255 tokenów)
DONE Anubis search się komplikuje! Przy tworzeniu obiektu tmMatches dla przykładu trzeba podać id przykładu, długość patternu i długość przykładu. Dwa pierwsze mamy, ale niestety nie ma skąd wziąć długości przykładu. Pamiętamy tylko offset sufiksu. DONE Anubis search się komplikuje! Przy tworzeniu obiektu tmMatches dla przykładu trzeba podać id przykładu, długość patternu i długość przykładu. Dwa pierwsze mamy, ale niestety nie ma skąd wziąć długości przykładu. Pamiętamy tylko offset sufiksu.
DONE 1. Bitwise operators (i stałe!) przy rozmiarze index character oraz markerów DONE 1. Bitwise operators (i stałe!) przy rozmiarze index character oraz markerów
2. Wykonać anubis search na nowych markerach z długością zdania IN PROGRESS 2. Wykonać anubis search na nowych markerach z długością zdania
3. Multi-threading? 3. Multi-threading?
- concordia-server - concordia-server
@ -19,6 +19,8 @@ DONE 1. Bitwise operators (i stałe!) przy rozmiarze index character oraz marker
zastanowić się nad optymalizacją: zastanowić się nad optymalizacją:
- unordered_map tmMatchesMap - tmMatchesMap jako normalna mapa (nie ptr_map)
- LCP array - REJECTED LCP array
- !important! rezygnacja z ptr_vector (wycieki!)
- zwracanie wektorów

View File

@ -1,15 +1,8 @@
#include "concordia/anubis_searcher.hpp" #include "concordia/anubis_searcher.hpp"
#include "concordia/tm_matches.hpp"
#include "concordia/common/logging.hpp"
#include <boost/ptr_container/ptr_map.hpp> #include "concordia/common/logging.hpp"
#include <boost/assign/ptr_map_inserter.hpp>
#include <boost/foreach.hpp> #include <boost/foreach.hpp>
#include <iostream> #include <iostream>
#include <map>
typedef boost::ptr_map<SUFFIX_MARKER_TYPE, TmMatches> TmMatchesMap;
typedef TmMatchesMap::iterator TmMatchesMapIterator;
AnubisSearcher::AnubisSearcher() { AnubisSearcher::AnubisSearcher() {
} }
@ -25,11 +18,19 @@ boost::ptr_vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern) boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern)
throw(ConcordiaException) { throw(ConcordiaException) {
SET_LOGGER_FILE("/tmp/concordia.log"); boost::shared_ptr<TmMatchesMap> tmMatchesMap = getTmMatches(T, markers, SA, pattern);
SET_LOGGING_LEVEL("ERROR");
INFO("AnubisSearcher::anubisSearch");
// get the tmMatches list sorted descending by score
boost::ptr_vector<AnubisSearchResult> result; boost::ptr_vector<AnubisSearchResult> result;
return result;
}
boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern)
throw(ConcordiaException) {
boost::shared_ptr<std::vector<sauchar_t> > patternVector = boost::shared_ptr<std::vector<sauchar_t> > patternVector =
Utils::indexVectorToSaucharVector(pattern); Utils::indexVectorToSaucharVector(pattern);
@ -39,67 +40,61 @@ boost::ptr_vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
throw ConcordiaException("Increasing pattern resolution went wrong."); throw ConcordiaException("Increasing pattern resolution went wrong.");
} }
INFO("AnubisSearcher::anubisSearch - about to create tmMatchesMap"); boost::shared_ptr<TmMatchesMap> tmMatchesMap(new TmMatchesMap());
TmMatchesMap tmMatchesMap;
for (int offset = 0; offset < pattern->size(); offset++) { for (int offset = 0; offset < pattern->size(); offset++) {
INFO("AnubisSearcher::anubisSearch - offset: ");
INFO(offset);
int highResOffset = offset * sizeof(INDEX_CHARACTER_TYPE); int highResOffset = offset * sizeof(INDEX_CHARACTER_TYPE);
INFO("AnubisSearcher::anubisSearch - high res offset: ");
INFO(highResOffset);
boost::shared_ptr<std::vector<sauchar_t> > currentPattern = boost::shared_ptr<std::vector<sauchar_t> > currentPattern =
boost::shared_ptr<std::vector<sauchar_t> > boost::shared_ptr<std::vector<sauchar_t> >
(new std::vector<sauchar_t>( (new std::vector<sauchar_t>(
patternVector->begin()+highResOffset, patternVector->end())); patternVector->begin()+highResOffset, patternVector->end()));
SUFFIX_MARKER_TYPE highResLongestPrefixesLength;
INFO("AnubisSearcher::anubisSearch - about to get longest prefixes");
boost::ptr_vector<SubstringOccurence> longestPrefixes =
lcpSearch(T, markers, SA, currentPattern, highResLongestPrefixesLength);
INFO("AnubisSearcher::anubisSearch - longest prefixes got"); saidx_t patternLength = 0;
SUFFIX_MARKER_TYPE longestPrefixesLength = highResLongestPrefixesLength / saidx_t size = SA->size();
sizeof(INDEX_CHARACTER_TYPE); saidx_t left = 0;
INFO("AnubisSearcher::anubisSearch - longest prefixes high res length");
INFO(highResLongestPrefixesLength);
INFO("AnubisSearcher::anubisSearch - longest prefixes length");
INFO(longestPrefixesLength);
if (longestPrefixesLength > 0) { sauchar_t * patternArray = currentPattern->data();
BOOST_FOREACH(SubstringOccurence & occurence, longestPrefixes) {
boost::shared_ptr<TmMatches> tmMatches;
TmMatchesMapIterator mapIterator = tmMatchesMap.find( saidx_t * SAleft = SA->data();
occurence.getId());
if (mapIterator != tmMatchesMap.end()) { saidx_t prevLeft;
tmMatches = boost::shared_ptr<TmMatches>( saidx_t prevSize;
mapIterator->second do {
); prevLeft = left;
} else { prevSize = size;
tmMatches = boost::shared_ptr<TmMatches>(
new TmMatches( patternLength += sizeof(INDEX_CHARACTER_TYPE);
occurence.getId(),
occurence.getExampleLength(), saidx_t localLeft;
patternVector->size() size = sa_search(T->data(), (saidx_t) T->size(),
)); (const sauchar_t *) patternArray, patternLength,
SAleft, size, &localLeft);
left += localLeft;
SAleft += localLeft;
if (patternLength > sizeof(INDEX_CHARACTER_TYPE)) {
// Add to tm matches map results surrounding the main stream.
// from left
for (saidx_t i = prevLeft; i < left; i++) {
_addToMap(SA, markers, tmMatchesMap, i, pattern->size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset);
}
// from right
for (saidx_t i = left+size; i < prevLeft+prevSize; i++) {
_addToMap(SA, markers, tmMatchesMap, i, pattern->size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset);
} }
// add intervals to tmMatches }
tmMatches->addExampleInterval( } while (patternLength < currentPattern->size() && size > 0);
occurence.getOffset(),
occurence.getOffset() + longestPrefixesLength if (size > 0) {
); for (saidx_t i = left; i < left+size; i++) {
tmMatches->addPatternInterval( _addToMap(SA, markers, tmMatchesMap, i, pattern->size(), patternLength / sizeof(INDEX_CHARACTER_TYPE), offset);
offset,
offset + longestPrefixesLength
);
} }
} }
} }
// get the tmMatches list sorted descending by score return tmMatchesMap;
return result;
} }
boost::ptr_vector<SubstringOccurence> AnubisSearcher::lcpSearch( boost::ptr_vector<SubstringOccurence> AnubisSearcher::lcpSearch(
@ -129,7 +124,6 @@ boost::ptr_vector<SubstringOccurence> AnubisSearcher::lcpSearch(
size = sa_search(T->data(), (saidx_t) T->size(), size = sa_search(T->data(), (saidx_t) T->size(),
(const sauchar_t *) patternArray, patternLength, (const sauchar_t *) patternArray, patternLength,
SAleft, size, &localLeft); SAleft, size, &localLeft);
left += localLeft; left += localLeft;
SAleft += localLeft; SAleft += localLeft;
} while (patternLength < pattern->size() && size > 0); } while (patternLength < pattern->size() && size > 0);
@ -170,3 +164,67 @@ void AnubisSearcher::_collectResults(
} }
} }
} }
void AnubisSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
saidx_t sa_pos,
SUFFIX_MARKER_TYPE totalPatternLength,
SUFFIX_MARKER_TYPE matchedFragmentLength,
SUFFIX_MARKER_TYPE patternOffset) {
SubstringOccurence occurence;
if (_getOccurenceFromSA(SA, markers, sa_pos, occurence)) {
_addOccurenceToMap(tmMatchesMap,
occurence,
totalPatternLength,
matchedFragmentLength,
patternOffset);
}
}
bool AnubisSearcher::_getOccurenceFromSA(
boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
saidx_t sa_pos,
SubstringOccurence & occurence) {
saidx_t resultPos = SA->at(sa_pos);
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
SUFFIX_MARKER_TYPE marker = markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
occurence.enterDataFromMarker(marker);
}
}
void AnubisSearcher::_addOccurenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap,
SubstringOccurence & occurence,
SUFFIX_MARKER_TYPE totalPatternLength,
SUFFIX_MARKER_TYPE matchedFragmentLength,
SUFFIX_MARKER_TYPE patternOffset) {
TmMatches * tmMatches;
TmMatchesMapIterator mapIterator = tmMatchesMap->find(
occurence.getId());
if (mapIterator != tmMatchesMap->end()) {
tmMatches = mapIterator->second;
} else {
tmMatches = new TmMatches(occurence.getId(),
occurence.getExampleLength(),
totalPatternLength);
SUFFIX_MARKER_TYPE key = occurence.getId();
tmMatchesMap->insert(key, tmMatches);
}
// add intervals to tmMatches
tmMatches->addExampleInterval(
occurence.getOffset(),
occurence.getOffset() + matchedFragmentLength
);
tmMatches->addPatternInterval(
patternOffset,
patternOffset + matchedFragmentLength
);
}

View File

@ -9,6 +9,7 @@
#include "concordia/substring_occurence.hpp" #include "concordia/substring_occurence.hpp"
#include "concordia/concordia_exception.hpp" #include "concordia/concordia_exception.hpp"
#include "concordia/anubis_search_result.hpp" #include "concordia/anubis_search_result.hpp"
#include "concordia/tm_matches.hpp"
#include <divsufsort.h> #include <divsufsort.h>
@ -34,6 +35,13 @@ public:
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern) boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern)
throw(ConcordiaException); throw(ConcordiaException);
boost::shared_ptr<TmMatchesMap> getTmMatches(
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern)
throw(ConcordiaException);
boost::ptr_vector<SubstringOccurence> lcpSearch( boost::ptr_vector<SubstringOccurence> lcpSearch(
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
@ -46,6 +54,25 @@ private:
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
saidx_t left, saidx_t size); saidx_t left, saidx_t size);
void _addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
saidx_t sa_pos,
SUFFIX_MARKER_TYPE totalPatternLength,
SUFFIX_MARKER_TYPE matchedFragmentLength,
SUFFIX_MARKER_TYPE patternOffset);
bool _getOccurenceFromSA(boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
saidx_t sa_pos,
SubstringOccurence & occurence);
void _addOccurenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap,
SubstringOccurence & occurence,
SUFFIX_MARKER_TYPE totalPatternLength,
SUFFIX_MARKER_TYPE matchedFragmentLength,
SUFFIX_MARKER_TYPE patternOffset);
}; };
#endif #endif

View File

@ -28,4 +28,3 @@ typedef @SUFFIX_MARKER_TYPE@ SUFFIX_MARKER_TYPE;
//The sentence marker is build as follows: its first bytes store the //The sentence marker is build as follows: its first bytes store the
// sentence id. Next, SUFFIX_MARKER_SENTENCE_BYTES store the suffix offset // sentence id. Next, SUFFIX_MARKER_SENTENCE_BYTES store the suffix offset
// and the last SUFFIX_MARKER_SENTENCE_BYTES store the sentence length. // and the last SUFFIX_MARKER_SENTENCE_BYTES store the sentence length.

View File

@ -67,7 +67,7 @@ private:
template <typename T> template <typename T>
void Utils::printVector(boost::shared_ptr<std::vector<T> > vector) { void Utils::printVector(boost::shared_ptr<std::vector<T> > vector) {
for (int i = 0; i < vector->size(); i++) { for (int i = 0; i < vector->size(); i++) {
cout << vector->at(i) << " "; cout << static_cast<int>(vector->at(i)) << " ";
} }
cout << endl; cout << endl;
} }

View File

@ -1,7 +1,7 @@
#include "concordia/interval.hpp" #include "concordia/interval.hpp"
Interval::Interval(const unsigned char start, const unsigned char end): Interval::Interval(const SUFFIX_MARKER_TYPE start, const SUFFIX_MARKER_TYPE end):
_start(start), _start(start),
_end(end) { _end(end) {
} }
@ -14,7 +14,7 @@ bool Interval::intersects(Interval & interval) {
interval.getEnd() - 1 < _start); interval.getEnd() - 1 < _start);
} }
unsigned char Interval::getLength() { SUFFIX_MARKER_TYPE Interval::getLength() {
return _end - _start; return _end - _start;
} }

View File

@ -1,6 +1,8 @@
#ifndef INTERVAL_HDR #ifndef INTERVAL_HDR
#define INTERVAL_HDR #define INTERVAL_HDR
#include "concordia/common/config.hpp"
/*! /*!
Class representing word interval. Class representing word interval.
@ -10,7 +12,7 @@ using namespace std;
class Interval { class Interval {
public: public:
explicit Interval(const unsigned char start, const unsigned char end); explicit Interval(const SUFFIX_MARKER_TYPE start, const SUFFIX_MARKER_TYPE end);
/*! Destructor. /*! Destructor.
*/ */
@ -18,20 +20,20 @@ public:
bool intersects(Interval & interval); bool intersects(Interval & interval);
unsigned char getLength(); SUFFIX_MARKER_TYPE getLength();
unsigned char getStart() const { SUFFIX_MARKER_TYPE getStart() const {
return _start; return _start;
} }
unsigned char getEnd() const { SUFFIX_MARKER_TYPE getEnd() const {
return _end; return _end;
} }
private: private:
unsigned char _start; SUFFIX_MARKER_TYPE _start;
unsigned char _end; SUFFIX_MARKER_TYPE _end;
}; };
#endif #endif

View File

@ -1,6 +1,8 @@
#include "concordia/substring_occurence.hpp" #include "concordia/substring_occurence.hpp"
#include "concordia/common/utils.hpp" #include "concordia/common/utils.hpp"
SubstringOccurence::SubstringOccurence() {
}
SubstringOccurence::SubstringOccurence(const SUFFIX_MARKER_TYPE & marker) { SubstringOccurence::SubstringOccurence(const SUFFIX_MARKER_TYPE & marker) {
_id = Utils::getIdFromMarker(marker); _id = Utils::getIdFromMarker(marker);
@ -8,6 +10,12 @@ SubstringOccurence::SubstringOccurence(const SUFFIX_MARKER_TYPE & marker) {
_exampleLength = Utils::getLengthFromMarker(marker); _exampleLength = Utils::getLengthFromMarker(marker);
} }
void SubstringOccurence::enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker) {
_id = Utils::getIdFromMarker(marker);
_offset = Utils::getOffsetFromMarker(marker);
_exampleLength = Utils::getLengthFromMarker(marker);
}
SubstringOccurence::SubstringOccurence( SubstringOccurence::SubstringOccurence(
const SUFFIX_MARKER_TYPE & id, const SUFFIX_MARKER_TYPE & id,

View File

@ -13,6 +13,8 @@ using namespace std;
class SubstringOccurence { class SubstringOccurence {
public: public:
SubstringOccurence();
explicit SubstringOccurence(const SUFFIX_MARKER_TYPE & marker); explicit SubstringOccurence(const SUFFIX_MARKER_TYPE & marker);
SubstringOccurence(const SUFFIX_MARKER_TYPE & id, SubstringOccurence(const SUFFIX_MARKER_TYPE & id,
@ -34,6 +36,8 @@ public:
return _exampleLength; return _exampleLength;
} }
void enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker);
private: private:
SUFFIX_MARKER_TYPE _id; SUFFIX_MARKER_TYPE _id;

View File

@ -1,7 +1,16 @@
#include <iostream>
#include "tests/unit-tests/unit_tests_globals.hpp" #include "tests/unit-tests/unit_tests_globals.hpp"
#include "concordia/tm_matches.hpp"
#include "concordia/anubis_searcher.hpp" #include "concordia/anubis_searcher.hpp"
#include "concordia/concordia_index.hpp"
#include "concordia/concordia_config.hpp"
#include "concordia/example.hpp"
#include "concordia/hash_generator.hpp"
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
#include "concordia/common/utils.hpp" #include "concordia/common/utils.hpp"
#include "concordia/common/logging.hpp"
#include "tests/common/test_resources_manager.hpp"
using namespace std; using namespace std;
@ -324,8 +333,108 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
} }
BOOST_AUTO_TEST_CASE( AnubisSearch1 ) BOOST_AUTO_TEST_CASE( TmMatchesTest )
{ {
AnubisSearcher searcher;
/*The test index contains 3 sentences:
14: "Ala posiada kota"
51: "Ala posiada rysia"
123: "Marysia posiada rysia"
Test word map:
Ala -> 0
posiada -> 1
kota -> 2
rysia -> 3
Marysia -> 4
Test hashed index:
n: 0 1 2 3 4 5 6 7 8 9 10 11
T[n]: 0 1 2 | 0 1 3 | 4 1 3 |
Test suffix array:
n: 0 1 2 3 4 5 6 7 8 9 10 11
SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7
*/
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX),
TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
boost::shared_ptr<ConcordiaConfig> config(
new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
boost::shared_ptr<HashGenerator> hashGenerator(new HashGenerator(config));
boost::shared_ptr<std::vector<sauchar_t> > T(new std::vector<sauchar_t>());
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers(new std::vector<SUFFIX_MARKER_TYPE>());
index.addExample(hashGenerator, T, markers, Example("Ala posiada kota",14));
index.addExample(hashGenerator, T, markers, Example("Ala posiada rysia",51));
index.addExample(hashGenerator, T, markers, Example("Marysia posiada rysia",123));
boost::shared_ptr<std::vector<saidx_t> > SA = index.generateSuffixArray(T);
// searching for pattern "Ola posiada rysia Marysia" (5 1 3 4)
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern = hashGenerator->generateHash("Ola posiada rysia Marysia");
boost::shared_ptr<TmMatchesMap> tmMatchesMap = searcher.getTmMatches(T, markers, SA, pattern);
BOOST_CHECK_EQUAL(tmMatchesMap->size(), 3);
TmMatches * tmMatches14 = tmMatchesMap->find(14)->second;
TmMatches * tmMatches51 = tmMatchesMap->find(51)->second;
TmMatches * tmMatches123 = tmMatchesMap->find(123)->second;
BOOST_CHECK_EQUAL(tmMatches14->getExampleId(), 14);
BOOST_CHECK_EQUAL(tmMatches51->getExampleId(), 51);
BOOST_CHECK_EQUAL(tmMatches123->getExampleId(), 123);
// example 14
// example interval list: [(1,2)]
boost::ptr_vector<Interval> exampleIntervals14 = tmMatches14->getExampleIntervals();
BOOST_CHECK_EQUAL(exampleIntervals14.size(), 1);
BOOST_CHECK_EQUAL(exampleIntervals14[0].getStart(), 1);
BOOST_CHECK_EQUAL(exampleIntervals14[0].getEnd(), 2);
// pattern interval list: [(1,2)]
boost::ptr_vector<Interval> patternIntervals14 = tmMatches14->getPatternIntervals();
BOOST_CHECK_EQUAL(patternIntervals14.size(), 1);
BOOST_CHECK_EQUAL(patternIntervals14[0].getStart(), 1);
BOOST_CHECK_EQUAL(patternIntervals14[0].getEnd(), 2);
// example 51
// example interval list: [(1,3)]
boost::ptr_vector<Interval> exampleIntervals51 = tmMatches51->getExampleIntervals();
BOOST_CHECK_EQUAL(exampleIntervals51.size(), 1);
BOOST_CHECK_EQUAL(exampleIntervals51[0].getStart(), 1);
BOOST_CHECK_EQUAL(exampleIntervals51[0].getEnd(), 3);
// pattern interval list: [(1,3)]
boost::ptr_vector<Interval> patternIntervals51 = tmMatches51->getPatternIntervals();
BOOST_CHECK_EQUAL(patternIntervals51.size(), 1);
BOOST_CHECK_EQUAL(patternIntervals51[0].getStart(), 1);
BOOST_CHECK_EQUAL(patternIntervals51[0].getEnd(), 3);
// example 123
// example interval list: [(1,3), (0,1)]
boost::ptr_vector<Interval> exampleIntervals123 = tmMatches123->getExampleIntervals();
BOOST_CHECK_EQUAL(exampleIntervals123.size(), 2);
BOOST_CHECK_EQUAL(exampleIntervals123[0].getStart(), 1);
BOOST_CHECK_EQUAL(exampleIntervals123[0].getEnd(), 3);
BOOST_CHECK_EQUAL(exampleIntervals123[1].getStart(), 0);
BOOST_CHECK_EQUAL(exampleIntervals123[1].getEnd(), 1);
// pattern interval list: [(1,3), (3,4)]
boost::ptr_vector<Interval> patternIntervals123 = tmMatches123->getPatternIntervals();
BOOST_CHECK_EQUAL(patternIntervals123.size(), 2);
BOOST_CHECK_EQUAL(patternIntervals123[0].getStart(), 1);
BOOST_CHECK_EQUAL(patternIntervals123[0].getEnd(), 3);
BOOST_CHECK_EQUAL(patternIntervals123[1].getStart(), 3);
BOOST_CHECK_EQUAL(patternIntervals123[1].getEnd(), 4);
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
} }

View File

@ -177,7 +177,6 @@ BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 )
n: 0 1 2 3 4 5 6 7 8 9 10 11 n: 0 1 2 3 4 5 6 7 8 9 10 11
SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7 SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7
*/
boost::ptr_vector<AnubisSearchResult> searchResult1 = concordia.anubisSearch("posiada rysia chyba"); boost::ptr_vector<AnubisSearchResult> searchResult1 = concordia.anubisSearch("posiada rysia chyba");
boost::ptr_vector<AnubisSearchResult> searchResult2 = concordia.anubisSearch("posiada kota Ala"); boost::ptr_vector<AnubisSearchResult> searchResult2 = concordia.anubisSearch("posiada kota Ala");

View File

@ -5,6 +5,7 @@
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
#include "concordia/interval.hpp" #include "concordia/interval.hpp"
#include <boost/ptr_container/ptr_vector.hpp> #include <boost/ptr_container/ptr_vector.hpp>
#include <boost/ptr_container/ptr_map.hpp>
/*! /*!
@ -28,6 +29,14 @@ public:
return _score; return _score;
} }
boost::ptr_vector<Interval> getExampleIntervals() const {
return _exampleMatchedRegions;
}
boost::ptr_vector<Interval> getPatternIntervals() const {
return _patternMatchedRegions;
}
SUFFIX_MARKER_TYPE getExampleId() const { SUFFIX_MARKER_TYPE getExampleId() const {
return _exampleId; return _exampleId;
} }
@ -61,4 +70,7 @@ private:
double _score; double _score;
}; };
typedef boost::ptr_map<SUFFIX_MARKER_TYPE, TmMatches> TmMatchesMap;
typedef TmMatchesMap::iterator TmMatchesMapIterator;
#endif #endif