getTmMatches
Former-commit-id: 94aa3db2db88195c61c6ac70006c0e1d743dc854
This commit is contained in:
parent
f03b4ad954
commit
e02bbaa0fa
8
TODO.txt
8
TODO.txt
@ -8,7 +8,7 @@ DONE 3. Dzielenie zdań (max 255 tokenów)
|
||||
DONE Anubis search się komplikuje! Przy tworzeniu obiektu tmMatches dla przykładu trzeba podać id przykładu, długość patternu i długość przykładu. Dwa pierwsze mamy, ale niestety nie ma skąd wziąć długości przykładu. Pamiętamy tylko offset sufiksu.
|
||||
|
||||
DONE 1. Bitwise operators (i stałe!) przy rozmiarze index character oraz markerów
|
||||
2. Wykonać anubis search na nowych markerach z długością zdania
|
||||
IN PROGRESS 2. Wykonać anubis search na nowych markerach z długością zdania
|
||||
3. Multi-threading?
|
||||
|
||||
- concordia-server
|
||||
@ -19,6 +19,8 @@ DONE 1. Bitwise operators (i stałe!) przy rozmiarze index character oraz marker
|
||||
|
||||
|
||||
zastanowić się nad optymalizacją:
|
||||
- unordered_map tmMatchesMap
|
||||
- LCP array
|
||||
- tmMatchesMap jako normalna mapa (nie ptr_map)
|
||||
- REJECTED LCP array
|
||||
- !important! rezygnacja z ptr_vector (wycieki!)
|
||||
- zwracanie wektorów
|
||||
|
||||
|
@ -1,15 +1,8 @@
|
||||
#include "concordia/anubis_searcher.hpp"
|
||||
#include "concordia/tm_matches.hpp"
|
||||
#include "concordia/common/logging.hpp"
|
||||
|
||||
#include <boost/ptr_container/ptr_map.hpp>
|
||||
#include <boost/assign/ptr_map_inserter.hpp>
|
||||
#include "concordia/common/logging.hpp"
|
||||
#include <boost/foreach.hpp>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
|
||||
typedef boost::ptr_map<SUFFIX_MARKER_TYPE, TmMatches> TmMatchesMap;
|
||||
typedef TmMatchesMap::iterator TmMatchesMapIterator;
|
||||
|
||||
AnubisSearcher::AnubisSearcher() {
|
||||
}
|
||||
@ -25,11 +18,19 @@ boost::ptr_vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern)
|
||||
throw(ConcordiaException) {
|
||||
SET_LOGGER_FILE("/tmp/concordia.log");
|
||||
SET_LOGGING_LEVEL("ERROR");
|
||||
INFO("AnubisSearcher::anubisSearch");
|
||||
|
||||
boost::shared_ptr<TmMatchesMap> tmMatchesMap = getTmMatches(T, markers, SA, pattern);
|
||||
|
||||
// get the tmMatches list sorted descending by score
|
||||
boost::ptr_vector<AnubisSearchResult> result;
|
||||
return result;
|
||||
}
|
||||
|
||||
boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern)
|
||||
throw(ConcordiaException) {
|
||||
|
||||
boost::shared_ptr<std::vector<sauchar_t> > patternVector =
|
||||
Utils::indexVectorToSaucharVector(pattern);
|
||||
@ -39,67 +40,61 @@ boost::ptr_vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
|
||||
throw ConcordiaException("Increasing pattern resolution went wrong.");
|
||||
}
|
||||
|
||||
INFO("AnubisSearcher::anubisSearch - about to create tmMatchesMap");
|
||||
TmMatchesMap tmMatchesMap;
|
||||
boost::shared_ptr<TmMatchesMap> tmMatchesMap(new TmMatchesMap());
|
||||
for (int offset = 0; offset < pattern->size(); offset++) {
|
||||
INFO("AnubisSearcher::anubisSearch - offset: ");
|
||||
INFO(offset);
|
||||
|
||||
int highResOffset = offset * sizeof(INDEX_CHARACTER_TYPE);
|
||||
INFO("AnubisSearcher::anubisSearch - high res offset: ");
|
||||
INFO(highResOffset);
|
||||
boost::shared_ptr<std::vector<sauchar_t> > currentPattern =
|
||||
boost::shared_ptr<std::vector<sauchar_t> >
|
||||
(new std::vector<sauchar_t>(
|
||||
patternVector->begin()+highResOffset, patternVector->end()));
|
||||
SUFFIX_MARKER_TYPE highResLongestPrefixesLength;
|
||||
INFO("AnubisSearcher::anubisSearch - about to get longest prefixes");
|
||||
boost::ptr_vector<SubstringOccurence> longestPrefixes =
|
||||
lcpSearch(T, markers, SA, currentPattern, highResLongestPrefixesLength);
|
||||
|
||||
INFO("AnubisSearcher::anubisSearch - longest prefixes got");
|
||||
SUFFIX_MARKER_TYPE longestPrefixesLength = highResLongestPrefixesLength /
|
||||
sizeof(INDEX_CHARACTER_TYPE);
|
||||
INFO("AnubisSearcher::anubisSearch - longest prefixes high res length");
|
||||
INFO(highResLongestPrefixesLength);
|
||||
INFO("AnubisSearcher::anubisSearch - longest prefixes length");
|
||||
INFO(longestPrefixesLength);
|
||||
|
||||
saidx_t patternLength = 0;
|
||||
saidx_t size = SA->size();
|
||||
saidx_t left = 0;
|
||||
|
||||
if (longestPrefixesLength > 0) {
|
||||
BOOST_FOREACH(SubstringOccurence & occurence, longestPrefixes) {
|
||||
boost::shared_ptr<TmMatches> tmMatches;
|
||||
sauchar_t * patternArray = currentPattern->data();
|
||||
|
||||
TmMatchesMapIterator mapIterator = tmMatchesMap.find(
|
||||
occurence.getId());
|
||||
if (mapIterator != tmMatchesMap.end()) {
|
||||
tmMatches = boost::shared_ptr<TmMatches>(
|
||||
mapIterator->second
|
||||
);
|
||||
} else {
|
||||
tmMatches = boost::shared_ptr<TmMatches>(
|
||||
new TmMatches(
|
||||
occurence.getId(),
|
||||
occurence.getExampleLength(),
|
||||
patternVector->size()
|
||||
));
|
||||
}
|
||||
saidx_t * SAleft = SA->data();
|
||||
|
||||
saidx_t prevLeft;
|
||||
saidx_t prevSize;
|
||||
do {
|
||||
prevLeft = left;
|
||||
prevSize = size;
|
||||
|
||||
patternLength += sizeof(INDEX_CHARACTER_TYPE);
|
||||
|
||||
saidx_t localLeft;
|
||||
size = sa_search(T->data(), (saidx_t) T->size(),
|
||||
(const sauchar_t *) patternArray, patternLength,
|
||||
SAleft, size, &localLeft);
|
||||
|
||||
|
||||
left += localLeft;
|
||||
SAleft += localLeft;
|
||||
|
||||
if (patternLength > sizeof(INDEX_CHARACTER_TYPE)) {
|
||||
// Add to tm matches map results surrounding the main stream.
|
||||
// from left
|
||||
for (saidx_t i = prevLeft; i < left; i++) {
|
||||
_addToMap(SA, markers, tmMatchesMap, i, pattern->size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset);
|
||||
}
|
||||
// from right
|
||||
for (saidx_t i = left+size; i < prevLeft+prevSize; i++) {
|
||||
_addToMap(SA, markers, tmMatchesMap, i, pattern->size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset);
|
||||
}
|
||||
|
||||
// add intervals to tmMatches
|
||||
tmMatches->addExampleInterval(
|
||||
occurence.getOffset(),
|
||||
occurence.getOffset() + longestPrefixesLength
|
||||
);
|
||||
tmMatches->addPatternInterval(
|
||||
offset,
|
||||
offset + longestPrefixesLength
|
||||
);
|
||||
}
|
||||
} while (patternLength < currentPattern->size() && size > 0);
|
||||
|
||||
if (size > 0) {
|
||||
for (saidx_t i = left; i < left+size; i++) {
|
||||
_addToMap(SA, markers, tmMatchesMap, i, pattern->size(), patternLength / sizeof(INDEX_CHARACTER_TYPE), offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// get the tmMatches list sorted descending by score
|
||||
|
||||
return result;
|
||||
return tmMatchesMap;
|
||||
}
|
||||
|
||||
boost::ptr_vector<SubstringOccurence> AnubisSearcher::lcpSearch(
|
||||
@ -128,8 +123,7 @@ boost::ptr_vector<SubstringOccurence> AnubisSearcher::lcpSearch(
|
||||
saidx_t localLeft;
|
||||
size = sa_search(T->data(), (saidx_t) T->size(),
|
||||
(const sauchar_t *) patternArray, patternLength,
|
||||
SAleft, size, &localLeft);
|
||||
|
||||
SAleft, size, &localLeft);
|
||||
left += localLeft;
|
||||
SAleft += localLeft;
|
||||
} while (patternLength < pattern->size() && size > 0);
|
||||
@ -170,3 +164,67 @@ void AnubisSearcher::_collectResults(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AnubisSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
||||
saidx_t sa_pos,
|
||||
SUFFIX_MARKER_TYPE totalPatternLength,
|
||||
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
||||
SUFFIX_MARKER_TYPE patternOffset) {
|
||||
SubstringOccurence occurence;
|
||||
if (_getOccurenceFromSA(SA, markers, sa_pos, occurence)) {
|
||||
_addOccurenceToMap(tmMatchesMap,
|
||||
occurence,
|
||||
totalPatternLength,
|
||||
matchedFragmentLength,
|
||||
patternOffset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool AnubisSearcher::_getOccurenceFromSA(
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
saidx_t sa_pos,
|
||||
SubstringOccurence & occurence) {
|
||||
saidx_t resultPos = SA->at(sa_pos);
|
||||
|
||||
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
||||
SUFFIX_MARKER_TYPE marker = markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
|
||||
occurence.enterDataFromMarker(marker);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void AnubisSearcher::_addOccurenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
||||
SubstringOccurence & occurence,
|
||||
SUFFIX_MARKER_TYPE totalPatternLength,
|
||||
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
||||
SUFFIX_MARKER_TYPE patternOffset) {
|
||||
TmMatches * tmMatches;
|
||||
|
||||
TmMatchesMapIterator mapIterator = tmMatchesMap->find(
|
||||
occurence.getId());
|
||||
if (mapIterator != tmMatchesMap->end()) {
|
||||
tmMatches = mapIterator->second;
|
||||
} else {
|
||||
tmMatches = new TmMatches(occurence.getId(),
|
||||
occurence.getExampleLength(),
|
||||
totalPatternLength);
|
||||
SUFFIX_MARKER_TYPE key = occurence.getId();
|
||||
tmMatchesMap->insert(key, tmMatches);
|
||||
}
|
||||
|
||||
// add intervals to tmMatches
|
||||
tmMatches->addExampleInterval(
|
||||
occurence.getOffset(),
|
||||
occurence.getOffset() + matchedFragmentLength
|
||||
);
|
||||
tmMatches->addPatternInterval(
|
||||
patternOffset,
|
||||
patternOffset + matchedFragmentLength
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "concordia/substring_occurence.hpp"
|
||||
#include "concordia/concordia_exception.hpp"
|
||||
#include "concordia/anubis_search_result.hpp"
|
||||
#include "concordia/tm_matches.hpp"
|
||||
|
||||
#include <divsufsort.h>
|
||||
|
||||
@ -34,6 +35,13 @@ public:
|
||||
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern)
|
||||
throw(ConcordiaException);
|
||||
|
||||
boost::shared_ptr<TmMatchesMap> getTmMatches(
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern)
|
||||
throw(ConcordiaException);
|
||||
|
||||
boost::ptr_vector<SubstringOccurence> lcpSearch(
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
@ -46,6 +54,25 @@ private:
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
saidx_t left, saidx_t size);
|
||||
|
||||
void _addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
||||
saidx_t sa_pos,
|
||||
SUFFIX_MARKER_TYPE totalPatternLength,
|
||||
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
||||
SUFFIX_MARKER_TYPE patternOffset);
|
||||
|
||||
bool _getOccurenceFromSA(boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
saidx_t sa_pos,
|
||||
SubstringOccurence & occurence);
|
||||
|
||||
void _addOccurenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
||||
SubstringOccurence & occurence,
|
||||
SUFFIX_MARKER_TYPE totalPatternLength,
|
||||
SUFFIX_MARKER_TYPE matchedFragmentLength,
|
||||
SUFFIX_MARKER_TYPE patternOffset);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -28,4 +28,3 @@ typedef @SUFFIX_MARKER_TYPE@ SUFFIX_MARKER_TYPE;
|
||||
//The sentence marker is build as follows: its first bytes store the
|
||||
// sentence id. Next, SUFFIX_MARKER_SENTENCE_BYTES store the suffix offset
|
||||
// and the last SUFFIX_MARKER_SENTENCE_BYTES store the sentence length.
|
||||
|
||||
|
@ -67,7 +67,7 @@ private:
|
||||
template <typename T>
|
||||
void Utils::printVector(boost::shared_ptr<std::vector<T> > vector) {
|
||||
for (int i = 0; i < vector->size(); i++) {
|
||||
cout << vector->at(i) << " ";
|
||||
cout << static_cast<int>(vector->at(i)) << " ";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include "concordia/interval.hpp"
|
||||
|
||||
|
||||
Interval::Interval(const unsigned char start, const unsigned char end):
|
||||
Interval::Interval(const SUFFIX_MARKER_TYPE start, const SUFFIX_MARKER_TYPE end):
|
||||
_start(start),
|
||||
_end(end) {
|
||||
}
|
||||
@ -14,7 +14,7 @@ bool Interval::intersects(Interval & interval) {
|
||||
interval.getEnd() - 1 < _start);
|
||||
}
|
||||
|
||||
unsigned char Interval::getLength() {
|
||||
SUFFIX_MARKER_TYPE Interval::getLength() {
|
||||
return _end - _start;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
#ifndef INTERVAL_HDR
|
||||
#define INTERVAL_HDR
|
||||
|
||||
#include "concordia/common/config.hpp"
|
||||
|
||||
/*!
|
||||
Class representing word interval.
|
||||
|
||||
@ -10,7 +12,7 @@ using namespace std;
|
||||
|
||||
class Interval {
|
||||
public:
|
||||
explicit Interval(const unsigned char start, const unsigned char end);
|
||||
explicit Interval(const SUFFIX_MARKER_TYPE start, const SUFFIX_MARKER_TYPE end);
|
||||
|
||||
/*! Destructor.
|
||||
*/
|
||||
@ -18,20 +20,20 @@ public:
|
||||
|
||||
bool intersects(Interval & interval);
|
||||
|
||||
unsigned char getLength();
|
||||
SUFFIX_MARKER_TYPE getLength();
|
||||
|
||||
unsigned char getStart() const {
|
||||
SUFFIX_MARKER_TYPE getStart() const {
|
||||
return _start;
|
||||
}
|
||||
|
||||
unsigned char getEnd() const {
|
||||
SUFFIX_MARKER_TYPE getEnd() const {
|
||||
return _end;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned char _start;
|
||||
SUFFIX_MARKER_TYPE _start;
|
||||
|
||||
unsigned char _end;
|
||||
SUFFIX_MARKER_TYPE _end;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include "concordia/substring_occurence.hpp"
|
||||
#include "concordia/common/utils.hpp"
|
||||
|
||||
SubstringOccurence::SubstringOccurence() {
|
||||
}
|
||||
|
||||
SubstringOccurence::SubstringOccurence(const SUFFIX_MARKER_TYPE & marker) {
|
||||
_id = Utils::getIdFromMarker(marker);
|
||||
@ -8,6 +10,12 @@ SubstringOccurence::SubstringOccurence(const SUFFIX_MARKER_TYPE & marker) {
|
||||
_exampleLength = Utils::getLengthFromMarker(marker);
|
||||
}
|
||||
|
||||
void SubstringOccurence::enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker) {
|
||||
_id = Utils::getIdFromMarker(marker);
|
||||
_offset = Utils::getOffsetFromMarker(marker);
|
||||
_exampleLength = Utils::getLengthFromMarker(marker);
|
||||
}
|
||||
|
||||
|
||||
SubstringOccurence::SubstringOccurence(
|
||||
const SUFFIX_MARKER_TYPE & id,
|
||||
|
@ -13,6 +13,8 @@ using namespace std;
|
||||
|
||||
class SubstringOccurence {
|
||||
public:
|
||||
SubstringOccurence();
|
||||
|
||||
explicit SubstringOccurence(const SUFFIX_MARKER_TYPE & marker);
|
||||
|
||||
SubstringOccurence(const SUFFIX_MARKER_TYPE & id,
|
||||
@ -33,6 +35,8 @@ public:
|
||||
SUFFIX_MARKER_TYPE getExampleLength() const {
|
||||
return _exampleLength;
|
||||
}
|
||||
|
||||
void enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker);
|
||||
|
||||
private:
|
||||
SUFFIX_MARKER_TYPE _id;
|
||||
|
@ -1,7 +1,16 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "tests/unit-tests/unit_tests_globals.hpp"
|
||||
#include "concordia/tm_matches.hpp"
|
||||
#include "concordia/anubis_searcher.hpp"
|
||||
#include "concordia/concordia_index.hpp"
|
||||
#include "concordia/concordia_config.hpp"
|
||||
#include "concordia/example.hpp"
|
||||
#include "concordia/hash_generator.hpp"
|
||||
#include "concordia/common/config.hpp"
|
||||
#include "concordia/common/utils.hpp"
|
||||
#include "concordia/common/logging.hpp"
|
||||
#include "tests/common/test_resources_manager.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -324,9 +333,109 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( AnubisSearch1 )
|
||||
BOOST_AUTO_TEST_CASE( TmMatchesTest )
|
||||
{
|
||||
AnubisSearcher searcher;
|
||||
|
||||
/*The test index contains 3 sentences:
|
||||
14: "Ala posiada kota"
|
||||
51: "Ala posiada rysia"
|
||||
123: "Marysia posiada rysia"
|
||||
|
||||
Test word map:
|
||||
Ala -> 0
|
||||
posiada -> 1
|
||||
kota -> 2
|
||||
rysia -> 3
|
||||
Marysia -> 4
|
||||
|
||||
Test hashed index:
|
||||
n: 0 1 2 3 4 5 6 7 8 9 10 11
|
||||
T[n]: 0 1 2 | 0 1 3 | 4 1 3 |
|
||||
|
||||
Test suffix array:
|
||||
n: 0 1 2 3 4 5 6 7 8 9 10 11
|
||||
SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7
|
||||
|
||||
*/
|
||||
|
||||
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX),
|
||||
TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
|
||||
boost::shared_ptr<ConcordiaConfig> config(
|
||||
new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
|
||||
boost::shared_ptr<HashGenerator> hashGenerator(new HashGenerator(config));
|
||||
|
||||
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T(new std::vector<sauchar_t>());
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers(new std::vector<SUFFIX_MARKER_TYPE>());
|
||||
|
||||
index.addExample(hashGenerator, T, markers, Example("Ala posiada kota",14));
|
||||
index.addExample(hashGenerator, T, markers, Example("Ala posiada rysia",51));
|
||||
index.addExample(hashGenerator, T, markers, Example("Marysia posiada rysia",123));
|
||||
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA = index.generateSuffixArray(T);
|
||||
|
||||
|
||||
// searching for pattern "Ola posiada rysia Marysia" (5 1 3 4)
|
||||
|
||||
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern = hashGenerator->generateHash("Ola posiada rysia Marysia");
|
||||
|
||||
boost::shared_ptr<TmMatchesMap> tmMatchesMap = searcher.getTmMatches(T, markers, SA, pattern);
|
||||
BOOST_CHECK_EQUAL(tmMatchesMap->size(), 3);
|
||||
|
||||
TmMatches * tmMatches14 = tmMatchesMap->find(14)->second;
|
||||
TmMatches * tmMatches51 = tmMatchesMap->find(51)->second;
|
||||
TmMatches * tmMatches123 = tmMatchesMap->find(123)->second;
|
||||
|
||||
BOOST_CHECK_EQUAL(tmMatches14->getExampleId(), 14);
|
||||
BOOST_CHECK_EQUAL(tmMatches51->getExampleId(), 51);
|
||||
BOOST_CHECK_EQUAL(tmMatches123->getExampleId(), 123);
|
||||
|
||||
// example 14
|
||||
// example interval list: [(1,2)]
|
||||
boost::ptr_vector<Interval> exampleIntervals14 = tmMatches14->getExampleIntervals();
|
||||
BOOST_CHECK_EQUAL(exampleIntervals14.size(), 1);
|
||||
BOOST_CHECK_EQUAL(exampleIntervals14[0].getStart(), 1);
|
||||
BOOST_CHECK_EQUAL(exampleIntervals14[0].getEnd(), 2);
|
||||
// pattern interval list: [(1,2)]
|
||||
boost::ptr_vector<Interval> patternIntervals14 = tmMatches14->getPatternIntervals();
|
||||
BOOST_CHECK_EQUAL(patternIntervals14.size(), 1);
|
||||
BOOST_CHECK_EQUAL(patternIntervals14[0].getStart(), 1);
|
||||
BOOST_CHECK_EQUAL(patternIntervals14[0].getEnd(), 2);
|
||||
|
||||
// example 51
|
||||
// example interval list: [(1,3)]
|
||||
boost::ptr_vector<Interval> exampleIntervals51 = tmMatches51->getExampleIntervals();
|
||||
BOOST_CHECK_EQUAL(exampleIntervals51.size(), 1);
|
||||
BOOST_CHECK_EQUAL(exampleIntervals51[0].getStart(), 1);
|
||||
BOOST_CHECK_EQUAL(exampleIntervals51[0].getEnd(), 3);
|
||||
// pattern interval list: [(1,3)]
|
||||
boost::ptr_vector<Interval> patternIntervals51 = tmMatches51->getPatternIntervals();
|
||||
BOOST_CHECK_EQUAL(patternIntervals51.size(), 1);
|
||||
BOOST_CHECK_EQUAL(patternIntervals51[0].getStart(), 1);
|
||||
BOOST_CHECK_EQUAL(patternIntervals51[0].getEnd(), 3);
|
||||
|
||||
// example 123
|
||||
// example interval list: [(1,3), (0,1)]
|
||||
boost::ptr_vector<Interval> exampleIntervals123 = tmMatches123->getExampleIntervals();
|
||||
BOOST_CHECK_EQUAL(exampleIntervals123.size(), 2);
|
||||
BOOST_CHECK_EQUAL(exampleIntervals123[0].getStart(), 1);
|
||||
BOOST_CHECK_EQUAL(exampleIntervals123[0].getEnd(), 3);
|
||||
BOOST_CHECK_EQUAL(exampleIntervals123[1].getStart(), 0);
|
||||
BOOST_CHECK_EQUAL(exampleIntervals123[1].getEnd(), 1);
|
||||
// pattern interval list: [(1,3), (3,4)]
|
||||
boost::ptr_vector<Interval> patternIntervals123 = tmMatches123->getPatternIntervals();
|
||||
BOOST_CHECK_EQUAL(patternIntervals123.size(), 2);
|
||||
BOOST_CHECK_EQUAL(patternIntervals123[0].getStart(), 1);
|
||||
BOOST_CHECK_EQUAL(patternIntervals123[0].getEnd(), 3);
|
||||
BOOST_CHECK_EQUAL(patternIntervals123[1].getStart(), 3);
|
||||
BOOST_CHECK_EQUAL(patternIntervals123[1].getEnd(), 4);
|
||||
|
||||
|
||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
|
||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -177,7 +177,6 @@ BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 )
|
||||
n: 0 1 2 3 4 5 6 7 8 9 10 11
|
||||
SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7
|
||||
|
||||
*/
|
||||
boost::ptr_vector<AnubisSearchResult> searchResult1 = concordia.anubisSearch("posiada rysia chyba");
|
||||
boost::ptr_vector<AnubisSearchResult> searchResult2 = concordia.anubisSearch("posiada kota Ala");
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "concordia/common/config.hpp"
|
||||
#include "concordia/interval.hpp"
|
||||
#include <boost/ptr_container/ptr_vector.hpp>
|
||||
#include <boost/ptr_container/ptr_map.hpp>
|
||||
|
||||
|
||||
/*!
|
||||
@ -28,6 +29,14 @@ public:
|
||||
return _score;
|
||||
}
|
||||
|
||||
boost::ptr_vector<Interval> getExampleIntervals() const {
|
||||
return _exampleMatchedRegions;
|
||||
}
|
||||
|
||||
boost::ptr_vector<Interval> getPatternIntervals() const {
|
||||
return _patternMatchedRegions;
|
||||
}
|
||||
|
||||
SUFFIX_MARKER_TYPE getExampleId() const {
|
||||
return _exampleId;
|
||||
}
|
||||
@ -61,4 +70,7 @@ private:
|
||||
double _score;
|
||||
};
|
||||
|
||||
typedef boost::ptr_map<SUFFIX_MARKER_TYPE, TmMatches> TmMatchesMap;
|
||||
typedef TmMatchesMap::iterator TmMatchesMapIterator;
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user