anubis search continued
Former-commit-id: 95a08f242a03311d067303bfff07bf4890796da5
This commit is contained in:
parent
e8ea5881a5
commit
8f953883bf
7
TODO.txt
7
TODO.txt
@ -4,3 +4,10 @@ DONE 3. Dzielenie zdań (max 255 tokenów)
|
|||||||
|
|
||||||
- concordia-server
|
- concordia-server
|
||||||
- zastanowić się nad empty hash examples
|
- zastanowić się nad empty hash examples
|
||||||
|
|
||||||
|
|
||||||
|
zastanowić się nad optymalizacją:
|
||||||
|
- unordered_map tmMatchesMap
|
||||||
|
- LCP array
|
||||||
|
|
||||||
|
Anubis search się komplikuje! Przy tworzeniu obiektu tmMatches dla przykładu trzeba podać id przykładu, długość patternu i długość przykładu. Dwa pierwsze mamy, ale niestety nie ma skąd wziąć długości przykładu. Pamiętamy tylko offset sufiksu.
|
||||||
|
@ -1,7 +1,13 @@
|
|||||||
#include "concordia/anubis_searcher.hpp"
|
#include "concordia/anubis_searcher.hpp"
|
||||||
|
#include "concordia/tm_matches.hpp"
|
||||||
|
|
||||||
|
#include <boost/ptr_container/ptr_map.hpp>
|
||||||
|
#include <boost/foreach.hpp>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
typedef boost::ptr_map<SUFFIX_MARKER_TYPE, TmMatches> TmMatchesMap;
|
||||||
|
typedef TmMatchesMap::iterator TmMatchesMapIterator;
|
||||||
|
|
||||||
AnubisSearcher::AnubisSearcher() {
|
AnubisSearcher::AnubisSearcher() {
|
||||||
}
|
}
|
||||||
@ -18,6 +24,39 @@ boost::ptr_vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
|
|||||||
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern)
|
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern)
|
||||||
throw(ConcordiaException) {
|
throw(ConcordiaException) {
|
||||||
boost::ptr_vector<AnubisSearchResult> result;
|
boost::ptr_vector<AnubisSearchResult> result;
|
||||||
|
|
||||||
|
boost::shared_ptr<std::vector<sauchar_t> > patternVector =
|
||||||
|
Utils::indexVectorToSaucharVector(pattern);
|
||||||
|
|
||||||
|
if (patternVector->size() != pattern->size() * sizeof(INDEX_CHARACTER_TYPE)) {
|
||||||
|
throw ConcordiaException("Increasing pattern resolution went wrong.");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TmMatchesMap tmMatchesMap;
|
||||||
|
for (int offset = 0;offset < pattern->size(); offset++) {
|
||||||
|
int highResOffset = offset * sizeof(INDEX_CHARACTER_TYPE);
|
||||||
|
boost::shared_ptr<std::vector<sauchar_t> > currentPattern =
|
||||||
|
boost::shared_ptr<std::vector<sauchar_t> >(new std::vector<sauchar_t>(
|
||||||
|
patternVector->begin()+highResOffset,patternVector->end()));
|
||||||
|
SUFFIX_MARKER_TYPE longestPrefixesLength;
|
||||||
|
boost::ptr_vector<SubstringOccurence> longestPrefixes = lcpSearch(T, markers, SA,
|
||||||
|
currentPattern, longestPrefixesLength);
|
||||||
|
|
||||||
|
BOOST_FOREACH(SubstringOccurence & occurence, longestPrefixes) {
|
||||||
|
TmMatchesMapIterator mapIterator = tmMatchesMap.find(occurence.getId());
|
||||||
|
if(mapIterator != tmMatchesMap.end()) {
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,6 +42,19 @@ sauchar_t * Utils::indexVectorToSaucharArray(
|
|||||||
return patternArray;
|
return patternArray;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boost::shared_ptr<std::vector<sauchar_t> > Utils::indexVectorToSaucharVector(
|
||||||
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input) {
|
||||||
|
|
||||||
|
boost::shared_ptr<std::vector<sauchar_t> > result = boost::shared_ptr<std::vector<sauchar_t> >(
|
||||||
|
new std::vector<sauchar_t>);
|
||||||
|
|
||||||
|
for (vector<INDEX_CHARACTER_TYPE>::iterator it = input->begin();
|
||||||
|
it != input->end(); ++it) {
|
||||||
|
appendCharToSaucharVector(result, *it);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
void Utils::appendCharToSaucharVector(
|
void Utils::appendCharToSaucharVector(
|
||||||
boost::shared_ptr<std::vector<sauchar_t> > vector,
|
boost::shared_ptr<std::vector<sauchar_t> > vector,
|
||||||
INDEX_CHARACTER_TYPE character) {
|
INDEX_CHARACTER_TYPE character) {
|
||||||
@ -59,3 +72,5 @@ void Utils::_insertCharToSaucharArray(sauchar_t * array,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,6 +34,9 @@ public:
|
|||||||
static sauchar_t * indexVectorToSaucharArray(
|
static sauchar_t * indexVectorToSaucharArray(
|
||||||
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input);
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input);
|
||||||
|
|
||||||
|
static boost::shared_ptr<std::vector<sauchar_t> > indexVectorToSaucharVector(
|
||||||
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input);
|
||||||
|
|
||||||
static void appendCharToSaucharVector(
|
static void appendCharToSaucharVector(
|
||||||
boost::shared_ptr<std::vector<sauchar_t> > vector,
|
boost::shared_ptr<std::vector<sauchar_t> > vector,
|
||||||
INDEX_CHARACTER_TYPE character);
|
INDEX_CHARACTER_TYPE character);
|
||||||
|
@ -60,6 +60,31 @@ BOOST_AUTO_TEST_CASE( IndexVectorToSaucharArray )
|
|||||||
BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end());
|
BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE( IndexVectorToSaucharVector )
|
||||||
|
{
|
||||||
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash(new vector<INDEX_CHARACTER_TYPE>());
|
||||||
|
hash->push_back(123456789); // in hex: 75BCD15
|
||||||
|
// in memory: 15 cd 5b 07
|
||||||
|
// in memory DEC: 21 205 91 7
|
||||||
|
|
||||||
|
hash->push_back(987654321); // in hex: 3ADE68B1
|
||||||
|
// in memory: b1 68 de 3a
|
||||||
|
// in memory DEC: 177 104 222 58
|
||||||
|
boost::shared_ptr<vector<sauchar_t> > result = Utils::indexVectorToSaucharVector(hash);
|
||||||
|
|
||||||
|
boost::shared_ptr<vector<sauchar_t> > expected(new vector<sauchar_t>());
|
||||||
|
expected->push_back(21);
|
||||||
|
expected->push_back(205);
|
||||||
|
expected->push_back(91);
|
||||||
|
expected->push_back(7);
|
||||||
|
expected->push_back(177);
|
||||||
|
expected->push_back(104);
|
||||||
|
expected->push_back(222);
|
||||||
|
expected->push_back(58);
|
||||||
|
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE_END()
|
BOOST_AUTO_TEST_SUITE_END()
|
||||||
|
Loading…
Reference in New Issue
Block a user