anubis search continued
Former-commit-id: 95a08f242a03311d067303bfff07bf4890796da5
This commit is contained in:
parent
e8ea5881a5
commit
8f953883bf
7
TODO.txt
7
TODO.txt
@ -4,3 +4,10 @@ DONE 3. Dzielenie zdań (max 255 tokenów)
|
||||
|
||||
- concordia-server
|
||||
- zastanowić się nad empty hash examples
|
||||
|
||||
|
||||
zastanowić się nad optymalizacją:
|
||||
- unordered_map tmMatchesMap
|
||||
- LCP array
|
||||
|
||||
Anubis search się komplikuje! Przy tworzeniu obiektu tmMatches dla przykładu trzeba podać id przykładu, długość patternu i długość przykładu. Dwa pierwsze mamy, ale niestety nie ma skąd wziąć długości przykładu. Pamiętamy tylko offset sufiksu.
|
||||
|
@ -1,7 +1,13 @@
|
||||
#include "concordia/anubis_searcher.hpp"
|
||||
#include "concordia/tm_matches.hpp"
|
||||
|
||||
#include <boost/ptr_container/ptr_map.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
|
||||
typedef boost::ptr_map<SUFFIX_MARKER_TYPE, TmMatches> TmMatchesMap;
|
||||
typedef TmMatchesMap::iterator TmMatchesMapIterator;
|
||||
|
||||
AnubisSearcher::AnubisSearcher() {
|
||||
}
|
||||
@ -18,6 +24,39 @@ boost::ptr_vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
|
||||
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern)
|
||||
throw(ConcordiaException) {
|
||||
boost::ptr_vector<AnubisSearchResult> result;
|
||||
|
||||
boost::shared_ptr<std::vector<sauchar_t> > patternVector =
|
||||
Utils::indexVectorToSaucharVector(pattern);
|
||||
|
||||
if (patternVector->size() != pattern->size() * sizeof(INDEX_CHARACTER_TYPE)) {
|
||||
throw ConcordiaException("Increasing pattern resolution went wrong.");
|
||||
}
|
||||
|
||||
|
||||
TmMatchesMap tmMatchesMap;
|
||||
for (int offset = 0;offset < pattern->size(); offset++) {
|
||||
int highResOffset = offset * sizeof(INDEX_CHARACTER_TYPE);
|
||||
boost::shared_ptr<std::vector<sauchar_t> > currentPattern =
|
||||
boost::shared_ptr<std::vector<sauchar_t> >(new std::vector<sauchar_t>(
|
||||
patternVector->begin()+highResOffset,patternVector->end()));
|
||||
SUFFIX_MARKER_TYPE longestPrefixesLength;
|
||||
boost::ptr_vector<SubstringOccurence> longestPrefixes = lcpSearch(T, markers, SA,
|
||||
currentPattern, longestPrefixesLength);
|
||||
|
||||
BOOST_FOREACH(SubstringOccurence & occurence, longestPrefixes) {
|
||||
TmMatchesMapIterator mapIterator = tmMatchesMap.find(occurence.getId());
|
||||
if(mapIterator != tmMatchesMap.end()) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,19 @@ sauchar_t * Utils::indexVectorToSaucharArray(
|
||||
return patternArray;
|
||||
}
|
||||
|
||||
boost::shared_ptr<std::vector<sauchar_t> > Utils::indexVectorToSaucharVector(
|
||||
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input) {
|
||||
|
||||
boost::shared_ptr<std::vector<sauchar_t> > result = boost::shared_ptr<std::vector<sauchar_t> >(
|
||||
new std::vector<sauchar_t>);
|
||||
|
||||
for (vector<INDEX_CHARACTER_TYPE>::iterator it = input->begin();
|
||||
it != input->end(); ++it) {
|
||||
appendCharToSaucharVector(result, *it);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void Utils::appendCharToSaucharVector(
|
||||
boost::shared_ptr<std::vector<sauchar_t> > vector,
|
||||
INDEX_CHARACTER_TYPE character) {
|
||||
@ -59,3 +72,5 @@ void Utils::_insertCharToSaucharArray(sauchar_t * array,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -34,6 +34,9 @@ public:
|
||||
static sauchar_t * indexVectorToSaucharArray(
|
||||
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input);
|
||||
|
||||
static boost::shared_ptr<std::vector<sauchar_t> > indexVectorToSaucharVector(
|
||||
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input);
|
||||
|
||||
static void appendCharToSaucharVector(
|
||||
boost::shared_ptr<std::vector<sauchar_t> > vector,
|
||||
INDEX_CHARACTER_TYPE character);
|
||||
|
@ -60,6 +60,31 @@ BOOST_AUTO_TEST_CASE( IndexVectorToSaucharArray )
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( IndexVectorToSaucharVector )
|
||||
{
|
||||
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash(new vector<INDEX_CHARACTER_TYPE>());
|
||||
hash->push_back(123456789); // in hex: 75BCD15
|
||||
// in memory: 15 cd 5b 07
|
||||
// in memory DEC: 21 205 91 7
|
||||
|
||||
hash->push_back(987654321); // in hex: 3ADE68B1
|
||||
// in memory: b1 68 de 3a
|
||||
// in memory DEC: 177 104 222 58
|
||||
boost::shared_ptr<vector<sauchar_t> > result = Utils::indexVectorToSaucharVector(hash);
|
||||
|
||||
boost::shared_ptr<vector<sauchar_t> > expected(new vector<sauchar_t>());
|
||||
expected->push_back(21);
|
||||
expected->push_back(205);
|
||||
expected->push_back(91);
|
||||
expected->push_back(7);
|
||||
expected->push_back(177);
|
||||
expected->push_back(104);
|
||||
expected->push_back(222);
|
||||
expected->push_back(58);
|
||||
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end());
|
||||
}
|
||||
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
Loading…
Reference in New Issue
Block a user