anubis search continued

Former-commit-id: 95a08f242a03311d067303bfff07bf4890796da5
This commit is contained in:
rjawor 2014-06-24 18:23:46 +02:00
parent e8ea5881a5
commit 8f953883bf
5 changed files with 90 additions and 1 deletions

View File

@ -4,3 +4,10 @@ DONE 3. Dzielenie zdań (max 255 tokenów)
- concordia-server - concordia-server
- zastanowić się nad empty hash examples - zastanowić się nad empty hash examples
zastanowić się nad optymalizacją:
- unordered_map tmMatchesMap
- LCP array
Anubis search się komplikuje! Przy tworzeniu obiektu tmMatches dla przykładu trzeba podać id przykładu, długość patternu i długość przykładu. Dwa pierwsze mamy, ale niestety nie ma skąd wziąć długości przykładu. Pamiętamy tylko offset sufiksu.

View File

@ -1,7 +1,13 @@
#include "concordia/anubis_searcher.hpp" #include "concordia/anubis_searcher.hpp"
#include "concordia/tm_matches.hpp"
#include <boost/ptr_container/ptr_map.hpp>
#include <boost/foreach.hpp>
#include <iostream> #include <iostream>
#include <map>
typedef boost::ptr_map<SUFFIX_MARKER_TYPE, TmMatches> TmMatchesMap;
typedef TmMatchesMap::iterator TmMatchesMapIterator;
AnubisSearcher::AnubisSearcher() { AnubisSearcher::AnubisSearcher() {
} }
@ -18,6 +24,39 @@ boost::ptr_vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern) boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern)
throw(ConcordiaException) { throw(ConcordiaException) {
boost::ptr_vector<AnubisSearchResult> result; boost::ptr_vector<AnubisSearchResult> result;
boost::shared_ptr<std::vector<sauchar_t> > patternVector =
Utils::indexVectorToSaucharVector(pattern);
if (patternVector->size() != pattern->size() * sizeof(INDEX_CHARACTER_TYPE)) {
throw ConcordiaException("Increasing pattern resolution went wrong.");
}
TmMatchesMap tmMatchesMap;
for (int offset = 0;offset < pattern->size(); offset++) {
int highResOffset = offset * sizeof(INDEX_CHARACTER_TYPE);
boost::shared_ptr<std::vector<sauchar_t> > currentPattern =
boost::shared_ptr<std::vector<sauchar_t> >(new std::vector<sauchar_t>(
patternVector->begin()+highResOffset,patternVector->end()));
SUFFIX_MARKER_TYPE longestPrefixesLength;
boost::ptr_vector<SubstringOccurence> longestPrefixes = lcpSearch(T, markers, SA,
currentPattern, longestPrefixesLength);
BOOST_FOREACH(SubstringOccurence & occurence, longestPrefixes) {
TmMatchesMapIterator mapIterator = tmMatchesMap.find(occurence.getId());
if(mapIterator != tmMatchesMap.end()) {
} else {
}
}
}
return result; return result;
} }

View File

@ -42,6 +42,19 @@ sauchar_t * Utils::indexVectorToSaucharArray(
return patternArray; return patternArray;
} }
boost::shared_ptr<std::vector<sauchar_t> > Utils::indexVectorToSaucharVector(
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input) {
boost::shared_ptr<std::vector<sauchar_t> > result = boost::shared_ptr<std::vector<sauchar_t> >(
new std::vector<sauchar_t>);
for (vector<INDEX_CHARACTER_TYPE>::iterator it = input->begin();
it != input->end(); ++it) {
appendCharToSaucharVector(result, *it);
}
return result;
}
void Utils::appendCharToSaucharVector( void Utils::appendCharToSaucharVector(
boost::shared_ptr<std::vector<sauchar_t> > vector, boost::shared_ptr<std::vector<sauchar_t> > vector,
INDEX_CHARACTER_TYPE character) { INDEX_CHARACTER_TYPE character) {
@ -59,3 +72,5 @@ void Utils::_insertCharToSaucharArray(sauchar_t * array,
} }
} }

View File

@ -34,6 +34,9 @@ public:
static sauchar_t * indexVectorToSaucharArray( static sauchar_t * indexVectorToSaucharArray(
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input); boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input);
static boost::shared_ptr<std::vector<sauchar_t> > indexVectorToSaucharVector(
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input);
static void appendCharToSaucharVector( static void appendCharToSaucharVector(
boost::shared_ptr<std::vector<sauchar_t> > vector, boost::shared_ptr<std::vector<sauchar_t> > vector,
INDEX_CHARACTER_TYPE character); INDEX_CHARACTER_TYPE character);

View File

@ -60,6 +60,31 @@ BOOST_AUTO_TEST_CASE( IndexVectorToSaucharArray )
BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end()); BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end());
} }
BOOST_AUTO_TEST_CASE( IndexVectorToSaucharVector )
{
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash(new vector<INDEX_CHARACTER_TYPE>());
hash->push_back(123456789); // in hex: 75BCD15
// in memory: 15 cd 5b 07
// in memory DEC: 21 205 91 7
hash->push_back(987654321); // in hex: 3ADE68B1
// in memory: b1 68 de 3a
// in memory DEC: 177 104 222 58
boost::shared_ptr<vector<sauchar_t> > result = Utils::indexVectorToSaucharVector(hash);
boost::shared_ptr<vector<sauchar_t> > expected(new vector<sauchar_t>());
expected->push_back(21);
expected->push_back(205);
expected->push_back(91);
expected->push_back(7);
expected->push_back(177);
expected->push_back(104);
expected->push_back(222);
expected->push_back(58);
BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end());
}
BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE_END()