anubis searcher stub

Former-commit-id: fe520e255c918c889bdd421a363668b2ed76c675
This commit is contained in:
rjawor 2014-05-14 16:29:44 +02:00
parent 96f55cddf3
commit dd8b27cc23
5 changed files with 68 additions and 3 deletions

View File

@ -6,6 +6,7 @@ foreach(dir ${ALL_DIRECTORIES})
endforeach(dir) endforeach(dir)
add_library(concordia SHARED add_library(concordia SHARED
anubis_searcher.cpp
regex_replacement.cpp regex_replacement.cpp
sentence_anonymizer.cpp sentence_anonymizer.cpp
interval.cpp interval.cpp
@ -30,6 +31,7 @@ add_subdirectory(t)
install(TARGETS concordia DESTINATION lib/) install(TARGETS concordia DESTINATION lib/)
install(FILES install(FILES
anubis_searcher.hpp
regex_replacement.hpp regex_replacement.hpp
sentence_anonymizer.hpp sentence_anonymizer.hpp
interval.hpp interval.hpp

View File

@ -0,0 +1,21 @@
#include "concordia/anubis_searcher.hpp"
AnubisSearcher::AnubisSearcher() {
}
AnubisSearcher::~AnubisSearcher() {
}
boost::ptr_vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> >)
throw(ConcordiaException) {
boost::ptr_vector<AnubisSearchResult> result;
return result;
}

View File

@ -0,0 +1,37 @@
#ifndef ANUBIS_SEARCHER_HDR
#define ANUBIS_SEARCHER_HDR
#include <boost/shared_ptr.hpp>
#include <boost/ptr_container/ptr_vector.hpp>
#include "concordia/common/config.hpp"
#include "concordia/substring_occurence.hpp"
#include "concordia/concordia_exception.hpp"
#include "concordia/anubis_search_result.hpp"
#include <divsufsort.h>
/*!
Class for searching using Anubis algorithm.
*/
using namespace std;
class AnubisSearcher {
public:
explicit AnubisSearcher();
/*! Destructor.
*/
virtual ~AnubisSearcher();
boost::ptr_vector<AnubisSearchResult> anubisSearch(
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> >) throw(ConcordiaException);
private:
};
#endif

View File

@ -4,6 +4,8 @@
#include <boost/filesystem.hpp> #include <boost/filesystem.hpp>
IndexSearcher::IndexSearcher() { IndexSearcher::IndexSearcher() {
_anubisSearcher = boost::shared_ptr<AnubisSearcher>(
new AnubisSearcher());
} }
@ -25,7 +27,7 @@ boost::ptr_vector<SubstringOccurence> IndexSearcher::simpleSearch(
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash); sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
int size = sa_search(T->data(), (saidx_t) T->size(), int size = sa_search(T->data(), (saidx_t) T->size(),
(const sauchar_t *) patternArray, patternLength, (const sauchar_t *) patternArray, patternLength,
SA->data(), (saidx_t) T->size(), &left); SA->data(), (saidx_t) SA->size(), &left);
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
saidx_t resultPos = SA->at(left + i); saidx_t resultPos = SA->at(left + i);
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) { if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
@ -55,6 +57,7 @@ boost::ptr_vector<AnubisSearchResult> IndexSearcher::anubisSearch(
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const string & pattern) throw(ConcordiaException) { const string & pattern) throw(ConcordiaException) {
boost::ptr_vector<AnubisSearchResult> result; boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash =
return result; hashGenerator->generateHash(pattern);
return _anubisSearcher->anubisSearch(T, markers, SA, hash);
} }

View File

@ -10,6 +10,7 @@
#include "concordia/substring_occurence.hpp" #include "concordia/substring_occurence.hpp"
#include "concordia/hash_generator.hpp" #include "concordia/hash_generator.hpp"
#include "concordia/concordia_exception.hpp" #include "concordia/concordia_exception.hpp"
#include "concordia/anubis_searcher.hpp"
#include "concordia/anubis_search_result.hpp" #include "concordia/anubis_search_result.hpp"
#include <divsufsort.h> #include <divsufsort.h>
@ -43,6 +44,7 @@ public:
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const string & pattern) throw(ConcordiaException); const string & pattern) throw(ConcordiaException);
private: private:
boost::shared_ptr<AnubisSearcher> _anubisSearcher;
}; };
#endif #endif