anubis searcher stub
Former-commit-id: fe520e255c918c889bdd421a363668b2ed76c675
This commit is contained in:
parent
96f55cddf3
commit
dd8b27cc23
@ -6,6 +6,7 @@ foreach(dir ${ALL_DIRECTORIES})
|
|||||||
endforeach(dir)
|
endforeach(dir)
|
||||||
|
|
||||||
add_library(concordia SHARED
|
add_library(concordia SHARED
|
||||||
|
anubis_searcher.cpp
|
||||||
regex_replacement.cpp
|
regex_replacement.cpp
|
||||||
sentence_anonymizer.cpp
|
sentence_anonymizer.cpp
|
||||||
interval.cpp
|
interval.cpp
|
||||||
@ -30,6 +31,7 @@ add_subdirectory(t)
|
|||||||
|
|
||||||
install(TARGETS concordia DESTINATION lib/)
|
install(TARGETS concordia DESTINATION lib/)
|
||||||
install(FILES
|
install(FILES
|
||||||
|
anubis_searcher.hpp
|
||||||
regex_replacement.hpp
|
regex_replacement.hpp
|
||||||
sentence_anonymizer.hpp
|
sentence_anonymizer.hpp
|
||||||
interval.hpp
|
interval.hpp
|
||||||
|
21
concordia/anubis_searcher.cpp
Normal file
21
concordia/anubis_searcher.cpp
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#include "concordia/anubis_searcher.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
AnubisSearcher::AnubisSearcher() {
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
AnubisSearcher::~AnubisSearcher() {
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
boost::ptr_vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
|
||||||
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
|
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> >)
|
||||||
|
throw(ConcordiaException) {
|
||||||
|
|
||||||
|
boost::ptr_vector<AnubisSearchResult> result;
|
||||||
|
return result;
|
||||||
|
}
|
37
concordia/anubis_searcher.hpp
Normal file
37
concordia/anubis_searcher.hpp
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
#ifndef ANUBIS_SEARCHER_HDR
|
||||||
|
#define ANUBIS_SEARCHER_HDR
|
||||||
|
|
||||||
|
#include <boost/shared_ptr.hpp>
|
||||||
|
#include <boost/ptr_container/ptr_vector.hpp>
|
||||||
|
|
||||||
|
#include "concordia/common/config.hpp"
|
||||||
|
#include "concordia/substring_occurence.hpp"
|
||||||
|
#include "concordia/concordia_exception.hpp"
|
||||||
|
#include "concordia/anubis_search_result.hpp"
|
||||||
|
|
||||||
|
#include <divsufsort.h>
|
||||||
|
|
||||||
|
/*!
|
||||||
|
Class for searching using Anubis algorithm.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
class AnubisSearcher {
|
||||||
|
public:
|
||||||
|
explicit AnubisSearcher();
|
||||||
|
|
||||||
|
/*! Destructor.
|
||||||
|
*/
|
||||||
|
virtual ~AnubisSearcher();
|
||||||
|
|
||||||
|
boost::ptr_vector<AnubisSearchResult> anubisSearch(
|
||||||
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
|
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> >) throw(ConcordiaException);
|
||||||
|
private:
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -4,6 +4,8 @@
|
|||||||
#include <boost/filesystem.hpp>
|
#include <boost/filesystem.hpp>
|
||||||
|
|
||||||
IndexSearcher::IndexSearcher() {
|
IndexSearcher::IndexSearcher() {
|
||||||
|
_anubisSearcher = boost::shared_ptr<AnubisSearcher>(
|
||||||
|
new AnubisSearcher());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -25,7 +27,7 @@ boost::ptr_vector<SubstringOccurence> IndexSearcher::simpleSearch(
|
|||||||
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
|
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
|
||||||
int size = sa_search(T->data(), (saidx_t) T->size(),
|
int size = sa_search(T->data(), (saidx_t) T->size(),
|
||||||
(const sauchar_t *) patternArray, patternLength,
|
(const sauchar_t *) patternArray, patternLength,
|
||||||
SA->data(), (saidx_t) T->size(), &left);
|
SA->data(), (saidx_t) SA->size(), &left);
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
saidx_t resultPos = SA->at(left + i);
|
saidx_t resultPos = SA->at(left + i);
|
||||||
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
||||||
@ -55,6 +57,7 @@ boost::ptr_vector<AnubisSearchResult> IndexSearcher::anubisSearch(
|
|||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
const string & pattern) throw(ConcordiaException) {
|
const string & pattern) throw(ConcordiaException) {
|
||||||
boost::ptr_vector<AnubisSearchResult> result;
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash =
|
||||||
return result;
|
hashGenerator->generateHash(pattern);
|
||||||
|
return _anubisSearcher->anubisSearch(T, markers, SA, hash);
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "concordia/substring_occurence.hpp"
|
#include "concordia/substring_occurence.hpp"
|
||||||
#include "concordia/hash_generator.hpp"
|
#include "concordia/hash_generator.hpp"
|
||||||
#include "concordia/concordia_exception.hpp"
|
#include "concordia/concordia_exception.hpp"
|
||||||
|
#include "concordia/anubis_searcher.hpp"
|
||||||
#include "concordia/anubis_search_result.hpp"
|
#include "concordia/anubis_search_result.hpp"
|
||||||
|
|
||||||
#include <divsufsort.h>
|
#include <divsufsort.h>
|
||||||
@ -43,6 +44,7 @@ public:
|
|||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
const string & pattern) throw(ConcordiaException);
|
const string & pattern) throw(ConcordiaException);
|
||||||
private:
|
private:
|
||||||
|
boost::shared_ptr<AnubisSearcher> _anubisSearcher;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user