anubis searcher -> concordia searcher

Former-commit-id: 8afe194adf3163ee62caa30732d9c9dd095df66b
This commit is contained in:
rjawor 2015-04-24 11:48:32 +02:00
parent 23aa113747
commit bb7608d05e
8 changed files with 31 additions and 29 deletions

View File

@ -1,6 +1,6 @@
---------------------------- Developer's private notes -----------------------------
- document the code
IN PROGRESS - concordia search zwraca pozycje tokenów z hash'a. Jak to odnieść do examples w korpusie?
- testy zużycia pamięci
- Prawdopodobnie długość example w markers będzie potrzebna tylko anubisowi (który, jak się okazuje, jest wolny). Pomyśleć, do czego można wykorzystać markery, bo ich idea wydaje się niezła.

View File

@ -8,7 +8,7 @@ endforeach(dir)
add_library(concordia SHARED
concordia_search_result.cpp
matched_pattern_fragment.cpp
anubis_searcher.cpp
concordia_searcher.cpp
regex_replacement.cpp
sentence_anonymizer.cpp
interval.cpp
@ -33,7 +33,9 @@ add_subdirectory(t)
install(TARGETS concordia DESTINATION lib/)
install(FILES
anubis_searcher.hpp
concordia_search_result.hpp
matched_pattern_fragment.hpp
concordia_searcher.hpp
regex_replacement.hpp
sentence_anonymizer.hpp
interval.hpp

View File

@ -1,17 +1,17 @@
#include "concordia/anubis_searcher.hpp"
#include "concordia/concordia_searcher.hpp"
#include "concordia/common/logging.hpp"
#include <boost/foreach.hpp>
#include <iostream>
AnubisSearcher::AnubisSearcher() {
ConcordiaSearcher::ConcordiaSearcher() {
}
AnubisSearcher::~AnubisSearcher() {
ConcordiaSearcher::~ConcordiaSearcher() {
}
void AnubisSearcher::concordiaSearch(
void ConcordiaSearcher::concordiaSearch(
boost::shared_ptr<ConcordiaSearchResult> result,
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
@ -51,7 +51,7 @@ void AnubisSearcher::concordiaSearch(
result->sortFragments();
}
std::vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
std::vector<AnubisSearchResult> ConcordiaSearcher::anubisSearch(
boost::shared_ptr<ConcordiaConfig> config,
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
@ -83,7 +83,7 @@ std::vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
return result;
}
boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
boost::shared_ptr<TmMatchesMap> ConcordiaSearcher::getTmMatches(
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
@ -156,7 +156,7 @@ boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
return tmMatchesMap;
}
std::vector<SubstringOccurence> AnubisSearcher::lcpSearch(
std::vector<SubstringOccurence> ConcordiaSearcher::lcpSearch(
boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
@ -209,7 +209,7 @@ std::vector<SubstringOccurence> AnubisSearcher::lcpSearch(
return result;
}
void AnubisSearcher::_collectResults(
void ConcordiaSearcher::_collectResults(
std::vector<SubstringOccurence> & result,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
@ -232,7 +232,7 @@ void AnubisSearcher::_collectResults(
}
}
void AnubisSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
void ConcordiaSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
saidx_t sa_pos,
@ -249,7 +249,7 @@ void AnubisSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
}
}
bool AnubisSearcher::_getOccurenceFromSA(
bool ConcordiaSearcher::_getOccurenceFromSA(
boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
saidx_t sa_pos,
@ -263,7 +263,7 @@ bool AnubisSearcher::_getOccurenceFromSA(
}
}
void AnubisSearcher::_addOccurenceToMap(
void ConcordiaSearcher::_addOccurenceToMap(
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
SubstringOccurence & occurence,
SUFFIX_MARKER_TYPE totalPatternLength,

View File

@ -16,17 +16,17 @@
#include <divsufsort.h>
/*!
Class for searching using Anubis algorithm.
Class for searching using Concordia algorithm.
*/
class AnubisSearcher {
class ConcordiaSearcher {
public:
explicit AnubisSearcher();
explicit ConcordiaSearcher();
/*! Destructor.
*/
virtual ~AnubisSearcher();
virtual ~ConcordiaSearcher();
void concordiaSearch(
boost::shared_ptr<ConcordiaSearchResult> result,

View File

@ -4,8 +4,8 @@
#include <boost/filesystem.hpp>
IndexSearcher::IndexSearcher() {
_anubisSearcher = boost::shared_ptr<AnubisSearcher>(
new AnubisSearcher());
_concordiaSearcher = boost::shared_ptr<ConcordiaSearcher>(
new ConcordiaSearcher());
}
@ -57,7 +57,7 @@ std::vector<AnubisSearchResult> IndexSearcher::anubisSearch(
const std::string & pattern) throw(ConcordiaException) {
std::vector<INDEX_CHARACTER_TYPE> hash =
hashGenerator->generateHash(pattern);
return _anubisSearcher->anubisSearch(config, T, markers, SA, hash);
return _concordiaSearcher->anubisSearch(config, T, markers, SA, hash);
}
boost::shared_ptr<ConcordiaSearchResult> IndexSearcher::concordiaSearch(
@ -72,6 +72,6 @@ boost::shared_ptr<ConcordiaSearchResult> IndexSearcher::concordiaSearch(
boost::shared_ptr<ConcordiaSearchResult>(
new ConcordiaSearchResult(hashGenerator->generateTokenVector(pattern)));
_anubisSearcher->concordiaSearch(result, T, markers, SA, hash);
_concordiaSearcher->concordiaSearch(result, T, markers, SA, hash);
return result;
}

View File

@ -10,7 +10,7 @@
#include "concordia/substring_occurence.hpp"
#include "concordia/hash_generator.hpp"
#include "concordia/concordia_exception.hpp"
#include "concordia/anubis_searcher.hpp"
#include "concordia/concordia_searcher.hpp"
#include "concordia/anubis_search_result.hpp"
#include <divsufsort.h>
@ -51,7 +51,7 @@ public:
const std::string & pattern) throw(ConcordiaException);
private:
boost::shared_ptr<AnubisSearcher> _anubisSearcher;
boost::shared_ptr<ConcordiaSearcher> _concordiaSearcher;
};
#endif

View File

@ -1,5 +1,5 @@
add_library(concordia-tests
test_anubis_searcher.cpp
test_concordia_searcher.cpp
test_sentence_anonymizer.cpp
test_text_utils.cpp
test_regex_replacement.cpp

View File

@ -2,7 +2,7 @@
#include "tests/unit-tests/unit_tests_globals.hpp"
#include "concordia/tm_matches.hpp"
#include "concordia/anubis_searcher.hpp"
#include "concordia/concordia_searcher.hpp"
#include "concordia/concordia_index.hpp"
#include "concordia/concordia_config.hpp"
#include "concordia/example.hpp"
@ -12,11 +12,11 @@
#include "concordia/common/logging.hpp"
#include "tests/common/test_resources_manager.hpp"
BOOST_AUTO_TEST_SUITE(anubis_searcher)
BOOST_AUTO_TEST_SUITE(concordia_searcher)
BOOST_AUTO_TEST_CASE( LcpSearch1 )
{
AnubisSearcher searcher;
ConcordiaSearcher searcher;
boost::shared_ptr<std::vector<sauchar_t> > T(new std::vector<sauchar_t>());
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers(new std::vector<SUFFIX_MARKER_TYPE>());
boost::shared_ptr<std::vector<saidx_t> > SA(new std::vector<saidx_t>());
@ -333,7 +333,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
BOOST_AUTO_TEST_CASE( TmMatchesTest )
{
AnubisSearcher searcher;
ConcordiaSearcher searcher;
/*The test index contains 3 sentences:
14: "Ala posiada kota"