anubis searcher -> concordia searcher
Former-commit-id: 8afe194adf3163ee62caa30732d9c9dd095df66b
This commit is contained in:
parent
23aa113747
commit
bb7608d05e
2
TODO.txt
2
TODO.txt
@ -1,6 +1,6 @@
|
||||
---------------------------- Developer's private notes -----------------------------
|
||||
|
||||
|
||||
- document the code
|
||||
IN PROGRESS - concordia search zwraca pozycje tokenów z hash'a. Jak to odnieść do examples w korpusie?
|
||||
- testy zużycia pamięci
|
||||
- Prawdopodobnie długość example w markers będzie potrzebna tylko anubisowi (który, jak się okazuje, jest wolny). Pomyśleć, do czego można wykorzystać markery, bo ich idea wydaje się niezła.
|
||||
|
@ -8,7 +8,7 @@ endforeach(dir)
|
||||
add_library(concordia SHARED
|
||||
concordia_search_result.cpp
|
||||
matched_pattern_fragment.cpp
|
||||
anubis_searcher.cpp
|
||||
concordia_searcher.cpp
|
||||
regex_replacement.cpp
|
||||
sentence_anonymizer.cpp
|
||||
interval.cpp
|
||||
@ -33,7 +33,9 @@ add_subdirectory(t)
|
||||
|
||||
install(TARGETS concordia DESTINATION lib/)
|
||||
install(FILES
|
||||
anubis_searcher.hpp
|
||||
concordia_search_result.hpp
|
||||
matched_pattern_fragment.hpp
|
||||
concordia_searcher.hpp
|
||||
regex_replacement.hpp
|
||||
sentence_anonymizer.hpp
|
||||
interval.hpp
|
||||
|
@ -1,17 +1,17 @@
|
||||
#include "concordia/anubis_searcher.hpp"
|
||||
#include "concordia/concordia_searcher.hpp"
|
||||
|
||||
#include "concordia/common/logging.hpp"
|
||||
#include <boost/foreach.hpp>
|
||||
#include <iostream>
|
||||
|
||||
AnubisSearcher::AnubisSearcher() {
|
||||
ConcordiaSearcher::ConcordiaSearcher() {
|
||||
}
|
||||
|
||||
|
||||
AnubisSearcher::~AnubisSearcher() {
|
||||
ConcordiaSearcher::~ConcordiaSearcher() {
|
||||
}
|
||||
|
||||
void AnubisSearcher::concordiaSearch(
|
||||
void ConcordiaSearcher::concordiaSearch(
|
||||
boost::shared_ptr<ConcordiaSearchResult> result,
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
@ -51,7 +51,7 @@ void AnubisSearcher::concordiaSearch(
|
||||
result->sortFragments();
|
||||
}
|
||||
|
||||
std::vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
|
||||
std::vector<AnubisSearchResult> ConcordiaSearcher::anubisSearch(
|
||||
boost::shared_ptr<ConcordiaConfig> config,
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
@ -83,7 +83,7 @@ std::vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
|
||||
return result;
|
||||
}
|
||||
|
||||
boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
|
||||
boost::shared_ptr<TmMatchesMap> ConcordiaSearcher::getTmMatches(
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
@ -156,7 +156,7 @@ boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
|
||||
return tmMatchesMap;
|
||||
}
|
||||
|
||||
std::vector<SubstringOccurence> AnubisSearcher::lcpSearch(
|
||||
std::vector<SubstringOccurence> ConcordiaSearcher::lcpSearch(
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
@ -209,7 +209,7 @@ std::vector<SubstringOccurence> AnubisSearcher::lcpSearch(
|
||||
return result;
|
||||
}
|
||||
|
||||
void AnubisSearcher::_collectResults(
|
||||
void ConcordiaSearcher::_collectResults(
|
||||
std::vector<SubstringOccurence> & result,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
@ -232,7 +232,7 @@ void AnubisSearcher::_collectResults(
|
||||
}
|
||||
}
|
||||
|
||||
void AnubisSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
void ConcordiaSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
||||
saidx_t sa_pos,
|
||||
@ -249,7 +249,7 @@ void AnubisSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
}
|
||||
}
|
||||
|
||||
bool AnubisSearcher::_getOccurenceFromSA(
|
||||
bool ConcordiaSearcher::_getOccurenceFromSA(
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
saidx_t sa_pos,
|
||||
@ -263,7 +263,7 @@ bool AnubisSearcher::_getOccurenceFromSA(
|
||||
}
|
||||
}
|
||||
|
||||
void AnubisSearcher::_addOccurenceToMap(
|
||||
void ConcordiaSearcher::_addOccurenceToMap(
|
||||
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
|
||||
SubstringOccurence & occurence,
|
||||
SUFFIX_MARKER_TYPE totalPatternLength,
|
@ -16,17 +16,17 @@
|
||||
#include <divsufsort.h>
|
||||
|
||||
/*!
|
||||
Class for searching using Anubis algorithm.
|
||||
Class for searching using Concordia algorithm.
|
||||
|
||||
*/
|
||||
|
||||
class AnubisSearcher {
|
||||
class ConcordiaSearcher {
|
||||
public:
|
||||
explicit AnubisSearcher();
|
||||
explicit ConcordiaSearcher();
|
||||
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~AnubisSearcher();
|
||||
virtual ~ConcordiaSearcher();
|
||||
|
||||
void concordiaSearch(
|
||||
boost::shared_ptr<ConcordiaSearchResult> result,
|
@ -4,8 +4,8 @@
|
||||
#include <boost/filesystem.hpp>
|
||||
|
||||
IndexSearcher::IndexSearcher() {
|
||||
_anubisSearcher = boost::shared_ptr<AnubisSearcher>(
|
||||
new AnubisSearcher());
|
||||
_concordiaSearcher = boost::shared_ptr<ConcordiaSearcher>(
|
||||
new ConcordiaSearcher());
|
||||
}
|
||||
|
||||
|
||||
@ -57,7 +57,7 @@ std::vector<AnubisSearchResult> IndexSearcher::anubisSearch(
|
||||
const std::string & pattern) throw(ConcordiaException) {
|
||||
std::vector<INDEX_CHARACTER_TYPE> hash =
|
||||
hashGenerator->generateHash(pattern);
|
||||
return _anubisSearcher->anubisSearch(config, T, markers, SA, hash);
|
||||
return _concordiaSearcher->anubisSearch(config, T, markers, SA, hash);
|
||||
}
|
||||
|
||||
boost::shared_ptr<ConcordiaSearchResult> IndexSearcher::concordiaSearch(
|
||||
@ -72,6 +72,6 @@ boost::shared_ptr<ConcordiaSearchResult> IndexSearcher::concordiaSearch(
|
||||
boost::shared_ptr<ConcordiaSearchResult>(
|
||||
new ConcordiaSearchResult(hashGenerator->generateTokenVector(pattern)));
|
||||
|
||||
_anubisSearcher->concordiaSearch(result, T, markers, SA, hash);
|
||||
_concordiaSearcher->concordiaSearch(result, T, markers, SA, hash);
|
||||
return result;
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include "concordia/substring_occurence.hpp"
|
||||
#include "concordia/hash_generator.hpp"
|
||||
#include "concordia/concordia_exception.hpp"
|
||||
#include "concordia/anubis_searcher.hpp"
|
||||
#include "concordia/concordia_searcher.hpp"
|
||||
#include "concordia/anubis_search_result.hpp"
|
||||
|
||||
#include <divsufsort.h>
|
||||
@ -51,7 +51,7 @@ public:
|
||||
const std::string & pattern) throw(ConcordiaException);
|
||||
|
||||
private:
|
||||
boost::shared_ptr<AnubisSearcher> _anubisSearcher;
|
||||
boost::shared_ptr<ConcordiaSearcher> _concordiaSearcher;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
add_library(concordia-tests
|
||||
test_anubis_searcher.cpp
|
||||
test_concordia_searcher.cpp
|
||||
test_sentence_anonymizer.cpp
|
||||
test_text_utils.cpp
|
||||
test_regex_replacement.cpp
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include "tests/unit-tests/unit_tests_globals.hpp"
|
||||
#include "concordia/tm_matches.hpp"
|
||||
#include "concordia/anubis_searcher.hpp"
|
||||
#include "concordia/concordia_searcher.hpp"
|
||||
#include "concordia/concordia_index.hpp"
|
||||
#include "concordia/concordia_config.hpp"
|
||||
#include "concordia/example.hpp"
|
||||
@ -12,11 +12,11 @@
|
||||
#include "concordia/common/logging.hpp"
|
||||
#include "tests/common/test_resources_manager.hpp"
|
||||
|
||||
BOOST_AUTO_TEST_SUITE(anubis_searcher)
|
||||
BOOST_AUTO_TEST_SUITE(concordia_searcher)
|
||||
|
||||
BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
||||
{
|
||||
AnubisSearcher searcher;
|
||||
ConcordiaSearcher searcher;
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T(new std::vector<sauchar_t>());
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers(new std::vector<SUFFIX_MARKER_TYPE>());
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA(new std::vector<saidx_t>());
|
||||
@ -333,7 +333,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
|
||||
|
||||
BOOST_AUTO_TEST_CASE( TmMatchesTest )
|
||||
{
|
||||
AnubisSearcher searcher;
|
||||
ConcordiaSearcher searcher;
|
||||
|
||||
/*The test index contains 3 sentences:
|
||||
14: "Ala posiada kota"
|
Loading…
Reference in New Issue
Block a user