diff --git a/TODO.txt b/TODO.txt index 848cce2..5923311 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,6 +1,6 @@ ---------------------------- Developer's private notes ----------------------------- - +- document the code IN PROGRESS - concordia search zwraca pozycje tokenów z hash'a. Jak to odnieść do examples w korpusie? - testy zużycia pamięci - Prawdopodobnie długość example w markers będzie potrzebna tylko anubisowi (który, jak się okazuje, jest wolny). Pomyśleć, do czego można wykorzystać markery, bo ich idea wydaje się niezła. diff --git a/concordia/CMakeLists.txt b/concordia/CMakeLists.txt index 336672e..aac35db 100644 --- a/concordia/CMakeLists.txt +++ b/concordia/CMakeLists.txt @@ -8,7 +8,7 @@ endforeach(dir) add_library(concordia SHARED concordia_search_result.cpp matched_pattern_fragment.cpp - anubis_searcher.cpp + concordia_searcher.cpp regex_replacement.cpp sentence_anonymizer.cpp interval.cpp @@ -33,7 +33,9 @@ add_subdirectory(t) install(TARGETS concordia DESTINATION lib/) install(FILES - anubis_searcher.hpp + concordia_search_result.hpp + matched_pattern_fragment.hpp + concordia_searcher.hpp regex_replacement.hpp sentence_anonymizer.hpp interval.hpp diff --git a/concordia/anubis_searcher.cpp b/concordia/concordia_searcher.cpp similarity index 94% rename from concordia/anubis_searcher.cpp rename to concordia/concordia_searcher.cpp index 2870f97..192fb4e 100644 --- a/concordia/anubis_searcher.cpp +++ b/concordia/concordia_searcher.cpp @@ -1,17 +1,17 @@ -#include "concordia/anubis_searcher.hpp" +#include "concordia/concordia_searcher.hpp" #include "concordia/common/logging.hpp" #include #include -AnubisSearcher::AnubisSearcher() { +ConcordiaSearcher::ConcordiaSearcher() { } -AnubisSearcher::~AnubisSearcher() { +ConcordiaSearcher::~ConcordiaSearcher() { } -void AnubisSearcher::concordiaSearch( +void ConcordiaSearcher::concordiaSearch( boost::shared_ptr result, boost::shared_ptr > T, boost::shared_ptr > markers, @@ -51,7 +51,7 @@ void AnubisSearcher::concordiaSearch( result->sortFragments(); } -std::vector AnubisSearcher::anubisSearch( +std::vector ConcordiaSearcher::anubisSearch( boost::shared_ptr config, boost::shared_ptr > T, boost::shared_ptr > markers, @@ -83,7 +83,7 @@ std::vector AnubisSearcher::anubisSearch( return result; } -boost::shared_ptr AnubisSearcher::getTmMatches( +boost::shared_ptr ConcordiaSearcher::getTmMatches( boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, @@ -156,7 +156,7 @@ boost::shared_ptr AnubisSearcher::getTmMatches( return tmMatchesMap; } -std::vector AnubisSearcher::lcpSearch( +std::vector ConcordiaSearcher::lcpSearch( boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, @@ -209,7 +209,7 @@ std::vector AnubisSearcher::lcpSearch( return result; } -void AnubisSearcher::_collectResults( +void ConcordiaSearcher::_collectResults( std::vector & result, boost::shared_ptr > markers, boost::shared_ptr > SA, @@ -232,7 +232,7 @@ void AnubisSearcher::_collectResults( } } -void AnubisSearcher::_addToMap(boost::shared_ptr > SA, +void ConcordiaSearcher::_addToMap(boost::shared_ptr > SA, boost::shared_ptr > markers, boost::shared_ptr tmMatchesMap, saidx_t sa_pos, @@ -249,7 +249,7 @@ void AnubisSearcher::_addToMap(boost::shared_ptr > SA, } } -bool AnubisSearcher::_getOccurenceFromSA( +bool ConcordiaSearcher::_getOccurenceFromSA( boost::shared_ptr > SA, boost::shared_ptr > markers, saidx_t sa_pos, @@ -263,7 +263,7 @@ bool AnubisSearcher::_getOccurenceFromSA( } } -void AnubisSearcher::_addOccurenceToMap( +void ConcordiaSearcher::_addOccurenceToMap( boost::shared_ptr tmMatchesMap, SubstringOccurence & occurence, SUFFIX_MARKER_TYPE totalPatternLength, diff --git a/concordia/anubis_searcher.hpp b/concordia/concordia_searcher.hpp similarity index 96% rename from concordia/anubis_searcher.hpp rename to concordia/concordia_searcher.hpp index 1419e12..5b97aea 100644 --- a/concordia/anubis_searcher.hpp +++ b/concordia/concordia_searcher.hpp @@ -16,17 +16,17 @@ #include /*! - Class for searching using Anubis algorithm. + Class for searching using Concordia algorithm. */ -class AnubisSearcher { +class ConcordiaSearcher { public: - explicit AnubisSearcher(); + explicit ConcordiaSearcher(); /*! Destructor. */ - virtual ~AnubisSearcher(); + virtual ~ConcordiaSearcher(); void concordiaSearch( boost::shared_ptr result, diff --git a/concordia/index_searcher.cpp b/concordia/index_searcher.cpp index 8bd3a8d..56c6bf0 100644 --- a/concordia/index_searcher.cpp +++ b/concordia/index_searcher.cpp @@ -4,8 +4,8 @@ #include IndexSearcher::IndexSearcher() { - _anubisSearcher = boost::shared_ptr( - new AnubisSearcher()); + _concordiaSearcher = boost::shared_ptr( + new ConcordiaSearcher()); } @@ -57,7 +57,7 @@ std::vector IndexSearcher::anubisSearch( const std::string & pattern) throw(ConcordiaException) { std::vector hash = hashGenerator->generateHash(pattern); - return _anubisSearcher->anubisSearch(config, T, markers, SA, hash); + return _concordiaSearcher->anubisSearch(config, T, markers, SA, hash); } boost::shared_ptr IndexSearcher::concordiaSearch( @@ -72,6 +72,6 @@ boost::shared_ptr IndexSearcher::concordiaSearch( boost::shared_ptr( new ConcordiaSearchResult(hashGenerator->generateTokenVector(pattern))); - _anubisSearcher->concordiaSearch(result, T, markers, SA, hash); + _concordiaSearcher->concordiaSearch(result, T, markers, SA, hash); return result; } diff --git a/concordia/index_searcher.hpp b/concordia/index_searcher.hpp index 5803a80..bdd9377 100644 --- a/concordia/index_searcher.hpp +++ b/concordia/index_searcher.hpp @@ -10,7 +10,7 @@ #include "concordia/substring_occurence.hpp" #include "concordia/hash_generator.hpp" #include "concordia/concordia_exception.hpp" -#include "concordia/anubis_searcher.hpp" +#include "concordia/concordia_searcher.hpp" #include "concordia/anubis_search_result.hpp" #include @@ -51,7 +51,7 @@ public: const std::string & pattern) throw(ConcordiaException); private: - boost::shared_ptr _anubisSearcher; + boost::shared_ptr _concordiaSearcher; }; #endif diff --git a/concordia/t/CMakeLists.txt b/concordia/t/CMakeLists.txt index e27d6af..5886596 100644 --- a/concordia/t/CMakeLists.txt +++ b/concordia/t/CMakeLists.txt @@ -1,5 +1,5 @@ add_library(concordia-tests - test_anubis_searcher.cpp + test_concordia_searcher.cpp test_sentence_anonymizer.cpp test_text_utils.cpp test_regex_replacement.cpp diff --git a/concordia/t/test_anubis_searcher.cpp b/concordia/t/test_concordia_searcher.cpp similarity index 98% rename from concordia/t/test_anubis_searcher.cpp rename to concordia/t/test_concordia_searcher.cpp index 5910b4a..4f11024 100644 --- a/concordia/t/test_anubis_searcher.cpp +++ b/concordia/t/test_concordia_searcher.cpp @@ -2,7 +2,7 @@ #include "tests/unit-tests/unit_tests_globals.hpp" #include "concordia/tm_matches.hpp" -#include "concordia/anubis_searcher.hpp" +#include "concordia/concordia_searcher.hpp" #include "concordia/concordia_index.hpp" #include "concordia/concordia_config.hpp" #include "concordia/example.hpp" @@ -12,11 +12,11 @@ #include "concordia/common/logging.hpp" #include "tests/common/test_resources_manager.hpp" -BOOST_AUTO_TEST_SUITE(anubis_searcher) +BOOST_AUTO_TEST_SUITE(concordia_searcher) BOOST_AUTO_TEST_CASE( LcpSearch1 ) { - AnubisSearcher searcher; + ConcordiaSearcher searcher; boost::shared_ptr > T(new std::vector()); boost::shared_ptr > markers(new std::vector()); boost::shared_ptr > SA(new std::vector()); @@ -333,7 +333,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) BOOST_AUTO_TEST_CASE( TmMatchesTest ) { - AnubisSearcher searcher; + ConcordiaSearcher searcher; /*The test index contains 3 sentences: 14: "Ala posiada kota"