From ec621fb3105fdc04d141f69d1c0b38b1e315b491 Mon Sep 17 00:00:00 2001 From: rjawor Date: Wed, 9 Jan 2019 18:31:52 +0100 Subject: [PATCH] working full search --- concordia/concordia.cpp | 4 ++-- concordia/concordia.hpp | 4 ++-- concordia/index_searcher.cpp | 14 ++++++++++---- concordia/index_searcher.hpp | 4 ++-- concordia/t/test_concordia.cpp | 27 ++++++++++++++++++++++++++- 5 files changed, 42 insertions(+), 11 deletions(-) diff --git a/concordia/concordia.cpp b/concordia/concordia.cpp index f15b1b5..8501edc 100644 --- a/concordia/concordia.cpp +++ b/concordia/concordia.cpp @@ -226,8 +226,8 @@ MatchedPatternFragment Concordia::simpleSearch( OccurencesList Concordia::fullSearch( const std::string & pattern, - SUFFIX_MARKER_TYPE limit, - SUFFIX_MARKER_TYPE offset, + int limit, + int offset, bool byWhitespace) throw(ConcordiaException) { if (_T->size() > 0 && pattern.size() > 0) { diff --git a/concordia/concordia.hpp b/concordia/concordia.hpp index 67ab351..2748936 100644 --- a/concordia/concordia.hpp +++ b/concordia/concordia.hpp @@ -148,8 +148,8 @@ public: */ OccurencesList fullSearch( const std::string & pattern, - SUFFIX_MARKER_TYPE limit, - SUFFIX_MARKER_TYPE offset, + int limit, + int offset, bool byWhitespace = false) throw(ConcordiaException); /*! Performs a search useful for lexicons in the following scenario: diff --git a/concordia/index_searcher.cpp b/concordia/index_searcher.cpp index c043510..5b51ac4 100644 --- a/concordia/index_searcher.cpp +++ b/concordia/index_searcher.cpp @@ -3,6 +3,7 @@ #include "concordia/common/utils.hpp" #include "concordia/tokenized_sentence.hpp" #include +#include IndexSearcher::IndexSearcher() { _concordiaSearcher = boost::shared_ptr( @@ -60,8 +61,8 @@ OccurencesList IndexSearcher::fullSearch( boost::shared_ptr > markers, boost::shared_ptr > SA, const std::string & pattern, - SUFFIX_MARKER_TYPE limit, - SUFFIX_MARKER_TYPE offset, + int limit, + int offset, bool byWhitespace) throw(ConcordiaException) { int left; std::vector hash = @@ -74,8 +75,13 @@ OccurencesList IndexSearcher::fullSearch( SA->data(), (saidx_t) SA->size(), &left); OccurencesList result(size); - for (int i = offset; i < limit; ++i) { - saidx_t resultPos = SA->at(left + i); + + int returnedResults = limit; + if ((size - offset) < limit) { + returnedResults = size - offset; + } + for (int i = 0; i < returnedResults; ++i) { + saidx_t resultPos = SA->at(left + offset + i); if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) { // As we are looking for a pattern in an array of higher // resolution than the hashed index file, we might diff --git a/concordia/index_searcher.hpp b/concordia/index_searcher.hpp index 67bea2f..25b1e9e 100644 --- a/concordia/index_searcher.hpp +++ b/concordia/index_searcher.hpp @@ -74,8 +74,8 @@ public: boost::shared_ptr > markers, boost::shared_ptr > SA, const std::string & pattern, - SUFFIX_MARKER_TYPE limit, - SUFFIX_MARKER_TYPE offset, + int limit, + int offset, bool byWhitespace = false) throw(ConcordiaException); /*! Performs a search useful for lexicons in the following scenario: diff --git a/concordia/t/test_concordia.cpp b/concordia/t/test_concordia.cpp index 67caa73..b17f64f 100644 --- a/concordia/t/test_concordia.cpp +++ b/concordia/t/test_concordia.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -184,11 +185,35 @@ BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 ) Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(), TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); + OccurencesList searchResult0 = concordia2.fullSearch("okno", 10, 0); + /* + search0 + occurence(exampleId=4, offset=1) + occurence(exampleId=3, offset=2) + occurence(exampleId=2, offset=2) + occurence(exampleId=4, offset=3) + occurence(exampleId=1, offset=2) + */ OccurencesList searchResult1 = concordia2.fullSearch("okno", 2, 1); + OccurencesList searchResult2 = concordia2.fullSearch("okno", 10, 3); + OccurencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3); + OccurencesList searchResult4 = concordia2.fullSearch("okno", 10, 6); concordia2.clearIndex(); - BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 10); + BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 5); + BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2); + BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 3); + BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 2); + + BOOST_CHECK_EQUAL(searchResult2.getTotalCount(), 5); + BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 4); + BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 1); + + + BOOST_CHECK_EQUAL(searchResult3.getTotalCount(), 0); + BOOST_CHECK_EQUAL(searchResult4.getTotalCount(), 5); + BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 0); }