working full search

This commit is contained in:
rjawor 2019-01-09 18:31:52 +01:00
parent 5a7cbbe9e9
commit ec621fb310
5 changed files with 42 additions and 11 deletions

View File

@ -226,8 +226,8 @@ MatchedPatternFragment Concordia::simpleSearch(
OccurencesList Concordia::fullSearch( OccurencesList Concordia::fullSearch(
const std::string & pattern, const std::string & pattern,
SUFFIX_MARKER_TYPE limit, int limit,
SUFFIX_MARKER_TYPE offset, int offset,
bool byWhitespace) bool byWhitespace)
throw(ConcordiaException) { throw(ConcordiaException) {
if (_T->size() > 0 && pattern.size() > 0) { if (_T->size() > 0 && pattern.size() > 0) {

View File

@ -148,8 +148,8 @@ public:
*/ */
OccurencesList fullSearch( OccurencesList fullSearch(
const std::string & pattern, const std::string & pattern,
SUFFIX_MARKER_TYPE limit, int limit,
SUFFIX_MARKER_TYPE offset, int offset,
bool byWhitespace = false) throw(ConcordiaException); bool byWhitespace = false) throw(ConcordiaException);
/*! Performs a search useful for lexicons in the following scenario: /*! Performs a search useful for lexicons in the following scenario:

View File

@ -3,6 +3,7 @@
#include "concordia/common/utils.hpp" #include "concordia/common/utils.hpp"
#include "concordia/tokenized_sentence.hpp" #include "concordia/tokenized_sentence.hpp"
#include <boost/filesystem.hpp> #include <boost/filesystem.hpp>
#include <algorithm>
IndexSearcher::IndexSearcher() { IndexSearcher::IndexSearcher() {
_concordiaSearcher = boost::shared_ptr<ConcordiaSearcher>( _concordiaSearcher = boost::shared_ptr<ConcordiaSearcher>(
@ -60,8 +61,8 @@ OccurencesList IndexSearcher::fullSearch(
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const std::string & pattern, const std::string & pattern,
SUFFIX_MARKER_TYPE limit, int limit,
SUFFIX_MARKER_TYPE offset, int offset,
bool byWhitespace) throw(ConcordiaException) { bool byWhitespace) throw(ConcordiaException) {
int left; int left;
std::vector<INDEX_CHARACTER_TYPE> hash = std::vector<INDEX_CHARACTER_TYPE> hash =
@ -74,8 +75,13 @@ OccurencesList IndexSearcher::fullSearch(
SA->data(), (saidx_t) SA->size(), &left); SA->data(), (saidx_t) SA->size(), &left);
OccurencesList result(size); OccurencesList result(size);
for (int i = offset; i < limit; ++i) {
saidx_t resultPos = SA->at(left + i); int returnedResults = limit;
if ((size - offset) < limit) {
returnedResults = size - offset;
}
for (int i = 0; i < returnedResults; ++i) {
saidx_t resultPos = SA->at(left + offset + i);
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) { if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
// As we are looking for a pattern in an array of higher // As we are looking for a pattern in an array of higher
// resolution than the hashed index file, we might // resolution than the hashed index file, we might

View File

@ -74,8 +74,8 @@ public:
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const std::string & pattern, const std::string & pattern,
SUFFIX_MARKER_TYPE limit, int limit,
SUFFIX_MARKER_TYPE offset, int offset,
bool byWhitespace = false) throw(ConcordiaException); bool byWhitespace = false) throw(ConcordiaException);
/*! Performs a search useful for lexicons in the following scenario: /*! Performs a search useful for lexicons in the following scenario:

View File

@ -10,6 +10,7 @@
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include <boost/algorithm/string/predicate.hpp> #include <boost/algorithm/string/predicate.hpp>
#include <boost/filesystem.hpp> #include <boost/filesystem.hpp>
#include <boost/foreach.hpp>
#include <string> #include <string>
#include <vector> #include <vector>
@ -184,11 +185,35 @@ BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 )
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(), Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
OccurencesList searchResult0 = concordia2.fullSearch("okno", 10, 0);
/*
search0
occurence(exampleId=4, offset=1)
occurence(exampleId=3, offset=2)
occurence(exampleId=2, offset=2)
occurence(exampleId=4, offset=3)
occurence(exampleId=1, offset=2)
*/
OccurencesList searchResult1 = concordia2.fullSearch("okno", 2, 1); OccurencesList searchResult1 = concordia2.fullSearch("okno", 2, 1);
OccurencesList searchResult2 = concordia2.fullSearch("okno", 10, 3);
OccurencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3);
OccurencesList searchResult4 = concordia2.fullSearch("okno", 10, 6);
concordia2.clearIndex(); concordia2.clearIndex();
BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 10); BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 5);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 3);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 2);
BOOST_CHECK_EQUAL(searchResult2.getTotalCount(), 5);
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 4);
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 1);
BOOST_CHECK_EQUAL(searchResult3.getTotalCount(), 0);
BOOST_CHECK_EQUAL(searchResult4.getTotalCount(), 5);
BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 0);
} }