working full search

This commit is contained in:
rjawor 2019-01-09 18:31:52 +01:00
parent 5a7cbbe9e9
commit ec621fb310
5 changed files with 42 additions and 11 deletions

View File

@ -226,8 +226,8 @@ MatchedPatternFragment Concordia::simpleSearch(
OccurencesList Concordia::fullSearch(
const std::string & pattern,
SUFFIX_MARKER_TYPE limit,
SUFFIX_MARKER_TYPE offset,
int limit,
int offset,
bool byWhitespace)
throw(ConcordiaException) {
if (_T->size() > 0 && pattern.size() > 0) {

View File

@ -148,8 +148,8 @@ public:
*/
OccurencesList fullSearch(
const std::string & pattern,
SUFFIX_MARKER_TYPE limit,
SUFFIX_MARKER_TYPE offset,
int limit,
int offset,
bool byWhitespace = false) throw(ConcordiaException);
/*! Performs a search useful for lexicons in the following scenario:

View File

@ -3,6 +3,7 @@
#include "concordia/common/utils.hpp"
#include "concordia/tokenized_sentence.hpp"
#include <boost/filesystem.hpp>
#include <algorithm>
IndexSearcher::IndexSearcher() {
_concordiaSearcher = boost::shared_ptr<ConcordiaSearcher>(
@ -60,8 +61,8 @@ OccurencesList IndexSearcher::fullSearch(
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
const std::string & pattern,
SUFFIX_MARKER_TYPE limit,
SUFFIX_MARKER_TYPE offset,
int limit,
int offset,
bool byWhitespace) throw(ConcordiaException) {
int left;
std::vector<INDEX_CHARACTER_TYPE> hash =
@ -74,8 +75,13 @@ OccurencesList IndexSearcher::fullSearch(
SA->data(), (saidx_t) SA->size(), &left);
OccurencesList result(size);
for (int i = offset; i < limit; ++i) {
saidx_t resultPos = SA->at(left + i);
int returnedResults = limit;
if ((size - offset) < limit) {
returnedResults = size - offset;
}
for (int i = 0; i < returnedResults; ++i) {
saidx_t resultPos = SA->at(left + offset + i);
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
// As we are looking for a pattern in an array of higher
// resolution than the hashed index file, we might

View File

@ -74,8 +74,8 @@ public:
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA,
const std::string & pattern,
SUFFIX_MARKER_TYPE limit,
SUFFIX_MARKER_TYPE offset,
int limit,
int offset,
bool byWhitespace = false) throw(ConcordiaException);
/*! Performs a search useful for lexicons in the following scenario:

View File

@ -10,6 +10,7 @@
#include <boost/shared_ptr.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/filesystem.hpp>
#include <boost/foreach.hpp>
#include <string>
#include <vector>
@ -184,11 +185,35 @@ BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 )
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
OccurencesList searchResult0 = concordia2.fullSearch("okno", 10, 0);
/*
search0
occurence(exampleId=4, offset=1)
occurence(exampleId=3, offset=2)
occurence(exampleId=2, offset=2)
occurence(exampleId=4, offset=3)
occurence(exampleId=1, offset=2)
*/
OccurencesList searchResult1 = concordia2.fullSearch("okno", 2, 1);
OccurencesList searchResult2 = concordia2.fullSearch("okno", 10, 3);
OccurencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3);
OccurencesList searchResult4 = concordia2.fullSearch("okno", 10, 6);
concordia2.clearIndex();
BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 10);
BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 5);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 3);
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 2);
BOOST_CHECK_EQUAL(searchResult2.getTotalCount(), 5);
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 4);
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 1);
BOOST_CHECK_EQUAL(searchResult3.getTotalCount(), 0);
BOOST_CHECK_EQUAL(searchResult4.getTotalCount(), 5);
BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 0);
}