working full search
This commit is contained in:
parent
5a7cbbe9e9
commit
ec621fb310
@ -226,8 +226,8 @@ MatchedPatternFragment Concordia::simpleSearch(
|
||||
|
||||
OccurencesList Concordia::fullSearch(
|
||||
const std::string & pattern,
|
||||
SUFFIX_MARKER_TYPE limit,
|
||||
SUFFIX_MARKER_TYPE offset,
|
||||
int limit,
|
||||
int offset,
|
||||
bool byWhitespace)
|
||||
throw(ConcordiaException) {
|
||||
if (_T->size() > 0 && pattern.size() > 0) {
|
||||
|
@ -148,8 +148,8 @@ public:
|
||||
*/
|
||||
OccurencesList fullSearch(
|
||||
const std::string & pattern,
|
||||
SUFFIX_MARKER_TYPE limit,
|
||||
SUFFIX_MARKER_TYPE offset,
|
||||
int limit,
|
||||
int offset,
|
||||
bool byWhitespace = false) throw(ConcordiaException);
|
||||
|
||||
/*! Performs a search useful for lexicons in the following scenario:
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "concordia/common/utils.hpp"
|
||||
#include "concordia/tokenized_sentence.hpp"
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <algorithm>
|
||||
|
||||
IndexSearcher::IndexSearcher() {
|
||||
_concordiaSearcher = boost::shared_ptr<ConcordiaSearcher>(
|
||||
@ -60,8 +61,8 @@ OccurencesList IndexSearcher::fullSearch(
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
const std::string & pattern,
|
||||
SUFFIX_MARKER_TYPE limit,
|
||||
SUFFIX_MARKER_TYPE offset,
|
||||
int limit,
|
||||
int offset,
|
||||
bool byWhitespace) throw(ConcordiaException) {
|
||||
int left;
|
||||
std::vector<INDEX_CHARACTER_TYPE> hash =
|
||||
@ -74,8 +75,13 @@ OccurencesList IndexSearcher::fullSearch(
|
||||
SA->data(), (saidx_t) SA->size(), &left);
|
||||
|
||||
OccurencesList result(size);
|
||||
for (int i = offset; i < limit; ++i) {
|
||||
saidx_t resultPos = SA->at(left + i);
|
||||
|
||||
int returnedResults = limit;
|
||||
if ((size - offset) < limit) {
|
||||
returnedResults = size - offset;
|
||||
}
|
||||
for (int i = 0; i < returnedResults; ++i) {
|
||||
saidx_t resultPos = SA->at(left + offset + i);
|
||||
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
||||
// As we are looking for a pattern in an array of higher
|
||||
// resolution than the hashed index file, we might
|
||||
|
@ -74,8 +74,8 @@ public:
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||
const std::string & pattern,
|
||||
SUFFIX_MARKER_TYPE limit,
|
||||
SUFFIX_MARKER_TYPE offset,
|
||||
int limit,
|
||||
int offset,
|
||||
bool byWhitespace = false) throw(ConcordiaException);
|
||||
|
||||
/*! Performs a search useful for lexicons in the following scenario:
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -184,11 +185,35 @@ BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 )
|
||||
|
||||
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
OccurencesList searchResult0 = concordia2.fullSearch("okno", 10, 0);
|
||||
/*
|
||||
search0
|
||||
occurence(exampleId=4, offset=1)
|
||||
occurence(exampleId=3, offset=2)
|
||||
occurence(exampleId=2, offset=2)
|
||||
occurence(exampleId=4, offset=3)
|
||||
occurence(exampleId=1, offset=2)
|
||||
*/
|
||||
OccurencesList searchResult1 = concordia2.fullSearch("okno", 2, 1);
|
||||
OccurencesList searchResult2 = concordia2.fullSearch("okno", 10, 3);
|
||||
OccurencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3);
|
||||
OccurencesList searchResult4 = concordia2.fullSearch("okno", 10, 6);
|
||||
|
||||
concordia2.clearIndex();
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 10);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 5);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 3);
|
||||
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 2);
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult2.getTotalCount(), 5);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 4);
|
||||
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 1);
|
||||
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult3.getTotalCount(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult4.getTotalCount(), 5);
|
||||
BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 0);
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user