working full search
This commit is contained in:
parent
5a7cbbe9e9
commit
ec621fb310
@ -226,8 +226,8 @@ MatchedPatternFragment Concordia::simpleSearch(
|
|||||||
|
|
||||||
OccurencesList Concordia::fullSearch(
|
OccurencesList Concordia::fullSearch(
|
||||||
const std::string & pattern,
|
const std::string & pattern,
|
||||||
SUFFIX_MARKER_TYPE limit,
|
int limit,
|
||||||
SUFFIX_MARKER_TYPE offset,
|
int offset,
|
||||||
bool byWhitespace)
|
bool byWhitespace)
|
||||||
throw(ConcordiaException) {
|
throw(ConcordiaException) {
|
||||||
if (_T->size() > 0 && pattern.size() > 0) {
|
if (_T->size() > 0 && pattern.size() > 0) {
|
||||||
|
@ -148,8 +148,8 @@ public:
|
|||||||
*/
|
*/
|
||||||
OccurencesList fullSearch(
|
OccurencesList fullSearch(
|
||||||
const std::string & pattern,
|
const std::string & pattern,
|
||||||
SUFFIX_MARKER_TYPE limit,
|
int limit,
|
||||||
SUFFIX_MARKER_TYPE offset,
|
int offset,
|
||||||
bool byWhitespace = false) throw(ConcordiaException);
|
bool byWhitespace = false) throw(ConcordiaException);
|
||||||
|
|
||||||
/*! Performs a search useful for lexicons in the following scenario:
|
/*! Performs a search useful for lexicons in the following scenario:
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include "concordia/common/utils.hpp"
|
#include "concordia/common/utils.hpp"
|
||||||
#include "concordia/tokenized_sentence.hpp"
|
#include "concordia/tokenized_sentence.hpp"
|
||||||
#include <boost/filesystem.hpp>
|
#include <boost/filesystem.hpp>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
IndexSearcher::IndexSearcher() {
|
IndexSearcher::IndexSearcher() {
|
||||||
_concordiaSearcher = boost::shared_ptr<ConcordiaSearcher>(
|
_concordiaSearcher = boost::shared_ptr<ConcordiaSearcher>(
|
||||||
@ -60,8 +61,8 @@ OccurencesList IndexSearcher::fullSearch(
|
|||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
const std::string & pattern,
|
const std::string & pattern,
|
||||||
SUFFIX_MARKER_TYPE limit,
|
int limit,
|
||||||
SUFFIX_MARKER_TYPE offset,
|
int offset,
|
||||||
bool byWhitespace) throw(ConcordiaException) {
|
bool byWhitespace) throw(ConcordiaException) {
|
||||||
int left;
|
int left;
|
||||||
std::vector<INDEX_CHARACTER_TYPE> hash =
|
std::vector<INDEX_CHARACTER_TYPE> hash =
|
||||||
@ -74,8 +75,13 @@ OccurencesList IndexSearcher::fullSearch(
|
|||||||
SA->data(), (saidx_t) SA->size(), &left);
|
SA->data(), (saidx_t) SA->size(), &left);
|
||||||
|
|
||||||
OccurencesList result(size);
|
OccurencesList result(size);
|
||||||
for (int i = offset; i < limit; ++i) {
|
|
||||||
saidx_t resultPos = SA->at(left + i);
|
int returnedResults = limit;
|
||||||
|
if ((size - offset) < limit) {
|
||||||
|
returnedResults = size - offset;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < returnedResults; ++i) {
|
||||||
|
saidx_t resultPos = SA->at(left + offset + i);
|
||||||
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
||||||
// As we are looking for a pattern in an array of higher
|
// As we are looking for a pattern in an array of higher
|
||||||
// resolution than the hashed index file, we might
|
// resolution than the hashed index file, we might
|
||||||
|
@ -74,8 +74,8 @@ public:
|
|||||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
|
||||||
boost::shared_ptr<std::vector<saidx_t> > SA,
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
const std::string & pattern,
|
const std::string & pattern,
|
||||||
SUFFIX_MARKER_TYPE limit,
|
int limit,
|
||||||
SUFFIX_MARKER_TYPE offset,
|
int offset,
|
||||||
bool byWhitespace = false) throw(ConcordiaException);
|
bool byWhitespace = false) throw(ConcordiaException);
|
||||||
|
|
||||||
/*! Performs a search useful for lexicons in the following scenario:
|
/*! Performs a search useful for lexicons in the following scenario:
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include <boost/shared_ptr.hpp>
|
#include <boost/shared_ptr.hpp>
|
||||||
#include <boost/algorithm/string/predicate.hpp>
|
#include <boost/algorithm/string/predicate.hpp>
|
||||||
#include <boost/filesystem.hpp>
|
#include <boost/filesystem.hpp>
|
||||||
|
#include <boost/foreach.hpp>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -184,11 +185,35 @@ BOOST_AUTO_TEST_CASE( ConcordiaFullSearch1 )
|
|||||||
|
|
||||||
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
|
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
|
||||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||||
|
OccurencesList searchResult0 = concordia2.fullSearch("okno", 10, 0);
|
||||||
|
/*
|
||||||
|
search0
|
||||||
|
occurence(exampleId=4, offset=1)
|
||||||
|
occurence(exampleId=3, offset=2)
|
||||||
|
occurence(exampleId=2, offset=2)
|
||||||
|
occurence(exampleId=4, offset=3)
|
||||||
|
occurence(exampleId=1, offset=2)
|
||||||
|
*/
|
||||||
OccurencesList searchResult1 = concordia2.fullSearch("okno", 2, 1);
|
OccurencesList searchResult1 = concordia2.fullSearch("okno", 2, 1);
|
||||||
|
OccurencesList searchResult2 = concordia2.fullSearch("okno", 10, 3);
|
||||||
|
OccurencesList searchResult3 = concordia2.fullSearch("xxx", 10, 3);
|
||||||
|
OccurencesList searchResult4 = concordia2.fullSearch("okno", 10, 6);
|
||||||
|
|
||||||
concordia2.clearIndex();
|
concordia2.clearIndex();
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 10);
|
BOOST_CHECK_EQUAL(searchResult1.getTotalCount(), 5);
|
||||||
|
BOOST_CHECK_EQUAL(searchResult1.getOccurences().size(), 2);
|
||||||
|
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(0).getId(), 3);
|
||||||
|
BOOST_CHECK_EQUAL(searchResult1.getOccurences().at(1).getId(), 2);
|
||||||
|
|
||||||
|
BOOST_CHECK_EQUAL(searchResult2.getTotalCount(), 5);
|
||||||
|
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(0).getId(), 4);
|
||||||
|
BOOST_CHECK_EQUAL(searchResult2.getOccurences().at(1).getId(), 1);
|
||||||
|
|
||||||
|
|
||||||
|
BOOST_CHECK_EQUAL(searchResult3.getTotalCount(), 0);
|
||||||
|
BOOST_CHECK_EQUAL(searchResult4.getTotalCount(), 5);
|
||||||
|
BOOST_CHECK_EQUAL(searchResult4.getOccurences().size(), 0);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user