std vectors

Former-commit-id: 5816e87c856f7edc242cc707851a0e2ad05aeb38
This commit is contained in:
rjawor 2015-04-15 10:55:26 +02:00
parent e02bbaa0fa
commit 3a03b01f42
22 changed files with 314 additions and 319 deletions

View File

@ -21,6 +21,8 @@ IN PROGRESS 2. Wykonać anubis search na nowych markerach z długością zdania
zastanowić się nad optymalizacją: zastanowić się nad optymalizacją:
- tmMatchesMap jako normalna mapa (nie ptr_map) - tmMatchesMap jako normalna mapa (nie ptr_map)
- REJECTED LCP array - REJECTED LCP array
- !important! rezygnacja z ptr_vector (wycieki!) - !important! rezygnacja z ptr_vector
- zwracanie wektorów - zwracanie wektorów
- powyrzucać using namespace std
- profiling

View File

@ -3,7 +3,6 @@
#include <boost/program_options.hpp> #include <boost/program_options.hpp>
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>
#include <boost/date_time/posix_time/posix_time.hpp> #include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/ptr_container/ptr_vector.hpp>
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include <boost/foreach.hpp> #include <boost/foreach.hpp>
@ -68,7 +67,7 @@ int main(int argc, char** argv) {
std::cout << "\tSearching for pattern: \"" << pattern << std::cout << "\tSearching for pattern: \"" << pattern <<
"\"" << std::endl; "\"" << std::endl;
time_start = boost::posix_time::microsec_clock::local_time(); time_start = boost::posix_time::microsec_clock::local_time();
boost::ptr_vector<SubstringOccurence> result = std::vector<SubstringOccurence> result =
concordia.simpleSearch(pattern); concordia.simpleSearch(pattern);
time_end = boost::posix_time::microsec_clock::local_time(); time_end = boost::posix_time::microsec_clock::local_time();
msdiff = time_end - time_start; msdiff = time_end - time_start;
@ -89,12 +88,12 @@ int main(int argc, char** argv) {
std::string line; std::string line;
if (text_file.is_open()) { if (text_file.is_open()) {
long lineCount = 0; long lineCount = 0;
boost::ptr_vector<Example> buffer; vector<Example> buffer;
boost::posix_time::ptime timeStart = boost::posix_time::ptime timeStart =
boost::posix_time::microsec_clock::local_time(); boost::posix_time::microsec_clock::local_time();
while (getline(text_file, line)) { while (getline(text_file, line)) {
lineCount++; lineCount++;
buffer.push_back(new Example(line, lineCount)); buffer.push_back(Example(line, lineCount));
if (lineCount % READ_BUFFER_LENGTH == 0) { if (lineCount % READ_BUFFER_LENGTH == 0) {
concordia.addAllExamples(buffer); concordia.addAllExamples(buffer);
buffer.clear(); buffer.clear();

View File

@ -12,16 +12,16 @@ AnubisSearcher::~AnubisSearcher() {
} }
boost::ptr_vector<AnubisSearchResult> AnubisSearcher::anubisSearch( std::vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern) const std::vector<INDEX_CHARACTER_TYPE> & pattern)
throw(ConcordiaException) { throw(ConcordiaException) {
boost::shared_ptr<TmMatchesMap> tmMatchesMap = getTmMatches(T, markers, SA, pattern); boost::shared_ptr<TmMatchesMap> tmMatchesMap = getTmMatches(T, markers, SA, pattern);
// get the tmMatches list sorted descending by score // get the tmMatches list sorted descending by score
boost::ptr_vector<AnubisSearchResult> result; std::vector<AnubisSearchResult> result;
return result; return result;
} }
@ -29,30 +29,28 @@ boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern) const std::vector<INDEX_CHARACTER_TYPE> & pattern)
throw(ConcordiaException) { throw(ConcordiaException) {
boost::shared_ptr<std::vector<sauchar_t> > patternVector = std::vector<sauchar_t> patternVector =
Utils::indexVectorToSaucharVector(pattern); Utils::indexVectorToSaucharVector(pattern);
if (patternVector->size() != if (patternVector.size() !=
pattern->size() * sizeof(INDEX_CHARACTER_TYPE)) { pattern.size() * sizeof(INDEX_CHARACTER_TYPE)) {
throw ConcordiaException("Increasing pattern resolution went wrong."); throw ConcordiaException("Increasing pattern resolution went wrong.");
} }
boost::shared_ptr<TmMatchesMap> tmMatchesMap(new TmMatchesMap()); boost::shared_ptr<TmMatchesMap> tmMatchesMap(new TmMatchesMap());
for (int offset = 0; offset < pattern->size(); offset++) { for (int offset = 0; offset < pattern.size(); offset++) {
int highResOffset = offset * sizeof(INDEX_CHARACTER_TYPE); int highResOffset = offset * sizeof(INDEX_CHARACTER_TYPE);
boost::shared_ptr<std::vector<sauchar_t> > currentPattern = std::vector<sauchar_t> currentPattern(
boost::shared_ptr<std::vector<sauchar_t> > patternVector.begin()+highResOffset, patternVector.end());
(new std::vector<sauchar_t>(
patternVector->begin()+highResOffset, patternVector->end()));
saidx_t patternLength = 0; saidx_t patternLength = 0;
saidx_t size = SA->size(); saidx_t size = SA->size();
saidx_t left = 0; saidx_t left = 0;
sauchar_t * patternArray = currentPattern->data(); sauchar_t * patternArray = currentPattern.data();
saidx_t * SAleft = SA->data(); saidx_t * SAleft = SA->data();
@ -77,19 +75,19 @@ boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
// Add to tm matches map results surrounding the main stream. // Add to tm matches map results surrounding the main stream.
// from left // from left
for (saidx_t i = prevLeft; i < left; i++) { for (saidx_t i = prevLeft; i < left; i++) {
_addToMap(SA, markers, tmMatchesMap, i, pattern->size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset); _addToMap(SA, markers, tmMatchesMap, i, pattern.size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset);
} }
// from right // from right
for (saidx_t i = left+size; i < prevLeft+prevSize; i++) { for (saidx_t i = left+size; i < prevLeft+prevSize; i++) {
_addToMap(SA, markers, tmMatchesMap, i, pattern->size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset); _addToMap(SA, markers, tmMatchesMap, i, pattern.size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset);
} }
} }
} while (patternLength < currentPattern->size() && size > 0); } while (patternLength < currentPattern.size() && size > 0);
if (size > 0) { if (size > 0) {
for (saidx_t i = left; i < left+size; i++) { for (saidx_t i = left; i < left+size; i++) {
_addToMap(SA, markers, tmMatchesMap, i, pattern->size(), patternLength / sizeof(INDEX_CHARACTER_TYPE), offset); _addToMap(SA, markers, tmMatchesMap, i, pattern.size(), patternLength / sizeof(INDEX_CHARACTER_TYPE), offset);
} }
} }
} }
@ -97,18 +95,18 @@ boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
return tmMatchesMap; return tmMatchesMap;
} }
boost::ptr_vector<SubstringOccurence> AnubisSearcher::lcpSearch( std::vector<SubstringOccurence> AnubisSearcher::lcpSearch(
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<sauchar_t> > pattern, const std::vector<sauchar_t> & pattern,
SUFFIX_MARKER_TYPE & length) SUFFIX_MARKER_TYPE & length)
throw(ConcordiaException) { throw(ConcordiaException) {
saidx_t patternLength = 0; saidx_t patternLength = 0;
saidx_t size = SA->size(); saidx_t size = SA->size();
saidx_t left = 0; saidx_t left = 0;
sauchar_t * patternArray = pattern->data(); const sauchar_t * patternArray = pattern.data();
saidx_t * SAleft = SA->data(); saidx_t * SAleft = SA->data();
@ -126,9 +124,9 @@ boost::ptr_vector<SubstringOccurence> AnubisSearcher::lcpSearch(
SAleft, size, &localLeft); SAleft, size, &localLeft);
left += localLeft; left += localLeft;
SAleft += localLeft; SAleft += localLeft;
} while (patternLength < pattern->size() && size > 0); } while (patternLength < pattern.size() && size > 0);
boost::ptr_vector<SubstringOccurence> result; vector<SubstringOccurence> result;
if (size == 0) { if (size == 0) {
// The search managed to find exactly the longest common prefixes. // The search managed to find exactly the longest common prefixes.
@ -151,7 +149,7 @@ boost::ptr_vector<SubstringOccurence> AnubisSearcher::lcpSearch(
} }
void AnubisSearcher::_collectResults( void AnubisSearcher::_collectResults(
boost::ptr_vector<SubstringOccurence> & result, vector<SubstringOccurence> & result,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
saidx_t left, saidx_t size) { saidx_t left, saidx_t size) {
@ -160,7 +158,7 @@ void AnubisSearcher::_collectResults(
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) { if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
SUFFIX_MARKER_TYPE marker = markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE)); SUFFIX_MARKER_TYPE marker = markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
result.push_back(new SubstringOccurence(marker)); result.push_back(SubstringOccurence(marker));
} }
} }
} }

View File

@ -2,7 +2,6 @@
#define ANUBIS_SEARCHER_HDR #define ANUBIS_SEARCHER_HDR
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include <boost/ptr_container/ptr_vector.hpp>
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
#include "concordia/common/utils.hpp" #include "concordia/common/utils.hpp"
@ -28,29 +27,29 @@ public:
*/ */
virtual ~AnubisSearcher(); virtual ~AnubisSearcher();
boost::ptr_vector<AnubisSearchResult> anubisSearch( std::vector<AnubisSearchResult> anubisSearch(
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern) const std::vector<INDEX_CHARACTER_TYPE> & pattern)
throw(ConcordiaException); throw(ConcordiaException);
boost::shared_ptr<TmMatchesMap> getTmMatches( boost::shared_ptr<TmMatchesMap> getTmMatches(
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern) const std::vector<INDEX_CHARACTER_TYPE> & pattern)
throw(ConcordiaException); throw(ConcordiaException);
boost::ptr_vector<SubstringOccurence> lcpSearch( std::vector<SubstringOccurence> lcpSearch(
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<sauchar_t> > pattern, const std::vector<sauchar_t> & pattern,
SUFFIX_MARKER_TYPE & length) throw(ConcordiaException); SUFFIX_MARKER_TYPE & length) throw(ConcordiaException);
private: private:
void _collectResults(boost::ptr_vector<SubstringOccurence> & result, void _collectResults(vector<SubstringOccurence> & result,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
saidx_t left, saidx_t size); saidx_t left, saidx_t size);

View File

@ -30,31 +30,39 @@ SUFFIX_MARKER_TYPE Utils::readMarker(ifstream & file) {
} }
sauchar_t * Utils::indexVectorToSaucharArray( sauchar_t * Utils::indexVectorToSaucharArray(
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input) { const vector<INDEX_CHARACTER_TYPE> & input) {
const int kArraySize = input->size()*sizeof(INDEX_CHARACTER_TYPE); const int kArraySize = input.size()*sizeof(INDEX_CHARACTER_TYPE);
sauchar_t * patternArray = sauchar_t * patternArray =
new sauchar_t[kArraySize]; new sauchar_t[kArraySize];
int pos = 0; int pos = 0;
for (vector<INDEX_CHARACTER_TYPE>::iterator it = input->begin(); for (vector<INDEX_CHARACTER_TYPE>::const_iterator it = input.begin();
it != input->end(); ++it) { it != input.end(); ++it) {
_insertCharToSaucharArray(patternArray, *it, pos); _insertCharToSaucharArray(patternArray, *it, pos);
pos += sizeof(INDEX_CHARACTER_TYPE); pos += sizeof(INDEX_CHARACTER_TYPE);
} }
return patternArray; return patternArray;
} }
boost::shared_ptr<std::vector<sauchar_t> > Utils::indexVectorToSaucharVector( std::vector<sauchar_t> Utils::indexVectorToSaucharVector(
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input) { const vector<INDEX_CHARACTER_TYPE> & input) {
boost::shared_ptr<std::vector<sauchar_t> > result = std::vector<sauchar_t> result;
boost::shared_ptr<std::vector<sauchar_t> >(new std::vector<sauchar_t>); for (vector<INDEX_CHARACTER_TYPE>::const_iterator it = input.begin();
it != input.end(); ++it) {
for (vector<INDEX_CHARACTER_TYPE>::iterator it = input->begin();
it != input->end(); ++it) {
appendCharToSaucharVector(result, *it); appendCharToSaucharVector(result, *it);
} }
return result; return result;
} }
void Utils::appendCharToSaucharVector(
std::vector<sauchar_t> & vector,
INDEX_CHARACTER_TYPE character) {
sauchar_t * characterArray = reinterpret_cast<sauchar_t *>(&character);
for (int i = 0; i < sizeof(character); i++) {
vector.push_back(characterArray[i]);
}
}
void Utils::appendCharToSaucharVector( void Utils::appendCharToSaucharVector(
boost::shared_ptr<std::vector<sauchar_t> > vector, boost::shared_ptr<std::vector<sauchar_t> > vector,
INDEX_CHARACTER_TYPE character) { INDEX_CHARACTER_TYPE character) {

View File

@ -32,18 +32,21 @@ public:
static SUFFIX_MARKER_TYPE readMarker(ifstream & file); static SUFFIX_MARKER_TYPE readMarker(ifstream & file);
static sauchar_t * indexVectorToSaucharArray( static sauchar_t * indexVectorToSaucharArray(
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input); const vector<INDEX_CHARACTER_TYPE> & input);
static boost::shared_ptr<std::vector<sauchar_t> > static std::vector<sauchar_t> indexVectorToSaucharVector(
indexVectorToSaucharVector( const vector<INDEX_CHARACTER_TYPE> & input);
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input);
static void appendCharToSaucharVector( static void appendCharToSaucharVector(
boost::shared_ptr<std::vector<sauchar_t> > vector, boost::shared_ptr<std::vector<sauchar_t> > vector,
INDEX_CHARACTER_TYPE character); INDEX_CHARACTER_TYPE character);
static void appendCharToSaucharVector(
std::vector<sauchar_t> & vector,
INDEX_CHARACTER_TYPE character);
template <typename T> template <typename T>
static void printVector(boost::shared_ptr<std::vector<T> > vector); static void printVector(const std::vector<T> & vector);
static SUFFIX_MARKER_TYPE getIdFromMarker(SUFFIX_MARKER_TYPE marker); static SUFFIX_MARKER_TYPE getIdFromMarker(SUFFIX_MARKER_TYPE marker);
@ -65,9 +68,9 @@ private:
}; };
template <typename T> template <typename T>
void Utils::printVector(boost::shared_ptr<std::vector<T> > vector) { void Utils::printVector(const std::vector<T> & vector) {
for (int i = 0; i < vector->size(); i++) { for (int i = 0; i < vector.size(); i++) {
cout << static_cast<int>(vector->at(i)) << " "; cout << static_cast<int>(vector.at(i)) << " ";
} }
cout << endl; cout << endl;
} }

View File

@ -51,7 +51,7 @@ void Concordia::addExample(const Example & example)
// Sentences are written to disk and added to T. // Sentences are written to disk and added to T.
// SA is generated on command by other methods. // SA is generated on command by other methods.
void Concordia::addAllExamples(const boost::ptr_vector<Example > & examples) void Concordia::addAllExamples(const std::vector<Example> & examples)
throw(ConcordiaException) { throw(ConcordiaException) {
_index->addAllExamples(_hashGenerator, _T, _markers, examples); _index->addAllExamples(_hashGenerator, _T, _markers, examples);
} }
@ -131,26 +131,26 @@ void Concordia::_initializeIndex() throw(ConcordiaException) {
} }
} }
boost::ptr_vector<SubstringOccurence> Concordia::simpleSearch( std::vector<SubstringOccurence> Concordia::simpleSearch(
const string & pattern) const string & pattern)
throw(ConcordiaException) { throw(ConcordiaException) {
if (_T->size() > 0) { if (_T->size() > 0) {
return _searcher->simpleSearch(_hashGenerator, _T, return _searcher->simpleSearch(_hashGenerator, _T,
_markers, _SA, pattern); _markers, _SA, pattern);
} else { } else {
boost::ptr_vector<SubstringOccurence> result; std::vector<SubstringOccurence> result;
return result; return result;
} }
} }
boost::ptr_vector<AnubisSearchResult> Concordia::anubisSearch( std::vector<AnubisSearchResult> Concordia::anubisSearch(
const string & pattern) const string & pattern)
throw(ConcordiaException) { throw(ConcordiaException) {
if (_T->size() > 0) { if (_T->size() > 0) {
return _searcher->anubisSearch(_hashGenerator, _T, return _searcher->anubisSearch(_hashGenerator, _T,
_markers, _SA, pattern); _markers, _SA, pattern);
} else { } else {
boost::ptr_vector<AnubisSearchResult> result; std::vector<AnubisSearchResult> result;
return result; return result;
} }
} }

View File

@ -4,7 +4,6 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include <boost/ptr_container/ptr_vector.hpp>
#include <boost/filesystem.hpp> #include <boost/filesystem.hpp>
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
@ -41,15 +40,13 @@ public:
void addExample(const Example & example) throw(ConcordiaException); void addExample(const Example & example) throw(ConcordiaException);
void addAllExamples(const boost::ptr_vector<Example > & examples) void addAllExamples(const std::vector<Example> & examples)
throw(ConcordiaException); throw(ConcordiaException);
boost::ptr_vector<SubstringOccurence> simpleSearch( std::vector<SubstringOccurence> simpleSearch(const std::string & pattern)
const std::string & pattern)
throw(ConcordiaException); throw(ConcordiaException);
boost::ptr_vector<AnubisSearchResult> anubisSearch( std::vector<AnubisSearchResult> anubisSearch(const std::string & pattern)
const std::string & pattern)
throw(ConcordiaException); throw(ConcordiaException);
void loadRAMIndexFromDisk() throw(ConcordiaException); void loadRAMIndexFromDisk() throw(ConcordiaException);

View File

@ -56,7 +56,7 @@ void ConcordiaIndex::addAllExamples(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<vector<sauchar_t> > T, boost::shared_ptr<vector<sauchar_t> > T,
boost::shared_ptr<vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<vector<SUFFIX_MARKER_TYPE> > markers,
const boost::ptr_vector<Example > & examples) { const vector<Example> & examples) {
ofstream hashedIndexFile; ofstream hashedIndexFile;
hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::out| hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::out|
ios::app|ios::binary); ios::app|ios::binary);
@ -81,11 +81,11 @@ void ConcordiaIndex::_addSingleExample(
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<vector<SUFFIX_MARKER_TYPE> > markers,
const Example & example) { const Example & example) {
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash vector<INDEX_CHARACTER_TYPE> hash
= hashGenerator->generateHash(example.getSentence()); = hashGenerator->generateHash(example.getSentence());
int offset = 0; int offset = 0;
for (vector<INDEX_CHARACTER_TYPE>::iterator it = hash->begin(); for (vector<INDEX_CHARACTER_TYPE>::iterator it = hash.begin();
it != hash->end(); ++it) { it != hash.end(); ++it) {
INDEX_CHARACTER_TYPE character = *it; INDEX_CHARACTER_TYPE character = *it;
Utils::writeIndexCharacter(hashedIndexFile, character); Utils::writeIndexCharacter(hashedIndexFile, character);
Utils::appendCharToSaucharVector(T, character); Utils::appendCharToSaucharVector(T, character);
@ -95,7 +95,7 @@ void ConcordiaIndex::_addSingleExample(
SUFFIX_MARKER_TYPE marker = Utils::createMarker( SUFFIX_MARKER_TYPE marker = Utils::createMarker(
example.getId(), example.getId(),
offset, offset,
hash->size()); hash.size());
Utils::writeMarker(markersFile, marker); Utils::writeMarker(markersFile, marker);
markers->push_back(marker); markers->push_back(marker);

View File

@ -2,10 +2,10 @@
#define CONCORDIA_INDEX_HDR #define CONCORDIA_INDEX_HDR
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include <boost/ptr_container/ptr_vector.hpp>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <vector>
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
#include "concordia/example.hpp" #include "concordia/example.hpp"
@ -40,7 +40,7 @@ public:
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<vector<sauchar_t> > T, boost::shared_ptr<vector<sauchar_t> > T,
boost::shared_ptr<vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<vector<SUFFIX_MARKER_TYPE> > markers,
const boost::ptr_vector<Example > & examples); const vector<Example> & examples);
boost::shared_ptr<vector<saidx_t> > generateSuffixArray( boost::shared_ptr<vector<saidx_t> > generateSuffixArray(
boost::shared_ptr<vector<sauchar_t> > T); boost::shared_ptr<vector<sauchar_t> > T);

View File

@ -25,31 +25,28 @@ HashGenerator::HashGenerator(boost::shared_ptr<ConcordiaConfig> config)
HashGenerator::~HashGenerator() { HashGenerator::~HashGenerator() {
} }
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > HashGenerator::generateHash( vector<INDEX_CHARACTER_TYPE> HashGenerator::generateHash(
const string & sentence) throw(ConcordiaException) { const string & sentence) throw(ConcordiaException) {
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > vector<INDEX_CHARACTER_TYPE> result;
result(new vector<INDEX_CHARACTER_TYPE>()); vector<string> tokenTexts = generateTokenVector(sentence);
boost::shared_ptr<vector<string> > tokenTexts = if (tokenTexts.size() > Utils::maxSentenceSize) {
generateTokenVector(sentence);
if (tokenTexts->size() > Utils::maxSentenceSize) {
throw ConcordiaException("Trying to add too long sentence."); throw ConcordiaException("Trying to add too long sentence.");
} }
for (vector<string>::iterator it = tokenTexts->begin(); for (vector<string>::iterator it = tokenTexts.begin();
it != tokenTexts->end(); ++it) { it != tokenTexts.end(); ++it) {
string token = *it; string token = *it;
INDEX_CHARACTER_TYPE code = _wordMap->getWordCode(token); INDEX_CHARACTER_TYPE code = _wordMap->getWordCode(token);
result->push_back(code); result.push_back(code);
} }
return result; return result;
} }
boost::shared_ptr<vector<string> > vector<string> HashGenerator::generateTokenVector(const string & sentence) {
HashGenerator::generateTokenVector(const string & sentence) {
string anonymizedSentence = _sentenceAnonymizer->anonymize(sentence); string anonymizedSentence = _sentenceAnonymizer->anonymize(sentence);
boost::trim(anonymizedSentence); boost::trim(anonymizedSentence);
boost::shared_ptr<vector<string> > tokenTexts(new vector<string>()); vector<string> tokenTexts;
boost::split(*tokenTexts, anonymizedSentence, boost::is_any_of(" \t\r\n"), boost::split(tokenTexts, anonymizedSentence, boost::is_any_of(" \t\r\n"),
boost::algorithm::token_compress_on); boost::algorithm::token_compress_on);
return tokenTexts; return tokenTexts;
} }

View File

@ -29,12 +29,10 @@ public:
*/ */
virtual ~HashGenerator(); virtual ~HashGenerator();
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > vector<INDEX_CHARACTER_TYPE> generateHash(const string & sentence)
generateHash(const string & sentence)
throw(ConcordiaException); throw(ConcordiaException);
boost::shared_ptr<vector<string> > vector<string> generateTokenVector(const string & sentence);
generateTokenVector(const string & sentence);
void serializeWordMap(); void serializeWordMap();

View File

@ -12,18 +12,17 @@ IndexSearcher::IndexSearcher() {
IndexSearcher::~IndexSearcher() { IndexSearcher::~IndexSearcher() {
} }
boost::ptr_vector<SubstringOccurence> IndexSearcher::simpleSearch( vector<SubstringOccurence> IndexSearcher::simpleSearch(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const string & pattern) throw(ConcordiaException) { const string & pattern) throw(ConcordiaException) {
boost::ptr_vector<SubstringOccurence> result; vector<SubstringOccurence> result;
int left; int left;
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash = vector<INDEX_CHARACTER_TYPE> hash = hashGenerator->generateHash(pattern);
hashGenerator->generateHash(pattern); saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE);
saidx_t patternLength = hash->size()*sizeof(INDEX_CHARACTER_TYPE);
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash); sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
int size = sa_search(T->data(), (saidx_t) T->size(), int size = sa_search(T->data(), (saidx_t) T->size(),
@ -40,7 +39,7 @@ boost::ptr_vector<SubstringOccurence> IndexSearcher::simpleSearch(
saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE); saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE);
SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos); SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos);
result.push_back(new SubstringOccurence(marker)); result.push_back(SubstringOccurence(marker));
} }
} }
@ -48,13 +47,12 @@ boost::ptr_vector<SubstringOccurence> IndexSearcher::simpleSearch(
return result; return result;
} }
boost::ptr_vector<AnubisSearchResult> IndexSearcher::anubisSearch( vector<AnubisSearchResult> IndexSearcher::anubisSearch(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const string & pattern) throw(ConcordiaException) { const string & pattern) throw(ConcordiaException) {
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash = vector<INDEX_CHARACTER_TYPE> hash = hashGenerator->generateHash(pattern);
hashGenerator->generateHash(pattern);
return _anubisSearcher->anubisSearch(T, markers, SA, hash); return _anubisSearcher->anubisSearch(T, markers, SA, hash);
} }

View File

@ -2,9 +2,9 @@
#define INDEX_SEARCHER_HDR #define INDEX_SEARCHER_HDR
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include <boost/ptr_container/ptr_vector.hpp>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <vector>
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
#include "concordia/substring_occurence.hpp" #include "concordia/substring_occurence.hpp"
@ -30,14 +30,14 @@ public:
*/ */
virtual ~IndexSearcher(); virtual ~IndexSearcher();
boost::ptr_vector<SubstringOccurence> simpleSearch( vector<SubstringOccurence> simpleSearch(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const string & pattern) throw(ConcordiaException); const string & pattern) throw(ConcordiaException);
boost::ptr_vector<AnubisSearchResult> anubisSearch( vector<AnubisSearchResult> anubisSearch(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,

View File

@ -59,7 +59,7 @@ void SentenceAnonymizer::_createNeRules(string & namedEntitiesPath) {
<< " in NE file: " << namedEntitiesPath; << " in NE file: " << namedEntitiesPath;
throw ConcordiaException(ss.str()); throw ConcordiaException(ss.str());
} else { } else {
_namedEntities.push_back(new RegexReplacement( _namedEntities.push_back(RegexReplacement(
tokenTexts->at(0), tokenTexts->at(1))); tokenTexts->at(0), tokenTexts->at(1)));
} }
} }

View File

@ -2,12 +2,12 @@
#define SENTENCE_ANONYMIZER_HDR #define SENTENCE_ANONYMIZER_HDR
#include <string> #include <string>
#include <vector>
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
#include "concordia/regex_replacement.hpp" #include "concordia/regex_replacement.hpp"
#include "concordia/concordia_config.hpp" #include "concordia/concordia_config.hpp"
#include "concordia/concordia_exception.hpp" #include "concordia/concordia_exception.hpp"
#include <boost/shared_ptr.hpp> #include <boost/shared_ptr.hpp>
#include <boost/ptr_container/ptr_vector.hpp>
#include <boost/filesystem.hpp> #include <boost/filesystem.hpp>
@ -39,7 +39,7 @@ private:
string replacement, string replacement,
bool wholeWord = false); bool wholeWord = false);
boost::ptr_vector<RegexReplacement> _namedEntities; vector<RegexReplacement> _namedEntities;
boost::shared_ptr<RegexReplacement> _htmlTags; boost::shared_ptr<RegexReplacement> _htmlTags;

View File

@ -22,7 +22,6 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
boost::shared_ptr<std::vector<sauchar_t> > T(new std::vector<sauchar_t>()); boost::shared_ptr<std::vector<sauchar_t> > T(new std::vector<sauchar_t>());
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers(new std::vector<SUFFIX_MARKER_TYPE>()); boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers(new std::vector<SUFFIX_MARKER_TYPE>());
boost::shared_ptr<std::vector<saidx_t> > SA(new std::vector<saidx_t>()); boost::shared_ptr<std::vector<saidx_t> > SA(new std::vector<saidx_t>());
boost::shared_ptr<std::vector<sauchar_t> > pattern(new std::vector<sauchar_t>());
/* Search in text: "banana" /* Search in text: "banana"
T = 123232 (all one sentence id=34) T = 123232 (all one sentence id=34)
@ -64,25 +63,26 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
markers->push_back(Utils::createMarker(34,i,6)); markers->push_back(Utils::createMarker(34,i,6));
} }
pattern->push_back(0); std::vector<sauchar_t> pattern;
pattern->push_back(0); pattern.push_back(0);
pattern->push_back(0); pattern.push_back(0);
pattern->push_back(2); pattern.push_back(0);
pattern.push_back(2);
pattern->push_back(0); pattern.push_back(0);
pattern->push_back(0); pattern.push_back(0);
pattern->push_back(0); pattern.push_back(0);
pattern->push_back(3); pattern.push_back(3);
pattern->push_back(0); pattern.push_back(0);
pattern->push_back(0); pattern.push_back(0);
pattern->push_back(0); pattern.push_back(0);
pattern->push_back(4); pattern.push_back(4);
pattern->push_back(0); pattern.push_back(0);
pattern->push_back(0); pattern.push_back(0);
pattern->push_back(0); pattern.push_back(0);
pattern->push_back(4); pattern.push_back(4);
/* Suffix array for the hashed index: 0001 0002 0003 0002 0003 0002 /* Suffix array for the hashed index: 0001 0002 0003 0002 0003 0002
0:000100020003000200030002 0:000100020003000200030002
@ -137,7 +137,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
SA->push_back(11); SA->push_back(11);
SUFFIX_MARKER_TYPE highResLength; SUFFIX_MARKER_TYPE highResLength;
boost::ptr_vector<SubstringOccurence> result = searcher.lcpSearch(T, markers, SA, pattern, highResLength); std::vector<SubstringOccurence> result = searcher.lcpSearch(T, markers, SA, pattern, highResLength);
SUFFIX_MARKER_TYPE length = highResLength / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE length = highResLength / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get the following results from SA: /* Expecting to get the following results from SA:
@ -155,39 +155,39 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
//--------pattern banana //--------pattern banana
boost::shared_ptr<std::vector<sauchar_t> > pattern2(new std::vector<sauchar_t>()); std::vector<sauchar_t> pattern2;
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(1); pattern2.push_back(1);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(2); pattern2.push_back(2);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(3); pattern2.push_back(3);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(2); pattern2.push_back(2);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(3); pattern2.push_back(3);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(0); pattern2.push_back(0);
pattern2->push_back(2); pattern2.push_back(2);
SUFFIX_MARKER_TYPE highResLength2; SUFFIX_MARKER_TYPE highResLength2;
boost::ptr_vector<SubstringOccurence> result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2); vector<SubstringOccurence> result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2);
SUFFIX_MARKER_TYPE length2 = highResLength2 / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE length2 = highResLength2 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get one result from SA: /* Expecting to get one result from SA:
@ -203,34 +203,34 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
//--------pattern banan //--------pattern banan
boost::shared_ptr<std::vector<sauchar_t> > pattern3(new std::vector<sauchar_t>()); std::vector<sauchar_t> pattern3;
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(1); pattern3.push_back(1);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(2); pattern3.push_back(2);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(3); pattern3.push_back(3);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(2); pattern3.push_back(2);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(0); pattern3.push_back(0);
pattern3->push_back(3); pattern3.push_back(3);
SUFFIX_MARKER_TYPE highResLength3; SUFFIX_MARKER_TYPE highResLength3;
boost::ptr_vector<SubstringOccurence> result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3); vector<SubstringOccurence> result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3);
SUFFIX_MARKER_TYPE length3 = highResLength3 / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE length3 = highResLength3 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get one result from SA: /* Expecting to get one result from SA:
@ -245,29 +245,29 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
//--------pattern nazz //--------pattern nazz
boost::shared_ptr<std::vector<sauchar_t> > pattern4(new std::vector<sauchar_t>()); std::vector<sauchar_t> pattern4;
pattern4->push_back(0); pattern4.push_back(0);
pattern4->push_back(0); pattern4.push_back(0);
pattern4->push_back(0); pattern4.push_back(0);
pattern4->push_back(3); pattern4.push_back(3);
pattern4->push_back(0); pattern4.push_back(0);
pattern4->push_back(0); pattern4.push_back(0);
pattern4->push_back(0); pattern4.push_back(0);
pattern4->push_back(2); pattern4.push_back(2);
pattern4->push_back(0); pattern4.push_back(0);
pattern4->push_back(0); pattern4.push_back(0);
pattern4->push_back(0); pattern4.push_back(0);
pattern4->push_back(4); pattern4.push_back(4);
pattern4->push_back(0); pattern4.push_back(0);
pattern4->push_back(0); pattern4.push_back(0);
pattern4->push_back(0); pattern4.push_back(0);
pattern4->push_back(4); pattern4.push_back(4);
SUFFIX_MARKER_TYPE highResLength4; SUFFIX_MARKER_TYPE highResLength4;
boost::ptr_vector<SubstringOccurence> result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4); vector<SubstringOccurence> result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4);
SUFFIX_MARKER_TYPE length4 = highResLength4 / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE length4 = highResLength4 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get 2 results from SA: /* Expecting to get 2 results from SA:
@ -286,19 +286,19 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
//--------pattern zz //--------pattern zz
boost::shared_ptr<std::vector<sauchar_t> > pattern5(new std::vector<sauchar_t>()); std::vector<sauchar_t> pattern5;
pattern5->push_back(0); pattern5.push_back(0);
pattern5->push_back(0); pattern5.push_back(0);
pattern5->push_back(0); pattern5.push_back(0);
pattern5->push_back(4); pattern5.push_back(4);
pattern5->push_back(0); pattern5.push_back(0);
pattern5->push_back(0); pattern5.push_back(0);
pattern5->push_back(0); pattern5.push_back(0);
pattern5->push_back(4); pattern5.push_back(4);
SUFFIX_MARKER_TYPE highResLength5; SUFFIX_MARKER_TYPE highResLength5;
boost::ptr_vector<SubstringOccurence> result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5); vector<SubstringOccurence> result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5);
SUFFIX_MARKER_TYPE length5 = highResLength5 / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE length5 = highResLength5 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get 0 results from SA, lcp length = 0; /* Expecting to get 0 results from SA, lcp length = 0;
@ -309,20 +309,20 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
//--------pattern existing in the text but spanning over parts of characters //--------pattern existing in the text but spanning over parts of characters
boost::shared_ptr<std::vector<sauchar_t> > pattern6(new std::vector<sauchar_t>()); std::vector<sauchar_t> pattern6;
pattern6->push_back(0); pattern6.push_back(0);
pattern6->push_back(0); pattern6.push_back(0);
pattern6->push_back(3); pattern6.push_back(3);
pattern6->push_back(0); pattern6.push_back(0);
pattern6->push_back(0); pattern6.push_back(0);
pattern6->push_back(0); pattern6.push_back(0);
pattern6->push_back(2); pattern6.push_back(2);
pattern6->push_back(0); pattern6.push_back(0);
SUFFIX_MARKER_TYPE highResLength6; SUFFIX_MARKER_TYPE highResLength6;
boost::ptr_vector<SubstringOccurence> result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6); vector<SubstringOccurence> result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6);
SUFFIX_MARKER_TYPE length6 = highResLength6 / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE length6 = highResLength6 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get 0 results from SA, lcp length = 0; /* Expecting to get 0 results from SA, lcp length = 0;
@ -378,7 +378,7 @@ BOOST_AUTO_TEST_CASE( TmMatchesTest )
// searching for pattern "Ola posiada rysia Marysia" (5 1 3 4) // searching for pattern "Ola posiada rysia Marysia" (5 1 3 4)
boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > pattern = hashGenerator->generateHash("Ola posiada rysia Marysia"); std::vector<INDEX_CHARACTER_TYPE> pattern = hashGenerator->generateHash("Ola posiada rysia Marysia");
boost::shared_ptr<TmMatchesMap> tmMatchesMap = searcher.getTmMatches(T, markers, SA, pattern); boost::shared_ptr<TmMatchesMap> tmMatchesMap = searcher.getTmMatches(T, markers, SA, pattern);
BOOST_CHECK_EQUAL(tmMatchesMap->size(), 3); BOOST_CHECK_EQUAL(tmMatchesMap->size(), 3);
@ -393,38 +393,38 @@ BOOST_AUTO_TEST_CASE( TmMatchesTest )
// example 14 // example 14
// example interval list: [(1,2)] // example interval list: [(1,2)]
boost::ptr_vector<Interval> exampleIntervals14 = tmMatches14->getExampleIntervals(); vector<Interval> exampleIntervals14 = tmMatches14->getExampleIntervals();
BOOST_CHECK_EQUAL(exampleIntervals14.size(), 1); BOOST_CHECK_EQUAL(exampleIntervals14.size(), 1);
BOOST_CHECK_EQUAL(exampleIntervals14[0].getStart(), 1); BOOST_CHECK_EQUAL(exampleIntervals14[0].getStart(), 1);
BOOST_CHECK_EQUAL(exampleIntervals14[0].getEnd(), 2); BOOST_CHECK_EQUAL(exampleIntervals14[0].getEnd(), 2);
// pattern interval list: [(1,2)] // pattern interval list: [(1,2)]
boost::ptr_vector<Interval> patternIntervals14 = tmMatches14->getPatternIntervals(); vector<Interval> patternIntervals14 = tmMatches14->getPatternIntervals();
BOOST_CHECK_EQUAL(patternIntervals14.size(), 1); BOOST_CHECK_EQUAL(patternIntervals14.size(), 1);
BOOST_CHECK_EQUAL(patternIntervals14[0].getStart(), 1); BOOST_CHECK_EQUAL(patternIntervals14[0].getStart(), 1);
BOOST_CHECK_EQUAL(patternIntervals14[0].getEnd(), 2); BOOST_CHECK_EQUAL(patternIntervals14[0].getEnd(), 2);
// example 51 // example 51
// example interval list: [(1,3)] // example interval list: [(1,3)]
boost::ptr_vector<Interval> exampleIntervals51 = tmMatches51->getExampleIntervals(); vector<Interval> exampleIntervals51 = tmMatches51->getExampleIntervals();
BOOST_CHECK_EQUAL(exampleIntervals51.size(), 1); BOOST_CHECK_EQUAL(exampleIntervals51.size(), 1);
BOOST_CHECK_EQUAL(exampleIntervals51[0].getStart(), 1); BOOST_CHECK_EQUAL(exampleIntervals51[0].getStart(), 1);
BOOST_CHECK_EQUAL(exampleIntervals51[0].getEnd(), 3); BOOST_CHECK_EQUAL(exampleIntervals51[0].getEnd(), 3);
// pattern interval list: [(1,3)] // pattern interval list: [(1,3)]
boost::ptr_vector<Interval> patternIntervals51 = tmMatches51->getPatternIntervals(); vector<Interval> patternIntervals51 = tmMatches51->getPatternIntervals();
BOOST_CHECK_EQUAL(patternIntervals51.size(), 1); BOOST_CHECK_EQUAL(patternIntervals51.size(), 1);
BOOST_CHECK_EQUAL(patternIntervals51[0].getStart(), 1); BOOST_CHECK_EQUAL(patternIntervals51[0].getStart(), 1);
BOOST_CHECK_EQUAL(patternIntervals51[0].getEnd(), 3); BOOST_CHECK_EQUAL(patternIntervals51[0].getEnd(), 3);
// example 123 // example 123
// example interval list: [(1,3), (0,1)] // example interval list: [(1,3), (0,1)]
boost::ptr_vector<Interval> exampleIntervals123 = tmMatches123->getExampleIntervals(); vector<Interval> exampleIntervals123 = tmMatches123->getExampleIntervals();
BOOST_CHECK_EQUAL(exampleIntervals123.size(), 2); BOOST_CHECK_EQUAL(exampleIntervals123.size(), 2);
BOOST_CHECK_EQUAL(exampleIntervals123[0].getStart(), 1); BOOST_CHECK_EQUAL(exampleIntervals123[0].getStart(), 1);
BOOST_CHECK_EQUAL(exampleIntervals123[0].getEnd(), 3); BOOST_CHECK_EQUAL(exampleIntervals123[0].getEnd(), 3);
BOOST_CHECK_EQUAL(exampleIntervals123[1].getStart(), 0); BOOST_CHECK_EQUAL(exampleIntervals123[1].getStart(), 0);
BOOST_CHECK_EQUAL(exampleIntervals123[1].getEnd(), 1); BOOST_CHECK_EQUAL(exampleIntervals123[1].getEnd(), 1);
// pattern interval list: [(1,3), (3,4)] // pattern interval list: [(1,3), (3,4)]
boost::ptr_vector<Interval> patternIntervals123 = tmMatches123->getPatternIntervals(); vector<Interval> patternIntervals123 = tmMatches123->getPatternIntervals();
BOOST_CHECK_EQUAL(patternIntervals123.size(), 2); BOOST_CHECK_EQUAL(patternIntervals123.size(), 2);
BOOST_CHECK_EQUAL(patternIntervals123[0].getStart(), 1); BOOST_CHECK_EQUAL(patternIntervals123[0].getStart(), 1);
BOOST_CHECK_EQUAL(patternIntervals123[0].getEnd(), 3); BOOST_CHECK_EQUAL(patternIntervals123[0].getEnd(), 3);

View File

@ -5,7 +5,6 @@
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
#include <boost/algorithm/string/predicate.hpp> #include <boost/algorithm/string/predicate.hpp>
#include <boost/ptr_container/ptr_vector.hpp>
#include <boost/filesystem.hpp> #include <boost/filesystem.hpp>
#include <string> #include <string>
@ -52,8 +51,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
*/ */
boost::ptr_vector<SubstringOccurence> searchResult1 = concordia.simpleSearch("posiada rysia"); vector<SubstringOccurence> searchResult1 = concordia.simpleSearch("posiada rysia");
boost::ptr_vector<SubstringOccurence> searchResult2 = concordia.simpleSearch("posiada kota Ala"); vector<SubstringOccurence> searchResult2 = concordia.simpleSearch("posiada kota Ala");
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
@ -74,11 +73,11 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
{ {
// modified stop words to avoid anonymization // modified stop words to avoid anonymization
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
boost::ptr_vector<Example> testExamples; vector<Example> testExamples;
testExamples.push_back(new Example("xto xjest okno",312)); testExamples.push_back(Example("xto xjest okno",312));
testExamples.push_back(new Example("czy xjest okno otwarte",202)); testExamples.push_back(Example("czy xjest okno otwarte",202));
testExamples.push_back(new Example("chyba xto xjest xtutaj",45)); testExamples.push_back(Example("chyba xto xjest xtutaj",45));
testExamples.push_back(new Example("xto xjest",29)); testExamples.push_back(Example("xto xjest",29));
concordia.addAllExamples(testExamples); concordia.addAllExamples(testExamples);
/*The test index contains 4 sentences: /*The test index contains 4 sentences:
@ -107,8 +106,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
*/ */
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
boost::ptr_vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("xto xjest"); vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("xto xjest");
boost::ptr_vector<SubstringOccurence> searchResult2 = concordia2.simpleSearch("xjest okno"); vector<SubstringOccurence> searchResult2 = concordia2.simpleSearch("xjest okno");
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
@ -132,13 +131,13 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 ) BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
{ {
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
boost::ptr_vector<Example> testExamples; vector<Example> testExamples;
testExamples.push_back(new Example("2. Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem, z jakiego korzystają obywatele tego państwa.",312)); testExamples.push_back(Example("2. Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem, z jakiego korzystają obywatele tego państwa.",312));
testExamples.push_back(new Example("czy xjest żółte otwarte",202)); testExamples.push_back(Example("czy xjest żółte otwarte",202));
concordia.addAllExamples(testExamples); concordia.addAllExamples(testExamples);
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
boost::ptr_vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia"); vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia");
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
@ -177,8 +176,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 )
n: 0 1 2 3 4 5 6 7 8 9 10 11 n: 0 1 2 3 4 5 6 7 8 9 10 11
SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7 SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7
boost::ptr_vector<AnubisSearchResult> searchResult1 = concordia.anubisSearch("posiada rysia chyba"); vector<AnubisSearchResult> searchResult1 = concordia.anubisSearch("posiada rysia chyba");
boost::ptr_vector<AnubisSearchResult> searchResult2 = concordia.anubisSearch("posiada kota Ala"); vector<AnubisSearchResult> searchResult2 = concordia.anubisSearch("posiada kota Ala");
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));

View File

@ -23,13 +23,13 @@ BOOST_AUTO_TEST_CASE( SimpleHashTest )
HashGenerator hashGenerator = HashGenerator(config); HashGenerator hashGenerator = HashGenerator(config);
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash = hashGenerator.generateHash("Ala posiada kota"); vector<INDEX_CHARACTER_TYPE> hash = hashGenerator.generateHash("Ala posiada kota");
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > expected(new vector<INDEX_CHARACTER_TYPE>()); vector<INDEX_CHARACTER_TYPE> expected;
expected->push_back(0); expected.push_back(0);
expected->push_back(1); expected.push_back(1);
expected->push_back(2); expected.push_back(2);
BOOST_CHECK_EQUAL_COLLECTIONS(hash->begin(), hash->end(), expected->begin(), expected->end()); BOOST_CHECK_EQUAL_COLLECTIONS(hash.begin(), hash.end(), expected.begin(), expected.end());
} }
/* Commentet out - the test takes too long. Run it once whenever the SUFFIX_MARKER_SENTENCE_BYTES parameter changes. /* Commentet out - the test takes too long. Run it once whenever the SUFFIX_MARKER_SENTENCE_BYTES parameter changes.
@ -76,22 +76,22 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
HashGenerator hashGenerator1 = HashGenerator(config); HashGenerator hashGenerator1 = HashGenerator(config);
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash1 = hashGenerator1.generateHash("Ala posiada kota"); vector<INDEX_CHARACTER_TYPE> hash1 = hashGenerator1.generateHash("Ala posiada kota");
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > expected1(new vector<INDEX_CHARACTER_TYPE>()); vector<INDEX_CHARACTER_TYPE> expected1;
expected1->push_back(0); expected1.push_back(0);
expected1->push_back(1); expected1.push_back(1);
expected1->push_back(2); expected1.push_back(2);
BOOST_CHECK_EQUAL_COLLECTIONS(hash1->begin(), hash1->end(), expected1->begin(), expected1->end()); BOOST_CHECK_EQUAL_COLLECTIONS(hash1.begin(), hash1.end(), expected1.begin(), expected1.end());
hashGenerator1.serializeWordMap(); hashGenerator1.serializeWordMap();
HashGenerator hashGenerator2 = HashGenerator(config); HashGenerator hashGenerator2 = HashGenerator(config);
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash2 = hashGenerator2.generateHash("Ala posiada psa"); vector<INDEX_CHARACTER_TYPE> hash2 = hashGenerator2.generateHash("Ala posiada psa");
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > expected2(new vector<INDEX_CHARACTER_TYPE>()); vector<INDEX_CHARACTER_TYPE> expected2;
expected2->push_back(0); expected2.push_back(0);
expected2->push_back(1); expected2.push_back(1);
expected2->push_back(3); expected2.push_back(3);
BOOST_CHECK_EQUAL_COLLECTIONS(hash2->begin(), hash2->end(), expected2->begin(), expected2->end()); BOOST_CHECK_EQUAL_COLLECTIONS(hash2.begin(), hash2.end(), expected2.begin(), expected2.end());
boost::filesystem::remove(config->getWordMapFilePath()); boost::filesystem::remove(config->getWordMapFilePath());
} }
@ -106,23 +106,23 @@ BOOST_AUTO_TEST_CASE( TokenVectorTest )
HashGenerator hashGenerator = HashGenerator(config); HashGenerator hashGenerator = HashGenerator(config);
boost::shared_ptr<vector<string> > tokenVector = hashGenerator.generateTokenVector("12.02.2014 o godzinie 17:40 doszło do kolizji na ulicy Grobla; policjanci ustalili, że kierowca zaparkował samochód."); vector<string> tokenVector = hashGenerator.generateTokenVector("12.02.2014 o godzinie 17:40 doszło do kolizji na ulicy Grobla; policjanci ustalili, że kierowca zaparkował samochód.");
boost::shared_ptr<vector<string> > expected(new vector<string>()); vector<string> expected;
expected->push_back("ne_date"); expected.push_back("ne_date");
expected->push_back("godzinie"); expected.push_back("godzinie");
expected->push_back("ne_number"); expected.push_back("ne_number");
expected->push_back("ne_number"); expected.push_back("ne_number");
expected->push_back("doszło"); expected.push_back("doszło");
expected->push_back("kolizji"); expected.push_back("kolizji");
expected->push_back("ulicy"); expected.push_back("ulicy");
expected->push_back("grobla"); expected.push_back("grobla");
expected->push_back("policjanci"); expected.push_back("policjanci");
expected->push_back("ustalili"); expected.push_back("ustalili");
expected->push_back("kierowca"); expected.push_back("kierowca");
expected->push_back("zaparkował"); expected.push_back("zaparkował");
expected->push_back("samochód"); expected.push_back("samochód");
BOOST_CHECK_EQUAL_COLLECTIONS(tokenVector->begin(), tokenVector->end(), expected->begin(), expected->end()); BOOST_CHECK_EQUAL_COLLECTIONS(tokenVector.begin(), tokenVector.end(), expected.begin(), expected.end());
} }
BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE_END()

View File

@ -31,58 +31,58 @@ BOOST_AUTO_TEST_CASE( WriteReadSingleCharacter )
BOOST_AUTO_TEST_CASE( IndexVectorToSaucharArray ) BOOST_AUTO_TEST_CASE( IndexVectorToSaucharArray )
{ {
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash(new vector<INDEX_CHARACTER_TYPE>()); vector<INDEX_CHARACTER_TYPE> hash;
hash->push_back(123456789); // in hex: 75BCD15 hash.push_back(123456789); // in hex: 75BCD15
// in memory: 15 cd 5b 07 // in memory: 15 cd 5b 07
// in memory DEC: 21 205 91 7 // in memory DEC: 21 205 91 7
hash->push_back(987654321); // in hex: 3ADE68B1 hash.push_back(987654321); // in hex: 3ADE68B1
// in memory: b1 68 de 3a // in memory: b1 68 de 3a
// in memory DEC: 177 104 222 58 // in memory DEC: 177 104 222 58
sauchar_t * dataArray = Utils::indexVectorToSaucharArray(hash); sauchar_t * dataArray = Utils::indexVectorToSaucharArray(hash);
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > result(new vector<INDEX_CHARACTER_TYPE>()); vector<INDEX_CHARACTER_TYPE> result;
for (int i=0;i<8;i++) { for (int i=0;i<8;i++) {
INDEX_CHARACTER_TYPE a = dataArray[i]; INDEX_CHARACTER_TYPE a = dataArray[i];
result->push_back(a); result.push_back(a);
} }
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > expected(new vector<INDEX_CHARACTER_TYPE>()); vector<INDEX_CHARACTER_TYPE> expected;
expected->push_back(21); expected.push_back(21);
expected->push_back(205); expected.push_back(205);
expected->push_back(91); expected.push_back(91);
expected->push_back(7); expected.push_back(7);
expected->push_back(177); expected.push_back(177);
expected->push_back(104); expected.push_back(104);
expected->push_back(222); expected.push_back(222);
expected->push_back(58); expected.push_back(58);
BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end()); BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end());
} }
BOOST_AUTO_TEST_CASE( IndexVectorToSaucharVector ) BOOST_AUTO_TEST_CASE( IndexVectorToSaucharVector )
{ {
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash(new vector<INDEX_CHARACTER_TYPE>()); vector<INDEX_CHARACTER_TYPE> hash;
hash->push_back(123456789); // in hex: 75BCD15 hash.push_back(123456789); // in hex: 75BCD15
// in memory: 15 cd 5b 07 // in memory: 15 cd 5b 07
// in memory DEC: 21 205 91 7 // in memory DEC: 21 205 91 7
hash->push_back(987654321); // in hex: 3ADE68B1 hash.push_back(987654321); // in hex: 3ADE68B1
// in memory: b1 68 de 3a // in memory: b1 68 de 3a
// in memory DEC: 177 104 222 58 // in memory DEC: 177 104 222 58
boost::shared_ptr<vector<sauchar_t> > result = Utils::indexVectorToSaucharVector(hash); vector<sauchar_t> result = Utils::indexVectorToSaucharVector(hash);
boost::shared_ptr<vector<sauchar_t> > expected(new vector<sauchar_t>()); vector<sauchar_t> expected;
expected->push_back(21); expected.push_back(21);
expected->push_back(205); expected.push_back(205);
expected->push_back(91); expected.push_back(91);
expected->push_back(7); expected.push_back(7);
expected->push_back(177); expected.push_back(177);
expected->push_back(104); expected.push_back(104);
expected->push_back(222); expected.push_back(222);
expected->push_back(58); expected.push_back(58);
BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end()); BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end());
} }
BOOST_AUTO_TEST_CASE( MaxSentenceSize ) BOOST_AUTO_TEST_CASE( MaxSentenceSize )

View File

@ -40,37 +40,34 @@ void TmMatches::calculateSimpleScore() {
void TmMatches::addExampleInterval(int start, int end) { void TmMatches::addExampleInterval(int start, int end) {
if (!_alreadyIntersects(_exampleMatchedRegions, start, end)) { if (!_alreadyIntersects(_exampleMatchedRegions, start, end)) {
_exampleMatchedRegions.push_back(new Interval(start, end)); _exampleMatchedRegions.push_back(Interval(start, end));
} }
} }
void TmMatches::addPatternInterval(int start, int end) { void TmMatches::addPatternInterval(int start, int end) {
if (!_alreadyIntersects(_patternMatchedRegions, start, end)) { if (!_alreadyIntersects(_patternMatchedRegions, start, end)) {
_patternMatchedRegions.push_back(new Interval(start, end)); _patternMatchedRegions.push_back(Interval(start, end));
} }
} }
bool TmMatches::_alreadyIntersects( bool TmMatches::_alreadyIntersects(
boost::ptr_vector<Interval> intervalList, const vector<Interval> & intervalList,
int start, int end) { int start, int end) {
Interval * tempInterval = new Interval(start, end); Interval tempInterval(start, end);
BOOST_FOREACH(Interval & oldInterval, intervalList) { BOOST_FOREACH(Interval oldInterval, intervalList) {
if (oldInterval.intersects(*tempInterval)) { if (oldInterval.intersects(tempInterval)) {
delete tempInterval;
return true; return true;
} }
} }
delete tempInterval;
return false; return false;
} }
double TmMatches::_getLogarithmicOverlay( double TmMatches::_getLogarithmicOverlay(
boost::ptr_vector<Interval> intervalList, const vector<Interval> & intervalList,
unsigned char sentenceSize, unsigned char sentenceSize,
double k) { double k) {
double overlayScore = 0; double overlayScore = 0;
BOOST_FOREACH(Interval & interval, intervalList) { BOOST_FOREACH(Interval interval, intervalList) {
double intervalOverlay = static_cast<double>(interval.getLength()) double intervalOverlay = static_cast<double>(interval.getLength())
/ static_cast<double>(sentenceSize); / static_cast<double>(sentenceSize);
double significanceFactor = pow(log(interval.getLength()+1) double significanceFactor = pow(log(interval.getLength()+1)

View File

@ -2,9 +2,9 @@
#define TM_MATCHES_HDR #define TM_MATCHES_HDR
#include <string> #include <string>
#include <vector>
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
#include "concordia/interval.hpp" #include "concordia/interval.hpp"
#include <boost/ptr_container/ptr_vector.hpp>
#include <boost/ptr_container/ptr_map.hpp> #include <boost/ptr_container/ptr_map.hpp>
@ -29,11 +29,11 @@ public:
return _score; return _score;
} }
boost::ptr_vector<Interval> getExampleIntervals() const { vector<Interval> getExampleIntervals() const {
return _exampleMatchedRegions; return _exampleMatchedRegions;
} }
boost::ptr_vector<Interval> getPatternIntervals() const { vector<Interval> getPatternIntervals() const {
return _patternMatchedRegions; return _patternMatchedRegions;
} }
@ -50,18 +50,18 @@ public:
void addPatternInterval(int start, int end); void addPatternInterval(int start, int end);
private: private:
bool _alreadyIntersects(boost::ptr_vector<Interval> intervalList, bool _alreadyIntersects(const vector<Interval> & intervalList,
int start, int end); int start, int end);
double _getLogarithmicOverlay(boost::ptr_vector<Interval> intervalList, double _getLogarithmicOverlay(const vector<Interval> & intervalList,
unsigned char sentenceSize, unsigned char sentenceSize,
double k); double k);
SUFFIX_MARKER_TYPE _exampleId; SUFFIX_MARKER_TYPE _exampleId;
boost::ptr_vector<Interval> _exampleMatchedRegions; vector<Interval> _exampleMatchedRegions;
boost::ptr_vector<Interval> _patternMatchedRegions; vector<Interval> _patternMatchedRegions;
unsigned char _patternSize; unsigned char _patternSize;