From 3a03b01f42a03bb1be6d4f5029aa3ac924256b49 Mon Sep 17 00:00:00 2001 From: rjawor Date: Wed, 15 Apr 2015 10:55:26 +0200 Subject: [PATCH] std vectors Former-commit-id: 5816e87c856f7edc242cc707851a0e2ad05aeb38 --- TODO.txt | 4 +- concordia-console/concordia-console.cpp | 7 +- concordia/anubis_searcher.cpp | 46 +++-- concordia/anubis_searcher.hpp | 13 +- concordia/common/utils.cpp | 30 ++-- concordia/common/utils.hpp | 19 +- concordia/concordia.cpp | 12 +- concordia/concordia.hpp | 13 +- concordia/concordia_index.cpp | 10 +- concordia/concordia_index.hpp | 4 +- concordia/hash_generator.cpp | 23 ++- concordia/hash_generator.hpp | 6 +- concordia/index_searcher.cpp | 16 +- concordia/index_searcher.hpp | 6 +- concordia/sentence_anonymizer.cpp | 2 +- concordia/sentence_anonymizer.hpp | 4 +- concordia/t/test_anubis_searcher.cpp | 222 ++++++++++++------------ concordia/t/test_concordia.cpp | 31 ++-- concordia/t/test_hash_generator.cpp | 68 ++++---- concordia/t/test_utils.cpp | 58 +++---- concordia/tm_matches.cpp | 19 +- concordia/tm_matches.hpp | 20 +-- 22 files changed, 314 insertions(+), 319 deletions(-) diff --git a/TODO.txt b/TODO.txt index cc60029..c0c2581 100644 --- a/TODO.txt +++ b/TODO.txt @@ -21,6 +21,8 @@ IN PROGRESS 2. Wykonać anubis search na nowych markerach z długością zdania zastanowić się nad optymalizacją: - tmMatchesMap jako normalna mapa (nie ptr_map) - REJECTED LCP array -- !important! rezygnacja z ptr_vector (wycieki!) +- !important! rezygnacja z ptr_vector - zwracanie wektorów +- powyrzucać using namespace std +- profiling diff --git a/concordia-console/concordia-console.cpp b/concordia-console/concordia-console.cpp index ab0e3c0..aaf9e1e 100644 --- a/concordia-console/concordia-console.cpp +++ b/concordia-console/concordia-console.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -68,7 +67,7 @@ int main(int argc, char** argv) { std::cout << "\tSearching for pattern: \"" << pattern << "\"" << std::endl; time_start = boost::posix_time::microsec_clock::local_time(); - boost::ptr_vector result = + std::vector result = concordia.simpleSearch(pattern); time_end = boost::posix_time::microsec_clock::local_time(); msdiff = time_end - time_start; @@ -89,12 +88,12 @@ int main(int argc, char** argv) { std::string line; if (text_file.is_open()) { long lineCount = 0; - boost::ptr_vector buffer; + vector buffer; boost::posix_time::ptime timeStart = boost::posix_time::microsec_clock::local_time(); while (getline(text_file, line)) { lineCount++; - buffer.push_back(new Example(line, lineCount)); + buffer.push_back(Example(line, lineCount)); if (lineCount % READ_BUFFER_LENGTH == 0) { concordia.addAllExamples(buffer); buffer.clear(); diff --git a/concordia/anubis_searcher.cpp b/concordia/anubis_searcher.cpp index ab367ee..7aa739f 100644 --- a/concordia/anubis_searcher.cpp +++ b/concordia/anubis_searcher.cpp @@ -12,16 +12,16 @@ AnubisSearcher::~AnubisSearcher() { } -boost::ptr_vector AnubisSearcher::anubisSearch( +std::vector AnubisSearcher::anubisSearch( boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, - boost::shared_ptr > pattern) + const std::vector & pattern) throw(ConcordiaException) { boost::shared_ptr tmMatchesMap = getTmMatches(T, markers, SA, pattern); // get the tmMatches list sorted descending by score - boost::ptr_vector result; + std::vector result; return result; } @@ -29,30 +29,28 @@ boost::shared_ptr AnubisSearcher::getTmMatches( boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, - boost::shared_ptr > pattern) + const std::vector & pattern) throw(ConcordiaException) { - boost::shared_ptr > patternVector = + std::vector patternVector = Utils::indexVectorToSaucharVector(pattern); - if (patternVector->size() != - pattern->size() * sizeof(INDEX_CHARACTER_TYPE)) { + if (patternVector.size() != + pattern.size() * sizeof(INDEX_CHARACTER_TYPE)) { throw ConcordiaException("Increasing pattern resolution went wrong."); } boost::shared_ptr tmMatchesMap(new TmMatchesMap()); - for (int offset = 0; offset < pattern->size(); offset++) { + for (int offset = 0; offset < pattern.size(); offset++) { int highResOffset = offset * sizeof(INDEX_CHARACTER_TYPE); - boost::shared_ptr > currentPattern = - boost::shared_ptr > - (new std::vector( - patternVector->begin()+highResOffset, patternVector->end())); + std::vector currentPattern( + patternVector.begin()+highResOffset, patternVector.end()); saidx_t patternLength = 0; saidx_t size = SA->size(); saidx_t left = 0; - sauchar_t * patternArray = currentPattern->data(); + sauchar_t * patternArray = currentPattern.data(); saidx_t * SAleft = SA->data(); @@ -77,19 +75,19 @@ boost::shared_ptr AnubisSearcher::getTmMatches( // Add to tm matches map results surrounding the main stream. // from left for (saidx_t i = prevLeft; i < left; i++) { - _addToMap(SA, markers, tmMatchesMap, i, pattern->size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset); + _addToMap(SA, markers, tmMatchesMap, i, pattern.size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset); } // from right for (saidx_t i = left+size; i < prevLeft+prevSize; i++) { - _addToMap(SA, markers, tmMatchesMap, i, pattern->size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset); + _addToMap(SA, markers, tmMatchesMap, i, pattern.size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset); } } - } while (patternLength < currentPattern->size() && size > 0); + } while (patternLength < currentPattern.size() && size > 0); if (size > 0) { for (saidx_t i = left; i < left+size; i++) { - _addToMap(SA, markers, tmMatchesMap, i, pattern->size(), patternLength / sizeof(INDEX_CHARACTER_TYPE), offset); + _addToMap(SA, markers, tmMatchesMap, i, pattern.size(), patternLength / sizeof(INDEX_CHARACTER_TYPE), offset); } } } @@ -97,18 +95,18 @@ boost::shared_ptr AnubisSearcher::getTmMatches( return tmMatchesMap; } -boost::ptr_vector AnubisSearcher::lcpSearch( +std::vector AnubisSearcher::lcpSearch( boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, - boost::shared_ptr > pattern, + const std::vector & pattern, SUFFIX_MARKER_TYPE & length) throw(ConcordiaException) { saidx_t patternLength = 0; saidx_t size = SA->size(); saidx_t left = 0; - sauchar_t * patternArray = pattern->data(); + const sauchar_t * patternArray = pattern.data(); saidx_t * SAleft = SA->data(); @@ -126,9 +124,9 @@ boost::ptr_vector AnubisSearcher::lcpSearch( SAleft, size, &localLeft); left += localLeft; SAleft += localLeft; - } while (patternLength < pattern->size() && size > 0); + } while (patternLength < pattern.size() && size > 0); - boost::ptr_vector result; + vector result; if (size == 0) { // The search managed to find exactly the longest common prefixes. @@ -151,7 +149,7 @@ boost::ptr_vector AnubisSearcher::lcpSearch( } void AnubisSearcher::_collectResults( - boost::ptr_vector & result, + vector & result, boost::shared_ptr > markers, boost::shared_ptr > SA, saidx_t left, saidx_t size) { @@ -160,7 +158,7 @@ void AnubisSearcher::_collectResults( if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) { SUFFIX_MARKER_TYPE marker = markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE)); - result.push_back(new SubstringOccurence(marker)); + result.push_back(SubstringOccurence(marker)); } } } diff --git a/concordia/anubis_searcher.hpp b/concordia/anubis_searcher.hpp index 6067acf..ef2a58a 100644 --- a/concordia/anubis_searcher.hpp +++ b/concordia/anubis_searcher.hpp @@ -2,7 +2,6 @@ #define ANUBIS_SEARCHER_HDR #include -#include #include "concordia/common/config.hpp" #include "concordia/common/utils.hpp" @@ -28,29 +27,29 @@ public: */ virtual ~AnubisSearcher(); - boost::ptr_vector anubisSearch( + std::vector anubisSearch( boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, - boost::shared_ptr > pattern) + const std::vector & pattern) throw(ConcordiaException); boost::shared_ptr getTmMatches( boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, - boost::shared_ptr > pattern) + const std::vector & pattern) throw(ConcordiaException); - boost::ptr_vector lcpSearch( + std::vector lcpSearch( boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, - boost::shared_ptr > pattern, + const std::vector & pattern, SUFFIX_MARKER_TYPE & length) throw(ConcordiaException); private: - void _collectResults(boost::ptr_vector & result, + void _collectResults(vector & result, boost::shared_ptr > markers, boost::shared_ptr > SA, saidx_t left, saidx_t size); diff --git a/concordia/common/utils.cpp b/concordia/common/utils.cpp index c94a04b..6d30008 100644 --- a/concordia/common/utils.cpp +++ b/concordia/common/utils.cpp @@ -30,31 +30,39 @@ SUFFIX_MARKER_TYPE Utils::readMarker(ifstream & file) { } sauchar_t * Utils::indexVectorToSaucharArray( - boost::shared_ptr > input) { - const int kArraySize = input->size()*sizeof(INDEX_CHARACTER_TYPE); + const vector & input) { + const int kArraySize = input.size()*sizeof(INDEX_CHARACTER_TYPE); sauchar_t * patternArray = new sauchar_t[kArraySize]; int pos = 0; - for (vector::iterator it = input->begin(); - it != input->end(); ++it) { + for (vector::const_iterator it = input.begin(); + it != input.end(); ++it) { _insertCharToSaucharArray(patternArray, *it, pos); pos += sizeof(INDEX_CHARACTER_TYPE); } return patternArray; } -boost::shared_ptr > Utils::indexVectorToSaucharVector( - boost::shared_ptr > input) { - boost::shared_ptr > result = - boost::shared_ptr >(new std::vector); - - for (vector::iterator it = input->begin(); - it != input->end(); ++it) { +std::vector Utils::indexVectorToSaucharVector( + const vector & input) { + std::vector result; + for (vector::const_iterator it = input.begin(); + it != input.end(); ++it) { appendCharToSaucharVector(result, *it); } return result; } + +void Utils::appendCharToSaucharVector( + std::vector & vector, + INDEX_CHARACTER_TYPE character) { + sauchar_t * characterArray = reinterpret_cast(&character); + for (int i = 0; i < sizeof(character); i++) { + vector.push_back(characterArray[i]); + } +} + void Utils::appendCharToSaucharVector( boost::shared_ptr > vector, INDEX_CHARACTER_TYPE character) { diff --git a/concordia/common/utils.hpp b/concordia/common/utils.hpp index 00c14aa..9175a60 100644 --- a/concordia/common/utils.hpp +++ b/concordia/common/utils.hpp @@ -32,18 +32,21 @@ public: static SUFFIX_MARKER_TYPE readMarker(ifstream & file); static sauchar_t * indexVectorToSaucharArray( - boost::shared_ptr > input); + const vector & input); - static boost::shared_ptr > - indexVectorToSaucharVector( - boost::shared_ptr > input); + static std::vector indexVectorToSaucharVector( + const vector & input); static void appendCharToSaucharVector( boost::shared_ptr > vector, INDEX_CHARACTER_TYPE character); + static void appendCharToSaucharVector( + std::vector & vector, + INDEX_CHARACTER_TYPE character); + template - static void printVector(boost::shared_ptr > vector); + static void printVector(const std::vector & vector); static SUFFIX_MARKER_TYPE getIdFromMarker(SUFFIX_MARKER_TYPE marker); @@ -65,9 +68,9 @@ private: }; template -void Utils::printVector(boost::shared_ptr > vector) { - for (int i = 0; i < vector->size(); i++) { - cout << static_cast(vector->at(i)) << " "; +void Utils::printVector(const std::vector & vector) { + for (int i = 0; i < vector.size(); i++) { + cout << static_cast(vector.at(i)) << " "; } cout << endl; } diff --git a/concordia/concordia.cpp b/concordia/concordia.cpp index d0d1e47..d07338d 100644 --- a/concordia/concordia.cpp +++ b/concordia/concordia.cpp @@ -51,7 +51,7 @@ void Concordia::addExample(const Example & example) // Sentences are written to disk and added to T. // SA is generated on command by other methods. -void Concordia::addAllExamples(const boost::ptr_vector & examples) +void Concordia::addAllExamples(const std::vector & examples) throw(ConcordiaException) { _index->addAllExamples(_hashGenerator, _T, _markers, examples); } @@ -131,26 +131,26 @@ void Concordia::_initializeIndex() throw(ConcordiaException) { } } -boost::ptr_vector Concordia::simpleSearch( +std::vector Concordia::simpleSearch( const string & pattern) throw(ConcordiaException) { if (_T->size() > 0) { return _searcher->simpleSearch(_hashGenerator, _T, _markers, _SA, pattern); } else { - boost::ptr_vector result; + std::vector result; return result; } } -boost::ptr_vector Concordia::anubisSearch( +std::vector Concordia::anubisSearch( const string & pattern) throw(ConcordiaException) { if (_T->size() > 0) { return _searcher->anubisSearch(_hashGenerator, _T, - _markers, _SA, pattern); + _markers, _SA, pattern); } else { - boost::ptr_vector result; + std::vector result; return result; } } diff --git a/concordia/concordia.hpp b/concordia/concordia.hpp index de1199b..d72362a 100644 --- a/concordia/concordia.hpp +++ b/concordia/concordia.hpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include "concordia/common/config.hpp" @@ -41,16 +40,14 @@ public: void addExample(const Example & example) throw(ConcordiaException); - void addAllExamples(const boost::ptr_vector & examples) + void addAllExamples(const std::vector & examples) throw(ConcordiaException); - boost::ptr_vector simpleSearch( - const std::string & pattern) - throw(ConcordiaException); + std::vector simpleSearch(const std::string & pattern) + throw(ConcordiaException); - boost::ptr_vector anubisSearch( - const std::string & pattern) - throw(ConcordiaException); + std::vector anubisSearch(const std::string & pattern) + throw(ConcordiaException); void loadRAMIndexFromDisk() throw(ConcordiaException); diff --git a/concordia/concordia_index.cpp b/concordia/concordia_index.cpp index 35bb438..ca3254a 100644 --- a/concordia/concordia_index.cpp +++ b/concordia/concordia_index.cpp @@ -56,7 +56,7 @@ void ConcordiaIndex::addAllExamples( boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, - const boost::ptr_vector & examples) { + const vector & examples) { ofstream hashedIndexFile; hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::out| ios::app|ios::binary); @@ -81,11 +81,11 @@ void ConcordiaIndex::_addSingleExample( boost::shared_ptr > T, boost::shared_ptr > markers, const Example & example) { - boost::shared_ptr > hash + vector hash = hashGenerator->generateHash(example.getSentence()); int offset = 0; - for (vector::iterator it = hash->begin(); - it != hash->end(); ++it) { + for (vector::iterator it = hash.begin(); + it != hash.end(); ++it) { INDEX_CHARACTER_TYPE character = *it; Utils::writeIndexCharacter(hashedIndexFile, character); Utils::appendCharToSaucharVector(T, character); @@ -95,7 +95,7 @@ void ConcordiaIndex::_addSingleExample( SUFFIX_MARKER_TYPE marker = Utils::createMarker( example.getId(), offset, - hash->size()); + hash.size()); Utils::writeMarker(markersFile, marker); markers->push_back(marker); diff --git a/concordia/concordia_index.hpp b/concordia/concordia_index.hpp index 0f34a0f..d6bb412 100644 --- a/concordia/concordia_index.hpp +++ b/concordia/concordia_index.hpp @@ -2,10 +2,10 @@ #define CONCORDIA_INDEX_HDR #include -#include #include #include #include +#include #include "concordia/common/config.hpp" #include "concordia/example.hpp" @@ -40,7 +40,7 @@ public: boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, - const boost::ptr_vector & examples); + const vector & examples); boost::shared_ptr > generateSuffixArray( boost::shared_ptr > T); diff --git a/concordia/hash_generator.cpp b/concordia/hash_generator.cpp index ee32874..9317e24 100644 --- a/concordia/hash_generator.cpp +++ b/concordia/hash_generator.cpp @@ -25,31 +25,28 @@ HashGenerator::HashGenerator(boost::shared_ptr config) HashGenerator::~HashGenerator() { } -boost::shared_ptr > HashGenerator::generateHash( +vector HashGenerator::generateHash( const string & sentence) throw(ConcordiaException) { - boost::shared_ptr > - result(new vector()); - boost::shared_ptr > tokenTexts = - generateTokenVector(sentence); - if (tokenTexts->size() > Utils::maxSentenceSize) { + vector result; + vector tokenTexts = generateTokenVector(sentence); + if (tokenTexts.size() > Utils::maxSentenceSize) { throw ConcordiaException("Trying to add too long sentence."); } - for (vector::iterator it = tokenTexts->begin(); - it != tokenTexts->end(); ++it) { + for (vector::iterator it = tokenTexts.begin(); + it != tokenTexts.end(); ++it) { string token = *it; INDEX_CHARACTER_TYPE code = _wordMap->getWordCode(token); - result->push_back(code); + result.push_back(code); } return result; } -boost::shared_ptr > - HashGenerator::generateTokenVector(const string & sentence) { +vector HashGenerator::generateTokenVector(const string & sentence) { string anonymizedSentence = _sentenceAnonymizer->anonymize(sentence); boost::trim(anonymizedSentence); - boost::shared_ptr > tokenTexts(new vector()); - boost::split(*tokenTexts, anonymizedSentence, boost::is_any_of(" \t\r\n"), + vector tokenTexts; + boost::split(tokenTexts, anonymizedSentence, boost::is_any_of(" \t\r\n"), boost::algorithm::token_compress_on); return tokenTexts; } diff --git a/concordia/hash_generator.hpp b/concordia/hash_generator.hpp index d88c1a9..ad041c7 100644 --- a/concordia/hash_generator.hpp +++ b/concordia/hash_generator.hpp @@ -29,12 +29,10 @@ public: */ virtual ~HashGenerator(); - boost::shared_ptr > - generateHash(const string & sentence) + vector generateHash(const string & sentence) throw(ConcordiaException); - boost::shared_ptr > - generateTokenVector(const string & sentence); + vector generateTokenVector(const string & sentence); void serializeWordMap(); diff --git a/concordia/index_searcher.cpp b/concordia/index_searcher.cpp index 0ef61c5..685a876 100644 --- a/concordia/index_searcher.cpp +++ b/concordia/index_searcher.cpp @@ -12,18 +12,17 @@ IndexSearcher::IndexSearcher() { IndexSearcher::~IndexSearcher() { } -boost::ptr_vector IndexSearcher::simpleSearch( +vector IndexSearcher::simpleSearch( boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, const string & pattern) throw(ConcordiaException) { - boost::ptr_vector result; + vector result; int left; - boost::shared_ptr > hash = - hashGenerator->generateHash(pattern); - saidx_t patternLength = hash->size()*sizeof(INDEX_CHARACTER_TYPE); + vector hash = hashGenerator->generateHash(pattern); + saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE); sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash); int size = sa_search(T->data(), (saidx_t) T->size(), @@ -40,7 +39,7 @@ boost::ptr_vector IndexSearcher::simpleSearch( saidx_t actualResultPos = resultPos / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE marker = markers->at(actualResultPos); - result.push_back(new SubstringOccurence(marker)); + result.push_back(SubstringOccurence(marker)); } } @@ -48,13 +47,12 @@ boost::ptr_vector IndexSearcher::simpleSearch( return result; } -boost::ptr_vector IndexSearcher::anubisSearch( +vector IndexSearcher::anubisSearch( boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, const string & pattern) throw(ConcordiaException) { - boost::shared_ptr > hash = - hashGenerator->generateHash(pattern); + vector hash = hashGenerator->generateHash(pattern); return _anubisSearcher->anubisSearch(T, markers, SA, hash); } diff --git a/concordia/index_searcher.hpp b/concordia/index_searcher.hpp index b8813a1..4e1331d 100644 --- a/concordia/index_searcher.hpp +++ b/concordia/index_searcher.hpp @@ -2,9 +2,9 @@ #define INDEX_SEARCHER_HDR #include -#include #include #include +#include #include "concordia/common/config.hpp" #include "concordia/substring_occurence.hpp" @@ -30,14 +30,14 @@ public: */ virtual ~IndexSearcher(); - boost::ptr_vector simpleSearch( + vector simpleSearch( boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, boost::shared_ptr > SA, const string & pattern) throw(ConcordiaException); - boost::ptr_vector anubisSearch( + vector anubisSearch( boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, diff --git a/concordia/sentence_anonymizer.cpp b/concordia/sentence_anonymizer.cpp index 79c46bf..f6d6067 100644 --- a/concordia/sentence_anonymizer.cpp +++ b/concordia/sentence_anonymizer.cpp @@ -59,7 +59,7 @@ void SentenceAnonymizer::_createNeRules(string & namedEntitiesPath) { << " in NE file: " << namedEntitiesPath; throw ConcordiaException(ss.str()); } else { - _namedEntities.push_back(new RegexReplacement( + _namedEntities.push_back(RegexReplacement( tokenTexts->at(0), tokenTexts->at(1))); } } diff --git a/concordia/sentence_anonymizer.hpp b/concordia/sentence_anonymizer.hpp index c16c04c..edd1ef8 100644 --- a/concordia/sentence_anonymizer.hpp +++ b/concordia/sentence_anonymizer.hpp @@ -2,12 +2,12 @@ #define SENTENCE_ANONYMIZER_HDR #include +#include #include "concordia/common/config.hpp" #include "concordia/regex_replacement.hpp" #include "concordia/concordia_config.hpp" #include "concordia/concordia_exception.hpp" #include -#include #include @@ -39,7 +39,7 @@ private: string replacement, bool wholeWord = false); - boost::ptr_vector _namedEntities; + vector _namedEntities; boost::shared_ptr _htmlTags; diff --git a/concordia/t/test_anubis_searcher.cpp b/concordia/t/test_anubis_searcher.cpp index 49ccdac..14808d4 100644 --- a/concordia/t/test_anubis_searcher.cpp +++ b/concordia/t/test_anubis_searcher.cpp @@ -22,7 +22,6 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) boost::shared_ptr > T(new std::vector()); boost::shared_ptr > markers(new std::vector()); boost::shared_ptr > SA(new std::vector()); - boost::shared_ptr > pattern(new std::vector()); /* Search in text: "banana" T = 123232 (all one sentence id=34) @@ -64,25 +63,26 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) markers->push_back(Utils::createMarker(34,i,6)); } - pattern->push_back(0); - pattern->push_back(0); - pattern->push_back(0); - pattern->push_back(2); + std::vector pattern; + pattern.push_back(0); + pattern.push_back(0); + pattern.push_back(0); + pattern.push_back(2); - pattern->push_back(0); - pattern->push_back(0); - pattern->push_back(0); - pattern->push_back(3); + pattern.push_back(0); + pattern.push_back(0); + pattern.push_back(0); + pattern.push_back(3); - pattern->push_back(0); - pattern->push_back(0); - pattern->push_back(0); - pattern->push_back(4); + pattern.push_back(0); + pattern.push_back(0); + pattern.push_back(0); + pattern.push_back(4); - pattern->push_back(0); - pattern->push_back(0); - pattern->push_back(0); - pattern->push_back(4); + pattern.push_back(0); + pattern.push_back(0); + pattern.push_back(0); + pattern.push_back(4); /* Suffix array for the hashed index: 0001 0002 0003 0002 0003 0002 0:000100020003000200030002 @@ -137,7 +137,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) SA->push_back(11); SUFFIX_MARKER_TYPE highResLength; - boost::ptr_vector result = searcher.lcpSearch(T, markers, SA, pattern, highResLength); + std::vector result = searcher.lcpSearch(T, markers, SA, pattern, highResLength); SUFFIX_MARKER_TYPE length = highResLength / sizeof(INDEX_CHARACTER_TYPE); /* Expecting to get the following results from SA: @@ -155,39 +155,39 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) //--------pattern banana - boost::shared_ptr > pattern2(new std::vector()); - pattern2->push_back(0); - pattern2->push_back(0); - pattern2->push_back(0); - pattern2->push_back(1); + std::vector pattern2; + pattern2.push_back(0); + pattern2.push_back(0); + pattern2.push_back(0); + pattern2.push_back(1); - pattern2->push_back(0); - pattern2->push_back(0); - pattern2->push_back(0); - pattern2->push_back(2); + pattern2.push_back(0); + pattern2.push_back(0); + pattern2.push_back(0); + pattern2.push_back(2); - pattern2->push_back(0); - pattern2->push_back(0); - pattern2->push_back(0); - pattern2->push_back(3); + pattern2.push_back(0); + pattern2.push_back(0); + pattern2.push_back(0); + pattern2.push_back(3); - pattern2->push_back(0); - pattern2->push_back(0); - pattern2->push_back(0); - pattern2->push_back(2); + pattern2.push_back(0); + pattern2.push_back(0); + pattern2.push_back(0); + pattern2.push_back(2); - pattern2->push_back(0); - pattern2->push_back(0); - pattern2->push_back(0); - pattern2->push_back(3); + pattern2.push_back(0); + pattern2.push_back(0); + pattern2.push_back(0); + pattern2.push_back(3); - pattern2->push_back(0); - pattern2->push_back(0); - pattern2->push_back(0); - pattern2->push_back(2); + pattern2.push_back(0); + pattern2.push_back(0); + pattern2.push_back(0); + pattern2.push_back(2); SUFFIX_MARKER_TYPE highResLength2; - boost::ptr_vector result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2); + vector result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2); SUFFIX_MARKER_TYPE length2 = highResLength2 / sizeof(INDEX_CHARACTER_TYPE); /* Expecting to get one result from SA: @@ -203,34 +203,34 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) //--------pattern banan - boost::shared_ptr > pattern3(new std::vector()); - pattern3->push_back(0); - pattern3->push_back(0); - pattern3->push_back(0); - pattern3->push_back(1); + std::vector pattern3; + pattern3.push_back(0); + pattern3.push_back(0); + pattern3.push_back(0); + pattern3.push_back(1); - pattern3->push_back(0); - pattern3->push_back(0); - pattern3->push_back(0); - pattern3->push_back(2); + pattern3.push_back(0); + pattern3.push_back(0); + pattern3.push_back(0); + pattern3.push_back(2); - pattern3->push_back(0); - pattern3->push_back(0); - pattern3->push_back(0); - pattern3->push_back(3); + pattern3.push_back(0); + pattern3.push_back(0); + pattern3.push_back(0); + pattern3.push_back(3); - pattern3->push_back(0); - pattern3->push_back(0); - pattern3->push_back(0); - pattern3->push_back(2); + pattern3.push_back(0); + pattern3.push_back(0); + pattern3.push_back(0); + pattern3.push_back(2); - pattern3->push_back(0); - pattern3->push_back(0); - pattern3->push_back(0); - pattern3->push_back(3); + pattern3.push_back(0); + pattern3.push_back(0); + pattern3.push_back(0); + pattern3.push_back(3); SUFFIX_MARKER_TYPE highResLength3; - boost::ptr_vector result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3); + vector result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3); SUFFIX_MARKER_TYPE length3 = highResLength3 / sizeof(INDEX_CHARACTER_TYPE); /* Expecting to get one result from SA: @@ -245,29 +245,29 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) //--------pattern nazz - boost::shared_ptr > pattern4(new std::vector()); - pattern4->push_back(0); - pattern4->push_back(0); - pattern4->push_back(0); - pattern4->push_back(3); + std::vector pattern4; + pattern4.push_back(0); + pattern4.push_back(0); + pattern4.push_back(0); + pattern4.push_back(3); - pattern4->push_back(0); - pattern4->push_back(0); - pattern4->push_back(0); - pattern4->push_back(2); + pattern4.push_back(0); + pattern4.push_back(0); + pattern4.push_back(0); + pattern4.push_back(2); - pattern4->push_back(0); - pattern4->push_back(0); - pattern4->push_back(0); - pattern4->push_back(4); + pattern4.push_back(0); + pattern4.push_back(0); + pattern4.push_back(0); + pattern4.push_back(4); - pattern4->push_back(0); - pattern4->push_back(0); - pattern4->push_back(0); - pattern4->push_back(4); + pattern4.push_back(0); + pattern4.push_back(0); + pattern4.push_back(0); + pattern4.push_back(4); SUFFIX_MARKER_TYPE highResLength4; - boost::ptr_vector result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4); + vector result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4); SUFFIX_MARKER_TYPE length4 = highResLength4 / sizeof(INDEX_CHARACTER_TYPE); /* Expecting to get 2 results from SA: @@ -286,19 +286,19 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) //--------pattern zz - boost::shared_ptr > pattern5(new std::vector()); - pattern5->push_back(0); - pattern5->push_back(0); - pattern5->push_back(0); - pattern5->push_back(4); + std::vector pattern5; + pattern5.push_back(0); + pattern5.push_back(0); + pattern5.push_back(0); + pattern5.push_back(4); - pattern5->push_back(0); - pattern5->push_back(0); - pattern5->push_back(0); - pattern5->push_back(4); + pattern5.push_back(0); + pattern5.push_back(0); + pattern5.push_back(0); + pattern5.push_back(4); SUFFIX_MARKER_TYPE highResLength5; - boost::ptr_vector result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5); + vector result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5); SUFFIX_MARKER_TYPE length5 = highResLength5 / sizeof(INDEX_CHARACTER_TYPE); /* Expecting to get 0 results from SA, lcp length = 0; @@ -309,20 +309,20 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 ) //--------pattern existing in the text but spanning over parts of characters - boost::shared_ptr > pattern6(new std::vector()); - pattern6->push_back(0); - pattern6->push_back(0); - pattern6->push_back(3); + std::vector pattern6; + pattern6.push_back(0); + pattern6.push_back(0); + pattern6.push_back(3); - pattern6->push_back(0); - pattern6->push_back(0); - pattern6->push_back(0); - pattern6->push_back(2); + pattern6.push_back(0); + pattern6.push_back(0); + pattern6.push_back(0); + pattern6.push_back(2); - pattern6->push_back(0); + pattern6.push_back(0); SUFFIX_MARKER_TYPE highResLength6; - boost::ptr_vector result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6); + vector result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6); SUFFIX_MARKER_TYPE length6 = highResLength6 / sizeof(INDEX_CHARACTER_TYPE); /* Expecting to get 0 results from SA, lcp length = 0; @@ -378,7 +378,7 @@ BOOST_AUTO_TEST_CASE( TmMatchesTest ) // searching for pattern "Ola posiada rysia Marysia" (5 1 3 4) - boost::shared_ptr > pattern = hashGenerator->generateHash("Ola posiada rysia Marysia"); + std::vector pattern = hashGenerator->generateHash("Ola posiada rysia Marysia"); boost::shared_ptr tmMatchesMap = searcher.getTmMatches(T, markers, SA, pattern); BOOST_CHECK_EQUAL(tmMatchesMap->size(), 3); @@ -393,38 +393,38 @@ BOOST_AUTO_TEST_CASE( TmMatchesTest ) // example 14 // example interval list: [(1,2)] - boost::ptr_vector exampleIntervals14 = tmMatches14->getExampleIntervals(); + vector exampleIntervals14 = tmMatches14->getExampleIntervals(); BOOST_CHECK_EQUAL(exampleIntervals14.size(), 1); BOOST_CHECK_EQUAL(exampleIntervals14[0].getStart(), 1); BOOST_CHECK_EQUAL(exampleIntervals14[0].getEnd(), 2); // pattern interval list: [(1,2)] - boost::ptr_vector patternIntervals14 = tmMatches14->getPatternIntervals(); + vector patternIntervals14 = tmMatches14->getPatternIntervals(); BOOST_CHECK_EQUAL(patternIntervals14.size(), 1); BOOST_CHECK_EQUAL(patternIntervals14[0].getStart(), 1); BOOST_CHECK_EQUAL(patternIntervals14[0].getEnd(), 2); // example 51 // example interval list: [(1,3)] - boost::ptr_vector exampleIntervals51 = tmMatches51->getExampleIntervals(); + vector exampleIntervals51 = tmMatches51->getExampleIntervals(); BOOST_CHECK_EQUAL(exampleIntervals51.size(), 1); BOOST_CHECK_EQUAL(exampleIntervals51[0].getStart(), 1); BOOST_CHECK_EQUAL(exampleIntervals51[0].getEnd(), 3); // pattern interval list: [(1,3)] - boost::ptr_vector patternIntervals51 = tmMatches51->getPatternIntervals(); + vector patternIntervals51 = tmMatches51->getPatternIntervals(); BOOST_CHECK_EQUAL(patternIntervals51.size(), 1); BOOST_CHECK_EQUAL(patternIntervals51[0].getStart(), 1); BOOST_CHECK_EQUAL(patternIntervals51[0].getEnd(), 3); // example 123 // example interval list: [(1,3), (0,1)] - boost::ptr_vector exampleIntervals123 = tmMatches123->getExampleIntervals(); + vector exampleIntervals123 = tmMatches123->getExampleIntervals(); BOOST_CHECK_EQUAL(exampleIntervals123.size(), 2); BOOST_CHECK_EQUAL(exampleIntervals123[0].getStart(), 1); BOOST_CHECK_EQUAL(exampleIntervals123[0].getEnd(), 3); BOOST_CHECK_EQUAL(exampleIntervals123[1].getStart(), 0); BOOST_CHECK_EQUAL(exampleIntervals123[1].getEnd(), 1); // pattern interval list: [(1,3), (3,4)] - boost::ptr_vector patternIntervals123 = tmMatches123->getPatternIntervals(); + vector patternIntervals123 = tmMatches123->getPatternIntervals(); BOOST_CHECK_EQUAL(patternIntervals123.size(), 2); BOOST_CHECK_EQUAL(patternIntervals123[0].getStart(), 1); BOOST_CHECK_EQUAL(patternIntervals123[0].getEnd(), 3); diff --git a/concordia/t/test_concordia.cpp b/concordia/t/test_concordia.cpp index 4fc31b7..8c4b315 100644 --- a/concordia/t/test_concordia.cpp +++ b/concordia/t/test_concordia.cpp @@ -5,7 +5,6 @@ #include "concordia/common/config.hpp" #include -#include #include #include @@ -52,8 +51,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 ) */ - boost::ptr_vector searchResult1 = concordia.simpleSearch("posiada rysia"); - boost::ptr_vector searchResult2 = concordia.simpleSearch("posiada kota Ala"); + vector searchResult1 = concordia.simpleSearch("posiada rysia"); + vector searchResult2 = concordia.simpleSearch("posiada kota Ala"); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); @@ -74,11 +73,11 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 ) { // modified stop words to avoid anonymization Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); - boost::ptr_vector testExamples; - testExamples.push_back(new Example("xto xjest okno",312)); - testExamples.push_back(new Example("czy xjest okno otwarte",202)); - testExamples.push_back(new Example("chyba xto xjest xtutaj",45)); - testExamples.push_back(new Example("xto xjest",29)); + vector testExamples; + testExamples.push_back(Example("xto xjest okno",312)); + testExamples.push_back(Example("czy xjest okno otwarte",202)); + testExamples.push_back(Example("chyba xto xjest xtutaj",45)); + testExamples.push_back(Example("xto xjest",29)); concordia.addAllExamples(testExamples); /*The test index contains 4 sentences: @@ -107,8 +106,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 ) */ Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); - boost::ptr_vector searchResult1 = concordia2.simpleSearch("xto xjest"); - boost::ptr_vector searchResult2 = concordia2.simpleSearch("xjest okno"); + vector searchResult1 = concordia2.simpleSearch("xto xjest"); + vector searchResult2 = concordia2.simpleSearch("xjest okno"); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); @@ -132,13 +131,13 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 ) BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 ) { Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); - boost::ptr_vector testExamples; - testExamples.push_back(new Example("2. Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem, z jakiego korzystają obywatele tego państwa.",312)); - testExamples.push_back(new Example("czy xjest żółte otwarte",202)); + vector testExamples; + testExamples.push_back(Example("2. Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem, z jakiego korzystają obywatele tego państwa.",312)); + testExamples.push_back(Example("czy xjest żółte otwarte",202)); concordia.addAllExamples(testExamples); Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); - boost::ptr_vector searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia"); + vector searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia"); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); @@ -177,8 +176,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 ) n: 0 1 2 3 4 5 6 7 8 9 10 11 SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7 - boost::ptr_vector searchResult1 = concordia.anubisSearch("posiada rysia chyba"); - boost::ptr_vector searchResult2 = concordia.anubisSearch("posiada kota Ala"); + vector searchResult1 = concordia.anubisSearch("posiada rysia chyba"); + vector searchResult2 = concordia.anubisSearch("posiada kota Ala"); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); diff --git a/concordia/t/test_hash_generator.cpp b/concordia/t/test_hash_generator.cpp index 6e08879..5f0f29e 100644 --- a/concordia/t/test_hash_generator.cpp +++ b/concordia/t/test_hash_generator.cpp @@ -23,13 +23,13 @@ BOOST_AUTO_TEST_CASE( SimpleHashTest ) HashGenerator hashGenerator = HashGenerator(config); - boost::shared_ptr > hash = hashGenerator.generateHash("Ala posiada kota"); - boost::shared_ptr > expected(new vector()); - expected->push_back(0); - expected->push_back(1); - expected->push_back(2); + vector hash = hashGenerator.generateHash("Ala posiada kota"); + vector expected; + expected.push_back(0); + expected.push_back(1); + expected.push_back(2); - BOOST_CHECK_EQUAL_COLLECTIONS(hash->begin(), hash->end(), expected->begin(), expected->end()); + BOOST_CHECK_EQUAL_COLLECTIONS(hash.begin(), hash.end(), expected.begin(), expected.end()); } /* Commentet out - the test takes too long. Run it once whenever the SUFFIX_MARKER_SENTENCE_BYTES parameter changes. @@ -76,22 +76,22 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest ) HashGenerator hashGenerator1 = HashGenerator(config); - boost::shared_ptr > hash1 = hashGenerator1.generateHash("Ala posiada kota"); - boost::shared_ptr > expected1(new vector()); - expected1->push_back(0); - expected1->push_back(1); - expected1->push_back(2); - BOOST_CHECK_EQUAL_COLLECTIONS(hash1->begin(), hash1->end(), expected1->begin(), expected1->end()); + vector hash1 = hashGenerator1.generateHash("Ala posiada kota"); + vector expected1; + expected1.push_back(0); + expected1.push_back(1); + expected1.push_back(2); + BOOST_CHECK_EQUAL_COLLECTIONS(hash1.begin(), hash1.end(), expected1.begin(), expected1.end()); hashGenerator1.serializeWordMap(); HashGenerator hashGenerator2 = HashGenerator(config); - boost::shared_ptr > hash2 = hashGenerator2.generateHash("Ala posiada psa"); - boost::shared_ptr > expected2(new vector()); - expected2->push_back(0); - expected2->push_back(1); - expected2->push_back(3); - BOOST_CHECK_EQUAL_COLLECTIONS(hash2->begin(), hash2->end(), expected2->begin(), expected2->end()); + vector hash2 = hashGenerator2.generateHash("Ala posiada psa"); + vector expected2; + expected2.push_back(0); + expected2.push_back(1); + expected2.push_back(3); + BOOST_CHECK_EQUAL_COLLECTIONS(hash2.begin(), hash2.end(), expected2.begin(), expected2.end()); boost::filesystem::remove(config->getWordMapFilePath()); } @@ -106,23 +106,23 @@ BOOST_AUTO_TEST_CASE( TokenVectorTest ) HashGenerator hashGenerator = HashGenerator(config); - boost::shared_ptr > tokenVector = hashGenerator.generateTokenVector("12.02.2014 o godzinie 17:40 doszło do kolizji na ulicy Grobla; policjanci ustalili, że kierowca zaparkował samochód."); - boost::shared_ptr > expected(new vector()); - expected->push_back("ne_date"); - expected->push_back("godzinie"); - expected->push_back("ne_number"); - expected->push_back("ne_number"); - expected->push_back("doszło"); - expected->push_back("kolizji"); - expected->push_back("ulicy"); - expected->push_back("grobla"); - expected->push_back("policjanci"); - expected->push_back("ustalili"); - expected->push_back("kierowca"); - expected->push_back("zaparkował"); - expected->push_back("samochód"); + vector tokenVector = hashGenerator.generateTokenVector("12.02.2014 o godzinie 17:40 doszło do kolizji na ulicy Grobla; policjanci ustalili, że kierowca zaparkował samochód."); + vector expected; + expected.push_back("ne_date"); + expected.push_back("godzinie"); + expected.push_back("ne_number"); + expected.push_back("ne_number"); + expected.push_back("doszło"); + expected.push_back("kolizji"); + expected.push_back("ulicy"); + expected.push_back("grobla"); + expected.push_back("policjanci"); + expected.push_back("ustalili"); + expected.push_back("kierowca"); + expected.push_back("zaparkował"); + expected.push_back("samochód"); - BOOST_CHECK_EQUAL_COLLECTIONS(tokenVector->begin(), tokenVector->end(), expected->begin(), expected->end()); + BOOST_CHECK_EQUAL_COLLECTIONS(tokenVector.begin(), tokenVector.end(), expected.begin(), expected.end()); } BOOST_AUTO_TEST_SUITE_END() diff --git a/concordia/t/test_utils.cpp b/concordia/t/test_utils.cpp index e8ec08c..7ca2fb0 100644 --- a/concordia/t/test_utils.cpp +++ b/concordia/t/test_utils.cpp @@ -31,58 +31,58 @@ BOOST_AUTO_TEST_CASE( WriteReadSingleCharacter ) BOOST_AUTO_TEST_CASE( IndexVectorToSaucharArray ) { - boost::shared_ptr > hash(new vector()); - hash->push_back(123456789); // in hex: 75BCD15 + vector hash; + hash.push_back(123456789); // in hex: 75BCD15 // in memory: 15 cd 5b 07 // in memory DEC: 21 205 91 7 - hash->push_back(987654321); // in hex: 3ADE68B1 + hash.push_back(987654321); // in hex: 3ADE68B1 // in memory: b1 68 de 3a // in memory DEC: 177 104 222 58 sauchar_t * dataArray = Utils::indexVectorToSaucharArray(hash); - boost::shared_ptr > result(new vector()); + vector result; for (int i=0;i<8;i++) { INDEX_CHARACTER_TYPE a = dataArray[i]; - result->push_back(a); + result.push_back(a); } - boost::shared_ptr > expected(new vector()); - expected->push_back(21); - expected->push_back(205); - expected->push_back(91); - expected->push_back(7); - expected->push_back(177); - expected->push_back(104); - expected->push_back(222); - expected->push_back(58); + vector expected; + expected.push_back(21); + expected.push_back(205); + expected.push_back(91); + expected.push_back(7); + expected.push_back(177); + expected.push_back(104); + expected.push_back(222); + expected.push_back(58); - BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end()); + BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end()); } BOOST_AUTO_TEST_CASE( IndexVectorToSaucharVector ) { - boost::shared_ptr > hash(new vector()); - hash->push_back(123456789); // in hex: 75BCD15 + vector hash; + hash.push_back(123456789); // in hex: 75BCD15 // in memory: 15 cd 5b 07 // in memory DEC: 21 205 91 7 - hash->push_back(987654321); // in hex: 3ADE68B1 + hash.push_back(987654321); // in hex: 3ADE68B1 // in memory: b1 68 de 3a // in memory DEC: 177 104 222 58 - boost::shared_ptr > result = Utils::indexVectorToSaucharVector(hash); + vector result = Utils::indexVectorToSaucharVector(hash); - boost::shared_ptr > expected(new vector()); - expected->push_back(21); - expected->push_back(205); - expected->push_back(91); - expected->push_back(7); - expected->push_back(177); - expected->push_back(104); - expected->push_back(222); - expected->push_back(58); + vector expected; + expected.push_back(21); + expected.push_back(205); + expected.push_back(91); + expected.push_back(7); + expected.push_back(177); + expected.push_back(104); + expected.push_back(222); + expected.push_back(58); - BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end()); + BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end()); } BOOST_AUTO_TEST_CASE( MaxSentenceSize ) diff --git a/concordia/tm_matches.cpp b/concordia/tm_matches.cpp index 77ebfb2..35e79a6 100644 --- a/concordia/tm_matches.cpp +++ b/concordia/tm_matches.cpp @@ -40,37 +40,34 @@ void TmMatches::calculateSimpleScore() { void TmMatches::addExampleInterval(int start, int end) { if (!_alreadyIntersects(_exampleMatchedRegions, start, end)) { - _exampleMatchedRegions.push_back(new Interval(start, end)); + _exampleMatchedRegions.push_back(Interval(start, end)); } } void TmMatches::addPatternInterval(int start, int end) { if (!_alreadyIntersects(_patternMatchedRegions, start, end)) { - _patternMatchedRegions.push_back(new Interval(start, end)); + _patternMatchedRegions.push_back(Interval(start, end)); } } bool TmMatches::_alreadyIntersects( - boost::ptr_vector intervalList, + const vector & intervalList, int start, int end) { - Interval * tempInterval = new Interval(start, end); - BOOST_FOREACH(Interval & oldInterval, intervalList) { - if (oldInterval.intersects(*tempInterval)) { - delete tempInterval; + Interval tempInterval(start, end); + BOOST_FOREACH(Interval oldInterval, intervalList) { + if (oldInterval.intersects(tempInterval)) { return true; } } - delete tempInterval; - return false; } double TmMatches::_getLogarithmicOverlay( - boost::ptr_vector intervalList, + const vector & intervalList, unsigned char sentenceSize, double k) { double overlayScore = 0; - BOOST_FOREACH(Interval & interval, intervalList) { + BOOST_FOREACH(Interval interval, intervalList) { double intervalOverlay = static_cast(interval.getLength()) / static_cast(sentenceSize); double significanceFactor = pow(log(interval.getLength()+1) diff --git a/concordia/tm_matches.hpp b/concordia/tm_matches.hpp index 8d1bbb2..ba660af 100644 --- a/concordia/tm_matches.hpp +++ b/concordia/tm_matches.hpp @@ -2,9 +2,9 @@ #define TM_MATCHES_HDR #include +#include #include "concordia/common/config.hpp" #include "concordia/interval.hpp" -#include #include @@ -29,11 +29,11 @@ public: return _score; } - boost::ptr_vector getExampleIntervals() const { + vector getExampleIntervals() const { return _exampleMatchedRegions; } - boost::ptr_vector getPatternIntervals() const { + vector getPatternIntervals() const { return _patternMatchedRegions; } @@ -50,18 +50,18 @@ public: void addPatternInterval(int start, int end); private: - bool _alreadyIntersects(boost::ptr_vector intervalList, - int start, int end); + bool _alreadyIntersects(const vector & intervalList, + int start, int end); - double _getLogarithmicOverlay(boost::ptr_vector intervalList, - unsigned char sentenceSize, - double k); + double _getLogarithmicOverlay(const vector & intervalList, + unsigned char sentenceSize, + double k); SUFFIX_MARKER_TYPE _exampleId; - boost::ptr_vector _exampleMatchedRegions; + vector _exampleMatchedRegions; - boost::ptr_vector _patternMatchedRegions; + vector _patternMatchedRegions; unsigned char _patternSize;