From b31877075204d7872a969e5a7da8ab34de7b9023 Mon Sep 17 00:00:00 2001 From: rjawor Date: Sat, 14 Dec 2013 15:23:17 +0100 Subject: [PATCH] redesigned project Former-commit-id: d35841126fda627a04a1a16a26b91943401b6fcf --- concordia-console/concordia-console.cpp | 59 +++++----- concordia-runner-large.sh | 9 +- concordia-runner.sh | 6 +- concordia/common/utils.cpp | 21 ++-- concordia/common/utils.hpp | 9 +- concordia/concordia.cpp | 84 ++++++++++++--- concordia/concordia.hpp | 20 +++- concordia/concordia_index.cpp | 137 ++++++++--------------- concordia/concordia_index.hpp | 28 +++-- concordia/hash_generator.cpp | 15 +-- concordia/hash_generator.hpp | 3 +- concordia/index_searcher.cpp | 83 +++----------- concordia/index_searcher.hpp | 20 +--- concordia/t/CMakeLists.txt | 1 - concordia/t/test_concordia.cpp | 58 +++++----- concordia/t/test_concordia_index.cpp | 84 ++++++--------- concordia/t/test_hash_generator.cpp | 36 +++---- concordia/t/test_index_searcher.cpp | 75 ------------- concordia/t/test_utils.cpp | 138 ++++-------------------- 19 files changed, 331 insertions(+), 555 deletions(-) delete mode 100644 concordia/t/test_index_searcher.cpp diff --git a/concordia-console/concordia-console.cpp b/concordia-console/concordia-console.cpp index 4240c3d..ec7c184 100644 --- a/concordia-console/concordia-console.cpp +++ b/concordia-console/concordia-console.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "concordia/concordia.hpp" #include "concordia/common/config.hpp" @@ -20,8 +21,6 @@ int main(int argc, char** argv) { ("help,h", "Display this message") ("config,c", boost::program_options::value(), "Concordia configuration file (required)") - ("generate-index,g", "Generate suffix array based index out of " - "added sentences") ("simple-search,s", boost::program_options::value(), "Pattern to be searched in the index") ("silent,n", "While searching, do not output search results") @@ -48,45 +47,36 @@ int main(int argc, char** argv) { try { + std::cout << "\tInitializing concordia..." << std::endl; + boost::posix_time::ptime time_start = + boost::posix_time::microsec_clock::local_time(); Concordia concordia(configFile); - std::cout << "Welcome to Concordia. Version = " - << concordia.getVersion() << std::endl; - if (cli.count("generate-index")) { - std::cout << "\tGenerating index..." << std::endl; - boost::posix_time::ptime time_start = - boost::posix_time::microsec_clock::local_time(); - concordia.generateIndex(); - boost::posix_time::ptime time_end = - boost::posix_time::microsec_clock::local_time(); - boost::posix_time::time_duration msdiff = time_end - time_start; - std::cout << "\tIndex generated in: " << - msdiff.total_milliseconds() << "ms." << std::endl; - } else if (cli.count("simple-search")) { - std::cout << "\tLoading index..." << std::endl; - boost::posix_time::ptime time_start = - boost::posix_time::microsec_clock::local_time(); - concordia.loadIndex(); - boost::posix_time::ptime time_end = - boost::posix_time::microsec_clock::local_time(); - boost::posix_time::time_duration msdiff = time_end - time_start; - std::cout << "\tIndex loaded in: " << - msdiff.total_milliseconds() << "ms." << std::endl; + boost::posix_time::ptime time_end = + boost::posix_time::microsec_clock::local_time(); + boost::posix_time::time_duration msdiff = time_end - time_start; + std::cout << "\tInitialization (loading index from disk " + << "and regenerating SA) took: " + << msdiff.total_milliseconds() << "ms." << std::endl; + std::cout << "\tWelcome to Concordia. Version = " + << concordia.getVersion() << std::endl; + if (cli.count("simple-search")) { std::string pattern = cli["simple-search"].as(); std::cout << "\tSearching for pattern: \"" << pattern << "\"" << std::endl; time_start = boost::posix_time::microsec_clock::local_time(); - vector result = concordia.simpleSearch(pattern); + boost::shared_ptr > result = + concordia.simpleSearch(pattern); time_end = boost::posix_time::microsec_clock::local_time(); msdiff = time_end - time_start; - std::cout << "\tFound: " << result.size() << " matches. " + std::cout << "\tFound: " << result->size() << " matches. " << "Search took: " << msdiff.total_milliseconds() << "ms." << std::endl; if (!cli.count("silent")) { - for (vector::iterator it = result.begin(); - it != result.end(); ++it) { + for (vector::iterator it = result->begin(); + it != result->end(); ++it) { std::cout << "\t\tfound match on word number: " << *it - << std::endl; + << std::endl; } } } else if (cli.count("read-file")) { @@ -97,15 +87,16 @@ int main(int argc, char** argv) { std::string line; if (text_file.is_open()) { long lineCount = 0; - vector buffer; + boost::shared_ptr > + buffer(new std::vector()); boost::posix_time::ptime timeStart = boost::posix_time::microsec_clock::local_time(); while (getline(text_file, line)) { lineCount++; - buffer.push_back(line); + buffer->push_back(line); if (lineCount % READ_BUFFER_LENGTH == 0) { concordia.addAllSentences(buffer); - buffer.clear(); + buffer->clear(); boost::posix_time::ptime timeEnd = boost::posix_time::microsec_clock::local_time(); boost::posix_time::time_duration msdiff = @@ -119,7 +110,7 @@ int main(int argc, char** argv) { " sentences per second" << std::endl; } } - if (buffer.size() > 0) { + if (buffer->size() > 0) { concordia.addAllSentences(buffer); } text_file.close(); @@ -146,7 +137,7 @@ int main(int argc, char** argv) { return 1; } - std::cout << "Concordia operation completed without errors." + std::cout << "\tConcordia operation completed without errors." << std::endl; } catch(ConcordiaException & e) { std::cerr << "ConcordiaException caught with message: " diff --git a/concordia-runner-large.sh b/concordia-runner-large.sh index 814eeb3..60dcb0c 100755 --- a/concordia-runner-large.sh +++ b/concordia-runner-large.sh @@ -1,15 +1,18 @@ #!/bin/sh -echo "Decompressing test file" +echo "CONCORDIA RUNNER: Decompressing test file" bunzip2 --keep prod/resources/text-files/large.txt.bz2 -echo "Running Concordia" +echo "CONCORDIA RUNNER: Running Concordia" rm prod/resources/temp/* + +echo "CONCORDIA RUNNER: reading from file" ./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -r prod/resources/text-files/large.txt -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -g +echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\"" ./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "drawn from his own" -n +echo "CONCORDIA RUNNER: searching for pattern: \"it is\"" ./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "it is" -n rm prod/resources/text-files/large.txt diff --git a/concordia-runner.sh b/concordia-runner.sh index e4f93bb..63898d6 100755 --- a/concordia-runner.sh +++ b/concordia-runner.sh @@ -1,9 +1,11 @@ #!/bin/sh -echo "Running Concordia" +echo "CONCORDIA RUNNER: Running Concordia" rm prod/resources/temp/* +echo "CONCORDIA RUNNER: reading from file" ./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -r prod/resources/text-files/medium.txt -./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -g +echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\"" ./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "drawn from his own" +echo "CONCORDIA RUNNER: searching for pattern: \"it is\"" ./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "it is" -n diff --git a/concordia/common/utils.cpp b/concordia/common/utils.cpp index 41dfb72..95c8ef0 100644 --- a/concordia/common/utils.cpp +++ b/concordia/common/utils.cpp @@ -18,20 +18,29 @@ INDEX_CHARACTER_TYPE Utils::readIndexCharacter(ifstream & file) { } sauchar_t * Utils::indexVectorToSaucharArray( - vector & input) { - const int kArraySize = input.size()*sizeof(INDEX_CHARACTER_TYPE); + boost::shared_ptr > input) { + const int kArraySize = input->size()*sizeof(INDEX_CHARACTER_TYPE); sauchar_t * patternArray = new sauchar_t[kArraySize]; int pos = 0; - for (vector::iterator it = input.begin(); - it != input.end(); ++it) { - insertCharToSaucharArray(patternArray, *it, pos); + for (vector::iterator it = input->begin(); + it != input->end(); ++it) { + _insertCharToSaucharArray(patternArray, *it, pos); pos += sizeof(INDEX_CHARACTER_TYPE); } return patternArray; } -void Utils::insertCharToSaucharArray(sauchar_t * array, +void Utils::appendCharToSaucharVector( + boost::shared_ptr > vector, + INDEX_CHARACTER_TYPE character) { + sauchar_t * characterArray = reinterpret_cast(&character); + for (int i = 0; i < sizeof(character); i++) { + vector->push_back(characterArray[i]); + } +} + +void Utils::_insertCharToSaucharArray(sauchar_t * array, INDEX_CHARACTER_TYPE character, int pos) { sauchar_t * characterArray = reinterpret_cast(&character); for (int i = pos; i < pos+sizeof(character); i++) { diff --git a/concordia/common/utils.hpp b/concordia/common/utils.hpp index e2636be..96bbf51 100644 --- a/concordia/common/utils.hpp +++ b/concordia/common/utils.hpp @@ -26,11 +26,14 @@ public: static INDEX_CHARACTER_TYPE readIndexCharacter(ifstream & file); static sauchar_t * indexVectorToSaucharArray( - vector & input); + boost::shared_ptr > input); - static void insertCharToSaucharArray(sauchar_t * array, - INDEX_CHARACTER_TYPE character, int pos); + static void appendCharToSaucharVector( + boost::shared_ptr > vector, + INDEX_CHARACTER_TYPE character); private: + static void _insertCharToSaucharArray(sauchar_t * array, + INDEX_CHARACTER_TYPE character, int pos); }; #endif diff --git a/concordia/concordia.cpp b/concordia/concordia.cpp index 95fc05c..550c53d 100644 --- a/concordia/concordia.cpp +++ b/concordia/concordia.cpp @@ -2,6 +2,7 @@ #include "concordia/concordia.hpp" #include "concordia/common/config.hpp" +#include "concordia/common/utils.hpp" // =========================================== @@ -18,10 +19,9 @@ Concordia::Concordia(const std::string & configFilePath) _config = boost::shared_ptr ( new ConcordiaConfig(configFilePath)); _index = boost::shared_ptr( - new ConcordiaIndex(_config->getWordMapFilePath(), - _config->getHashedIndexFilePath(), - _config->getSuffixArrayFilePath())); + new ConcordiaIndex(_config->getHashedIndexFilePath())); _searcher = boost::shared_ptr(new IndexSearcher()); + _initializeIndex(); } Concordia::~Concordia() { @@ -41,29 +41,83 @@ std::string _createLibraryVersion() { return version.str(); } +// Sentences are written to disk and added to T. +// SA is generated on command by different methods. void Concordia::addSentence(const std::string & sentence) throw(ConcordiaException) { - _index->addSentence(sentence); + _index->addSentence(_hashGenerator, _T, sentence); } -void Concordia::addAllSentences(vector & sentences) - throw(ConcordiaException) { - _index->addAllSentences(sentences); +// Sentences are written to disk and added to T. +// SA is generated on command by different methods. +void Concordia::addAllSentences( + boost::shared_ptr > sentences) + throw(ConcordiaException) { + _index->addAllSentences(_hashGenerator, _T, sentences); } -void Concordia::generateIndex() throw(ConcordiaException) { - _index->generateSuffixArray(); +void Concordia::loadRAMIndexFromDisk() throw(ConcordiaException) { + if (boost::filesystem::exists(_config->getWordMapFilePath()) + && boost::filesystem::exists(_config->getHashedIndexFilePath())) { + // reading index from files + _T->clear(); + ifstream hashedIndexFile; + hashedIndexFile.open(_config->getHashedIndexFilePath().c_str(), ios::in + | ios::ate | ios::binary); + saidx_t fileSize = hashedIndexFile.tellg(); + if (fileSize > 0) { + hashedIndexFile.seekg(0, ios::beg); + + while (!hashedIndexFile.eof()) { + INDEX_CHARACTER_TYPE character = + Utils::readIndexCharacter(hashedIndexFile); + Utils::appendCharToSaucharVector(_T, character); + } + hashedIndexFile.close(); + + // generating suffix array + _SA = _index->generateSuffixArray(_hashGenerator, _T); + } else { + throw ConcordiaException("Index corrupt: empty hash index file"); + } + } else { + throw ConcordiaException("Index corrupt: missing files"); + } } -void Concordia::loadIndex() throw(ConcordiaException) { - _searcher->loadIndex(_config->getWordMapFilePath(), - _config->getHashedIndexFilePath(), - _config->getSuffixArrayFilePath()); +void Concordia::refreshSAfromRAM() throw(ConcordiaException) { + _SA = _index->generateSuffixArray(_hashGenerator, _T); } -std::vector Concordia::simpleSearch(const std::string & pattern) + +void Concordia::_initializeIndex() throw(ConcordiaException) { + _hashGenerator = boost::shared_ptr( + new HashGenerator(_config->getWordMapFilePath())); + _T = boost::shared_ptr >( + new std::vector); + if (boost::filesystem::exists(_config->getWordMapFilePath()) + && boost::filesystem::exists(_config->getHashedIndexFilePath())) { + loadRAMIndexFromDisk(); + } else if (!boost::filesystem::exists(_config->getWordMapFilePath()) + && !boost::filesystem::exists(_config->getHashedIndexFilePath())) { + // empty index + _SA = boost::shared_ptr >( + new std::vector); + } else { + throw ConcordiaException("Index corrupt: missing files"); + } +} + +boost::shared_ptr > Concordia::simpleSearch( + const string & pattern) throw(ConcordiaException) { - return _searcher->simpleSearch(pattern); + if (_T->size() > 0) { + return _searcher->simpleSearch(_hashGenerator, _T, _SA, pattern); + } else { + boost::shared_ptr > result = + boost::shared_ptr >(new std::vector); + return result; + } } diff --git a/concordia/concordia.hpp b/concordia/concordia.hpp index db2ebda..d2dc5fe 100644 --- a/concordia/concordia.hpp +++ b/concordia/concordia.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include "concordia/concordia_config.hpp" #include "concordia/concordia_index.hpp" @@ -35,17 +36,20 @@ public: void addSentence(const std::string & sentence) throw(ConcordiaException); - void addAllSentences(vector & sentences) + void addAllSentences(boost::shared_ptr > sentences) throw(ConcordiaException); - void generateIndex() throw(ConcordiaException); + boost::shared_ptr > simpleSearch( + const std::string & pattern) + throw(ConcordiaException); - void loadIndex() throw(ConcordiaException); + void loadRAMIndexFromDisk() throw(ConcordiaException); - std::vector simpleSearch(const std::string & pattern) - throw(ConcordiaException); + void refreshSAfromRAM() throw(ConcordiaException); private: + void _initializeIndex() throw(ConcordiaException); + static std::string _libraryVersion; boost::shared_ptr _config; @@ -53,6 +57,12 @@ private: boost::shared_ptr _index; boost::shared_ptr _searcher; + + boost::shared_ptr _hashGenerator; + + boost::shared_ptr > _T; + + boost::shared_ptr > _SA; }; #endif diff --git a/concordia/concordia_index.cpp b/concordia/concordia_index.cpp index 4c5cdec..abb335d 100644 --- a/concordia/concordia_index.cpp +++ b/concordia/concordia_index.cpp @@ -4,118 +4,71 @@ #include #include -ConcordiaIndex::ConcordiaIndex(const string & wordMapFilePath, - const string & hashedIndexFilePath, - const string & suffixArrayFilePath) +ConcordiaIndex::ConcordiaIndex(const string & hashedIndexFilePath) throw(ConcordiaException) : - _hashedIndexFilePath(hashedIndexFilePath), - _suffixArrayFilePath(suffixArrayFilePath) { - if (boost::filesystem::exists(wordMapFilePath)) { - if (!boost::filesystem::exists(hashedIndexFilePath)) { - throw ConcordiaException("E01: Word map file exists " - "but hashed index file absent."); - } - } else { // WordMap file does not exist - if (boost::filesystem::exists(hashedIndexFilePath)) { - throw ConcordiaException("E02: Hashed index file exists " - "but word map file absent."); - } - } - _hashGenerator = boost::shared_ptr( - new HashGenerator(wordMapFilePath)); + _hashedIndexFilePath(hashedIndexFilePath) { } ConcordiaIndex::~ConcordiaIndex() { } -void ConcordiaIndex::_serializeWordMap() { - _hashGenerator->serializeWordMap(); -} - -void ConcordiaIndex::generateSuffixArray() { - if (boost::filesystem::exists(_hashedIndexFilePath.c_str())) { - ifstream hashedIndexFile; - hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::in| - ios::ate|ios::binary); - - /* Get the file size. */ - saidx_t n = hashedIndexFile.tellg(); - if (n > 0) { - sauchar_t *T; - saidx_t *SA; - - /* Read n bytes of data. */ - hashedIndexFile.seekg(0, ios::beg); - T = new sauchar_t[n]; - int pos = 0; - while (!hashedIndexFile.eof()) { - INDEX_CHARACTER_TYPE character = - Utils::readIndexCharacter(hashedIndexFile); - Utils::insertCharToSaucharArray(T, character, pos); - pos+=sizeof(character); - } - hashedIndexFile.close(); - - SA = new saidx_t[n]; - - /* Construct the suffix array. */ - if (divsufsort(T, SA, (saidx_t)n) != 0) { - throw ConcordiaException("Error creating suffix array."); - } - - /* Write the suffix array. */ - - ofstream suffixArrayFile; - suffixArrayFile.open(_suffixArrayFilePath.c_str(), - ios::out|ios::binary); - - for (int i = 0; i < n; i++) { - suffixArrayFile.write(reinterpret_cast(&SA[i]), - sizeof(saidx_t)); - } - suffixArrayFile.close(); - - /* Deallocate memory. */ - delete[] T; - delete[] SA; - } else { - throw ConcordiaException("Can not generate suffix array: " - "hashed index file is empty"); - } - } else { - throw ConcordiaException("Can not generate suffix array: " - "hashed index file does not exist"); +boost::shared_ptr > ConcordiaIndex::generateSuffixArray( + boost::shared_ptr hashGenerator, + boost::shared_ptr > T) { + saidx_t * SA_array = new saidx_t[T->size()]; + if (divsufsort(T->data(), SA_array, (saidx_t) T->size()) != 0) { + throw ConcordiaException("Error creating suffix array."); } + + boost::shared_ptr > result = + boost::shared_ptr >(new vector); + for (int i = 0; i < T->size(); i++) { + result->push_back(SA_array[i]); + } + + delete[] SA_array; + return result; } -void ConcordiaIndex::addSentence(const string & sentence) { - vector hash = _hashGenerator->generateHash(sentence); +void ConcordiaIndex::addSentence(boost::shared_ptr hashGenerator, + boost::shared_ptr > T, + const string & sentence) { ofstream hashedIndexFile; hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::out| ios::app|ios::binary); - for (vector::iterator it = hash.begin(); - it != hash.end(); ++it) { - Utils::writeIndexCharacter(hashedIndexFile, *it); - } + _addSingleSentence(hashedIndexFile, hashGenerator, T, sentence); hashedIndexFile.close(); - _serializeWordMap(); + hashGenerator->serializeWordMap(); } -void ConcordiaIndex::addAllSentences(vector & sentences) { +void ConcordiaIndex::addAllSentences( + boost::shared_ptr hashGenerator, + boost::shared_ptr > T, + boost::shared_ptr > sentences) { ofstream hashedIndexFile; hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::out| ios::app|ios::binary); - for (vector::iterator sent_it = sentences.begin(); - sent_it != sentences.end(); ++sent_it) { + for (vector::iterator sent_it = sentences->begin(); + sent_it != sentences->end(); ++sent_it) { string sentence = *sent_it; - vector hash = - _hashGenerator->generateHash(sentence); - for (vector::iterator it = hash.begin(); - it != hash.end(); ++it) { - Utils::writeIndexCharacter(hashedIndexFile, *it); - } + _addSingleSentence(hashedIndexFile, hashGenerator, T, sentence); } hashedIndexFile.close(); - _serializeWordMap(); + hashGenerator->serializeWordMap(); +} + +void ConcordiaIndex::_addSingleSentence( + ofstream & hashedIndexFile, + boost::shared_ptr hashGenerator, + boost::shared_ptr > T, + const string & sentence) { + boost::shared_ptr > hash + = hashGenerator->generateHash(sentence); + for (vector::iterator it = hash->begin(); + it != hash->end(); ++it) { + INDEX_CHARACTER_TYPE character = *it; + Utils::writeIndexCharacter(hashedIndexFile, character); + Utils::appendCharToSaucharVector(T, character); + } } diff --git a/concordia/concordia_index.hpp b/concordia/concordia_index.hpp index 6d97692..8a47fd6 100644 --- a/concordia/concordia_index.hpp +++ b/concordia/concordia_index.hpp @@ -19,29 +19,35 @@ using namespace std; class ConcordiaIndex { public: - explicit ConcordiaIndex(const string & wordMapFilePath, - const string & hashedIndexFilePath, - const string & suffixArrayFilePath) + explicit ConcordiaIndex(const string & hashedIndexFilePath) throw(ConcordiaException); /*! Destructor. */ virtual ~ConcordiaIndex(); - void addSentence(const string & sentence); + void addSentence( + boost::shared_ptr hashGenerator, + boost::shared_ptr > T, + const string & sentence); - void addAllSentences(vector & sentences); + void addAllSentences( + boost::shared_ptr hashGenerator, + boost::shared_ptr > T, + boost::shared_ptr > sentences); - void generateSuffixArray(); + boost::shared_ptr > generateSuffixArray( + boost::shared_ptr hashGenerator, + boost::shared_ptr > T); private: - void _serializeWordMap(); - - boost::shared_ptr _hashGenerator; + // Add sentence to disk index and update RAM index. + void _addSingleSentence(ofstream & hashedIndexFile, + boost::shared_ptr hashGenerator, + boost::shared_ptr > T, + const string & sentence); string _hashedIndexFilePath; - - string _suffixArrayFilePath; }; #endif diff --git a/concordia/hash_generator.cpp b/concordia/hash_generator.cpp index 593530d..fb545f2 100644 --- a/concordia/hash_generator.cpp +++ b/concordia/hash_generator.cpp @@ -20,17 +20,18 @@ HashGenerator::HashGenerator(const string & wordMapFilePath) HashGenerator::~HashGenerator() { } -vector HashGenerator::generateHash( +boost::shared_ptr > HashGenerator::generateHash( const string & sentence) { - vector result; - vector tokenTexts; - boost::split(tokenTexts, sentence, boost::is_any_of(" ")); + boost::shared_ptr > + result(new vector()); + boost::shared_ptr > tokenTexts(new vector()); + boost::split(*tokenTexts, sentence, boost::is_any_of(" ")); - for (vector::iterator it = tokenTexts.begin(); - it != tokenTexts.end(); ++it) { + for (vector::iterator it = tokenTexts->begin(); + it != tokenTexts->end(); ++it) { string token = *it; INDEX_CHARACTER_TYPE code = _wordMap->getWordCode(token); - result.push_back(code); + result->push_back(code); } return result; diff --git a/concordia/hash_generator.hpp b/concordia/hash_generator.hpp index a0468a2..c458343 100644 --- a/concordia/hash_generator.hpp +++ b/concordia/hash_generator.hpp @@ -26,7 +26,8 @@ public: */ virtual ~HashGenerator(); - vector generateHash(const string & sentence); + boost::shared_ptr > + generateHash(const string & sentence); void serializeWordMap(); diff --git a/concordia/index_searcher.cpp b/concordia/index_searcher.cpp index 4530d53..43be3ca 100644 --- a/concordia/index_searcher.cpp +++ b/concordia/index_searcher.cpp @@ -3,89 +3,38 @@ #include "concordia/common/utils.hpp" #include -IndexSearcher::IndexSearcher(): - _T(NULL), - _SA(NULL), - _n(0) { +IndexSearcher::IndexSearcher() { } IndexSearcher::~IndexSearcher() { } - -void IndexSearcher::loadIndex(const string & wordMapFilepath, - const string & hashedIndexFilepath, - const string & suffixArrayFilepath) - throw(ConcordiaException) { - if (!boost::filesystem::exists(wordMapFilepath)) { - throw ConcordiaException("E06: Failed to open word map " - "file for reading."); - } - - if (!boost::filesystem::exists(hashedIndexFilepath)) { - throw ConcordiaException("E07: Failed to open hashed index file " - "for reading."); - } - - if (!boost::filesystem::exists(suffixArrayFilepath)) { - throw ConcordiaException("E08: Failed to open suffix array file " - "for reading."); - } - - _hashGenerator = boost::shared_ptr( - new HashGenerator(wordMapFilepath)); - - ifstream hashedIndexFile; - hashedIndexFile.open(hashedIndexFilepath.c_str(), ios::in - | ios::ate | ios::binary); - _n = hashedIndexFile.tellg(); - hashedIndexFile.seekg(0, ios::beg); - _T = new sauchar_t[_n]; - int pos = 0; - while (!hashedIndexFile.eof()) { - INDEX_CHARACTER_TYPE character = - Utils::readIndexCharacter(hashedIndexFile); - Utils::insertCharToSaucharArray(_T, character, pos); - pos+=sizeof(character); - } - hashedIndexFile.close(); - - _SA = new saidx_t[_n]; - - ifstream suffixArrayFile; - suffixArrayFile.open(suffixArrayFilepath.c_str(), ios::in | ios::binary); - - saidx_t saidx_buff; - pos = 0; - while (!suffixArrayFile.eof() && pos < _n) { - suffixArrayFile.read(reinterpret_cast(&saidx_buff), - sizeof(saidx_t)); - _SA[pos++] = saidx_buff; - } - suffixArrayFile.close(); -} - -vector IndexSearcher::simpleSearch(const string & pattern) - throw(ConcordiaException) { - vector result; +boost::shared_ptr > IndexSearcher::simpleSearch( + boost::shared_ptr hashGenerator, + boost::shared_ptr > T, + boost::shared_ptr > SA, + const string & pattern) throw(ConcordiaException) { + boost::shared_ptr > result = + boost::shared_ptr >(new vector()); int left; - vector hash = _hashGenerator->generateHash(pattern); - saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE); + boost::shared_ptr > hash = + hashGenerator->generateHash(pattern); + saidx_t patternLength = hash->size()*sizeof(INDEX_CHARACTER_TYPE); sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash); - int size = sa_search(_T, (saidx_t) _n, - (const sauchar_t *) patternArray, patternLength, - _SA, (saidx_t) _n, &left); + int size = sa_search(T->data(), (saidx_t) T->size(), + (const sauchar_t *) patternArray, patternLength, + SA->data(), (saidx_t) T->size(), &left); for (int i = 0; i < size; ++i) { - saidx_t result_pos = _SA[left + i]; + saidx_t result_pos = SA->at(left + i); if (result_pos % sizeof(INDEX_CHARACTER_TYPE) == 0) { // As we are looking for a pattern in an array of higher // resolution than the hashed index file, we might // obtain accidental results exceeding the boundaries // of characters in hashed index. The above check // removes these accidental results. - result.push_back(result_pos / sizeof(INDEX_CHARACTER_TYPE)); + result->push_back(result_pos / sizeof(INDEX_CHARACTER_TYPE)); } } diff --git a/concordia/index_searcher.hpp b/concordia/index_searcher.hpp index a5f7961..f96f33d 100644 --- a/concordia/index_searcher.hpp +++ b/concordia/index_searcher.hpp @@ -25,22 +25,12 @@ public: */ virtual ~IndexSearcher(); - void loadIndex(const string & wordMapFilepath, - const string & hashedIndexFilepath, - const string & suffixArrayFilepath) - throw(ConcordiaException); - - vector simpleSearch(const string & pattern) - throw(ConcordiaException); - + boost::shared_ptr > simpleSearch( + boost::shared_ptr hashGenerator, + boost::shared_ptr > T, + boost::shared_ptr > SA, + const string & pattern) throw(ConcordiaException); private: - boost::shared_ptr _hashGenerator; - - sauchar_t * _T; - - saidx_t * _SA; - - saidx_t _n; }; #endif diff --git a/concordia/t/CMakeLists.txt b/concordia/t/CMakeLists.txt index eda9038..bbb2a9a 100644 --- a/concordia/t/CMakeLists.txt +++ b/concordia/t/CMakeLists.txt @@ -4,7 +4,6 @@ add_library(concordia-tests test_word_map.cpp test_hash_generator.cpp test_concordia_index.cpp - test_index_searcher.cpp test_concordia_config.cpp test_concordia.cpp ) diff --git a/concordia/t/test_concordia.cpp b/concordia/t/test_concordia.cpp index aa844ff..96bcc42 100644 --- a/concordia/t/test_concordia.cpp +++ b/concordia/t/test_concordia.cpp @@ -25,8 +25,7 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 ) concordia.addSentence("Ala ma kota"); concordia.addSentence("Ala ma rysia"); concordia.addSentence("Marysia ma rysia"); - - concordia.generateIndex(); + concordia.refreshSAfromRAM(); /*The test index contains 3 sentences: "Ala ma kota" @@ -50,34 +49,30 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 ) */ - vector expectedResult1; - expectedResult1.push_back(7); - expectedResult1.push_back(4); + boost::shared_ptr > expectedResult1(new std::vector()); + expectedResult1->push_back(7); + expectedResult1->push_back(4); - concordia.loadIndex(); - vector searchResult1 = concordia.simpleSearch("ma rysia"); + boost::shared_ptr > searchResult1 = concordia.simpleSearch("ma rysia"); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX)); - boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_SUFFIX_ARRAY)); - BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1.begin(), searchResult1.end(), - expectedResult1.begin(), expectedResult1.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1->begin(), searchResult1->end(), + expectedResult1->begin(), expectedResult1->end()); } BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 ) { Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); - vector testSentences; - testSentences.push_back("to jest okno"); - testSentences.push_back("czy jest okno otwarte"); - testSentences.push_back("chyba to jest tutaj"); - testSentences.push_back("to jest"); + boost::shared_ptr > testSentences (new vector()); + testSentences->push_back("to jest okno"); + testSentences->push_back("czy jest okno otwarte"); + testSentences->push_back("chyba to jest tutaj"); + testSentences->push_back("to jest"); concordia.addAllSentences(testSentences); - concordia.generateIndex(); - /*The test index contains 4 sentences: "to jest okno" "czy jest okno otwarte" @@ -103,27 +98,26 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 ) */ - vector expectedResult1; - expectedResult1.push_back(11); - expectedResult1.push_back(0); - expectedResult1.push_back(8); + boost::shared_ptr > expectedResult1(new vector()); + expectedResult1->push_back(11); + expectedResult1->push_back(0); + expectedResult1->push_back(8); - vector expectedResult2; - expectedResult2.push_back(1); - expectedResult2.push_back(4); + boost::shared_ptr > expectedResult2(new vector()); + expectedResult2->push_back(1); + expectedResult2->push_back(4); - concordia.loadIndex(); - vector searchResult1 = concordia.simpleSearch("to jest"); - vector searchResult2 = concordia.simpleSearch("jest okno"); + Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); + boost::shared_ptr > searchResult1 = concordia2.simpleSearch("to jest"); + boost::shared_ptr > searchResult2 = concordia2.simpleSearch("jest okno"); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX)); - boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_SUFFIX_ARRAY)); - BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1.begin(), searchResult1.end(), - expectedResult1.begin(), expectedResult1.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(searchResult2.begin(), searchResult2.end(), - expectedResult2.begin(), expectedResult2.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1->begin(), searchResult1->end(), + expectedResult1->begin(), expectedResult1->end()); + BOOST_CHECK_EQUAL_COLLECTIONS(searchResult2->begin(), searchResult2->end(), + expectedResult2->begin(), expectedResult2->end()); } diff --git a/concordia/t/test_concordia_index.cpp b/concordia/t/test_concordia_index.cpp index 29f6728..7e8ed44 100644 --- a/concordia/t/test_concordia_index.cpp +++ b/concordia/t/test_concordia_index.cpp @@ -12,60 +12,42 @@ using namespace std; BOOST_AUTO_TEST_SUITE(concordia_index) -BOOST_AUTO_TEST_CASE( ResourcesExistenceTest1 ) -{ - bool exceptionThrown = false; - string message = ""; - - try { - ConcordiaIndex index(TestResourcesManager::getTestFilePath("concordia-index","mock_word_map.bin"), - TestResourcesManager::getTestFilePath("concordia-index","nonexistent.bin"), - TestResourcesManager::getTestFilePath("concordia-index","test_SA.bin")); - } catch (ConcordiaException & e) { - exceptionThrown = true; - message = e.what(); - } - - BOOST_CHECK(exceptionThrown); - BOOST_CHECK_EQUAL(boost::starts_with(message, "E01"), true); -} - -BOOST_AUTO_TEST_CASE( ResourcesExistenceTest2 ) -{ - bool exceptionThrown = false; - string message = ""; - - try { - ConcordiaIndex index(TestResourcesManager::getTestFilePath("concordia-index","nonexistent.bin"), - TestResourcesManager::getTestFilePath("concordia-index","mock_hash_index.bin"), - TestResourcesManager::getTestFilePath("concordia-index","test_SA.bin")); - } catch (ConcordiaException & e) { - exceptionThrown = true; - message = e.what(); - } - - BOOST_CHECK(exceptionThrown); - BOOST_CHECK_EQUAL(boost::starts_with(message, "E02"), true); -} - BOOST_AUTO_TEST_CASE( SuffixArrayGenerationTest ) { - ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"), - TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"), - TestResourcesManager::getTestFilePath("temp","test_SA.bin")); - index.addSentence("Ala ma kota"); - index.addSentence("Ala ma rysia"); - index.addSentence("Marysia ma rysia"); - - index.generateSuffixArray(); - - BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"))); - BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"))); - BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_SA.bin"))); + boost::shared_ptr hashGenerator (new HashGenerator("nonexistent")); - boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_word_map.bin")); - boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin")); - boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_SA.bin")); + ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin")); + boost::shared_ptr > T = boost::shared_ptr >(new vector()); + // Test hashed index: + // n: 0 1 2 3 4 5 6 7 8 + // T[n]: 0 1 2 0 1 3 4 1 3 + T->push_back(0); + T->push_back(1); + T->push_back(2); + T->push_back(0); + T->push_back(1); + T->push_back(3); + T->push_back(4); + T->push_back(1); + T->push_back(3); + + // Test suffix array: + // n: 0 1 2 3 4 5 6 7 8 + //SA[n]: 0 3 1 7 4 2 8 5 6 + + boost::shared_ptr > SA = index.generateSuffixArray(hashGenerator, T); + + boost::shared_ptr > expectedSA = boost::shared_ptr >(new vector()); + expectedSA->push_back(0); + expectedSA->push_back(3); + expectedSA->push_back(1); + expectedSA->push_back(7); + expectedSA->push_back(4); + expectedSA->push_back(2); + expectedSA->push_back(8); + expectedSA->push_back(5); + expectedSA->push_back(6); + BOOST_CHECK_EQUAL_COLLECTIONS(SA->begin(), SA->end(), expectedSA->begin(), expectedSA->end()); } BOOST_AUTO_TEST_SUITE_END() diff --git a/concordia/t/test_hash_generator.cpp b/concordia/t/test_hash_generator.cpp index 9b09882..ccabd86 100644 --- a/concordia/t/test_hash_generator.cpp +++ b/concordia/t/test_hash_generator.cpp @@ -19,13 +19,13 @@ BOOST_AUTO_TEST_CASE( SimpleHashTest ) HashGenerator hashGenerator = HashGenerator(TEST_WORD_MAP_PATH); - vector hash = hashGenerator.generateHash("Ala ma kota"); - vector expected; - expected.push_back(0); - expected.push_back(1); - expected.push_back(2); + boost::shared_ptr > hash = hashGenerator.generateHash("Ala ma kota"); + boost::shared_ptr > expected(new vector()); + expected->push_back(0); + expected->push_back(1); + expected->push_back(2); - BOOST_CHECK_EQUAL_COLLECTIONS(hash.begin(), hash.end(), expected.begin(), expected.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(hash->begin(), hash->end(), expected->begin(), expected->end()); } BOOST_AUTO_TEST_CASE( HashSerializationTest ) @@ -35,22 +35,22 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest ) } HashGenerator hashGenerator1 = HashGenerator(TEST_WORD_MAP_PATH); - vector hash1 = hashGenerator1.generateHash("Ala ma kota"); - vector expected1; - expected1.push_back(0); - expected1.push_back(1); - expected1.push_back(2); - BOOST_CHECK_EQUAL_COLLECTIONS(hash1.begin(), hash1.end(), expected1.begin(), expected1.end()); + boost::shared_ptr > hash1 = hashGenerator1.generateHash("Ala ma kota"); + boost::shared_ptr > expected1(new vector()); + expected1->push_back(0); + expected1->push_back(1); + expected1->push_back(2); + BOOST_CHECK_EQUAL_COLLECTIONS(hash1->begin(), hash1->end(), expected1->begin(), expected1->end()); hashGenerator1.serializeWordMap(); HashGenerator hashGenerator2 = HashGenerator(TEST_WORD_MAP_PATH); - vector hash2 = hashGenerator2.generateHash("Ala ma psa"); - vector expected2; - expected2.push_back(0); - expected2.push_back(1); - expected2.push_back(3); - BOOST_CHECK_EQUAL_COLLECTIONS(hash2.begin(), hash2.end(), expected2.begin(), expected2.end()); + boost::shared_ptr > hash2 = hashGenerator2.generateHash("Ala ma psa"); + boost::shared_ptr > expected2(new vector()); + expected2->push_back(0); + expected2->push_back(1); + expected2->push_back(3); + BOOST_CHECK_EQUAL_COLLECTIONS(hash2->begin(), hash2->end(), expected2->begin(), expected2->end()); boost::filesystem::remove(TEST_WORD_MAP_PATH); } diff --git a/concordia/t/test_index_searcher.cpp b/concordia/t/test_index_searcher.cpp deleted file mode 100644 index 957aee3..0000000 --- a/concordia/t/test_index_searcher.cpp +++ /dev/null @@ -1,75 +0,0 @@ -#include "tests/unit-tests/unit_tests_globals.hpp" - -#include "concordia/index_searcher.hpp" -#include "concordia/concordia_index.hpp" -#include "concordia/concordia_exception.hpp" -#include "tests/common/test_resources_manager.hpp" - -#include -#include - -using namespace std; - -BOOST_AUTO_TEST_SUITE(index_searcher) - - -BOOST_AUTO_TEST_CASE( SimpleSearchTest ) -{ - -ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"), - TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"), - TestResourcesManager::getTestFilePath("temp","test_SA.bin")); - index.addSentence("Ala ma kota"); - index.addSentence("Ala ma rysia"); - index.addSentence("Marysia ma rysia"); - - index.generateSuffixArray(); - - BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"))); - BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"))); - BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_SA.bin"))); - - IndexSearcher searcher; - searcher.loadIndex(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"), - TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"), - TestResourcesManager::getTestFilePath("temp","test_SA.bin")); - - /*The test index contains 3 sentences: - "Ala ma kota" - "Ala ma rysia" - "Marysia ma rysia" - - Test word map: - Ala -> 0 - ma -> 1 - kota -> 2 - rysia -> 3 - Marysia -> 4 - - Test hashed index: - n: 0 1 2 3 4 5 6 7 8 - T[n]: 0 1 2 0 1 3 4 1 3 - - Test suffix array: - n: 0 1 2 3 4 5 6 7 8 - SA[n]: 0 3 1 7 4 2 8 5 6 - - */ - - vector expectedResult1; - expectedResult1.push_back(7); - expectedResult1.push_back(4); - - vector searchResult1 = searcher.simpleSearch("ma rysia"); - - boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_word_map.bin")); - boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin")); - boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_SA.bin")); - - BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1.begin(), searchResult1.end(), - expectedResult1.begin(), expectedResult1.end()); - - -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/concordia/t/test_utils.cpp b/concordia/t/test_utils.cpp index bbde206..0ad09d4 100644 --- a/concordia/t/test_utils.cpp +++ b/concordia/t/test_utils.cpp @@ -11,7 +11,7 @@ using namespace std; BOOST_AUTO_TEST_SUITE(utils) -BOOST_AUTO_TEST_CASE( UtilsTest1 ) +BOOST_AUTO_TEST_CASE( WriteReadSingleCharacter ) { ofstream testFileOutput; testFileOutput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(), @@ -29,133 +29,37 @@ BOOST_AUTO_TEST_CASE( UtilsTest1 ) boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","temp_file.bin")); } -BOOST_AUTO_TEST_CASE( UtilsTest2 ) +BOOST_AUTO_TEST_CASE( IndexVectorToSaucharArray ) { - ofstream testFileOutput; - testFileOutput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(), - ios::out|ios::binary); - Utils::writeIndexCharacter(testFileOutput,123456789); //in hex: 75BCD15 - //in memory: 15 cd 5b 07 - // in DEC: 21 205 91 7 - - Utils::writeIndexCharacter(testFileOutput,987654321); //in hex: 3ADE68B1 - //in memory: b1 68 de 3a - // in DEC: 177 104 222 58 - testFileOutput.close(); - - sauchar_t * dataArray = new sauchar_t[8]; - ifstream testFileInput; - testFileInput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(),ios::in|ios::binary); - - INDEX_CHARACTER_TYPE retrievedCharacter1 = Utils::readIndexCharacter(testFileInput); - BOOST_CHECK_EQUAL(retrievedCharacter1, 123456789); - Utils::insertCharToSaucharArray(dataArray, retrievedCharacter1, 0); + boost::shared_ptr > hash(new vector()); + hash->push_back(123456789); // in hex: 75BCD15 + // in memory: 15 cd 5b 07 + // in memory DEC: 21 205 91 7 - INDEX_CHARACTER_TYPE retrievedCharacter2 = Utils::readIndexCharacter(testFileInput); - BOOST_CHECK_EQUAL(retrievedCharacter2, 987654321); - Utils::insertCharToSaucharArray(dataArray, retrievedCharacter2, 4); - - testFileInput.close(); - - vector expected; - expected.push_back(21); - expected.push_back(205); - expected.push_back(91); - expected.push_back(7); - expected.push_back(177); - expected.push_back(104); - expected.push_back(222); - expected.push_back(58); - - vector result; - for (int i=0;i<8;i++) { - INDEX_CHARACTER_TYPE a = dataArray[i]; - result.push_back(a); - } - - boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","temp_file.bin")); - - BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end()); -} - -BOOST_AUTO_TEST_CASE( UtilsTest3 ) -{ - vector hash; - hash.push_back(123456789); - hash.push_back(987654321); - + hash->push_back(987654321); // in hex: 3ADE68B1 + // in memory: b1 68 de 3a + // in memory DEC: 177 104 222 58 sauchar_t * dataArray = Utils::indexVectorToSaucharArray(hash); - vector result; + boost::shared_ptr > result(new vector()); for (int i=0;i<8;i++) { INDEX_CHARACTER_TYPE a = dataArray[i]; - result.push_back(a); + result->push_back(a); } - vector expected; - expected.push_back(21); - expected.push_back(205); - expected.push_back(91); - expected.push_back(7); - expected.push_back(177); - expected.push_back(104); - expected.push_back(222); - expected.push_back(58); + boost::shared_ptr > expected(new vector()); + expected->push_back(21); + expected->push_back(205); + expected->push_back(91); + expected->push_back(7); + expected->push_back(177); + expected->push_back(104); + expected->push_back(222); + expected->push_back(58); - BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end()); } -/* -BOOST_AUTO_TEST_CASE( UtilsTest4 ) -{ - ofstream testFileOutput; - testFileOutput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(), - ios::out|ios::binary); - Utils::writeIndexCharacter(testFileOutput,123456789); //in hex: 75BCD15 - //in memory: 15 cd 5b 07 - // in DEC: 21 205 91 7 - - Utils::writeIndexCharacter(testFileOutput,987654321); //in hex: 3ADE68B1 - //in memory: b1 68 de 3a - // in DEC: 177 104 222 58 - testFileOutput.close(); - - sauchar_t * dataArray = Utils::readIndexFromFile( - ifstream testFileInput; - testFileInput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(),ios::in|ios::binary); - - INDEX_CHARACTER_TYPE retrievedCharacter1 = Utils::readIndexCharacter(testFileInput); - BOOST_CHECK_EQUAL(retrievedCharacter1, 123456789); - Utils::insertCharToSaucharArray(dataArray, retrievedCharacter1, 0); - - INDEX_CHARACTER_TYPE retrievedCharacter2 = Utils::readIndexCharacter(testFileInput); - BOOST_CHECK_EQUAL(retrievedCharacter2, 987654321); - Utils::insertCharToSaucharArray(dataArray, retrievedCharacter2, 4); - - testFileInput.close(); - - vector expected; - expected.push_back(21); - expected.push_back(205); - expected.push_back(91); - expected.push_back(7); - expected.push_back(177); - expected.push_back(104); - expected.push_back(222); - expected.push_back(58); - - vector result; - for (int i=0;i<8;i++) { - INDEX_CHARACTER_TYPE a = dataArray[i]; - result.push_back(a); - } - - boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","temp_file.bin")); - - BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end()); -} -*/ - BOOST_AUTO_TEST_SUITE_END()