redesigned project
Former-commit-id: d35841126fda627a04a1a16a26b91943401b6fcf
This commit is contained in:
parent
9ff5f05205
commit
b318770752
@ -3,6 +3,7 @@
|
|||||||
#include <boost/program_options.hpp>
|
#include <boost/program_options.hpp>
|
||||||
#include <boost/algorithm/string.hpp>
|
#include <boost/algorithm/string.hpp>
|
||||||
#include <boost/date_time/posix_time/posix_time.hpp>
|
#include <boost/date_time/posix_time/posix_time.hpp>
|
||||||
|
#include <boost/shared_ptr.hpp>
|
||||||
|
|
||||||
#include "concordia/concordia.hpp"
|
#include "concordia/concordia.hpp"
|
||||||
#include "concordia/common/config.hpp"
|
#include "concordia/common/config.hpp"
|
||||||
@ -20,8 +21,6 @@ int main(int argc, char** argv) {
|
|||||||
("help,h", "Display this message")
|
("help,h", "Display this message")
|
||||||
("config,c", boost::program_options::value<std::string>(),
|
("config,c", boost::program_options::value<std::string>(),
|
||||||
"Concordia configuration file (required)")
|
"Concordia configuration file (required)")
|
||||||
("generate-index,g", "Generate suffix array based index out of "
|
|
||||||
"added sentences")
|
|
||||||
("simple-search,s", boost::program_options::value<std::string>(),
|
("simple-search,s", boost::program_options::value<std::string>(),
|
||||||
"Pattern to be searched in the index")
|
"Pattern to be searched in the index")
|
||||||
("silent,n", "While searching, do not output search results")
|
("silent,n", "While searching, do not output search results")
|
||||||
@ -48,45 +47,36 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
std::cout << "\tInitializing concordia..." << std::endl;
|
||||||
|
boost::posix_time::ptime time_start =
|
||||||
|
boost::posix_time::microsec_clock::local_time();
|
||||||
Concordia concordia(configFile);
|
Concordia concordia(configFile);
|
||||||
std::cout << "Welcome to Concordia. Version = "
|
boost::posix_time::ptime time_end =
|
||||||
<< concordia.getVersion() << std::endl;
|
boost::posix_time::microsec_clock::local_time();
|
||||||
if (cli.count("generate-index")) {
|
boost::posix_time::time_duration msdiff = time_end - time_start;
|
||||||
std::cout << "\tGenerating index..." << std::endl;
|
std::cout << "\tInitialization (loading index from disk "
|
||||||
boost::posix_time::ptime time_start =
|
<< "and regenerating SA) took: "
|
||||||
boost::posix_time::microsec_clock::local_time();
|
<< msdiff.total_milliseconds() << "ms." << std::endl;
|
||||||
concordia.generateIndex();
|
|
||||||
boost::posix_time::ptime time_end =
|
|
||||||
boost::posix_time::microsec_clock::local_time();
|
|
||||||
boost::posix_time::time_duration msdiff = time_end - time_start;
|
|
||||||
std::cout << "\tIndex generated in: " <<
|
|
||||||
msdiff.total_milliseconds() << "ms." << std::endl;
|
|
||||||
} else if (cli.count("simple-search")) {
|
|
||||||
std::cout << "\tLoading index..." << std::endl;
|
|
||||||
boost::posix_time::ptime time_start =
|
|
||||||
boost::posix_time::microsec_clock::local_time();
|
|
||||||
concordia.loadIndex();
|
|
||||||
boost::posix_time::ptime time_end =
|
|
||||||
boost::posix_time::microsec_clock::local_time();
|
|
||||||
boost::posix_time::time_duration msdiff = time_end - time_start;
|
|
||||||
std::cout << "\tIndex loaded in: " <<
|
|
||||||
msdiff.total_milliseconds() << "ms." << std::endl;
|
|
||||||
|
|
||||||
|
std::cout << "\tWelcome to Concordia. Version = "
|
||||||
|
<< concordia.getVersion() << std::endl;
|
||||||
|
if (cli.count("simple-search")) {
|
||||||
std::string pattern = cli["simple-search"].as<std::string>();
|
std::string pattern = cli["simple-search"].as<std::string>();
|
||||||
std::cout << "\tSearching for pattern: \"" << pattern <<
|
std::cout << "\tSearching for pattern: \"" << pattern <<
|
||||||
"\"" << std::endl;
|
"\"" << std::endl;
|
||||||
time_start = boost::posix_time::microsec_clock::local_time();
|
time_start = boost::posix_time::microsec_clock::local_time();
|
||||||
vector<saidx_t> result = concordia.simpleSearch(pattern);
|
boost::shared_ptr<vector<saidx_t> > result =
|
||||||
|
concordia.simpleSearch(pattern);
|
||||||
time_end = boost::posix_time::microsec_clock::local_time();
|
time_end = boost::posix_time::microsec_clock::local_time();
|
||||||
msdiff = time_end - time_start;
|
msdiff = time_end - time_start;
|
||||||
std::cout << "\tFound: " << result.size() << " matches. "
|
std::cout << "\tFound: " << result->size() << " matches. "
|
||||||
<< "Search took: " <<
|
<< "Search took: " <<
|
||||||
msdiff.total_milliseconds() << "ms." << std::endl;
|
msdiff.total_milliseconds() << "ms." << std::endl;
|
||||||
if (!cli.count("silent")) {
|
if (!cli.count("silent")) {
|
||||||
for (vector<saidx_t>::iterator it = result.begin();
|
for (vector<saidx_t>::iterator it = result->begin();
|
||||||
it != result.end(); ++it) {
|
it != result->end(); ++it) {
|
||||||
std::cout << "\t\tfound match on word number: " << *it
|
std::cout << "\t\tfound match on word number: " << *it
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (cli.count("read-file")) {
|
} else if (cli.count("read-file")) {
|
||||||
@ -97,15 +87,16 @@ int main(int argc, char** argv) {
|
|||||||
std::string line;
|
std::string line;
|
||||||
if (text_file.is_open()) {
|
if (text_file.is_open()) {
|
||||||
long lineCount = 0;
|
long lineCount = 0;
|
||||||
vector<std::string> buffer;
|
boost::shared_ptr<std::vector<std::string> >
|
||||||
|
buffer(new std::vector<std::string>());
|
||||||
boost::posix_time::ptime timeStart =
|
boost::posix_time::ptime timeStart =
|
||||||
boost::posix_time::microsec_clock::local_time();
|
boost::posix_time::microsec_clock::local_time();
|
||||||
while (getline(text_file, line)) {
|
while (getline(text_file, line)) {
|
||||||
lineCount++;
|
lineCount++;
|
||||||
buffer.push_back(line);
|
buffer->push_back(line);
|
||||||
if (lineCount % READ_BUFFER_LENGTH == 0) {
|
if (lineCount % READ_BUFFER_LENGTH == 0) {
|
||||||
concordia.addAllSentences(buffer);
|
concordia.addAllSentences(buffer);
|
||||||
buffer.clear();
|
buffer->clear();
|
||||||
boost::posix_time::ptime timeEnd =
|
boost::posix_time::ptime timeEnd =
|
||||||
boost::posix_time::microsec_clock::local_time();
|
boost::posix_time::microsec_clock::local_time();
|
||||||
boost::posix_time::time_duration msdiff =
|
boost::posix_time::time_duration msdiff =
|
||||||
@ -119,7 +110,7 @@ int main(int argc, char** argv) {
|
|||||||
" sentences per second" << std::endl;
|
" sentences per second" << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (buffer.size() > 0) {
|
if (buffer->size() > 0) {
|
||||||
concordia.addAllSentences(buffer);
|
concordia.addAllSentences(buffer);
|
||||||
}
|
}
|
||||||
text_file.close();
|
text_file.close();
|
||||||
@ -146,7 +137,7 @@ int main(int argc, char** argv) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "Concordia operation completed without errors."
|
std::cout << "\tConcordia operation completed without errors."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
} catch(ConcordiaException & e) {
|
} catch(ConcordiaException & e) {
|
||||||
std::cerr << "ConcordiaException caught with message: "
|
std::cerr << "ConcordiaException caught with message: "
|
||||||
|
@ -1,15 +1,18 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
echo "Decompressing test file"
|
echo "CONCORDIA RUNNER: Decompressing test file"
|
||||||
|
|
||||||
bunzip2 --keep prod/resources/text-files/large.txt.bz2
|
bunzip2 --keep prod/resources/text-files/large.txt.bz2
|
||||||
|
|
||||||
echo "Running Concordia"
|
echo "CONCORDIA RUNNER: Running Concordia"
|
||||||
|
|
||||||
rm prod/resources/temp/*
|
rm prod/resources/temp/*
|
||||||
|
|
||||||
|
echo "CONCORDIA RUNNER: reading from file"
|
||||||
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -r prod/resources/text-files/large.txt
|
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -r prod/resources/text-files/large.txt
|
||||||
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -g
|
echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\""
|
||||||
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "drawn from his own" -n
|
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "drawn from his own" -n
|
||||||
|
echo "CONCORDIA RUNNER: searching for pattern: \"it is\""
|
||||||
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "it is" -n
|
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "it is" -n
|
||||||
|
|
||||||
rm prod/resources/text-files/large.txt
|
rm prod/resources/text-files/large.txt
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
echo "Running Concordia"
|
echo "CONCORDIA RUNNER: Running Concordia"
|
||||||
|
|
||||||
rm prod/resources/temp/*
|
rm prod/resources/temp/*
|
||||||
|
echo "CONCORDIA RUNNER: reading from file"
|
||||||
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -r prod/resources/text-files/medium.txt
|
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -r prod/resources/text-files/medium.txt
|
||||||
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -g
|
echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\""
|
||||||
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "drawn from his own"
|
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "drawn from his own"
|
||||||
|
echo "CONCORDIA RUNNER: searching for pattern: \"it is\""
|
||||||
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "it is" -n
|
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "it is" -n
|
||||||
|
@ -18,20 +18,29 @@ INDEX_CHARACTER_TYPE Utils::readIndexCharacter(ifstream & file) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
sauchar_t * Utils::indexVectorToSaucharArray(
|
sauchar_t * Utils::indexVectorToSaucharArray(
|
||||||
vector<INDEX_CHARACTER_TYPE> & input) {
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input) {
|
||||||
const int kArraySize = input.size()*sizeof(INDEX_CHARACTER_TYPE);
|
const int kArraySize = input->size()*sizeof(INDEX_CHARACTER_TYPE);
|
||||||
sauchar_t * patternArray =
|
sauchar_t * patternArray =
|
||||||
new sauchar_t[kArraySize];
|
new sauchar_t[kArraySize];
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
for (vector<INDEX_CHARACTER_TYPE>::iterator it = input.begin();
|
for (vector<INDEX_CHARACTER_TYPE>::iterator it = input->begin();
|
||||||
it != input.end(); ++it) {
|
it != input->end(); ++it) {
|
||||||
insertCharToSaucharArray(patternArray, *it, pos);
|
_insertCharToSaucharArray(patternArray, *it, pos);
|
||||||
pos += sizeof(INDEX_CHARACTER_TYPE);
|
pos += sizeof(INDEX_CHARACTER_TYPE);
|
||||||
}
|
}
|
||||||
return patternArray;
|
return patternArray;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Utils::insertCharToSaucharArray(sauchar_t * array,
|
void Utils::appendCharToSaucharVector(
|
||||||
|
boost::shared_ptr<std::vector<sauchar_t> > vector,
|
||||||
|
INDEX_CHARACTER_TYPE character) {
|
||||||
|
sauchar_t * characterArray = reinterpret_cast<sauchar_t *>(&character);
|
||||||
|
for (int i = 0; i < sizeof(character); i++) {
|
||||||
|
vector->push_back(characterArray[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Utils::_insertCharToSaucharArray(sauchar_t * array,
|
||||||
INDEX_CHARACTER_TYPE character, int pos) {
|
INDEX_CHARACTER_TYPE character, int pos) {
|
||||||
sauchar_t * characterArray = reinterpret_cast<sauchar_t *>(&character);
|
sauchar_t * characterArray = reinterpret_cast<sauchar_t *>(&character);
|
||||||
for (int i = pos; i < pos+sizeof(character); i++) {
|
for (int i = pos; i < pos+sizeof(character); i++) {
|
||||||
|
@ -26,11 +26,14 @@ public:
|
|||||||
static INDEX_CHARACTER_TYPE readIndexCharacter(ifstream & file);
|
static INDEX_CHARACTER_TYPE readIndexCharacter(ifstream & file);
|
||||||
|
|
||||||
static sauchar_t * indexVectorToSaucharArray(
|
static sauchar_t * indexVectorToSaucharArray(
|
||||||
vector<INDEX_CHARACTER_TYPE> & input);
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > input);
|
||||||
|
|
||||||
static void insertCharToSaucharArray(sauchar_t * array,
|
static void appendCharToSaucharVector(
|
||||||
INDEX_CHARACTER_TYPE character, int pos);
|
boost::shared_ptr<std::vector<sauchar_t> > vector,
|
||||||
|
INDEX_CHARACTER_TYPE character);
|
||||||
private:
|
private:
|
||||||
|
static void _insertCharToSaucharArray(sauchar_t * array,
|
||||||
|
INDEX_CHARACTER_TYPE character, int pos);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
#include "concordia/concordia.hpp"
|
#include "concordia/concordia.hpp"
|
||||||
#include "concordia/common/config.hpp"
|
#include "concordia/common/config.hpp"
|
||||||
|
#include "concordia/common/utils.hpp"
|
||||||
|
|
||||||
// ===========================================
|
// ===========================================
|
||||||
|
|
||||||
@ -18,10 +19,9 @@ Concordia::Concordia(const std::string & configFilePath)
|
|||||||
_config = boost::shared_ptr<ConcordiaConfig> (
|
_config = boost::shared_ptr<ConcordiaConfig> (
|
||||||
new ConcordiaConfig(configFilePath));
|
new ConcordiaConfig(configFilePath));
|
||||||
_index = boost::shared_ptr<ConcordiaIndex>(
|
_index = boost::shared_ptr<ConcordiaIndex>(
|
||||||
new ConcordiaIndex(_config->getWordMapFilePath(),
|
new ConcordiaIndex(_config->getHashedIndexFilePath()));
|
||||||
_config->getHashedIndexFilePath(),
|
|
||||||
_config->getSuffixArrayFilePath()));
|
|
||||||
_searcher = boost::shared_ptr<IndexSearcher>(new IndexSearcher());
|
_searcher = boost::shared_ptr<IndexSearcher>(new IndexSearcher());
|
||||||
|
_initializeIndex();
|
||||||
}
|
}
|
||||||
|
|
||||||
Concordia::~Concordia() {
|
Concordia::~Concordia() {
|
||||||
@ -41,29 +41,83 @@ std::string _createLibraryVersion() {
|
|||||||
return version.str();
|
return version.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Sentences are written to disk and added to T.
|
||||||
|
// SA is generated on command by different methods.
|
||||||
void Concordia::addSentence(const std::string & sentence)
|
void Concordia::addSentence(const std::string & sentence)
|
||||||
throw(ConcordiaException) {
|
throw(ConcordiaException) {
|
||||||
_index->addSentence(sentence);
|
_index->addSentence(_hashGenerator, _T, sentence);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Concordia::addAllSentences(vector<std::string> & sentences)
|
// Sentences are written to disk and added to T.
|
||||||
throw(ConcordiaException) {
|
// SA is generated on command by different methods.
|
||||||
_index->addAllSentences(sentences);
|
void Concordia::addAllSentences(
|
||||||
|
boost::shared_ptr<std::vector<std::string> > sentences)
|
||||||
|
throw(ConcordiaException) {
|
||||||
|
_index->addAllSentences(_hashGenerator, _T, sentences);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Concordia::generateIndex() throw(ConcordiaException) {
|
void Concordia::loadRAMIndexFromDisk() throw(ConcordiaException) {
|
||||||
_index->generateSuffixArray();
|
if (boost::filesystem::exists(_config->getWordMapFilePath())
|
||||||
|
&& boost::filesystem::exists(_config->getHashedIndexFilePath())) {
|
||||||
|
// reading index from files
|
||||||
|
_T->clear();
|
||||||
|
ifstream hashedIndexFile;
|
||||||
|
hashedIndexFile.open(_config->getHashedIndexFilePath().c_str(), ios::in
|
||||||
|
| ios::ate | ios::binary);
|
||||||
|
saidx_t fileSize = hashedIndexFile.tellg();
|
||||||
|
if (fileSize > 0) {
|
||||||
|
hashedIndexFile.seekg(0, ios::beg);
|
||||||
|
|
||||||
|
while (!hashedIndexFile.eof()) {
|
||||||
|
INDEX_CHARACTER_TYPE character =
|
||||||
|
Utils::readIndexCharacter(hashedIndexFile);
|
||||||
|
Utils::appendCharToSaucharVector(_T, character);
|
||||||
|
}
|
||||||
|
hashedIndexFile.close();
|
||||||
|
|
||||||
|
// generating suffix array
|
||||||
|
_SA = _index->generateSuffixArray(_hashGenerator, _T);
|
||||||
|
} else {
|
||||||
|
throw ConcordiaException("Index corrupt: empty hash index file");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw ConcordiaException("Index corrupt: missing files");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Concordia::loadIndex() throw(ConcordiaException) {
|
void Concordia::refreshSAfromRAM() throw(ConcordiaException) {
|
||||||
_searcher->loadIndex(_config->getWordMapFilePath(),
|
_SA = _index->generateSuffixArray(_hashGenerator, _T);
|
||||||
_config->getHashedIndexFilePath(),
|
|
||||||
_config->getSuffixArrayFilePath());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<saidx_t> Concordia::simpleSearch(const std::string & pattern)
|
|
||||||
|
void Concordia::_initializeIndex() throw(ConcordiaException) {
|
||||||
|
_hashGenerator = boost::shared_ptr<HashGenerator>(
|
||||||
|
new HashGenerator(_config->getWordMapFilePath()));
|
||||||
|
_T = boost::shared_ptr<std::vector<sauchar_t> >(
|
||||||
|
new std::vector<sauchar_t>);
|
||||||
|
if (boost::filesystem::exists(_config->getWordMapFilePath())
|
||||||
|
&& boost::filesystem::exists(_config->getHashedIndexFilePath())) {
|
||||||
|
loadRAMIndexFromDisk();
|
||||||
|
} else if (!boost::filesystem::exists(_config->getWordMapFilePath())
|
||||||
|
&& !boost::filesystem::exists(_config->getHashedIndexFilePath())) {
|
||||||
|
// empty index
|
||||||
|
_SA = boost::shared_ptr<std::vector<saidx_t> >(
|
||||||
|
new std::vector<saidx_t>);
|
||||||
|
} else {
|
||||||
|
throw ConcordiaException("Index corrupt: missing files");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
boost::shared_ptr<std::vector<saidx_t> > Concordia::simpleSearch(
|
||||||
|
const string & pattern)
|
||||||
throw(ConcordiaException) {
|
throw(ConcordiaException) {
|
||||||
return _searcher->simpleSearch(pattern);
|
if (_T->size() > 0) {
|
||||||
|
return _searcher->simpleSearch(_hashGenerator, _T, _SA, pattern);
|
||||||
|
} else {
|
||||||
|
boost::shared_ptr<std::vector<saidx_t> > result =
|
||||||
|
boost::shared_ptr<std::vector<saidx_t> >(new std::vector<saidx_t>);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <boost/shared_ptr.hpp>
|
#include <boost/shared_ptr.hpp>
|
||||||
|
#include <boost/filesystem.hpp>
|
||||||
|
|
||||||
#include "concordia/concordia_config.hpp"
|
#include "concordia/concordia_config.hpp"
|
||||||
#include "concordia/concordia_index.hpp"
|
#include "concordia/concordia_index.hpp"
|
||||||
@ -35,17 +36,20 @@ public:
|
|||||||
|
|
||||||
void addSentence(const std::string & sentence) throw(ConcordiaException);
|
void addSentence(const std::string & sentence) throw(ConcordiaException);
|
||||||
|
|
||||||
void addAllSentences(vector<std::string> & sentences)
|
void addAllSentences(boost::shared_ptr<std::vector<std::string> > sentences)
|
||||||
throw(ConcordiaException);
|
throw(ConcordiaException);
|
||||||
|
|
||||||
void generateIndex() throw(ConcordiaException);
|
boost::shared_ptr<std::vector<saidx_t> > simpleSearch(
|
||||||
|
const std::string & pattern)
|
||||||
|
throw(ConcordiaException);
|
||||||
|
|
||||||
void loadIndex() throw(ConcordiaException);
|
void loadRAMIndexFromDisk() throw(ConcordiaException);
|
||||||
|
|
||||||
std::vector<saidx_t> simpleSearch(const std::string & pattern)
|
void refreshSAfromRAM() throw(ConcordiaException);
|
||||||
throw(ConcordiaException);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void _initializeIndex() throw(ConcordiaException);
|
||||||
|
|
||||||
static std::string _libraryVersion;
|
static std::string _libraryVersion;
|
||||||
|
|
||||||
boost::shared_ptr<ConcordiaConfig> _config;
|
boost::shared_ptr<ConcordiaConfig> _config;
|
||||||
@ -53,6 +57,12 @@ private:
|
|||||||
boost::shared_ptr<ConcordiaIndex> _index;
|
boost::shared_ptr<ConcordiaIndex> _index;
|
||||||
|
|
||||||
boost::shared_ptr<IndexSearcher> _searcher;
|
boost::shared_ptr<IndexSearcher> _searcher;
|
||||||
|
|
||||||
|
boost::shared_ptr<HashGenerator> _hashGenerator;
|
||||||
|
|
||||||
|
boost::shared_ptr<std::vector<sauchar_t> > _T;
|
||||||
|
|
||||||
|
boost::shared_ptr<std::vector<saidx_t> > _SA;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -4,118 +4,71 @@
|
|||||||
#include <boost/filesystem.hpp>
|
#include <boost/filesystem.hpp>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
ConcordiaIndex::ConcordiaIndex(const string & wordMapFilePath,
|
ConcordiaIndex::ConcordiaIndex(const string & hashedIndexFilePath)
|
||||||
const string & hashedIndexFilePath,
|
|
||||||
const string & suffixArrayFilePath)
|
|
||||||
throw(ConcordiaException) :
|
throw(ConcordiaException) :
|
||||||
_hashedIndexFilePath(hashedIndexFilePath),
|
_hashedIndexFilePath(hashedIndexFilePath) {
|
||||||
_suffixArrayFilePath(suffixArrayFilePath) {
|
|
||||||
if (boost::filesystem::exists(wordMapFilePath)) {
|
|
||||||
if (!boost::filesystem::exists(hashedIndexFilePath)) {
|
|
||||||
throw ConcordiaException("E01: Word map file exists "
|
|
||||||
"but hashed index file absent.");
|
|
||||||
}
|
|
||||||
} else { // WordMap file does not exist
|
|
||||||
if (boost::filesystem::exists(hashedIndexFilePath)) {
|
|
||||||
throw ConcordiaException("E02: Hashed index file exists "
|
|
||||||
"but word map file absent.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_hashGenerator = boost::shared_ptr<HashGenerator>(
|
|
||||||
new HashGenerator(wordMapFilePath));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ConcordiaIndex::~ConcordiaIndex() {
|
ConcordiaIndex::~ConcordiaIndex() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConcordiaIndex::_serializeWordMap() {
|
boost::shared_ptr<vector<saidx_t> > ConcordiaIndex::generateSuffixArray(
|
||||||
_hashGenerator->serializeWordMap();
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
}
|
boost::shared_ptr<vector<sauchar_t> > T) {
|
||||||
|
saidx_t * SA_array = new saidx_t[T->size()];
|
||||||
void ConcordiaIndex::generateSuffixArray() {
|
if (divsufsort(T->data(), SA_array, (saidx_t) T->size()) != 0) {
|
||||||
if (boost::filesystem::exists(_hashedIndexFilePath.c_str())) {
|
throw ConcordiaException("Error creating suffix array.");
|
||||||
ifstream hashedIndexFile;
|
|
||||||
hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::in|
|
|
||||||
ios::ate|ios::binary);
|
|
||||||
|
|
||||||
/* Get the file size. */
|
|
||||||
saidx_t n = hashedIndexFile.tellg();
|
|
||||||
if (n > 0) {
|
|
||||||
sauchar_t *T;
|
|
||||||
saidx_t *SA;
|
|
||||||
|
|
||||||
/* Read n bytes of data. */
|
|
||||||
hashedIndexFile.seekg(0, ios::beg);
|
|
||||||
T = new sauchar_t[n];
|
|
||||||
int pos = 0;
|
|
||||||
while (!hashedIndexFile.eof()) {
|
|
||||||
INDEX_CHARACTER_TYPE character =
|
|
||||||
Utils::readIndexCharacter(hashedIndexFile);
|
|
||||||
Utils::insertCharToSaucharArray(T, character, pos);
|
|
||||||
pos+=sizeof(character);
|
|
||||||
}
|
|
||||||
hashedIndexFile.close();
|
|
||||||
|
|
||||||
SA = new saidx_t[n];
|
|
||||||
|
|
||||||
/* Construct the suffix array. */
|
|
||||||
if (divsufsort(T, SA, (saidx_t)n) != 0) {
|
|
||||||
throw ConcordiaException("Error creating suffix array.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Write the suffix array. */
|
|
||||||
|
|
||||||
ofstream suffixArrayFile;
|
|
||||||
suffixArrayFile.open(_suffixArrayFilePath.c_str(),
|
|
||||||
ios::out|ios::binary);
|
|
||||||
|
|
||||||
for (int i = 0; i < n; i++) {
|
|
||||||
suffixArrayFile.write(reinterpret_cast<char *>(&SA[i]),
|
|
||||||
sizeof(saidx_t));
|
|
||||||
}
|
|
||||||
suffixArrayFile.close();
|
|
||||||
|
|
||||||
/* Deallocate memory. */
|
|
||||||
delete[] T;
|
|
||||||
delete[] SA;
|
|
||||||
} else {
|
|
||||||
throw ConcordiaException("Can not generate suffix array: "
|
|
||||||
"hashed index file is empty");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw ConcordiaException("Can not generate suffix array: "
|
|
||||||
"hashed index file does not exist");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boost::shared_ptr<vector<saidx_t> > result =
|
||||||
|
boost::shared_ptr<vector<saidx_t> >(new vector<saidx_t>);
|
||||||
|
for (int i = 0; i < T->size(); i++) {
|
||||||
|
result->push_back(SA_array[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] SA_array;
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConcordiaIndex::addSentence(const string & sentence) {
|
void ConcordiaIndex::addSentence(boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
vector<INDEX_CHARACTER_TYPE> hash = _hashGenerator->generateHash(sentence);
|
boost::shared_ptr<vector<sauchar_t> > T,
|
||||||
|
const string & sentence) {
|
||||||
ofstream hashedIndexFile;
|
ofstream hashedIndexFile;
|
||||||
hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::out|
|
hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::out|
|
||||||
ios::app|ios::binary);
|
ios::app|ios::binary);
|
||||||
for (vector<INDEX_CHARACTER_TYPE>::iterator it = hash.begin();
|
_addSingleSentence(hashedIndexFile, hashGenerator, T, sentence);
|
||||||
it != hash.end(); ++it) {
|
|
||||||
Utils::writeIndexCharacter(hashedIndexFile, *it);
|
|
||||||
}
|
|
||||||
hashedIndexFile.close();
|
hashedIndexFile.close();
|
||||||
_serializeWordMap();
|
hashGenerator->serializeWordMap();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConcordiaIndex::addAllSentences(vector<std::string> & sentences) {
|
void ConcordiaIndex::addAllSentences(
|
||||||
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
|
boost::shared_ptr<vector<sauchar_t> > T,
|
||||||
|
boost::shared_ptr<vector<string> > sentences) {
|
||||||
ofstream hashedIndexFile;
|
ofstream hashedIndexFile;
|
||||||
hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::out|
|
hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::out|
|
||||||
ios::app|ios::binary);
|
ios::app|ios::binary);
|
||||||
for (vector<string>::iterator sent_it = sentences.begin();
|
for (vector<string>::iterator sent_it = sentences->begin();
|
||||||
sent_it != sentences.end(); ++sent_it) {
|
sent_it != sentences->end(); ++sent_it) {
|
||||||
string sentence = *sent_it;
|
string sentence = *sent_it;
|
||||||
vector<INDEX_CHARACTER_TYPE> hash =
|
_addSingleSentence(hashedIndexFile, hashGenerator, T, sentence);
|
||||||
_hashGenerator->generateHash(sentence);
|
|
||||||
for (vector<INDEX_CHARACTER_TYPE>::iterator it = hash.begin();
|
|
||||||
it != hash.end(); ++it) {
|
|
||||||
Utils::writeIndexCharacter(hashedIndexFile, *it);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
hashedIndexFile.close();
|
hashedIndexFile.close();
|
||||||
_serializeWordMap();
|
hashGenerator->serializeWordMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConcordiaIndex::_addSingleSentence(
|
||||||
|
ofstream & hashedIndexFile,
|
||||||
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
|
const string & sentence) {
|
||||||
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash
|
||||||
|
= hashGenerator->generateHash(sentence);
|
||||||
|
for (vector<INDEX_CHARACTER_TYPE>::iterator it = hash->begin();
|
||||||
|
it != hash->end(); ++it) {
|
||||||
|
INDEX_CHARACTER_TYPE character = *it;
|
||||||
|
Utils::writeIndexCharacter(hashedIndexFile, character);
|
||||||
|
Utils::appendCharToSaucharVector(T, character);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,29 +19,35 @@ using namespace std;
|
|||||||
|
|
||||||
class ConcordiaIndex {
|
class ConcordiaIndex {
|
||||||
public:
|
public:
|
||||||
explicit ConcordiaIndex(const string & wordMapFilePath,
|
explicit ConcordiaIndex(const string & hashedIndexFilePath)
|
||||||
const string & hashedIndexFilePath,
|
|
||||||
const string & suffixArrayFilePath)
|
|
||||||
throw(ConcordiaException);
|
throw(ConcordiaException);
|
||||||
|
|
||||||
/*! Destructor.
|
/*! Destructor.
|
||||||
*/
|
*/
|
||||||
virtual ~ConcordiaIndex();
|
virtual ~ConcordiaIndex();
|
||||||
|
|
||||||
void addSentence(const string & sentence);
|
void addSentence(
|
||||||
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
|
boost::shared_ptr<vector<sauchar_t> > T,
|
||||||
|
const string & sentence);
|
||||||
|
|
||||||
void addAllSentences(vector<string> & sentences);
|
void addAllSentences(
|
||||||
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
|
boost::shared_ptr<vector<sauchar_t> > T,
|
||||||
|
boost::shared_ptr<vector<string> > sentences);
|
||||||
|
|
||||||
void generateSuffixArray();
|
boost::shared_ptr<vector<saidx_t> > generateSuffixArray(
|
||||||
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
|
boost::shared_ptr<vector<sauchar_t> > T);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void _serializeWordMap();
|
// Add sentence to disk index and update RAM index.
|
||||||
|
void _addSingleSentence(ofstream & hashedIndexFile,
|
||||||
boost::shared_ptr<HashGenerator> _hashGenerator;
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
|
const string & sentence);
|
||||||
|
|
||||||
string _hashedIndexFilePath;
|
string _hashedIndexFilePath;
|
||||||
|
|
||||||
string _suffixArrayFilePath;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -20,17 +20,18 @@ HashGenerator::HashGenerator(const string & wordMapFilePath)
|
|||||||
HashGenerator::~HashGenerator() {
|
HashGenerator::~HashGenerator() {
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<INDEX_CHARACTER_TYPE> HashGenerator::generateHash(
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > HashGenerator::generateHash(
|
||||||
const string & sentence) {
|
const string & sentence) {
|
||||||
vector<INDEX_CHARACTER_TYPE> result;
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> >
|
||||||
vector<string> tokenTexts;
|
result(new vector<INDEX_CHARACTER_TYPE>());
|
||||||
boost::split(tokenTexts, sentence, boost::is_any_of(" "));
|
boost::shared_ptr<vector<string> > tokenTexts(new vector<string>());
|
||||||
|
boost::split(*tokenTexts, sentence, boost::is_any_of(" "));
|
||||||
|
|
||||||
for (vector<string>::iterator it = tokenTexts.begin();
|
for (vector<string>::iterator it = tokenTexts->begin();
|
||||||
it != tokenTexts.end(); ++it) {
|
it != tokenTexts->end(); ++it) {
|
||||||
string token = *it;
|
string token = *it;
|
||||||
INDEX_CHARACTER_TYPE code = _wordMap->getWordCode(token);
|
INDEX_CHARACTER_TYPE code = _wordMap->getWordCode(token);
|
||||||
result.push_back(code);
|
result->push_back(code);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
@ -26,7 +26,8 @@ public:
|
|||||||
*/
|
*/
|
||||||
virtual ~HashGenerator();
|
virtual ~HashGenerator();
|
||||||
|
|
||||||
vector<INDEX_CHARACTER_TYPE> generateHash(const string & sentence);
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> >
|
||||||
|
generateHash(const string & sentence);
|
||||||
|
|
||||||
void serializeWordMap();
|
void serializeWordMap();
|
||||||
|
|
||||||
|
@ -3,89 +3,38 @@
|
|||||||
#include "concordia/common/utils.hpp"
|
#include "concordia/common/utils.hpp"
|
||||||
#include <boost/filesystem.hpp>
|
#include <boost/filesystem.hpp>
|
||||||
|
|
||||||
IndexSearcher::IndexSearcher():
|
IndexSearcher::IndexSearcher() {
|
||||||
_T(NULL),
|
|
||||||
_SA(NULL),
|
|
||||||
_n(0) {
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
IndexSearcher::~IndexSearcher() {
|
IndexSearcher::~IndexSearcher() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boost::shared_ptr<vector<saidx_t> > IndexSearcher::simpleSearch(
|
||||||
void IndexSearcher::loadIndex(const string & wordMapFilepath,
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
const string & hashedIndexFilepath,
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
const string & suffixArrayFilepath)
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
throw(ConcordiaException) {
|
const string & pattern) throw(ConcordiaException) {
|
||||||
if (!boost::filesystem::exists(wordMapFilepath)) {
|
boost::shared_ptr<vector<saidx_t> > result =
|
||||||
throw ConcordiaException("E06: Failed to open word map "
|
boost::shared_ptr<vector<saidx_t> >(new vector<saidx_t>());
|
||||||
"file for reading.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!boost::filesystem::exists(hashedIndexFilepath)) {
|
|
||||||
throw ConcordiaException("E07: Failed to open hashed index file "
|
|
||||||
"for reading.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!boost::filesystem::exists(suffixArrayFilepath)) {
|
|
||||||
throw ConcordiaException("E08: Failed to open suffix array file "
|
|
||||||
"for reading.");
|
|
||||||
}
|
|
||||||
|
|
||||||
_hashGenerator = boost::shared_ptr<HashGenerator>(
|
|
||||||
new HashGenerator(wordMapFilepath));
|
|
||||||
|
|
||||||
ifstream hashedIndexFile;
|
|
||||||
hashedIndexFile.open(hashedIndexFilepath.c_str(), ios::in
|
|
||||||
| ios::ate | ios::binary);
|
|
||||||
_n = hashedIndexFile.tellg();
|
|
||||||
hashedIndexFile.seekg(0, ios::beg);
|
|
||||||
_T = new sauchar_t[_n];
|
|
||||||
int pos = 0;
|
|
||||||
while (!hashedIndexFile.eof()) {
|
|
||||||
INDEX_CHARACTER_TYPE character =
|
|
||||||
Utils::readIndexCharacter(hashedIndexFile);
|
|
||||||
Utils::insertCharToSaucharArray(_T, character, pos);
|
|
||||||
pos+=sizeof(character);
|
|
||||||
}
|
|
||||||
hashedIndexFile.close();
|
|
||||||
|
|
||||||
_SA = new saidx_t[_n];
|
|
||||||
|
|
||||||
ifstream suffixArrayFile;
|
|
||||||
suffixArrayFile.open(suffixArrayFilepath.c_str(), ios::in | ios::binary);
|
|
||||||
|
|
||||||
saidx_t saidx_buff;
|
|
||||||
pos = 0;
|
|
||||||
while (!suffixArrayFile.eof() && pos < _n) {
|
|
||||||
suffixArrayFile.read(reinterpret_cast<char *>(&saidx_buff),
|
|
||||||
sizeof(saidx_t));
|
|
||||||
_SA[pos++] = saidx_buff;
|
|
||||||
}
|
|
||||||
suffixArrayFile.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<saidx_t> IndexSearcher::simpleSearch(const string & pattern)
|
|
||||||
throw(ConcordiaException) {
|
|
||||||
vector<saidx_t> result;
|
|
||||||
|
|
||||||
int left;
|
int left;
|
||||||
vector<INDEX_CHARACTER_TYPE> hash = _hashGenerator->generateHash(pattern);
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash =
|
||||||
saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE);
|
hashGenerator->generateHash(pattern);
|
||||||
|
saidx_t patternLength = hash->size()*sizeof(INDEX_CHARACTER_TYPE);
|
||||||
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
|
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
|
||||||
int size = sa_search(_T, (saidx_t) _n,
|
int size = sa_search(T->data(), (saidx_t) T->size(),
|
||||||
(const sauchar_t *) patternArray, patternLength,
|
(const sauchar_t *) patternArray, patternLength,
|
||||||
_SA, (saidx_t) _n, &left);
|
SA->data(), (saidx_t) T->size(), &left);
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
saidx_t result_pos = _SA[left + i];
|
saidx_t result_pos = SA->at(left + i);
|
||||||
if (result_pos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
if (result_pos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
|
||||||
// As we are looking for a pattern in an array of higher
|
// As we are looking for a pattern in an array of higher
|
||||||
// resolution than the hashed index file, we might
|
// resolution than the hashed index file, we might
|
||||||
// obtain accidental results exceeding the boundaries
|
// obtain accidental results exceeding the boundaries
|
||||||
// of characters in hashed index. The above check
|
// of characters in hashed index. The above check
|
||||||
// removes these accidental results.
|
// removes these accidental results.
|
||||||
result.push_back(result_pos / sizeof(INDEX_CHARACTER_TYPE));
|
result->push_back(result_pos / sizeof(INDEX_CHARACTER_TYPE));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,22 +25,12 @@ public:
|
|||||||
*/
|
*/
|
||||||
virtual ~IndexSearcher();
|
virtual ~IndexSearcher();
|
||||||
|
|
||||||
void loadIndex(const string & wordMapFilepath,
|
boost::shared_ptr<vector<saidx_t> > simpleSearch(
|
||||||
const string & hashedIndexFilepath,
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
||||||
const string & suffixArrayFilepath)
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
||||||
throw(ConcordiaException);
|
boost::shared_ptr<std::vector<saidx_t> > SA,
|
||||||
|
const string & pattern) throw(ConcordiaException);
|
||||||
vector<saidx_t> simpleSearch(const string & pattern)
|
|
||||||
throw(ConcordiaException);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
boost::shared_ptr<HashGenerator> _hashGenerator;
|
|
||||||
|
|
||||||
sauchar_t * _T;
|
|
||||||
|
|
||||||
saidx_t * _SA;
|
|
||||||
|
|
||||||
saidx_t _n;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -4,7 +4,6 @@ add_library(concordia-tests
|
|||||||
test_word_map.cpp
|
test_word_map.cpp
|
||||||
test_hash_generator.cpp
|
test_hash_generator.cpp
|
||||||
test_concordia_index.cpp
|
test_concordia_index.cpp
|
||||||
test_index_searcher.cpp
|
|
||||||
test_concordia_config.cpp
|
test_concordia_config.cpp
|
||||||
test_concordia.cpp
|
test_concordia.cpp
|
||||||
)
|
)
|
||||||
|
@ -25,8 +25,7 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
|
|||||||
concordia.addSentence("Ala ma kota");
|
concordia.addSentence("Ala ma kota");
|
||||||
concordia.addSentence("Ala ma rysia");
|
concordia.addSentence("Ala ma rysia");
|
||||||
concordia.addSentence("Marysia ma rysia");
|
concordia.addSentence("Marysia ma rysia");
|
||||||
|
concordia.refreshSAfromRAM();
|
||||||
concordia.generateIndex();
|
|
||||||
|
|
||||||
/*The test index contains 3 sentences:
|
/*The test index contains 3 sentences:
|
||||||
"Ala ma kota"
|
"Ala ma kota"
|
||||||
@ -50,34 +49,30 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
vector<saidx_t> expectedResult1;
|
boost::shared_ptr<std::vector<saidx_t> > expectedResult1(new std::vector<saidx_t>());
|
||||||
expectedResult1.push_back(7);
|
expectedResult1->push_back(7);
|
||||||
expectedResult1.push_back(4);
|
expectedResult1->push_back(4);
|
||||||
|
|
||||||
concordia.loadIndex();
|
boost::shared_ptr<std::vector<saidx_t> > searchResult1 = concordia.simpleSearch("ma rysia");
|
||||||
vector<saidx_t> searchResult1 = concordia.simpleSearch("ma rysia");
|
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_SUFFIX_ARRAY));
|
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1.begin(), searchResult1.end(),
|
BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1->begin(), searchResult1->end(),
|
||||||
expectedResult1.begin(), expectedResult1.end());
|
expectedResult1->begin(), expectedResult1->end());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
||||||
{
|
{
|
||||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||||
vector<string> testSentences;
|
boost::shared_ptr<vector<string> > testSentences (new vector<string>());
|
||||||
testSentences.push_back("to jest okno");
|
testSentences->push_back("to jest okno");
|
||||||
testSentences.push_back("czy jest okno otwarte");
|
testSentences->push_back("czy jest okno otwarte");
|
||||||
testSentences.push_back("chyba to jest tutaj");
|
testSentences->push_back("chyba to jest tutaj");
|
||||||
testSentences.push_back("to jest");
|
testSentences->push_back("to jest");
|
||||||
concordia.addAllSentences(testSentences);
|
concordia.addAllSentences(testSentences);
|
||||||
|
|
||||||
concordia.generateIndex();
|
|
||||||
|
|
||||||
/*The test index contains 4 sentences:
|
/*The test index contains 4 sentences:
|
||||||
"to jest okno"
|
"to jest okno"
|
||||||
"czy jest okno otwarte"
|
"czy jest okno otwarte"
|
||||||
@ -103,27 +98,26 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
vector<saidx_t> expectedResult1;
|
boost::shared_ptr<vector<saidx_t> > expectedResult1(new vector<saidx_t>());
|
||||||
expectedResult1.push_back(11);
|
expectedResult1->push_back(11);
|
||||||
expectedResult1.push_back(0);
|
expectedResult1->push_back(0);
|
||||||
expectedResult1.push_back(8);
|
expectedResult1->push_back(8);
|
||||||
|
|
||||||
vector<saidx_t> expectedResult2;
|
boost::shared_ptr<vector<saidx_t> > expectedResult2(new vector<saidx_t>());
|
||||||
expectedResult2.push_back(1);
|
expectedResult2->push_back(1);
|
||||||
expectedResult2.push_back(4);
|
expectedResult2->push_back(4);
|
||||||
|
|
||||||
concordia.loadIndex();
|
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||||
vector<saidx_t> searchResult1 = concordia.simpleSearch("to jest");
|
boost::shared_ptr<vector<saidx_t> > searchResult1 = concordia2.simpleSearch("to jest");
|
||||||
vector<saidx_t> searchResult2 = concordia.simpleSearch("jest okno");
|
boost::shared_ptr<vector<saidx_t> > searchResult2 = concordia2.simpleSearch("jest okno");
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_SUFFIX_ARRAY));
|
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1.begin(), searchResult1.end(),
|
BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1->begin(), searchResult1->end(),
|
||||||
expectedResult1.begin(), expectedResult1.end());
|
expectedResult1->begin(), expectedResult1->end());
|
||||||
BOOST_CHECK_EQUAL_COLLECTIONS(searchResult2.begin(), searchResult2.end(),
|
BOOST_CHECK_EQUAL_COLLECTIONS(searchResult2->begin(), searchResult2->end(),
|
||||||
expectedResult2.begin(), expectedResult2.end());
|
expectedResult2->begin(), expectedResult2->end());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,60 +12,42 @@ using namespace std;
|
|||||||
BOOST_AUTO_TEST_SUITE(concordia_index)
|
BOOST_AUTO_TEST_SUITE(concordia_index)
|
||||||
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ResourcesExistenceTest1 )
|
|
||||||
{
|
|
||||||
bool exceptionThrown = false;
|
|
||||||
string message = "";
|
|
||||||
|
|
||||||
try {
|
|
||||||
ConcordiaIndex index(TestResourcesManager::getTestFilePath("concordia-index","mock_word_map.bin"),
|
|
||||||
TestResourcesManager::getTestFilePath("concordia-index","nonexistent.bin"),
|
|
||||||
TestResourcesManager::getTestFilePath("concordia-index","test_SA.bin"));
|
|
||||||
} catch (ConcordiaException & e) {
|
|
||||||
exceptionThrown = true;
|
|
||||||
message = e.what();
|
|
||||||
}
|
|
||||||
|
|
||||||
BOOST_CHECK(exceptionThrown);
|
|
||||||
BOOST_CHECK_EQUAL(boost::starts_with(message, "E01"), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ResourcesExistenceTest2 )
|
|
||||||
{
|
|
||||||
bool exceptionThrown = false;
|
|
||||||
string message = "";
|
|
||||||
|
|
||||||
try {
|
|
||||||
ConcordiaIndex index(TestResourcesManager::getTestFilePath("concordia-index","nonexistent.bin"),
|
|
||||||
TestResourcesManager::getTestFilePath("concordia-index","mock_hash_index.bin"),
|
|
||||||
TestResourcesManager::getTestFilePath("concordia-index","test_SA.bin"));
|
|
||||||
} catch (ConcordiaException & e) {
|
|
||||||
exceptionThrown = true;
|
|
||||||
message = e.what();
|
|
||||||
}
|
|
||||||
|
|
||||||
BOOST_CHECK(exceptionThrown);
|
|
||||||
BOOST_CHECK_EQUAL(boost::starts_with(message, "E02"), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( SuffixArrayGenerationTest )
|
BOOST_AUTO_TEST_CASE( SuffixArrayGenerationTest )
|
||||||
{
|
{
|
||||||
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"),
|
boost::shared_ptr<HashGenerator> hashGenerator (new HashGenerator("nonexistent"));
|
||||||
TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"),
|
|
||||||
TestResourcesManager::getTestFilePath("temp","test_SA.bin"));
|
|
||||||
index.addSentence("Ala ma kota");
|
|
||||||
index.addSentence("Ala ma rysia");
|
|
||||||
index.addSentence("Marysia ma rysia");
|
|
||||||
|
|
||||||
index.generateSuffixArray();
|
|
||||||
|
|
||||||
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_word_map.bin")));
|
|
||||||
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin")));
|
|
||||||
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_SA.bin")));
|
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"));
|
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"));
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"));
|
boost::shared_ptr<vector<sauchar_t> > T = boost::shared_ptr<vector<sauchar_t> >(new vector<sauchar_t>());
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_SA.bin"));
|
// Test hashed index:
|
||||||
|
// n: 0 1 2 3 4 5 6 7 8
|
||||||
|
// T[n]: 0 1 2 0 1 3 4 1 3
|
||||||
|
T->push_back(0);
|
||||||
|
T->push_back(1);
|
||||||
|
T->push_back(2);
|
||||||
|
T->push_back(0);
|
||||||
|
T->push_back(1);
|
||||||
|
T->push_back(3);
|
||||||
|
T->push_back(4);
|
||||||
|
T->push_back(1);
|
||||||
|
T->push_back(3);
|
||||||
|
|
||||||
|
// Test suffix array:
|
||||||
|
// n: 0 1 2 3 4 5 6 7 8
|
||||||
|
//SA[n]: 0 3 1 7 4 2 8 5 6
|
||||||
|
|
||||||
|
boost::shared_ptr<std::vector<saidx_t> > SA = index.generateSuffixArray(hashGenerator, T);
|
||||||
|
|
||||||
|
boost::shared_ptr<vector<saidx_t> > expectedSA = boost::shared_ptr<vector<saidx_t> >(new vector<saidx_t>());
|
||||||
|
expectedSA->push_back(0);
|
||||||
|
expectedSA->push_back(3);
|
||||||
|
expectedSA->push_back(1);
|
||||||
|
expectedSA->push_back(7);
|
||||||
|
expectedSA->push_back(4);
|
||||||
|
expectedSA->push_back(2);
|
||||||
|
expectedSA->push_back(8);
|
||||||
|
expectedSA->push_back(5);
|
||||||
|
expectedSA->push_back(6);
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(SA->begin(), SA->end(), expectedSA->begin(), expectedSA->end());
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE_END()
|
BOOST_AUTO_TEST_SUITE_END()
|
||||||
|
@ -19,13 +19,13 @@ BOOST_AUTO_TEST_CASE( SimpleHashTest )
|
|||||||
|
|
||||||
HashGenerator hashGenerator = HashGenerator(TEST_WORD_MAP_PATH);
|
HashGenerator hashGenerator = HashGenerator(TEST_WORD_MAP_PATH);
|
||||||
|
|
||||||
vector<INDEX_CHARACTER_TYPE> hash = hashGenerator.generateHash("Ala ma kota");
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash = hashGenerator.generateHash("Ala ma kota");
|
||||||
vector<INDEX_CHARACTER_TYPE> expected;
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > expected(new vector<INDEX_CHARACTER_TYPE>());
|
||||||
expected.push_back(0);
|
expected->push_back(0);
|
||||||
expected.push_back(1);
|
expected->push_back(1);
|
||||||
expected.push_back(2);
|
expected->push_back(2);
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL_COLLECTIONS(hash.begin(), hash.end(), expected.begin(), expected.end());
|
BOOST_CHECK_EQUAL_COLLECTIONS(hash->begin(), hash->end(), expected->begin(), expected->end());
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( HashSerializationTest )
|
BOOST_AUTO_TEST_CASE( HashSerializationTest )
|
||||||
@ -35,22 +35,22 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
|
|||||||
}
|
}
|
||||||
|
|
||||||
HashGenerator hashGenerator1 = HashGenerator(TEST_WORD_MAP_PATH);
|
HashGenerator hashGenerator1 = HashGenerator(TEST_WORD_MAP_PATH);
|
||||||
vector<INDEX_CHARACTER_TYPE> hash1 = hashGenerator1.generateHash("Ala ma kota");
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash1 = hashGenerator1.generateHash("Ala ma kota");
|
||||||
vector<INDEX_CHARACTER_TYPE> expected1;
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > expected1(new vector<INDEX_CHARACTER_TYPE>());
|
||||||
expected1.push_back(0);
|
expected1->push_back(0);
|
||||||
expected1.push_back(1);
|
expected1->push_back(1);
|
||||||
expected1.push_back(2);
|
expected1->push_back(2);
|
||||||
BOOST_CHECK_EQUAL_COLLECTIONS(hash1.begin(), hash1.end(), expected1.begin(), expected1.end());
|
BOOST_CHECK_EQUAL_COLLECTIONS(hash1->begin(), hash1->end(), expected1->begin(), expected1->end());
|
||||||
|
|
||||||
hashGenerator1.serializeWordMap();
|
hashGenerator1.serializeWordMap();
|
||||||
|
|
||||||
HashGenerator hashGenerator2 = HashGenerator(TEST_WORD_MAP_PATH);
|
HashGenerator hashGenerator2 = HashGenerator(TEST_WORD_MAP_PATH);
|
||||||
vector<INDEX_CHARACTER_TYPE> hash2 = hashGenerator2.generateHash("Ala ma psa");
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash2 = hashGenerator2.generateHash("Ala ma psa");
|
||||||
vector<INDEX_CHARACTER_TYPE> expected2;
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > expected2(new vector<INDEX_CHARACTER_TYPE>());
|
||||||
expected2.push_back(0);
|
expected2->push_back(0);
|
||||||
expected2.push_back(1);
|
expected2->push_back(1);
|
||||||
expected2.push_back(3);
|
expected2->push_back(3);
|
||||||
BOOST_CHECK_EQUAL_COLLECTIONS(hash2.begin(), hash2.end(), expected2.begin(), expected2.end());
|
BOOST_CHECK_EQUAL_COLLECTIONS(hash2->begin(), hash2->end(), expected2->begin(), expected2->end());
|
||||||
|
|
||||||
boost::filesystem::remove(TEST_WORD_MAP_PATH);
|
boost::filesystem::remove(TEST_WORD_MAP_PATH);
|
||||||
}
|
}
|
||||||
|
@ -1,75 +0,0 @@
|
|||||||
#include "tests/unit-tests/unit_tests_globals.hpp"
|
|
||||||
|
|
||||||
#include "concordia/index_searcher.hpp"
|
|
||||||
#include "concordia/concordia_index.hpp"
|
|
||||||
#include "concordia/concordia_exception.hpp"
|
|
||||||
#include "tests/common/test_resources_manager.hpp"
|
|
||||||
|
|
||||||
#include <boost/algorithm/string/predicate.hpp>
|
|
||||||
#include <boost/filesystem.hpp>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE(index_searcher)
|
|
||||||
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( SimpleSearchTest )
|
|
||||||
{
|
|
||||||
|
|
||||||
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"),
|
|
||||||
TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"),
|
|
||||||
TestResourcesManager::getTestFilePath("temp","test_SA.bin"));
|
|
||||||
index.addSentence("Ala ma kota");
|
|
||||||
index.addSentence("Ala ma rysia");
|
|
||||||
index.addSentence("Marysia ma rysia");
|
|
||||||
|
|
||||||
index.generateSuffixArray();
|
|
||||||
|
|
||||||
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_word_map.bin")));
|
|
||||||
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin")));
|
|
||||||
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_SA.bin")));
|
|
||||||
|
|
||||||
IndexSearcher searcher;
|
|
||||||
searcher.loadIndex(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"),
|
|
||||||
TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"),
|
|
||||||
TestResourcesManager::getTestFilePath("temp","test_SA.bin"));
|
|
||||||
|
|
||||||
/*The test index contains 3 sentences:
|
|
||||||
"Ala ma kota"
|
|
||||||
"Ala ma rysia"
|
|
||||||
"Marysia ma rysia"
|
|
||||||
|
|
||||||
Test word map:
|
|
||||||
Ala -> 0
|
|
||||||
ma -> 1
|
|
||||||
kota -> 2
|
|
||||||
rysia -> 3
|
|
||||||
Marysia -> 4
|
|
||||||
|
|
||||||
Test hashed index:
|
|
||||||
n: 0 1 2 3 4 5 6 7 8
|
|
||||||
T[n]: 0 1 2 0 1 3 4 1 3
|
|
||||||
|
|
||||||
Test suffix array:
|
|
||||||
n: 0 1 2 3 4 5 6 7 8
|
|
||||||
SA[n]: 0 3 1 7 4 2 8 5 6
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
vector<saidx_t> expectedResult1;
|
|
||||||
expectedResult1.push_back(7);
|
|
||||||
expectedResult1.push_back(4);
|
|
||||||
|
|
||||||
vector<saidx_t> searchResult1 = searcher.simpleSearch("ma rysia");
|
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"));
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"));
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_SA.bin"));
|
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1.begin(), searchResult1.end(),
|
|
||||||
expectedResult1.begin(), expectedResult1.end());
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE_END()
|
|
@ -11,7 +11,7 @@ using namespace std;
|
|||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE(utils)
|
BOOST_AUTO_TEST_SUITE(utils)
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( UtilsTest1 )
|
BOOST_AUTO_TEST_CASE( WriteReadSingleCharacter )
|
||||||
{
|
{
|
||||||
ofstream testFileOutput;
|
ofstream testFileOutput;
|
||||||
testFileOutput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(),
|
testFileOutput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(),
|
||||||
@ -29,133 +29,37 @@ BOOST_AUTO_TEST_CASE( UtilsTest1 )
|
|||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","temp_file.bin"));
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","temp_file.bin"));
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( UtilsTest2 )
|
BOOST_AUTO_TEST_CASE( IndexVectorToSaucharArray )
|
||||||
{
|
{
|
||||||
ofstream testFileOutput;
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash(new vector<INDEX_CHARACTER_TYPE>());
|
||||||
testFileOutput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(),
|
hash->push_back(123456789); // in hex: 75BCD15
|
||||||
ios::out|ios::binary);
|
// in memory: 15 cd 5b 07
|
||||||
Utils::writeIndexCharacter(testFileOutput,123456789); //in hex: 75BCD15
|
// in memory DEC: 21 205 91 7
|
||||||
//in memory: 15 cd 5b 07
|
|
||||||
// in DEC: 21 205 91 7
|
|
||||||
|
|
||||||
Utils::writeIndexCharacter(testFileOutput,987654321); //in hex: 3ADE68B1
|
|
||||||
//in memory: b1 68 de 3a
|
|
||||||
// in DEC: 177 104 222 58
|
|
||||||
testFileOutput.close();
|
|
||||||
|
|
||||||
sauchar_t * dataArray = new sauchar_t[8];
|
|
||||||
ifstream testFileInput;
|
|
||||||
testFileInput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(),ios::in|ios::binary);
|
|
||||||
|
|
||||||
INDEX_CHARACTER_TYPE retrievedCharacter1 = Utils::readIndexCharacter(testFileInput);
|
|
||||||
BOOST_CHECK_EQUAL(retrievedCharacter1, 123456789);
|
|
||||||
Utils::insertCharToSaucharArray(dataArray, retrievedCharacter1, 0);
|
|
||||||
|
|
||||||
INDEX_CHARACTER_TYPE retrievedCharacter2 = Utils::readIndexCharacter(testFileInput);
|
hash->push_back(987654321); // in hex: 3ADE68B1
|
||||||
BOOST_CHECK_EQUAL(retrievedCharacter2, 987654321);
|
// in memory: b1 68 de 3a
|
||||||
Utils::insertCharToSaucharArray(dataArray, retrievedCharacter2, 4);
|
// in memory DEC: 177 104 222 58
|
||||||
|
|
||||||
testFileInput.close();
|
|
||||||
|
|
||||||
vector<INDEX_CHARACTER_TYPE> expected;
|
|
||||||
expected.push_back(21);
|
|
||||||
expected.push_back(205);
|
|
||||||
expected.push_back(91);
|
|
||||||
expected.push_back(7);
|
|
||||||
expected.push_back(177);
|
|
||||||
expected.push_back(104);
|
|
||||||
expected.push_back(222);
|
|
||||||
expected.push_back(58);
|
|
||||||
|
|
||||||
vector<INDEX_CHARACTER_TYPE> result;
|
|
||||||
for (int i=0;i<8;i++) {
|
|
||||||
INDEX_CHARACTER_TYPE a = dataArray[i];
|
|
||||||
result.push_back(a);
|
|
||||||
}
|
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","temp_file.bin"));
|
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( UtilsTest3 )
|
|
||||||
{
|
|
||||||
vector<INDEX_CHARACTER_TYPE> hash;
|
|
||||||
hash.push_back(123456789);
|
|
||||||
hash.push_back(987654321);
|
|
||||||
|
|
||||||
sauchar_t * dataArray = Utils::indexVectorToSaucharArray(hash);
|
sauchar_t * dataArray = Utils::indexVectorToSaucharArray(hash);
|
||||||
|
|
||||||
vector<INDEX_CHARACTER_TYPE> result;
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > result(new vector<INDEX_CHARACTER_TYPE>());
|
||||||
for (int i=0;i<8;i++) {
|
for (int i=0;i<8;i++) {
|
||||||
INDEX_CHARACTER_TYPE a = dataArray[i];
|
INDEX_CHARACTER_TYPE a = dataArray[i];
|
||||||
result.push_back(a);
|
result->push_back(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<INDEX_CHARACTER_TYPE> expected;
|
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > expected(new vector<INDEX_CHARACTER_TYPE>());
|
||||||
expected.push_back(21);
|
expected->push_back(21);
|
||||||
expected.push_back(205);
|
expected->push_back(205);
|
||||||
expected.push_back(91);
|
expected->push_back(91);
|
||||||
expected.push_back(7);
|
expected->push_back(7);
|
||||||
expected.push_back(177);
|
expected->push_back(177);
|
||||||
expected.push_back(104);
|
expected->push_back(104);
|
||||||
expected.push_back(222);
|
expected->push_back(222);
|
||||||
expected.push_back(58);
|
expected->push_back(58);
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end());
|
BOOST_CHECK_EQUAL_COLLECTIONS(result->begin(), result->end(), expected->begin(), expected->end());
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
BOOST_AUTO_TEST_CASE( UtilsTest4 )
|
|
||||||
{
|
|
||||||
ofstream testFileOutput;
|
|
||||||
testFileOutput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(),
|
|
||||||
ios::out|ios::binary);
|
|
||||||
Utils::writeIndexCharacter(testFileOutput,123456789); //in hex: 75BCD15
|
|
||||||
//in memory: 15 cd 5b 07
|
|
||||||
// in DEC: 21 205 91 7
|
|
||||||
|
|
||||||
Utils::writeIndexCharacter(testFileOutput,987654321); //in hex: 3ADE68B1
|
|
||||||
//in memory: b1 68 de 3a
|
|
||||||
// in DEC: 177 104 222 58
|
|
||||||
testFileOutput.close();
|
|
||||||
|
|
||||||
sauchar_t * dataArray = Utils::readIndexFromFile(
|
|
||||||
ifstream testFileInput;
|
|
||||||
testFileInput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(),ios::in|ios::binary);
|
|
||||||
|
|
||||||
INDEX_CHARACTER_TYPE retrievedCharacter1 = Utils::readIndexCharacter(testFileInput);
|
|
||||||
BOOST_CHECK_EQUAL(retrievedCharacter1, 123456789);
|
|
||||||
Utils::insertCharToSaucharArray(dataArray, retrievedCharacter1, 0);
|
|
||||||
|
|
||||||
INDEX_CHARACTER_TYPE retrievedCharacter2 = Utils::readIndexCharacter(testFileInput);
|
|
||||||
BOOST_CHECK_EQUAL(retrievedCharacter2, 987654321);
|
|
||||||
Utils::insertCharToSaucharArray(dataArray, retrievedCharacter2, 4);
|
|
||||||
|
|
||||||
testFileInput.close();
|
|
||||||
|
|
||||||
vector<INDEX_CHARACTER_TYPE> expected;
|
|
||||||
expected.push_back(21);
|
|
||||||
expected.push_back(205);
|
|
||||||
expected.push_back(91);
|
|
||||||
expected.push_back(7);
|
|
||||||
expected.push_back(177);
|
|
||||||
expected.push_back(104);
|
|
||||||
expected.push_back(222);
|
|
||||||
expected.push_back(58);
|
|
||||||
|
|
||||||
vector<INDEX_CHARACTER_TYPE> result;
|
|
||||||
for (int i=0;i<8;i++) {
|
|
||||||
INDEX_CHARACTER_TYPE a = dataArray[i];
|
|
||||||
result.push_back(a);
|
|
||||||
}
|
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","temp_file.bin"));
|
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end());
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE_END()
|
BOOST_AUTO_TEST_SUITE_END()
|
||||||
|
Loading…
Reference in New Issue
Block a user