suffix array simple search
This commit is contained in:
parent
d3cccff654
commit
0d8a057278
@ -20,11 +20,17 @@ set (PROD_PUDDLE_TAGSET_PATH "${PROD_RESOURCES_DIRECTORY}/puddle/tagset.txt")
|
|||||||
|
|
||||||
set (TEST_RESOURCES_DIRECTORY "${concordia_SOURCE_DIR}/tests/resources")
|
set (TEST_RESOURCES_DIRECTORY "${concordia_SOURCE_DIR}/tests/resources")
|
||||||
set (TEST_PUDDLE_TAGSET_PATH "${TEST_RESOURCES_DIRECTORY}/puddle/basic-tagset.txt")
|
set (TEST_PUDDLE_TAGSET_PATH "${TEST_RESOURCES_DIRECTORY}/puddle/basic-tagset.txt")
|
||||||
|
set (TEMP_WORD_MAP "temp_word_map.bin")
|
||||||
|
set (TEMP_HASHED_INDEX "temp_hashed_index.bin")
|
||||||
|
set (TEMP_SUFFIX_ARRAY "temp_suffix_array.bin")
|
||||||
|
|
||||||
|
|
||||||
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
|
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
|
||||||
|
|
||||||
set(BASE_TARGETS concordia)
|
set(BASE_TARGETS concordia)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ================================================
|
# ================================================
|
||||||
# Third-party libraries
|
# Third-party libraries
|
||||||
# ================================================
|
# ================================================
|
||||||
@ -99,11 +105,6 @@ configure_file (
|
|||||||
"${concordia_SOURCE_DIR}/concordia/common/config.hpp"
|
"${concordia_SOURCE_DIR}/concordia/common/config.hpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
configure_file (
|
|
||||||
"${concordia_SOURCE_DIR}/prod/resources/concordia-config/concordia.cfg.in"
|
|
||||||
"${concordia_SOURCE_DIR}/prod/resources/concordia-config/concordia.cfg"
|
|
||||||
)
|
|
||||||
|
|
||||||
configure_file (
|
configure_file (
|
||||||
"${concordia_SOURCE_DIR}/tests/resources/concordia-config/concordia.cfg.in"
|
"${concordia_SOURCE_DIR}/tests/resources/concordia-config/concordia.cfg.in"
|
||||||
"${concordia_SOURCE_DIR}/tests/resources/concordia-config/concordia.cfg"
|
"${concordia_SOURCE_DIR}/tests/resources/concordia-config/concordia.cfg"
|
||||||
|
@ -6,6 +6,7 @@ foreach(dir ${ALL_DIRECTORIES})
|
|||||||
endforeach(dir)
|
endforeach(dir)
|
||||||
|
|
||||||
add_library(concordia SHARED
|
add_library(concordia SHARED
|
||||||
|
index_searcher.cpp
|
||||||
concordia_index.cpp
|
concordia_index.cpp
|
||||||
word_map.cpp
|
word_map.cpp
|
||||||
hash_generator.cpp
|
hash_generator.cpp
|
||||||
|
@ -2,6 +2,9 @@
|
|||||||
#define CONCORDIA_VERSION_MINOR @CONCORDIA_VERSION_MINOR@
|
#define CONCORDIA_VERSION_MINOR @CONCORDIA_VERSION_MINOR@
|
||||||
|
|
||||||
#define TEST_RESOURCES_DIRECTORY "@TEST_RESOURCES_DIRECTORY@"
|
#define TEST_RESOURCES_DIRECTORY "@TEST_RESOURCES_DIRECTORY@"
|
||||||
|
#define TEMP_WORD_MAP "@TEMP_WORD_MAP@"
|
||||||
|
#define TEMP_HASHED_INDEX "@TEMP_HASHED_INDEX@"
|
||||||
|
#define TEMP_SUFFIX_ARRAY "@TEMP_SUFFIX_ARRAY@"
|
||||||
|
|
||||||
#define PROD_RESOURCES_DIRECTORY "@PROD_RESOURCES_DIRECTORY@"
|
#define PROD_RESOURCES_DIRECTORY "@PROD_RESOURCES_DIRECTORY@"
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
|
#include <sstream>
|
||||||
|
|
||||||
#include "concordia/concordia.hpp"
|
#include "concordia/concordia.hpp"
|
||||||
#include "concordia/common/config.hpp"
|
#include "concordia/common/config.hpp"
|
||||||
|
|
||||||
#include <sstream>
|
|
||||||
|
|
||||||
// ===========================================
|
// ===========================================
|
||||||
|
|
||||||
std::string _createLibraryVersion();
|
std::string _createLibraryVersion();
|
||||||
@ -13,9 +13,15 @@ std::string Concordia::_libraryVersion = _createLibraryVersion();
|
|||||||
|
|
||||||
// ===========================================
|
// ===========================================
|
||||||
|
|
||||||
Concordia::Concordia(const string & configFilePath) throw(ConcordiaException) {
|
Concordia::Concordia(const std::string & configFilePath)
|
||||||
boost::shared_ptr<ConcordiaConfig> _config(
|
throw(ConcordiaException) {
|
||||||
|
_config = boost::shared_ptr<ConcordiaConfig> (
|
||||||
new ConcordiaConfig(configFilePath));
|
new ConcordiaConfig(configFilePath));
|
||||||
|
_index = boost::shared_ptr<ConcordiaIndex>(
|
||||||
|
new ConcordiaIndex(_config->getWordMapFilePath(),
|
||||||
|
_config->getHashedIndexFilePath(),
|
||||||
|
_config->getSuffixArrayFilePath()));
|
||||||
|
_searcher = boost::shared_ptr<IndexSearcher>(new IndexSearcher());
|
||||||
}
|
}
|
||||||
|
|
||||||
Concordia::~Concordia() {
|
Concordia::~Concordia() {
|
||||||
@ -35,3 +41,22 @@ std::string _createLibraryVersion() {
|
|||||||
return version.str();
|
return version.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Concordia::addSentence(const std::string & sentence)
|
||||||
|
throw(ConcordiaException) {
|
||||||
|
_index->addSentence(sentence);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Concordia::generateIndex() throw(ConcordiaException) {
|
||||||
|
_index->generateSuffixArray();
|
||||||
|
_index->serializeWordMap();
|
||||||
|
_searcher->loadIndex(_config->getWordMapFilePath(),
|
||||||
|
_config->getHashedIndexFilePath(),
|
||||||
|
_config->getSuffixArrayFilePath());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<saidx_t> Concordia::simpleSearch(const std::string & pattern)
|
||||||
|
throw(ConcordiaException) {
|
||||||
|
return _searcher->simpleSearch(pattern);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,9 +2,14 @@
|
|||||||
#define CONCORDIA_HDR
|
#define CONCORDIA_HDR
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
#include <boost/shared_ptr.hpp>
|
#include <boost/shared_ptr.hpp>
|
||||||
|
|
||||||
#include "concordia/concordia_config.hpp"
|
#include "concordia/concordia_config.hpp"
|
||||||
|
#include "concordia/concordia_index.hpp"
|
||||||
|
#include "concordia/index_searcher.hpp"
|
||||||
|
#include <divsufsort.h>
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
The Concordia class is the main access point to the library.
|
The Concordia class is the main access point to the library.
|
||||||
@ -28,10 +33,21 @@ public:
|
|||||||
*/
|
*/
|
||||||
std::string & getVersion();
|
std::string & getVersion();
|
||||||
|
|
||||||
|
void addSentence(const std::string & sentence) throw(ConcordiaException);
|
||||||
|
|
||||||
|
void generateIndex() throw(ConcordiaException);
|
||||||
|
|
||||||
|
std::vector<saidx_t> simpleSearch(const std::string & pattern)
|
||||||
|
throw(ConcordiaException);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static std::string _libraryVersion;
|
static std::string _libraryVersion;
|
||||||
|
|
||||||
boost::shared_ptr<ConcordiaConfig> _config;
|
boost::shared_ptr<ConcordiaConfig> _config;
|
||||||
|
|
||||||
|
boost::shared_ptr<ConcordiaIndex> _index;
|
||||||
|
|
||||||
|
boost::shared_ptr<IndexSearcher> _searcher;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -3,6 +3,9 @@
|
|||||||
#include "concordia/common/logging.hpp"
|
#include "concordia/common/logging.hpp"
|
||||||
|
|
||||||
#define PUDDLE_TAGSET_PARAM "puddle_tagset_path"
|
#define PUDDLE_TAGSET_PARAM "puddle_tagset_path"
|
||||||
|
#define WORD_MAP_PARAM "word_map_path"
|
||||||
|
#define HASHED_INDEX_PARAM "hashed_index_path"
|
||||||
|
#define SUFFIX_ARRAY_PARAM "suffix_array_path"
|
||||||
|
|
||||||
ConcordiaConfig::ConcordiaConfig(const string & configFilePath)
|
ConcordiaConfig::ConcordiaConfig(const string & configFilePath)
|
||||||
throw(ConcordiaException) {
|
throw(ConcordiaException) {
|
||||||
@ -17,6 +20,12 @@ ConcordiaConfig::ConcordiaConfig(const string & configFilePath)
|
|||||||
|
|
||||||
_puddleTagsetFilePath =
|
_puddleTagsetFilePath =
|
||||||
ConcordiaConfig::_readConfigParameterStr(PUDDLE_TAGSET_PARAM);
|
ConcordiaConfig::_readConfigParameterStr(PUDDLE_TAGSET_PARAM);
|
||||||
|
_wordMapFilePath =
|
||||||
|
ConcordiaConfig::_readConfigParameterStr(WORD_MAP_PARAM);
|
||||||
|
_hashedIndexFilePath =
|
||||||
|
ConcordiaConfig::_readConfigParameterStr(HASHED_INDEX_PARAM);
|
||||||
|
_suffixArrayFilePath =
|
||||||
|
ConcordiaConfig::_readConfigParameterStr(SUFFIX_ARRAY_PARAM);
|
||||||
}
|
}
|
||||||
|
|
||||||
ConcordiaConfig::~ConcordiaConfig() {
|
ConcordiaConfig::~ConcordiaConfig() {
|
||||||
|
@ -34,11 +34,29 @@ public:
|
|||||||
return _puddleTagsetFilePath;
|
return _puddleTagsetFilePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string & getWordMapFilePath() {
|
||||||
|
return _wordMapFilePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
string & getHashedIndexFilePath() {
|
||||||
|
return _hashedIndexFilePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
string & getSuffixArrayFilePath() {
|
||||||
|
return _suffixArrayFilePath;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Config _config;
|
Config _config;
|
||||||
|
|
||||||
string _puddleTagsetFilePath;
|
string _puddleTagsetFilePath;
|
||||||
|
|
||||||
|
string _wordMapFilePath;
|
||||||
|
|
||||||
|
string _hashedIndexFilePath;
|
||||||
|
|
||||||
|
string _suffixArrayFilePath;
|
||||||
|
|
||||||
string _readConfigParameterStr(const string & name)
|
string _readConfigParameterStr(const string & name)
|
||||||
throw(ConcordiaException);
|
throw(ConcordiaException);
|
||||||
};
|
};
|
||||||
|
@ -1,45 +1,27 @@
|
|||||||
#include "concordia/concordia_index.hpp"
|
#include "concordia/concordia_index.hpp"
|
||||||
|
|
||||||
#include <boost/filesystem.hpp>
|
#include <boost/filesystem.hpp>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
ConcordiaIndex::ConcordiaIndex(const string & wordMapFilepath,
|
ConcordiaIndex::ConcordiaIndex(const string & wordMapFilePath,
|
||||||
const string & hashedIndexFilepath,
|
const string & hashedIndexFilePath,
|
||||||
const string & suffixArrayFilepath)
|
const string & suffixArrayFilePath)
|
||||||
throw(ConcordiaException) {
|
throw(ConcordiaException) :
|
||||||
if (boost::filesystem::exists(wordMapFilepath)) {
|
_hashedIndexFilePath(hashedIndexFilePath),
|
||||||
if (boost::filesystem::exists(hashedIndexFilepath)) {
|
_suffixArrayFilePath(suffixArrayFilePath) {
|
||||||
_hashedIndexFile.open(hashedIndexFilepath.c_str(), ios::out |
|
if (boost::filesystem::exists(wordMapFilePath)) {
|
||||||
ios::app | ios::binary);
|
if (!boost::filesystem::exists(hashedIndexFilePath)) {
|
||||||
if (!_hashedIndexFile.is_open()) {
|
|
||||||
throw ConcordiaException("E03: Failed to open hashed index "
|
|
||||||
"file for appending.");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw ConcordiaException("E01: Word map file exists "
|
throw ConcordiaException("E01: Word map file exists "
|
||||||
"but hashed index file absent.");
|
"but hashed index file absent.");
|
||||||
}
|
}
|
||||||
} else { // WordMap file does not exist
|
} else { // WordMap file does not exist
|
||||||
if (boost::filesystem::exists(hashedIndexFilepath)) {
|
if (boost::filesystem::exists(hashedIndexFilePath)) {
|
||||||
throw ConcordiaException("E02: Hashed index file exists "
|
throw ConcordiaException("E02: Hashed index file exists "
|
||||||
"but word map file absent.");
|
"but word map file absent.");
|
||||||
} else {
|
|
||||||
_hashedIndexFile.open(hashedIndexFilepath.c_str(), ios::out |
|
|
||||||
ios::binary);
|
|
||||||
if (!_hashedIndexFile.is_open()) {
|
|
||||||
throw ConcordiaException("E04: Failed to open hashed index "
|
|
||||||
"file for writing.");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_suffixArrayFile.open(suffixArrayFilepath.c_str(), ios::out |
|
|
||||||
ios::binary);
|
|
||||||
if (!_hashedIndexFile.is_open()) {
|
|
||||||
throw ConcordiaException("E05: Failed to open suffix array "
|
|
||||||
"file for writing.");
|
|
||||||
}
|
|
||||||
_hashGenerator = boost::shared_ptr<HashGenerator>(
|
_hashGenerator = boost::shared_ptr<HashGenerator>(
|
||||||
new HashGenerator(wordMapFilepath));
|
new HashGenerator(wordMapFilePath));
|
||||||
}
|
}
|
||||||
|
|
||||||
ConcordiaIndex::~ConcordiaIndex() {
|
ConcordiaIndex::~ConcordiaIndex() {
|
||||||
@ -50,23 +32,30 @@ void ConcordiaIndex::serializeWordMap() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ConcordiaIndex::generateSuffixArray() {
|
void ConcordiaIndex::generateSuffixArray() {
|
||||||
/* Get the file size. */
|
ifstream hashedIndexFile;
|
||||||
long n = _hashedIndexFile.tellg();
|
hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::in|
|
||||||
|
ios::ate|ios::binary);
|
||||||
|
|
||||||
|
/* Get the file size. */
|
||||||
|
long n = hashedIndexFile.tellg();
|
||||||
|
|
||||||
/* Allocate 5blocksize bytes of memory. */
|
|
||||||
sauchar_t *T;
|
sauchar_t *T;
|
||||||
saidx_t *SA;
|
saidx_t *SA;
|
||||||
|
|
||||||
T = reinterpret_cast<sauchar_t *> (malloc((size_t)n * sizeof(sauchar_t)));
|
T = new sauchar_t[n];
|
||||||
SA = reinterpret_cast<saidx_t *> (malloc((size_t)n * sizeof(saidx_t)));
|
SA = new saidx_t[n];
|
||||||
if ((T == NULL) || (SA == NULL)) {
|
|
||||||
throw ConcordiaException("Cannot allocate memory.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Read n bytes of data. */
|
/* Read n bytes of data. */
|
||||||
|
hashedIndexFile.seekg(0, ios::beg);
|
||||||
|
|
||||||
_hashedIndexFile.seekg(0, ios::beg);
|
sauchar_t buff;
|
||||||
_hashedIndexFile.read(reinterpret_cast<char*> (T), (size_t)n);
|
int pos = 0;
|
||||||
|
while (!hashedIndexFile.eof()) {
|
||||||
|
hashedIndexFile.read(reinterpret_cast<char *>(&buff),
|
||||||
|
sizeof(sauchar_t));
|
||||||
|
T[pos++] = buff;
|
||||||
|
}
|
||||||
|
hashedIndexFile.close();
|
||||||
|
|
||||||
/* Construct the suffix array. */
|
/* Construct the suffix array. */
|
||||||
if (divsufsort(T, SA, (saidx_t)n) != 0) {
|
if (divsufsort(T, SA, (saidx_t)n) != 0) {
|
||||||
@ -74,18 +63,32 @@ void ConcordiaIndex::generateSuffixArray() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Write the suffix array. */
|
/* Write the suffix array. */
|
||||||
_suffixArrayFile << *SA;
|
|
||||||
|
ofstream suffixArrayFile;
|
||||||
|
suffixArrayFile.open(_suffixArrayFilePath.c_str(), ios::out|ios::binary);
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
suffixArrayFile.write(reinterpret_cast<char *>(&SA[i]),
|
||||||
|
sizeof(saidx_t));
|
||||||
|
}
|
||||||
|
suffixArrayFile.close();
|
||||||
|
|
||||||
/* Deallocate memory. */
|
/* Deallocate memory. */
|
||||||
free(SA);
|
delete[] T;
|
||||||
free(T);
|
delete[] SA;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConcordiaIndex::addSentence(const string & sentence) {
|
void ConcordiaIndex::addSentence(const string & sentence) {
|
||||||
vector<sauchar_t> hash = _hashGenerator->generateHash(sentence);
|
vector<sauchar_t> hash = _hashGenerator->generateHash(sentence);
|
||||||
|
ofstream hashedIndexFile;
|
||||||
|
hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::out|
|
||||||
|
ios::app|ios::binary);
|
||||||
for (vector<sauchar_t>::iterator it = hash.begin();
|
for (vector<sauchar_t>::iterator it = hash.begin();
|
||||||
it != hash.end(); ++it) {
|
it != hash.end(); ++it) {
|
||||||
_hashedIndexFile << *it;
|
sauchar_t buff = *it;
|
||||||
|
hashedIndexFile.write(reinterpret_cast<char *>(&buff),
|
||||||
|
sizeof(sauchar_t));
|
||||||
}
|
}
|
||||||
|
hashedIndexFile.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,9 +19,9 @@ using namespace std;
|
|||||||
|
|
||||||
class ConcordiaIndex {
|
class ConcordiaIndex {
|
||||||
public:
|
public:
|
||||||
explicit ConcordiaIndex(const string & wordMapFilepath,
|
explicit ConcordiaIndex(const string & wordMapFilePath,
|
||||||
const string & hashedIndexFilepath,
|
const string & hashedIndexFilePath,
|
||||||
const string & suffixArrayFilepath)
|
const string & suffixArrayFilePath)
|
||||||
throw(ConcordiaException);
|
throw(ConcordiaException);
|
||||||
|
|
||||||
/*! Destructor.
|
/*! Destructor.
|
||||||
@ -37,9 +37,9 @@ public:
|
|||||||
private:
|
private:
|
||||||
boost::shared_ptr<HashGenerator> _hashGenerator;
|
boost::shared_ptr<HashGenerator> _hashGenerator;
|
||||||
|
|
||||||
fstream _hashedIndexFile;
|
string _hashedIndexFilePath;
|
||||||
|
|
||||||
ofstream _suffixArrayFile;
|
string _suffixArrayFilePath;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
90
concordia/index_searcher.cpp
Normal file
90
concordia/index_searcher.cpp
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
#include "concordia/index_searcher.hpp"
|
||||||
|
|
||||||
|
#include <boost/filesystem.hpp>
|
||||||
|
|
||||||
|
IndexSearcher::IndexSearcher():
|
||||||
|
_T(NULL),
|
||||||
|
_SA(NULL),
|
||||||
|
_n(0) {
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
IndexSearcher::~IndexSearcher() {
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void IndexSearcher::loadIndex(const string & wordMapFilepath,
|
||||||
|
const string & hashedIndexFilepath,
|
||||||
|
const string & suffixArrayFilepath)
|
||||||
|
throw(ConcordiaException) {
|
||||||
|
if (!boost::filesystem::exists(wordMapFilepath)) {
|
||||||
|
throw ConcordiaException("E06: Failed to open word map "
|
||||||
|
"file for reading.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!boost::filesystem::exists(hashedIndexFilepath)) {
|
||||||
|
throw ConcordiaException("E07: Failed to open hashed index file "
|
||||||
|
"for reading.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!boost::filesystem::exists(suffixArrayFilepath)) {
|
||||||
|
throw ConcordiaException("E08: Failed to open suffix array file "
|
||||||
|
"for reading.");
|
||||||
|
}
|
||||||
|
|
||||||
|
_hashGenerator = boost::shared_ptr<HashGenerator>(
|
||||||
|
new HashGenerator(wordMapFilepath));
|
||||||
|
|
||||||
|
ifstream hashedIndexFile;
|
||||||
|
hashedIndexFile.open(hashedIndexFilepath.c_str(), ios::in
|
||||||
|
| ios::ate | ios::binary);
|
||||||
|
_n = hashedIndexFile.tellg();
|
||||||
|
_T = new sauchar_t[_n];
|
||||||
|
|
||||||
|
hashedIndexFile.seekg(0, ios::beg);
|
||||||
|
hashedIndexFile.read(reinterpret_cast<char*> (_T), _n);
|
||||||
|
hashedIndexFile.close();
|
||||||
|
|
||||||
|
_SA = new saidx_t[_n];
|
||||||
|
|
||||||
|
ifstream suffixArrayFile;
|
||||||
|
suffixArrayFile.open(suffixArrayFilepath.c_str(), ios::in | ios::binary);
|
||||||
|
|
||||||
|
saidx_t buff;
|
||||||
|
int pos = 0;
|
||||||
|
while (!suffixArrayFile.eof() && pos < _n) {
|
||||||
|
suffixArrayFile.read(reinterpret_cast<char *>(&buff), sizeof(saidx_t));
|
||||||
|
_SA[pos++] = buff;
|
||||||
|
}
|
||||||
|
suffixArrayFile.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<saidx_t> IndexSearcher::simpleSearch(const string & pattern)
|
||||||
|
throw(ConcordiaException) {
|
||||||
|
vector<saidx_t> result;
|
||||||
|
|
||||||
|
int left;
|
||||||
|
vector<sauchar_t> hash = _hashGenerator->generateHash(pattern);
|
||||||
|
saidx_t patternLength = hash.size();
|
||||||
|
sauchar_t * patternArray = new sauchar_t[patternLength];
|
||||||
|
int i = 0;
|
||||||
|
for (vector<sauchar_t>::iterator it = hash.begin();
|
||||||
|
it != hash.end(); ++it) {
|
||||||
|
patternArray[i] = *it;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
int size = sa_search(_T, (saidx_t) _n,
|
||||||
|
(const sauchar_t *) patternArray, patternLength,
|
||||||
|
_SA, (saidx_t) _n, &left);
|
||||||
|
for (i = 0; i < size; ++i) {
|
||||||
|
result.push_back(_SA[left + i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] patternArray;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
45
concordia/index_searcher.hpp
Normal file
45
concordia/index_searcher.hpp
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
#ifndef INDEX_SEARCHER_HDR
|
||||||
|
#define INDEX_SEARCHER_HDR
|
||||||
|
|
||||||
|
#include <divsufsort.h>
|
||||||
|
#include <boost/shared_ptr.hpp>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "concordia/hash_generator.hpp"
|
||||||
|
#include "concordia/concordia_exception.hpp"
|
||||||
|
|
||||||
|
/*!
|
||||||
|
Class for searching the index with a sentence.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
class IndexSearcher {
|
||||||
|
public:
|
||||||
|
explicit IndexSearcher();
|
||||||
|
|
||||||
|
/*! Destructor.
|
||||||
|
*/
|
||||||
|
virtual ~IndexSearcher();
|
||||||
|
|
||||||
|
void loadIndex(const string & wordMapFilepath,
|
||||||
|
const string & hashedIndexFilepath,
|
||||||
|
const string & suffixArrayFilepath)
|
||||||
|
throw(ConcordiaException);
|
||||||
|
|
||||||
|
vector<saidx_t> simpleSearch(const string & pattern)
|
||||||
|
throw(ConcordiaException);
|
||||||
|
|
||||||
|
private:
|
||||||
|
boost::shared_ptr<HashGenerator> _hashGenerator;
|
||||||
|
|
||||||
|
sauchar_t * _T;
|
||||||
|
|
||||||
|
saidx_t * _SA;
|
||||||
|
|
||||||
|
size_t _n;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -4,6 +4,7 @@ add_library(concordia-tests
|
|||||||
test_word_map.cpp
|
test_word_map.cpp
|
||||||
test_hash_generator.cpp
|
test_hash_generator.cpp
|
||||||
test_concordia_index.cpp
|
test_concordia_index.cpp
|
||||||
|
test_index_searcher.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(concordia-tests concordia ${LIBCONFIG_LIB} concordia-tests-common)
|
target_link_libraries(concordia-tests concordia ${LIBCONFIG_LIB} concordia-tests-common)
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
#include "tests/unit-tests/unit_tests_globals.hpp"
|
#include "tests/unit-tests/unit_tests_globals.hpp"
|
||||||
#include "concordia/concordia.hpp"
|
#include "concordia/concordia.hpp"
|
||||||
#include "tests/common/test_resources_manager.hpp"
|
#include "tests/common/test_resources_manager.hpp"
|
||||||
|
#include "concordia/common/config.hpp"
|
||||||
|
|
||||||
|
#include <boost/algorithm/string/predicate.hpp>
|
||||||
|
#include <boost/filesystem.hpp>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
@ -16,4 +19,108 @@ BOOST_AUTO_TEST_CASE( ConcordiaVersion )
|
|||||||
BOOST_CHECK_EQUAL( version , "0.1");
|
BOOST_CHECK_EQUAL( version , "0.1");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
|
||||||
|
{
|
||||||
|
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||||
|
concordia.addSentence("Ala ma kota");
|
||||||
|
concordia.addSentence("Ala ma rysia");
|
||||||
|
concordia.addSentence("Marysia ma rysia");
|
||||||
|
|
||||||
|
concordia.generateIndex();
|
||||||
|
|
||||||
|
/*The test index contains 3 sentences:
|
||||||
|
"Ala ma kota"
|
||||||
|
"Ala ma rysia"
|
||||||
|
"Marysia ma rysia"
|
||||||
|
|
||||||
|
Test word map:
|
||||||
|
Ala -> 0
|
||||||
|
ma -> 1
|
||||||
|
kota -> 2
|
||||||
|
rysia -> 3
|
||||||
|
Marysia -> 4
|
||||||
|
|
||||||
|
Test hashed index:
|
||||||
|
n: 0 1 2 3 4 5 6 7 8
|
||||||
|
T[n]: 0 1 2 0 1 3 4 1 3
|
||||||
|
|
||||||
|
Test suffix array:
|
||||||
|
n: 0 1 2 3 4 5 6 7 8
|
||||||
|
SA[n]: 0 3 1 7 4 2 8 5 6
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
vector<saidx_t> expectedResult1;
|
||||||
|
expectedResult1.push_back(7);
|
||||||
|
expectedResult1.push_back(4);
|
||||||
|
|
||||||
|
vector<saidx_t> searchResult1 = concordia.simpleSearch("ma rysia");
|
||||||
|
|
||||||
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
||||||
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
||||||
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_SUFFIX_ARRAY));
|
||||||
|
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1.begin(), searchResult1.end(),
|
||||||
|
expectedResult1.begin(), expectedResult1.end());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
||||||
|
{
|
||||||
|
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||||
|
concordia.addSentence("to jest okno");
|
||||||
|
concordia.addSentence("czy jest okno otwarte");
|
||||||
|
concordia.addSentence("chyba to jest tutaj");
|
||||||
|
concordia.addSentence("to jest");
|
||||||
|
|
||||||
|
concordia.generateIndex();
|
||||||
|
|
||||||
|
/*The test index contains 4 sentences:
|
||||||
|
"to jest okno"
|
||||||
|
"czy jest okno otwarte"
|
||||||
|
"chyba to jest tutaj"
|
||||||
|
"to jest"
|
||||||
|
|
||||||
|
Test word map:
|
||||||
|
to -> 0
|
||||||
|
jest -> 1
|
||||||
|
okno -> 2
|
||||||
|
czy -> 3
|
||||||
|
otwarte -> 4
|
||||||
|
chyba -> 5
|
||||||
|
tutaj -> 6
|
||||||
|
|
||||||
|
Test hashed index:
|
||||||
|
n: 0 1 2 3 4 5 6 7 8 9 10 11 12
|
||||||
|
T[n]: 0 1 2 3 1 2 4 5 0 1 6 0 1
|
||||||
|
|
||||||
|
Test suffix array:
|
||||||
|
n: 0 1 2 3 4 5 6 7 8 9 10 11 12
|
||||||
|
SA[n]: 11 0 8 12 1 4 9 2 5 3 6 7 10
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
vector<saidx_t> expectedResult1;
|
||||||
|
expectedResult1.push_back(11);
|
||||||
|
expectedResult1.push_back(0);
|
||||||
|
expectedResult1.push_back(8);
|
||||||
|
|
||||||
|
vector<saidx_t> expectedResult2;
|
||||||
|
expectedResult2.push_back(1);
|
||||||
|
expectedResult2.push_back(4);
|
||||||
|
|
||||||
|
vector<saidx_t> searchResult1 = concordia.simpleSearch("to jest");
|
||||||
|
vector<saidx_t> searchResult2 = concordia.simpleSearch("jest okno");
|
||||||
|
|
||||||
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
||||||
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
||||||
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_SUFFIX_ARRAY));
|
||||||
|
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1.begin(), searchResult1.end(),
|
||||||
|
expectedResult1.begin(), expectedResult1.end());
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(searchResult2.begin(), searchResult2.end(),
|
||||||
|
expectedResult2.begin(), expectedResult2.end());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE_END()
|
BOOST_AUTO_TEST_SUITE_END()
|
||||||
|
@ -13,8 +13,11 @@ BOOST_AUTO_TEST_SUITE(concordia_config)
|
|||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ConfigParameters )
|
BOOST_AUTO_TEST_CASE( ConfigParameters )
|
||||||
{
|
{
|
||||||
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("concordia-test.cfg"));
|
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("concordia-mock.cfg"));
|
||||||
BOOST_CHECK_EQUAL( config.getPuddleTagsetFilePath() , "puddle/tagset.txt" );
|
BOOST_CHECK_EQUAL( config.getPuddleTagsetFilePath() , "puddle/tagset.txt" );
|
||||||
|
BOOST_CHECK_EQUAL( config.getWordMapFilePath() , "tmp/wm.bin" );
|
||||||
|
BOOST_CHECK_EQUAL( config.getHashedIndexFilePath() , "tmp/hi.bin" );
|
||||||
|
BOOST_CHECK_EQUAL( config.getSuffixArrayFilePath() , "tmp/sa.bin" );
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( NonexistentConfigTest )
|
BOOST_AUTO_TEST_CASE( NonexistentConfigTest )
|
||||||
|
@ -13,9 +13,9 @@ BOOST_AUTO_TEST_SUITE(concordia_index)
|
|||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ResourcesExistenceTest1 )
|
BOOST_AUTO_TEST_CASE( ResourcesExistenceTest1 )
|
||||||
{
|
{
|
||||||
ConcordiaIndex index(TestResourcesManager::getTestWordMapFilePath("mock_word_map.bin"),
|
ConcordiaIndex index(TestResourcesManager::getTestFilePath("concordia-index","mock_word_map.bin"),
|
||||||
TestResourcesManager::getTestHashIndexFilePath("mock_hash_index.bin"),
|
TestResourcesManager::getTestFilePath("concordia-index","mock_hash_index.bin"),
|
||||||
TestResourcesManager::getTestSuffixArrayFilePath());
|
TestResourcesManager::getTestFilePath("concordia-index","test_SA.bin"));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -26,9 +26,9 @@ BOOST_AUTO_TEST_CASE( ResourcesExistenceTest2 )
|
|||||||
string message = "";
|
string message = "";
|
||||||
|
|
||||||
try {
|
try {
|
||||||
ConcordiaIndex index(TestResourcesManager::getTestWordMapFilePath("mock_word_map.bin"),
|
ConcordiaIndex index(TestResourcesManager::getTestFilePath("concordia-index","mock_word_map.bin"),
|
||||||
TestResourcesManager::getTestHashIndexFilePath("nonexistent.bin"),
|
TestResourcesManager::getTestFilePath("concordia-index","nonexistent.bin"),
|
||||||
TestResourcesManager::getTestSuffixArrayFilePath());
|
TestResourcesManager::getTestFilePath("concordia-index","test_SA.bin"));
|
||||||
} catch (ConcordiaException & e) {
|
} catch (ConcordiaException & e) {
|
||||||
exceptionThrown = true;
|
exceptionThrown = true;
|
||||||
message = e.what();
|
message = e.what();
|
||||||
@ -44,9 +44,9 @@ BOOST_AUTO_TEST_CASE( ResourcesExistenceTest3 )
|
|||||||
string message = "";
|
string message = "";
|
||||||
|
|
||||||
try {
|
try {
|
||||||
ConcordiaIndex index(TestResourcesManager::getTestWordMapFilePath("nonexistent.bin"),
|
ConcordiaIndex index(TestResourcesManager::getTestFilePath("concordia-index","nonexistent.bin"),
|
||||||
TestResourcesManager::getTestHashIndexFilePath("mock_hash_index.bin"),
|
TestResourcesManager::getTestFilePath("concordia-index","mock_hash_index.bin"),
|
||||||
TestResourcesManager::getTestSuffixArrayFilePath());
|
TestResourcesManager::getTestFilePath("concordia-index","test_SA.bin"));
|
||||||
} catch (ConcordiaException & e) {
|
} catch (ConcordiaException & e) {
|
||||||
exceptionThrown = true;
|
exceptionThrown = true;
|
||||||
message = e.what();
|
message = e.what();
|
||||||
@ -58,20 +58,23 @@ BOOST_AUTO_TEST_CASE( ResourcesExistenceTest3 )
|
|||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( SuffixArrayGenerationTest )
|
BOOST_AUTO_TEST_CASE( SuffixArrayGenerationTest )
|
||||||
{
|
{
|
||||||
ConcordiaIndex index(TestResourcesManager::getTestWordMapFilePath("test_word_map.bin"),
|
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"),
|
||||||
TestResourcesManager::getTestHashIndexFilePath("test_hash_index.bin"),
|
TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"),
|
||||||
TestResourcesManager::getTestSuffixArrayFilePath());
|
TestResourcesManager::getTestFilePath("temp","test_SA.bin"));
|
||||||
index.addSentence("Ala ma kota");
|
index.addSentence("Ala ma kota");
|
||||||
|
index.addSentence("Ala ma rysia");
|
||||||
|
index.addSentence("Marysia ma rysia");
|
||||||
|
|
||||||
index.generateSuffixArray();
|
index.generateSuffixArray();
|
||||||
index.serializeWordMap();
|
index.serializeWordMap();
|
||||||
|
|
||||||
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestWordMapFilePath("test_word_map.bin")));
|
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_word_map.bin")));
|
||||||
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestHashIndexFilePath("test_hash_index.bin")));
|
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin")));
|
||||||
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestSuffixArrayFilePath()));
|
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_SA.bin")));
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestWordMapFilePath("test_word_map.bin"));
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"));
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestHashIndexFilePath("test_hash_index.bin"));
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"));
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestSuffixArrayFilePath());
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_SA.bin"));
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE_END()
|
BOOST_AUTO_TEST_SUITE_END()
|
||||||
|
76
concordia/t/test_index_searcher.cpp
Normal file
76
concordia/t/test_index_searcher.cpp
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
#include "tests/unit-tests/unit_tests_globals.hpp"
|
||||||
|
|
||||||
|
#include "concordia/index_searcher.hpp"
|
||||||
|
#include "concordia/concordia_index.hpp"
|
||||||
|
#include "concordia/concordia_exception.hpp"
|
||||||
|
#include "tests/common/test_resources_manager.hpp"
|
||||||
|
|
||||||
|
#include <boost/algorithm/string/predicate.hpp>
|
||||||
|
#include <boost/filesystem.hpp>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_SUITE(index_searcher)
|
||||||
|
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE( SimpleSearchTest )
|
||||||
|
{
|
||||||
|
|
||||||
|
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"),
|
||||||
|
TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"),
|
||||||
|
TestResourcesManager::getTestFilePath("temp","test_SA.bin"));
|
||||||
|
index.addSentence("Ala ma kota");
|
||||||
|
index.addSentence("Ala ma rysia");
|
||||||
|
index.addSentence("Marysia ma rysia");
|
||||||
|
|
||||||
|
index.generateSuffixArray();
|
||||||
|
index.serializeWordMap();
|
||||||
|
|
||||||
|
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_word_map.bin")));
|
||||||
|
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin")));
|
||||||
|
BOOST_CHECK(boost::filesystem::exists(TestResourcesManager::getTestFilePath("temp","test_SA.bin")));
|
||||||
|
|
||||||
|
IndexSearcher searcher;
|
||||||
|
searcher.loadIndex(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"),
|
||||||
|
TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"),
|
||||||
|
TestResourcesManager::getTestFilePath("temp","test_SA.bin"));
|
||||||
|
|
||||||
|
/*The test index contains 3 sentences:
|
||||||
|
"Ala ma kota"
|
||||||
|
"Ala ma rysia"
|
||||||
|
"Marysia ma rysia"
|
||||||
|
|
||||||
|
Test word map:
|
||||||
|
Ala -> 0
|
||||||
|
ma -> 1
|
||||||
|
kota -> 2
|
||||||
|
rysia -> 3
|
||||||
|
Marysia -> 4
|
||||||
|
|
||||||
|
Test hashed index:
|
||||||
|
n: 0 1 2 3 4 5 6 7 8
|
||||||
|
T[n]: 0 1 2 0 1 3 4 1 3
|
||||||
|
|
||||||
|
Test suffix array:
|
||||||
|
n: 0 1 2 3 4 5 6 7 8
|
||||||
|
SA[n]: 0 3 1 7 4 2 8 5 6
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
vector<saidx_t> expectedResult1;
|
||||||
|
expectedResult1.push_back(7);
|
||||||
|
expectedResult1.push_back(4);
|
||||||
|
|
||||||
|
vector<saidx_t> searchResult1 = searcher.simpleSearch("ma rysia");
|
||||||
|
|
||||||
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_word_map.bin"));
|
||||||
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"));
|
||||||
|
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp","test_SA.bin"));
|
||||||
|
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(searchResult1.begin(), searchResult1.end(),
|
||||||
|
expectedResult1.begin(), expectedResult1.end());
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_SUITE_END()
|
@ -3,7 +3,6 @@
|
|||||||
#define PUDDLE_TEST_DIRECTORY "puddle"
|
#define PUDDLE_TEST_DIRECTORY "puddle"
|
||||||
#define CONCORDIA_TAGSET_DIRECTORY "concordia-tagset"
|
#define CONCORDIA_TAGSET_DIRECTORY "concordia-tagset"
|
||||||
#define CONCORDIA_CONFIG_DIRECTORY "concordia-config"
|
#define CONCORDIA_CONFIG_DIRECTORY "concordia-config"
|
||||||
#define CONCORDIA_INDEX_DIRECTORY "concordia-index"
|
|
||||||
|
|
||||||
string TestResourcesManager::getPuddleFilePath(const string & filename) {
|
string TestResourcesManager::getPuddleFilePath(const string & filename) {
|
||||||
string result = string(TEST_RESOURCES_DIRECTORY);
|
string result = string(TEST_RESOURCES_DIRECTORY);
|
||||||
@ -16,23 +15,13 @@ string TestResourcesManager::getTestConcordiaConfigFilePath(const string & filen
|
|||||||
return result + "/" + CONCORDIA_CONFIG_DIRECTORY + "/" + filename;
|
return result + "/" + CONCORDIA_CONFIG_DIRECTORY + "/" + filename;
|
||||||
}
|
}
|
||||||
|
|
||||||
string TestResourcesManager::getTestWordMapFilePath(const string & filename) {
|
|
||||||
string result = string(TEST_RESOURCES_DIRECTORY);
|
|
||||||
return result + "/" + CONCORDIA_INDEX_DIRECTORY + "/" + filename;
|
|
||||||
}
|
|
||||||
|
|
||||||
string TestResourcesManager::getTestHashIndexFilePath(const string & filename) {
|
|
||||||
string result = string(TEST_RESOURCES_DIRECTORY);
|
|
||||||
return result + "/" + CONCORDIA_INDEX_DIRECTORY + "/" + filename;
|
|
||||||
}
|
|
||||||
|
|
||||||
string TestResourcesManager::getTestSuffixArrayFilePath() {
|
|
||||||
string result = string(TEST_RESOURCES_DIRECTORY);
|
|
||||||
return result + "/" + CONCORDIA_INDEX_DIRECTORY + "/test_SA.bin";
|
|
||||||
}
|
|
||||||
|
|
||||||
string TestResourcesManager::getProdConcordiaConfigFilePath(const string & filename) {
|
string TestResourcesManager::getProdConcordiaConfigFilePath(const string & filename) {
|
||||||
string result = string(PROD_RESOURCES_DIRECTORY);
|
string result = string(PROD_RESOURCES_DIRECTORY);
|
||||||
return result + "/" + CONCORDIA_CONFIG_DIRECTORY + "/" + filename;
|
return result + "/" + CONCORDIA_CONFIG_DIRECTORY + "/" + filename;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string TestResourcesManager::getTestFilePath(const string & module, const string & filename) {
|
||||||
|
string result = string(TEST_RESOURCES_DIRECTORY);
|
||||||
|
return result + "/" + module + "/" + filename;
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -14,13 +14,10 @@ public:
|
|||||||
|
|
||||||
static string getTestConcordiaConfigFilePath(const string & filename);
|
static string getTestConcordiaConfigFilePath(const string & filename);
|
||||||
|
|
||||||
static string getTestWordMapFilePath(const string & filename);
|
|
||||||
|
|
||||||
static string getTestHashIndexFilePath(const string & filename);
|
|
||||||
|
|
||||||
static string getTestSuffixArrayFilePath();
|
|
||||||
|
|
||||||
static string getProdConcordiaConfigFilePath(const string & filename);
|
static string getProdConcordiaConfigFilePath(const string & filename);
|
||||||
|
|
||||||
|
static string getTestFilePath(const string & module, const string & filename);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,9 +1,15 @@
|
|||||||
#----------------------------
|
#----------------------------
|
||||||
# Concordia configuration file
|
# Concordia mock configuration file
|
||||||
#---------------------------
|
#---------------------------
|
||||||
#
|
#
|
||||||
|
|
||||||
#Path to the Puddle tagset
|
#Path to the Puddle tagset
|
||||||
puddle_tagset_path = "puddle/tagset.txt";
|
puddle_tagset_path = "puddle/tagset.txt";
|
||||||
|
|
||||||
|
word_map_path = "tmp/wm.bin"
|
||||||
|
|
||||||
|
hashed_index_path = "tmp/hi.bin"
|
||||||
|
|
||||||
|
suffix_array_path = "tmp/sa.bin"
|
||||||
|
|
||||||
### eof
|
### eof
|
@ -6,4 +6,22 @@
|
|||||||
#Path to the Puddle tagset
|
#Path to the Puddle tagset
|
||||||
puddle_tagset_path = "@TEST_PUDDLE_TAGSET_PATH@";
|
puddle_tagset_path = "@TEST_PUDDLE_TAGSET_PATH@";
|
||||||
|
|
||||||
|
#-------------------------------------------------------------------------------
|
||||||
|
#Word map, hashed index and suffix array files are in a temporary directory
|
||||||
|
#and should be deleted at the end of each test procedure.
|
||||||
|
|
||||||
|
#Word map file containing unique codes for tokens
|
||||||
|
|
||||||
|
word_map_path = "@TEST_RESOURCES_DIRECTORY@/temp/@TEMP_WORD_MAP@"
|
||||||
|
|
||||||
|
#File containing the "text" for suffix array searching, i.e. sequence of codes
|
||||||
|
|
||||||
|
hashed_index_path = "@TEST_RESOURCES_DIRECTORY@/temp/@TEMP_HASHED_INDEX@"
|
||||||
|
|
||||||
|
#Binarized suffix array
|
||||||
|
|
||||||
|
suffix_array_path = "@TEST_RESOURCES_DIRECTORY@/temp/@TEMP_SUFFIX_ARRAY@"
|
||||||
|
#-------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
### eof
|
### eof
|
||||||
|
BIN
tests/resources/index-searcher/test_SA.bin
Normal file
BIN
tests/resources/index-searcher/test_SA.bin
Normal file
Binary file not shown.
BIN
tests/resources/index-searcher/test_hash_index.bin
Normal file
BIN
tests/resources/index-searcher/test_hash_index.bin
Normal file
Binary file not shown.
BIN
tests/resources/index-searcher/test_word_map.bin
Normal file
BIN
tests/resources/index-searcher/test_word_map.bin
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user