add index path as required argument to concordia constructor

This commit is contained in:
rjawor 2015-10-16 22:14:11 +02:00
parent f585ff9e01
commit 1adabf4833
32 changed files with 143 additions and 171 deletions

View File

@ -57,10 +57,6 @@ set (PROD_RESOURCES_DIRECTORY "${concordia_SOURCE_DIR}/prod/resources")
# ============================== #
set (TEST_RESOURCES_DIRECTORY "${concordia_SOURCE_DIR}/tests/resources")
set (TEMP_WORD_MAP "temp_word_map.bin")
set (TEMP_HASHED_INDEX "temp_hashed_index.bin")
set (TEMP_MARKERS "temp_markers.bin")
set (TEMP_SUFFIX_ARRAY "temp_suffix_array.bin")
file(MAKE_DIRECTORY ${TEST_RESOURCES_DIRECTORY}/temp)
file(MAKE_DIRECTORY ${PROD_RESOURCES_DIRECTORY}/temp)

View File

@ -108,6 +108,8 @@ int main(int argc, char** argv) {
desc.add_options()
("help,h", "Display this message")
("index,i", boost::program_options::value<std::string>(),
"Index directory (required)")
("config,c", boost::program_options::value<std::string>(),
"Concordia configuration file (required)")
("simple-search,s", boost::program_options::value<std::string>(),
@ -144,12 +146,20 @@ int main(int argc, char** argv) {
return 1;
}
std::string indexDirectory;
if (cli.count("index")) {
indexDirectory = cli["index"].as<std::string>();
} else {
std::cerr << "No index directory path given. Terminating."
<< std::endl;
return 1;
}
try {
std::cout << "\tInitializing concordia..." << std::endl;
boost::posix_time::ptime time_start =
boost::posix_time::microsec_clock::local_time();
Concordia concordia(configFile);
Concordia concordia(indexDirectory, configFile);
boost::posix_time::ptime time_end =
boost::posix_time::microsec_clock::local_time();
boost::posix_time::time_duration msdiff = time_end - time_start;

View File

@ -2,10 +2,6 @@
#define CONCORDIA_VERSION_MINOR @CONCORDIA_VERSION_MINOR@
#define TEST_RESOURCES_DIRECTORY "@TEST_RESOURCES_DIRECTORY@"
#define TEMP_WORD_MAP "@TEMP_WORD_MAP@"
#define TEMP_HASHED_INDEX "@TEMP_HASHED_INDEX@"
#define TEMP_MARKERS "@TEMP_MARKERS@"
#define TEMP_SUFFIX_ARRAY "@TEMP_SUFFIX_ARRAY@"
#define PROD_RESOURCES_DIRECTORY "@PROD_RESOURCES_DIRECTORY@"
@ -30,3 +26,7 @@ typedef @SUFFIX_MARKER_TYPE@ SUFFIX_MARKER_TYPE;
// and the last SUFFIX_MARKER_SENTENCE_BYTES store the sentence length.
#define CONCORDIA_SEARCH_MAX_RESULTS 3
#define WORD_MAP_FILE_NAME "word_map.bin"
#define MARKERS_FILE_NAME "markers.bin"
#define HASHED_INDEX_FILE_NAME "hashed_index.bin"

View File

@ -15,13 +15,15 @@ std::string Concordia::_libraryVersion = _createLibraryVersion();
// ===========================================
Concordia::Concordia(const std::string & configFilePath)
throw(ConcordiaException) {
Concordia::Concordia(const std::string & indexPath,
const std::string & configFilePath)
throw(ConcordiaException) :
_indexPath(indexPath) {
_config = boost::shared_ptr<ConcordiaConfig> (
new ConcordiaConfig(configFilePath));
_index = boost::shared_ptr<ConcordiaIndex>(
new ConcordiaIndex(_config->getHashedIndexFilePath(),
_config->getMarkersFilePath()));
new ConcordiaIndex(_getHashedIndexFilePath(),
_getMarkersFilePath()));
_searcher = boost::shared_ptr<IndexSearcher>(new IndexSearcher());
_initializeIndex();
}
@ -101,14 +103,14 @@ std::vector<TokenizedSentence> Concordia::addAllExamples(
}
void Concordia::loadRAMIndexFromDisk() throw(ConcordiaException) {
if (boost::filesystem::exists(_config->getWordMapFilePath())
&& boost::filesystem::exists(_config->getHashedIndexFilePath())
&& boost::filesystem::exists(_config->getMarkersFilePath())) {
if (boost::filesystem::exists(_getWordMapFilePath())
&& boost::filesystem::exists(_getHashedIndexFilePath())
&& boost::filesystem::exists(_getMarkersFilePath())) {
// reading index from file
_T->clear();
std::ifstream hashedIndexFile;
hashedIndexFile.open(
_config->getHashedIndexFilePath().c_str(), std::ios::in
_getHashedIndexFilePath().c_str(), std::ios::in
| std::ios::ate | std::ios::binary);
saidx_t hiFileSize = hashedIndexFile.tellg();
if (hiFileSize > 0) {
@ -128,7 +130,7 @@ void Concordia::loadRAMIndexFromDisk() throw(ConcordiaException) {
// reading markers from file
_markers->clear();
std::ifstream markersFile;
markersFile.open(_config->getMarkersFilePath().c_str(), std::ios::in
markersFile.open(_getMarkersFilePath().c_str(), std::ios::in
| std::ios::ate | std::ios::binary);
saidx_t maFileSize = markersFile.tellg();
if (maFileSize > 0) {
@ -158,16 +160,17 @@ void Concordia::refreshSAfromRAM() throw(ConcordiaException) {
void Concordia::_initializeIndex() throw(ConcordiaException) {
_hashGenerator = boost::shared_ptr<HashGenerator>(
new HashGenerator(_config));
new HashGenerator(_indexPath,
_config));
_T = boost::shared_ptr<std::vector<sauchar_t> >(
new std::vector<sauchar_t>);
_markers = boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> >(
new std::vector<SUFFIX_MARKER_TYPE>);
if (boost::filesystem::exists(_config->getWordMapFilePath())
&& boost::filesystem::exists(_config->getHashedIndexFilePath())) {
if (boost::filesystem::exists(_getWordMapFilePath())
&& boost::filesystem::exists(_getHashedIndexFilePath())) {
loadRAMIndexFromDisk();
} else if (!boost::filesystem::exists(_config->getWordMapFilePath())
&& !boost::filesystem::exists(_config->getHashedIndexFilePath())) {
} else if (!boost::filesystem::exists(_getWordMapFilePath())
&& !boost::filesystem::exists(_getHashedIndexFilePath())) {
// empty index
_SA = boost::shared_ptr<std::vector<saidx_t> >(
new std::vector<saidx_t>);
@ -233,7 +236,19 @@ void Concordia::clearIndex() throw(ConcordiaException) {
_SA = boost::shared_ptr<std::vector<saidx_t> >(
new std::vector<saidx_t>);
boost::filesystem::remove(_config->getHashedIndexFilePath());
boost::filesystem::remove(_config->getMarkersFilePath());
boost::filesystem::remove(_getHashedIndexFilePath());
boost::filesystem::remove(_getMarkersFilePath());
}
std::string Concordia::_getWordMapFilePath() {
return _indexPath+"/"+WORD_MAP_FILE_NAME;
}
std::string Concordia::_getHashedIndexFilePath() {
return _indexPath+"/"+HASHED_INDEX_FILE_NAME;
}
std::string Concordia::_getMarkersFilePath() {
return _indexPath+"/"+MARKERS_FILE_NAME;
}

View File

@ -38,10 +38,12 @@
class Concordia {
public:
/*! Constructor.
\param indexPath path to the index directory
\param configFilePath path to the Concordia configuration file
\throws ConcordiaException
*/
explicit Concordia(const std::string & configFilePath)
explicit Concordia(const std::string & indexPath,
const std::string & configFilePath)
throw(ConcordiaException);
/*! Destructor.
*/
@ -163,10 +165,18 @@ public:
void clearIndex() throw(ConcordiaException);
private:
std::string _getWordMapFilePath();
std::string _getHashedIndexFilePath();
std::string _getMarkersFilePath();
void _initializeIndex() throw(ConcordiaException);
static std::string _libraryVersion;
std::string _indexPath;
boost::shared_ptr<ConcordiaConfig> _config;
boost::shared_ptr<ConcordiaIndex> _index;

View File

@ -4,9 +4,6 @@
#include "concordia/common/logging.hpp"
#define PUDDLE_TAGSET_PARAM "puddle_tagset_path"
#define WORD_MAP_PARAM "word_map_path"
#define HASHED_INDEX_PARAM "hashed_index_path"
#define MARKERS_PARAM "markers_path"
#define SUFFIX_ARRAY_PARAM "suffix_array_path"
#define HTML_TAGS_PARAM "html_tags_path"
#define STOP_WORDS_ENABLED_PARAM "stop_words_enabled"
@ -25,12 +22,6 @@ ConcordiaConfig::ConcordiaConfig(const std::string & configFilePath)
+configFilePath);
}
_wordMapFilePath =
ConcordiaConfig::_readConfigParameterStr(WORD_MAP_PARAM);
_hashedIndexFilePath =
ConcordiaConfig::_readConfigParameterStr(HASHED_INDEX_PARAM);
_markersFilePath =
ConcordiaConfig::_readConfigParameterStr(MARKERS_PARAM);
_htmlTagsFilePath =
ConcordiaConfig::_readConfigParameterStr(HTML_TAGS_PARAM);
_stopWordsEnabled =

View File

@ -24,30 +24,6 @@ public:
*/
virtual ~ConcordiaConfig();
/*! Getter for word map file path.
For more information see \ref tutorial3.
\returns word map file path
*/
std::string & getWordMapFilePath() {
return _wordMapFilePath;
}
/*! Getter for hashed index file path.
For more information see \ref tutorial3.
\returns hashed index file path
*/
std::string & getHashedIndexFilePath() {
return _hashedIndexFilePath;
}
/*! Getter for markers file path.
For more information see \ref tutorial3.
\returns markers file path
*/
std::string & getMarkersFilePath() {
return _markersFilePath;
}
/*! Getter for html tags file path.
For more information see \ref tutorial3.
\returns html tags file path
@ -91,12 +67,6 @@ public:
private:
libconfig::Config _config;
std::string _wordMapFilePath;
std::string _hashedIndexFilePath;
std::string _markersFilePath;
std::string _htmlTagsFilePath;
bool _stopWordsEnabled;

View File

@ -10,9 +10,10 @@
#include <fstream>
HashGenerator::HashGenerator(boost::shared_ptr<ConcordiaConfig> config)
HashGenerator::HashGenerator(std::string indexPath,
boost::shared_ptr<ConcordiaConfig> config)
throw(ConcordiaException) :
_wordMapFilePath(config->getWordMapFilePath()),
_wordMapFilePath(indexPath+"/"+WORD_MAP_FILE_NAME),
_wordMap(boost::shared_ptr<WordMap>(new WordMap)),
_sentenceTokenizer(boost::shared_ptr<SentenceTokenizer>(
new SentenceTokenizer(config))) {

View File

@ -30,9 +30,11 @@ class HashGenerator {
public:
/*!
Constructor.
\param indexPath path to the index directory
\param config pointer to current config object
*/
explicit HashGenerator(boost::shared_ptr<ConcordiaConfig> config)
explicit HashGenerator(std::string indexPath,
boost::shared_ptr<ConcordiaConfig> config)
throw(ConcordiaException);
/*! Destructor.

View File

@ -62,16 +62,17 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
std::vector<INDEX_CHARACTER_TYPE> hash =
hashGenerator->generateHash(pattern).getCodes();
// append sentence boundary marker, as we are looking only for exact sentence matches
// append sentence boundary marker,
// as we are looking only for exact sentence matches
hash.push_back(INDEX_CHARACTER_TYPE_MAX_VALUE);
saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE);
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
int size = sa_search(T->data(), (saidx_t) T->size(),
(const sauchar_t *) patternArray, patternLength,
SA->data(), (saidx_t) SA->size(), &left);
SUFFIX_MARKER_TYPE occurencesCount = 0;
for (int i = 0; i < size; ++i) {
saidx_t resultPos = SA->at(left + i);
@ -86,7 +87,7 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
}
delete[] patternArray;
return occurencesCount;
}

View File

@ -18,7 +18,8 @@ BOOST_AUTO_TEST_SUITE(concordia_main)
BOOST_AUTO_TEST_CASE( ConcordiaVersion )
{
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
std::string version = concordia.getVersion();
BOOST_CHECK_EQUAL( version , "1.0");
}
@ -26,7 +27,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaVersion )
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
{
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
TokenizedSentence ts = concordia.addExample(Example("Ala posiada kota",14));
/*
0,3 type: 1 value: ala
@ -85,7 +87,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
{
// modified stop words to avoid anonymization
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
std::vector<Example> testExamples;
testExamples.push_back(Example("xto xjest okno",312));
testExamples.push_back(Example("czy xjest okno otwarte",202));
@ -128,7 +131,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
*/
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
std::vector<MatchedPatternFragment> searchResult1 = concordia2.simpleSearch("xto xjest");
std::vector<MatchedPatternFragment> searchResult2 = concordia2.simpleSearch("xjest okno");
@ -156,13 +160,15 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
{
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
std::vector<Example> testExamples;
testExamples.push_back(Example("2. Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem, z jakiego korzystają obywatele tego państwa.",312));
testExamples.push_back(Example("czy xjest żółte otwarte",202));
concordia.addAllExamples(testExamples);
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
std::vector<MatchedPatternFragment> searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia");
concordia2.clearIndex();
@ -175,7 +181,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 )
{
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
concordia.addExample(Example("Ala posiada kota",14));
concordia.addExample(Example("Ala posiada rysia",51));
concordia.addExample(Example("Marysia posiada rysia",123));
@ -220,7 +227,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 )
BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
{
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
concordia.addExample(Example("Ala posiada kota",14));
concordia.addExample(Example("Ala posiada rysia",51));
concordia.addExample(Example("Marysia posiada rysia",123));
@ -292,7 +300,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
{
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
/*
concordia.addExample(Example("Alice has a cat", 56));
concordia.addExample(Example("Alice has a dog", 23));
@ -349,7 +358,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
BOOST_AUTO_TEST_CASE( Tokenize )
{
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
TokenizedSentence ts = concordia.tokenize(" Ala posiada kota");
/*
0,3 type: 1 value: ala
@ -382,7 +392,8 @@ BOOST_AUTO_TEST_CASE( Tokenize )
BOOST_AUTO_TEST_CASE( ConcordiaCountOccurences )
{
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
concordia.addExample(Example("Ala posiada kota",14));
concordia.addExample(Example("Ala posiada rysia",51));
concordia.addExample(Example("Ala posiada kota",16));

View File

@ -12,9 +12,6 @@ BOOST_AUTO_TEST_SUITE(concordia_config)
BOOST_AUTO_TEST_CASE( ConfigParameters )
{
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("concordia-mock.cfg"));
BOOST_CHECK_EQUAL( config.getWordMapFilePath() , "/tmp/wm.bin" );
BOOST_CHECK_EQUAL( config.getHashedIndexFilePath() , "/tmp/hi.bin" );
BOOST_CHECK_EQUAL( config.getMarkersFilePath() , "/tmp/ma.bin" );
BOOST_CHECK_EQUAL( config.getHtmlTagsFilePath() , "/tmp/html_tags.txt" );
BOOST_CHECK_EQUAL( config.getStopWordsFilePath() , "/tmp/stop_words.txt" );
BOOST_CHECK_EQUAL( config.getNamedEntitiesFilePath() , "/tmp/named_entities.txt" );

View File

@ -356,11 +356,11 @@ BOOST_AUTO_TEST_CASE( TmMatchesTest )
SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7
*/
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX),
TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
ConcordiaIndex index(TestResourcesManager::getTempPath()+"/"+HASHED_INDEX_FILE_NAME,
TestResourcesManager::getTempPath()+"/"+MARKERS_FILE_NAME);
boost::shared_ptr<ConcordiaConfig> config(
new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
boost::shared_ptr<HashGenerator> hashGenerator(new HashGenerator(config));
boost::shared_ptr<HashGenerator> hashGenerator(new HashGenerator(TestResourcesManager::getTempPath(), config));
boost::shared_ptr<std::vector<sauchar_t> > T(new std::vector<sauchar_t>());
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers(new std::vector<SUFFIX_MARKER_TYPE>());
@ -428,9 +428,9 @@ BOOST_AUTO_TEST_CASE( TmMatchesTest )
BOOST_CHECK_EQUAL(patternIntervals123[1].getEnd(), 4);
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+HASHED_INDEX_FILE_NAME);
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+MARKERS_FILE_NAME);
}

View File

@ -17,11 +17,11 @@ BOOST_AUTO_TEST_CASE( SimpleHashTest )
{
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
if (boost::filesystem::exists(config->getWordMapFilePath())) {
boost::filesystem::remove(config->getWordMapFilePath());
if (boost::filesystem::exists(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME)) {
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
}
HashGenerator hashGenerator = HashGenerator(config);
HashGenerator hashGenerator = HashGenerator(TestResourcesManager::getTempPath(), config);
std::vector<INDEX_CHARACTER_TYPE> hash = hashGenerator.generateHash("Ala posiada kota").getCodes();
std::vector<INDEX_CHARACTER_TYPE> expected;
@ -38,11 +38,11 @@ BOOST_AUTO_TEST_CASE( TooLongHashTest )
{
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
if (boost::filesystem::exists(config->getWordMapFilePath())) {
boost::filesystem::remove(config->getWordMapFilePath());
if (boost::filesystem::exists(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME)) {
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
}
HashGenerator hashGenerator = HashGenerator(config);
HashGenerator hashGenerator = HashGenerator(TestResourcesManager::getTempPath(), config);
std::stringstream ss;
for (int i=0;i<65537;i++) {
@ -70,11 +70,11 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
{
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
if (boost::filesystem::exists(config->getWordMapFilePath())) {
boost::filesystem::remove(config->getWordMapFilePath());
if (boost::filesystem::exists(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME)) {
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
}
HashGenerator hashGenerator1 = HashGenerator(config);
HashGenerator hashGenerator1 = HashGenerator(TestResourcesManager::getTempPath(), config);
std::vector<INDEX_CHARACTER_TYPE> hash1 = hashGenerator1.generateHash("Ala posiada kota").getCodes();
std::vector<INDEX_CHARACTER_TYPE> expected1;
@ -85,7 +85,7 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
hashGenerator1.serializeWordMap();
HashGenerator hashGenerator2 = HashGenerator(config);
HashGenerator hashGenerator2 = HashGenerator(TestResourcesManager::getTempPath(), config);
std::vector<INDEX_CHARACTER_TYPE> hash2 = hashGenerator2.generateHash("Ala posiada psa").getCodes();
std::vector<INDEX_CHARACTER_TYPE> expected2;
expected2.push_back(0);
@ -100,11 +100,11 @@ BOOST_AUTO_TEST_CASE( TokenVectorTest )
{
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
if (boost::filesystem::exists(config->getWordMapFilePath())) {
boost::filesystem::remove(config->getWordMapFilePath());
if (boost::filesystem::exists(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME)) {
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
}
HashGenerator hashGenerator = HashGenerator(config);
HashGenerator hashGenerator = HashGenerator(TestResourcesManager::getTempPath(), config);
TokenizedSentence tokenizedSentence = hashGenerator.generateHash("12.02.2014 o godzinie 17:40 doszło do kolizji na ulicy Grobla; policjanci ustalili, że <b>kierowca</b> zaparkował samochód.");

View File

@ -29,7 +29,7 @@ File first.cpp:
using namespace std;
int main() {
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
cout << concordia.getVersion() << endl;
}
\endverbatim
@ -52,7 +52,7 @@ File simple_search.cpp:
using namespace std;
int main() {
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
// adding sentences to index
concordia.addExample(Example("Alice has a cat", 56));
@ -116,7 +116,7 @@ File concordia_searching.cpp:
using namespace std;
int main() {
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
TokenizedSentence ts = concordia.addExample(Example("Alice has a cat", 56));
cout << "Added the following tokens: " << endl;
@ -209,17 +209,6 @@ Every option is documented in comments within the configuration file.
#---------------------------
#
#-------------------------------------------------------------------------------
# The below set the paths for hashed index, markers array and word map files.
# If all the files pointed by these paths exist, Concordia reads them to its
# RAM index. When none of these files exist, a new empty index is created.
# However, if any of these files exist and any other is missing, the index
# is considered corrupt and Concordia does not start.
hashed_index_path = "<CONCORDIA_HOME>/tests/resources/temp/temp_hashed_index.bin"
markers_path = "<CONCORDIA_HOME>/tests/resources/temp/temp_markers.bin"
word_map_path = "<CONCORDIA_HOME>/tests/resources/temp/temp_word_map.bin"
#-------------------------------------------------------------------------------
# The following settings control the sentence tokenizer mechanism. Tokenizer
# takes into account html tags, substitutes predefined symbols
@ -260,6 +249,7 @@ The full list of program options is given below:
\verbatim
-h [ --help ] Display this message
-c [ --config ] arg Concordia configuration file (required)
-i [ --index ] arg Index directory path (required)
-s [ --simple-search ] arg Pattern to be searched in the index
-n [ --silent ] While searching, do not
output search results
@ -277,12 +267,12 @@ From <CONCORDIA_HOME> directory:
Read sentences from file sentences.txt
\verbatim
./build/concordia-console/concordia-console -c tests/resources/concordia-config/concordia.cfg -r ~/sentences.txt
./build/concordia-console/concordia-console -i /tmp -c tests/resources/concordia-config/concordia.cfg -r ~/sentences.txt
\endverbatim
Run concordia search on the index
\verbatim
./build/concordia-console/concordia-console -c tests/resources/concordia-config/concordia.cfg -x "some pattern"
./build/concordia-console/concordia-console -i /tmp -c tests/resources/concordia-config/concordia.cfg -x "some pattern"
\endverbatim
*/

View File

@ -12,7 +12,7 @@
using namespace std;
int main() {
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
TokenizedSentence ts = concordia.addExample(Example("Alice has a cat", 56));
cout << "Added the following tokens: " << endl;

View File

@ -7,6 +7,6 @@
using namespace std;
int main() {
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
cout << concordia.getVersion() << endl;
}

View File

@ -10,7 +10,7 @@
using namespace std;
int main() {
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
// adding sentences to index
concordia.addExample(Example("Alice has a cat", 56));

View File

@ -3,17 +3,6 @@
#---------------------------
#
#-------------------------------------------------------------------------------
# The below set the paths for hashed index, markers array and word map files.
# If all the files pointed by these paths exist, Concordia reads them to its
# RAM index. When none of these files exist, a new empty index is created.
# However, if any of these files exist and any other is missing, the index
# is considered corrupt and Concordia does not start.
hashed_index_path = "@PROD_RESOURCES_DIRECTORY@/temp/@TEMP_HASHED_INDEX@"
markers_path = "@PROD_RESOURCES_DIRECTORY@/temp/@TEMP_MARKERS@"
word_map_path = "@PROD_RESOURCES_DIRECTORY@/temp/@TEMP_WORD_MAP@"
#-------------------------------------------------------------------------------
# The following settings control the sentence anonymizer mechanism. It is used to
# remove unnecessary symbols and possibly words from sentences added to index

View File

@ -1,5 +1,5 @@
#!/bin/sh
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -a "$1"
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -a "$1"

View File

@ -1,5 +1,5 @@
#!/bin/sh
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "$1"
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -x "$1"

View File

@ -9,6 +9,6 @@ echo "CONCORDIA INDEXER: Running Concordia"
rm ../prod/resources/temp/*
echo "CONCORDIA INDEXER: reading from file"
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt

View File

@ -9,22 +9,22 @@ echo "CONCORDIA RUNNER: Running Concordia"
rm ../prod/resources/temp/*
echo "CONCORDIA RUNNER: reading from file"
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt
echo "CONCORDIA RUNNER: concordia searching for pattern: \"Współpraca Państw Członkowskich i Komisji Europejskiej\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "Współpraca Państw Członkowskich i Komisji Europejskiej"
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -x "Współpraca Państw Członkowskich i Komisji Europejskiej"
echo "CONCORDIA RUNNER: concordia searching for pattern: \"8. W odniesieniu do artykułu 45 ustęp 12\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "8. W odniesieniu do artykułu 45 ustęp 12"
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -x "8. W odniesieniu do artykułu 45 ustęp 12"
echo "CONCORDIA RUNNER: concordia searching for pattern: \"Prawo europejskie umożliwia handel zagraniczny\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "Prawo europejskie umożliwia handel zagraniczny"
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -x "Prawo europejskie umożliwia handel zagraniczny"
echo "CONCORDIA RUNNER: searching for pattern: \"Parlamentu Europejskiego\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "Parlamentu Europejskiego" -n
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "Parlamentu Europejskiego" -n
echo "CONCORDIA RUNNER: searching for pattern: \"Dostęp do zatrudnienia\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "Dostęp do zatrudnienia" -n
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "Dostęp do zatrudnienia" -n
echo "CONCORDIA RUNNER: searching for pattern: \"Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem" -n
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem" -n
rm ../prod/resources/text-files/jrc_smaller.txt

View File

@ -9,10 +9,10 @@ echo "CONCORDIA RUNNER: Running Concordia"
rm ../prod/resources/temp/*
echo "CONCORDIA RUNNER: reading from file"
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/large.txt
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/large.txt
echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own" -n
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own" -n
echo "CONCORDIA RUNNER: searching for pattern: \"it is\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n
rm ../prod/resources/text-files/large.txt

View File

@ -4,8 +4,8 @@ echo "CONCORDIA RUNNER: Running Concordia"
rm ../prod/resources/temp/*
echo "CONCORDIA RUNNER: reading from file"
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/medium.txt
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/medium.txt
echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own"
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own"
echo "CONCORDIA RUNNER: searching for pattern: \"it is\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n

View File

@ -1,5 +1,5 @@
#!/bin/sh
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "$1" -n
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "$1" -n

View File

@ -9,6 +9,6 @@ echo "CONCORDIA RUNNER: Running Concordia"
rm ../prod/resources/temp/*
echo "CONCORDIA RUNNER: testing"
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/jrc_smaller.txt
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/jrc_smaller.txt
rm ../prod/resources/text-files/jrc_smaller.txt

View File

@ -5,5 +5,5 @@ echo "CONCORDIA RUNNER: Running Concordia"
rm ../prod/resources/temp/*
echo "CONCORDIA RUNNER: testing"
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/medium.txt
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/medium.txt

View File

@ -25,3 +25,8 @@ std::string TestResourcesManager::getTestFilePath(const std::string & module, co
return result + "/" + module + "/" + filename;
}
std::string TestResourcesManager::getTempPath() {
std::string result = std::string(TEST_RESOURCES_DIRECTORY);
return result + "/temp";
}

View File

@ -16,6 +16,7 @@ public:
static std::string getTestFilePath(const std::string & module, const std::string & filename);
static std::string getTempPath();
};
#endif

View File

@ -6,12 +6,6 @@
# Anubis score threshold
anubis_threshold = "0.3"
word_map_path = "/tmp/wm.bin"
hashed_index_path = "/tmp/hi.bin"
markers_path = "/tmp/ma.bin"
html_tags_path = "/tmp/html_tags.txt"
stop_words_enabled = "true"

View File

@ -3,17 +3,6 @@
#---------------------------
#
#-------------------------------------------------------------------------------
# The below set the paths for hashed index, markers array and word map files.
# If all the files pointed by these paths exist, Concordia reads them to its
# RAM index. When none of these files exist, a new empty index is created.
# However, if any of these files exist and any other is missing, the index
# is considered corrupt and Concordia does not start.
hashed_index_path = "@TEST_RESOURCES_DIRECTORY@/temp/@TEMP_HASHED_INDEX@"
markers_path = "@TEST_RESOURCES_DIRECTORY@/temp/@TEMP_MARKERS@"
word_map_path = "@TEST_RESOURCES_DIRECTORY@/temp/@TEMP_WORD_MAP@"
#-------------------------------------------------------------------------------
# The following settings control the sentence anonymizer mechanism. It is used to
# remove unnecessary symbols and possibly words from sentences added to index