add index path as required argument to concordia constructor

This commit is contained in:
rjawor 2015-10-16 22:14:11 +02:00
parent f585ff9e01
commit 1adabf4833
32 changed files with 143 additions and 171 deletions

View File

@ -57,10 +57,6 @@ set (PROD_RESOURCES_DIRECTORY "${concordia_SOURCE_DIR}/prod/resources")
# ============================== # # ============================== #
set (TEST_RESOURCES_DIRECTORY "${concordia_SOURCE_DIR}/tests/resources") set (TEST_RESOURCES_DIRECTORY "${concordia_SOURCE_DIR}/tests/resources")
set (TEMP_WORD_MAP "temp_word_map.bin")
set (TEMP_HASHED_INDEX "temp_hashed_index.bin")
set (TEMP_MARKERS "temp_markers.bin")
set (TEMP_SUFFIX_ARRAY "temp_suffix_array.bin")
file(MAKE_DIRECTORY ${TEST_RESOURCES_DIRECTORY}/temp) file(MAKE_DIRECTORY ${TEST_RESOURCES_DIRECTORY}/temp)
file(MAKE_DIRECTORY ${PROD_RESOURCES_DIRECTORY}/temp) file(MAKE_DIRECTORY ${PROD_RESOURCES_DIRECTORY}/temp)

View File

@ -108,6 +108,8 @@ int main(int argc, char** argv) {
desc.add_options() desc.add_options()
("help,h", "Display this message") ("help,h", "Display this message")
("index,i", boost::program_options::value<std::string>(),
"Index directory (required)")
("config,c", boost::program_options::value<std::string>(), ("config,c", boost::program_options::value<std::string>(),
"Concordia configuration file (required)") "Concordia configuration file (required)")
("simple-search,s", boost::program_options::value<std::string>(), ("simple-search,s", boost::program_options::value<std::string>(),
@ -144,12 +146,20 @@ int main(int argc, char** argv) {
return 1; return 1;
} }
std::string indexDirectory;
if (cli.count("index")) {
indexDirectory = cli["index"].as<std::string>();
} else {
std::cerr << "No index directory path given. Terminating."
<< std::endl;
return 1;
}
try { try {
std::cout << "\tInitializing concordia..." << std::endl; std::cout << "\tInitializing concordia..." << std::endl;
boost::posix_time::ptime time_start = boost::posix_time::ptime time_start =
boost::posix_time::microsec_clock::local_time(); boost::posix_time::microsec_clock::local_time();
Concordia concordia(configFile); Concordia concordia(indexDirectory, configFile);
boost::posix_time::ptime time_end = boost::posix_time::ptime time_end =
boost::posix_time::microsec_clock::local_time(); boost::posix_time::microsec_clock::local_time();
boost::posix_time::time_duration msdiff = time_end - time_start; boost::posix_time::time_duration msdiff = time_end - time_start;

View File

@ -2,10 +2,6 @@
#define CONCORDIA_VERSION_MINOR @CONCORDIA_VERSION_MINOR@ #define CONCORDIA_VERSION_MINOR @CONCORDIA_VERSION_MINOR@
#define TEST_RESOURCES_DIRECTORY "@TEST_RESOURCES_DIRECTORY@" #define TEST_RESOURCES_DIRECTORY "@TEST_RESOURCES_DIRECTORY@"
#define TEMP_WORD_MAP "@TEMP_WORD_MAP@"
#define TEMP_HASHED_INDEX "@TEMP_HASHED_INDEX@"
#define TEMP_MARKERS "@TEMP_MARKERS@"
#define TEMP_SUFFIX_ARRAY "@TEMP_SUFFIX_ARRAY@"
#define PROD_RESOURCES_DIRECTORY "@PROD_RESOURCES_DIRECTORY@" #define PROD_RESOURCES_DIRECTORY "@PROD_RESOURCES_DIRECTORY@"
@ -30,3 +26,7 @@ typedef @SUFFIX_MARKER_TYPE@ SUFFIX_MARKER_TYPE;
// and the last SUFFIX_MARKER_SENTENCE_BYTES store the sentence length. // and the last SUFFIX_MARKER_SENTENCE_BYTES store the sentence length.
#define CONCORDIA_SEARCH_MAX_RESULTS 3 #define CONCORDIA_SEARCH_MAX_RESULTS 3
#define WORD_MAP_FILE_NAME "word_map.bin"
#define MARKERS_FILE_NAME "markers.bin"
#define HASHED_INDEX_FILE_NAME "hashed_index.bin"

View File

@ -15,13 +15,15 @@ std::string Concordia::_libraryVersion = _createLibraryVersion();
// =========================================== // ===========================================
Concordia::Concordia(const std::string & configFilePath) Concordia::Concordia(const std::string & indexPath,
throw(ConcordiaException) { const std::string & configFilePath)
throw(ConcordiaException) :
_indexPath(indexPath) {
_config = boost::shared_ptr<ConcordiaConfig> ( _config = boost::shared_ptr<ConcordiaConfig> (
new ConcordiaConfig(configFilePath)); new ConcordiaConfig(configFilePath));
_index = boost::shared_ptr<ConcordiaIndex>( _index = boost::shared_ptr<ConcordiaIndex>(
new ConcordiaIndex(_config->getHashedIndexFilePath(), new ConcordiaIndex(_getHashedIndexFilePath(),
_config->getMarkersFilePath())); _getMarkersFilePath()));
_searcher = boost::shared_ptr<IndexSearcher>(new IndexSearcher()); _searcher = boost::shared_ptr<IndexSearcher>(new IndexSearcher());
_initializeIndex(); _initializeIndex();
} }
@ -101,14 +103,14 @@ std::vector<TokenizedSentence> Concordia::addAllExamples(
} }
void Concordia::loadRAMIndexFromDisk() throw(ConcordiaException) { void Concordia::loadRAMIndexFromDisk() throw(ConcordiaException) {
if (boost::filesystem::exists(_config->getWordMapFilePath()) if (boost::filesystem::exists(_getWordMapFilePath())
&& boost::filesystem::exists(_config->getHashedIndexFilePath()) && boost::filesystem::exists(_getHashedIndexFilePath())
&& boost::filesystem::exists(_config->getMarkersFilePath())) { && boost::filesystem::exists(_getMarkersFilePath())) {
// reading index from file // reading index from file
_T->clear(); _T->clear();
std::ifstream hashedIndexFile; std::ifstream hashedIndexFile;
hashedIndexFile.open( hashedIndexFile.open(
_config->getHashedIndexFilePath().c_str(), std::ios::in _getHashedIndexFilePath().c_str(), std::ios::in
| std::ios::ate | std::ios::binary); | std::ios::ate | std::ios::binary);
saidx_t hiFileSize = hashedIndexFile.tellg(); saidx_t hiFileSize = hashedIndexFile.tellg();
if (hiFileSize > 0) { if (hiFileSize > 0) {
@ -128,7 +130,7 @@ void Concordia::loadRAMIndexFromDisk() throw(ConcordiaException) {
// reading markers from file // reading markers from file
_markers->clear(); _markers->clear();
std::ifstream markersFile; std::ifstream markersFile;
markersFile.open(_config->getMarkersFilePath().c_str(), std::ios::in markersFile.open(_getMarkersFilePath().c_str(), std::ios::in
| std::ios::ate | std::ios::binary); | std::ios::ate | std::ios::binary);
saidx_t maFileSize = markersFile.tellg(); saidx_t maFileSize = markersFile.tellg();
if (maFileSize > 0) { if (maFileSize > 0) {
@ -158,16 +160,17 @@ void Concordia::refreshSAfromRAM() throw(ConcordiaException) {
void Concordia::_initializeIndex() throw(ConcordiaException) { void Concordia::_initializeIndex() throw(ConcordiaException) {
_hashGenerator = boost::shared_ptr<HashGenerator>( _hashGenerator = boost::shared_ptr<HashGenerator>(
new HashGenerator(_config)); new HashGenerator(_indexPath,
_config));
_T = boost::shared_ptr<std::vector<sauchar_t> >( _T = boost::shared_ptr<std::vector<sauchar_t> >(
new std::vector<sauchar_t>); new std::vector<sauchar_t>);
_markers = boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> >( _markers = boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> >(
new std::vector<SUFFIX_MARKER_TYPE>); new std::vector<SUFFIX_MARKER_TYPE>);
if (boost::filesystem::exists(_config->getWordMapFilePath()) if (boost::filesystem::exists(_getWordMapFilePath())
&& boost::filesystem::exists(_config->getHashedIndexFilePath())) { && boost::filesystem::exists(_getHashedIndexFilePath())) {
loadRAMIndexFromDisk(); loadRAMIndexFromDisk();
} else if (!boost::filesystem::exists(_config->getWordMapFilePath()) } else if (!boost::filesystem::exists(_getWordMapFilePath())
&& !boost::filesystem::exists(_config->getHashedIndexFilePath())) { && !boost::filesystem::exists(_getHashedIndexFilePath())) {
// empty index // empty index
_SA = boost::shared_ptr<std::vector<saidx_t> >( _SA = boost::shared_ptr<std::vector<saidx_t> >(
new std::vector<saidx_t>); new std::vector<saidx_t>);
@ -233,7 +236,19 @@ void Concordia::clearIndex() throw(ConcordiaException) {
_SA = boost::shared_ptr<std::vector<saidx_t> >( _SA = boost::shared_ptr<std::vector<saidx_t> >(
new std::vector<saidx_t>); new std::vector<saidx_t>);
boost::filesystem::remove(_config->getHashedIndexFilePath()); boost::filesystem::remove(_getHashedIndexFilePath());
boost::filesystem::remove(_config->getMarkersFilePath()); boost::filesystem::remove(_getMarkersFilePath());
}
std::string Concordia::_getWordMapFilePath() {
return _indexPath+"/"+WORD_MAP_FILE_NAME;
}
std::string Concordia::_getHashedIndexFilePath() {
return _indexPath+"/"+HASHED_INDEX_FILE_NAME;
}
std::string Concordia::_getMarkersFilePath() {
return _indexPath+"/"+MARKERS_FILE_NAME;
} }

View File

@ -38,10 +38,12 @@
class Concordia { class Concordia {
public: public:
/*! Constructor. /*! Constructor.
\param indexPath path to the index directory
\param configFilePath path to the Concordia configuration file \param configFilePath path to the Concordia configuration file
\throws ConcordiaException \throws ConcordiaException
*/ */
explicit Concordia(const std::string & configFilePath) explicit Concordia(const std::string & indexPath,
const std::string & configFilePath)
throw(ConcordiaException); throw(ConcordiaException);
/*! Destructor. /*! Destructor.
*/ */
@ -163,10 +165,18 @@ public:
void clearIndex() throw(ConcordiaException); void clearIndex() throw(ConcordiaException);
private: private:
std::string _getWordMapFilePath();
std::string _getHashedIndexFilePath();
std::string _getMarkersFilePath();
void _initializeIndex() throw(ConcordiaException); void _initializeIndex() throw(ConcordiaException);
static std::string _libraryVersion; static std::string _libraryVersion;
std::string _indexPath;
boost::shared_ptr<ConcordiaConfig> _config; boost::shared_ptr<ConcordiaConfig> _config;
boost::shared_ptr<ConcordiaIndex> _index; boost::shared_ptr<ConcordiaIndex> _index;

View File

@ -4,9 +4,6 @@
#include "concordia/common/logging.hpp" #include "concordia/common/logging.hpp"
#define PUDDLE_TAGSET_PARAM "puddle_tagset_path" #define PUDDLE_TAGSET_PARAM "puddle_tagset_path"
#define WORD_MAP_PARAM "word_map_path"
#define HASHED_INDEX_PARAM "hashed_index_path"
#define MARKERS_PARAM "markers_path"
#define SUFFIX_ARRAY_PARAM "suffix_array_path" #define SUFFIX_ARRAY_PARAM "suffix_array_path"
#define HTML_TAGS_PARAM "html_tags_path" #define HTML_TAGS_PARAM "html_tags_path"
#define STOP_WORDS_ENABLED_PARAM "stop_words_enabled" #define STOP_WORDS_ENABLED_PARAM "stop_words_enabled"
@ -25,12 +22,6 @@ ConcordiaConfig::ConcordiaConfig(const std::string & configFilePath)
+configFilePath); +configFilePath);
} }
_wordMapFilePath =
ConcordiaConfig::_readConfigParameterStr(WORD_MAP_PARAM);
_hashedIndexFilePath =
ConcordiaConfig::_readConfigParameterStr(HASHED_INDEX_PARAM);
_markersFilePath =
ConcordiaConfig::_readConfigParameterStr(MARKERS_PARAM);
_htmlTagsFilePath = _htmlTagsFilePath =
ConcordiaConfig::_readConfigParameterStr(HTML_TAGS_PARAM); ConcordiaConfig::_readConfigParameterStr(HTML_TAGS_PARAM);
_stopWordsEnabled = _stopWordsEnabled =

View File

@ -24,30 +24,6 @@ public:
*/ */
virtual ~ConcordiaConfig(); virtual ~ConcordiaConfig();
/*! Getter for word map file path.
For more information see \ref tutorial3.
\returns word map file path
*/
std::string & getWordMapFilePath() {
return _wordMapFilePath;
}
/*! Getter for hashed index file path.
For more information see \ref tutorial3.
\returns hashed index file path
*/
std::string & getHashedIndexFilePath() {
return _hashedIndexFilePath;
}
/*! Getter for markers file path.
For more information see \ref tutorial3.
\returns markers file path
*/
std::string & getMarkersFilePath() {
return _markersFilePath;
}
/*! Getter for html tags file path. /*! Getter for html tags file path.
For more information see \ref tutorial3. For more information see \ref tutorial3.
\returns html tags file path \returns html tags file path
@ -91,12 +67,6 @@ public:
private: private:
libconfig::Config _config; libconfig::Config _config;
std::string _wordMapFilePath;
std::string _hashedIndexFilePath;
std::string _markersFilePath;
std::string _htmlTagsFilePath; std::string _htmlTagsFilePath;
bool _stopWordsEnabled; bool _stopWordsEnabled;

View File

@ -10,9 +10,10 @@
#include <fstream> #include <fstream>
HashGenerator::HashGenerator(boost::shared_ptr<ConcordiaConfig> config) HashGenerator::HashGenerator(std::string indexPath,
boost::shared_ptr<ConcordiaConfig> config)
throw(ConcordiaException) : throw(ConcordiaException) :
_wordMapFilePath(config->getWordMapFilePath()), _wordMapFilePath(indexPath+"/"+WORD_MAP_FILE_NAME),
_wordMap(boost::shared_ptr<WordMap>(new WordMap)), _wordMap(boost::shared_ptr<WordMap>(new WordMap)),
_sentenceTokenizer(boost::shared_ptr<SentenceTokenizer>( _sentenceTokenizer(boost::shared_ptr<SentenceTokenizer>(
new SentenceTokenizer(config))) { new SentenceTokenizer(config))) {

View File

@ -30,9 +30,11 @@ class HashGenerator {
public: public:
/*! /*!
Constructor. Constructor.
\param indexPath path to the index directory
\param config pointer to current config object \param config pointer to current config object
*/ */
explicit HashGenerator(boost::shared_ptr<ConcordiaConfig> config) explicit HashGenerator(std::string indexPath,
boost::shared_ptr<ConcordiaConfig> config)
throw(ConcordiaException); throw(ConcordiaException);
/*! Destructor. /*! Destructor.

View File

@ -62,16 +62,17 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
std::vector<INDEX_CHARACTER_TYPE> hash = std::vector<INDEX_CHARACTER_TYPE> hash =
hashGenerator->generateHash(pattern).getCodes(); hashGenerator->generateHash(pattern).getCodes();
// append sentence boundary marker, as we are looking only for exact sentence matches // append sentence boundary marker,
// as we are looking only for exact sentence matches
hash.push_back(INDEX_CHARACTER_TYPE_MAX_VALUE); hash.push_back(INDEX_CHARACTER_TYPE_MAX_VALUE);
saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE); saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE);
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash); sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
int size = sa_search(T->data(), (saidx_t) T->size(), int size = sa_search(T->data(), (saidx_t) T->size(),
(const sauchar_t *) patternArray, patternLength, (const sauchar_t *) patternArray, patternLength,
SA->data(), (saidx_t) SA->size(), &left); SA->data(), (saidx_t) SA->size(), &left);
SUFFIX_MARKER_TYPE occurencesCount = 0; SUFFIX_MARKER_TYPE occurencesCount = 0;
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
saidx_t resultPos = SA->at(left + i); saidx_t resultPos = SA->at(left + i);
@ -86,7 +87,7 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
} }
delete[] patternArray; delete[] patternArray;
return occurencesCount; return occurencesCount;
} }

View File

@ -18,7 +18,8 @@ BOOST_AUTO_TEST_SUITE(concordia_main)
BOOST_AUTO_TEST_CASE( ConcordiaVersion ) BOOST_AUTO_TEST_CASE( ConcordiaVersion )
{ {
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
std::string version = concordia.getVersion(); std::string version = concordia.getVersion();
BOOST_CHECK_EQUAL( version , "1.0"); BOOST_CHECK_EQUAL( version , "1.0");
} }
@ -26,7 +27,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaVersion )
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 ) BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
{ {
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
TokenizedSentence ts = concordia.addExample(Example("Ala posiada kota",14)); TokenizedSentence ts = concordia.addExample(Example("Ala posiada kota",14));
/* /*
0,3 type: 1 value: ala 0,3 type: 1 value: ala
@ -85,7 +87,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 ) BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
{ {
// modified stop words to avoid anonymization // modified stop words to avoid anonymization
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
std::vector<Example> testExamples; std::vector<Example> testExamples;
testExamples.push_back(Example("xto xjest okno",312)); testExamples.push_back(Example("xto xjest okno",312));
testExamples.push_back(Example("czy xjest okno otwarte",202)); testExamples.push_back(Example("czy xjest okno otwarte",202));
@ -128,7 +131,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
*/ */
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
std::vector<MatchedPatternFragment> searchResult1 = concordia2.simpleSearch("xto xjest"); std::vector<MatchedPatternFragment> searchResult1 = concordia2.simpleSearch("xto xjest");
std::vector<MatchedPatternFragment> searchResult2 = concordia2.simpleSearch("xjest okno"); std::vector<MatchedPatternFragment> searchResult2 = concordia2.simpleSearch("xjest okno");
@ -156,13 +160,15 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 ) BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
{ {
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
std::vector<Example> testExamples; std::vector<Example> testExamples;
testExamples.push_back(Example("2. Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem, z jakiego korzystają obywatele tego państwa.",312)); testExamples.push_back(Example("2. Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem, z jakiego korzystają obywatele tego państwa.",312));
testExamples.push_back(Example("czy xjest żółte otwarte",202)); testExamples.push_back(Example("czy xjest żółte otwarte",202));
concordia.addAllExamples(testExamples); concordia.addAllExamples(testExamples);
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
std::vector<MatchedPatternFragment> searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia"); std::vector<MatchedPatternFragment> searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia");
concordia2.clearIndex(); concordia2.clearIndex();
@ -175,7 +181,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 ) BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 )
{ {
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
concordia.addExample(Example("Ala posiada kota",14)); concordia.addExample(Example("Ala posiada kota",14));
concordia.addExample(Example("Ala posiada rysia",51)); concordia.addExample(Example("Ala posiada rysia",51));
concordia.addExample(Example("Marysia posiada rysia",123)); concordia.addExample(Example("Marysia posiada rysia",123));
@ -220,7 +227,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 )
BOOST_AUTO_TEST_CASE( ConcordiaSearch1 ) BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
{ {
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
concordia.addExample(Example("Ala posiada kota",14)); concordia.addExample(Example("Ala posiada kota",14));
concordia.addExample(Example("Ala posiada rysia",51)); concordia.addExample(Example("Ala posiada rysia",51));
concordia.addExample(Example("Marysia posiada rysia",123)); concordia.addExample(Example("Marysia posiada rysia",123));
@ -292,7 +300,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
BOOST_AUTO_TEST_CASE( ConcordiaSearch2 ) BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
{ {
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
/* /*
concordia.addExample(Example("Alice has a cat", 56)); concordia.addExample(Example("Alice has a cat", 56));
concordia.addExample(Example("Alice has a dog", 23)); concordia.addExample(Example("Alice has a dog", 23));
@ -349,7 +358,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
BOOST_AUTO_TEST_CASE( Tokenize ) BOOST_AUTO_TEST_CASE( Tokenize )
{ {
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
TokenizedSentence ts = concordia.tokenize(" Ala posiada kota"); TokenizedSentence ts = concordia.tokenize(" Ala posiada kota");
/* /*
0,3 type: 1 value: ala 0,3 type: 1 value: ala
@ -382,7 +392,8 @@ BOOST_AUTO_TEST_CASE( Tokenize )
BOOST_AUTO_TEST_CASE( ConcordiaCountOccurences ) BOOST_AUTO_TEST_CASE( ConcordiaCountOccurences )
{ {
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
concordia.addExample(Example("Ala posiada kota",14)); concordia.addExample(Example("Ala posiada kota",14));
concordia.addExample(Example("Ala posiada rysia",51)); concordia.addExample(Example("Ala posiada rysia",51));
concordia.addExample(Example("Ala posiada kota",16)); concordia.addExample(Example("Ala posiada kota",16));

View File

@ -12,9 +12,6 @@ BOOST_AUTO_TEST_SUITE(concordia_config)
BOOST_AUTO_TEST_CASE( ConfigParameters ) BOOST_AUTO_TEST_CASE( ConfigParameters )
{ {
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("concordia-mock.cfg")); ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("concordia-mock.cfg"));
BOOST_CHECK_EQUAL( config.getWordMapFilePath() , "/tmp/wm.bin" );
BOOST_CHECK_EQUAL( config.getHashedIndexFilePath() , "/tmp/hi.bin" );
BOOST_CHECK_EQUAL( config.getMarkersFilePath() , "/tmp/ma.bin" );
BOOST_CHECK_EQUAL( config.getHtmlTagsFilePath() , "/tmp/html_tags.txt" ); BOOST_CHECK_EQUAL( config.getHtmlTagsFilePath() , "/tmp/html_tags.txt" );
BOOST_CHECK_EQUAL( config.getStopWordsFilePath() , "/tmp/stop_words.txt" ); BOOST_CHECK_EQUAL( config.getStopWordsFilePath() , "/tmp/stop_words.txt" );
BOOST_CHECK_EQUAL( config.getNamedEntitiesFilePath() , "/tmp/named_entities.txt" ); BOOST_CHECK_EQUAL( config.getNamedEntitiesFilePath() , "/tmp/named_entities.txt" );

View File

@ -356,11 +356,11 @@ BOOST_AUTO_TEST_CASE( TmMatchesTest )
SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7 SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7
*/ */
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX), ConcordiaIndex index(TestResourcesManager::getTempPath()+"/"+HASHED_INDEX_FILE_NAME,
TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); TestResourcesManager::getTempPath()+"/"+MARKERS_FILE_NAME);
boost::shared_ptr<ConcordiaConfig> config( boost::shared_ptr<ConcordiaConfig> config(
new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"))); new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
boost::shared_ptr<HashGenerator> hashGenerator(new HashGenerator(config)); boost::shared_ptr<HashGenerator> hashGenerator(new HashGenerator(TestResourcesManager::getTempPath(), config));
boost::shared_ptr<std::vector<sauchar_t> > T(new std::vector<sauchar_t>()); boost::shared_ptr<std::vector<sauchar_t> > T(new std::vector<sauchar_t>());
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers(new std::vector<SUFFIX_MARKER_TYPE>()); boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers(new std::vector<SUFFIX_MARKER_TYPE>());
@ -428,9 +428,9 @@ BOOST_AUTO_TEST_CASE( TmMatchesTest )
BOOST_CHECK_EQUAL(patternIntervals123[1].getEnd(), 4); BOOST_CHECK_EQUAL(patternIntervals123[1].getEnd(), 4);
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+HASHED_INDEX_FILE_NAME);
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX)); boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+MARKERS_FILE_NAME);
} }

View File

@ -17,11 +17,11 @@ BOOST_AUTO_TEST_CASE( SimpleHashTest )
{ {
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"))); boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
if (boost::filesystem::exists(config->getWordMapFilePath())) { if (boost::filesystem::exists(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME)) {
boost::filesystem::remove(config->getWordMapFilePath()); boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
} }
HashGenerator hashGenerator = HashGenerator(config); HashGenerator hashGenerator = HashGenerator(TestResourcesManager::getTempPath(), config);
std::vector<INDEX_CHARACTER_TYPE> hash = hashGenerator.generateHash("Ala posiada kota").getCodes(); std::vector<INDEX_CHARACTER_TYPE> hash = hashGenerator.generateHash("Ala posiada kota").getCodes();
std::vector<INDEX_CHARACTER_TYPE> expected; std::vector<INDEX_CHARACTER_TYPE> expected;
@ -38,11 +38,11 @@ BOOST_AUTO_TEST_CASE( TooLongHashTest )
{ {
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"))); boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
if (boost::filesystem::exists(config->getWordMapFilePath())) { if (boost::filesystem::exists(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME)) {
boost::filesystem::remove(config->getWordMapFilePath()); boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
} }
HashGenerator hashGenerator = HashGenerator(config); HashGenerator hashGenerator = HashGenerator(TestResourcesManager::getTempPath(), config);
std::stringstream ss; std::stringstream ss;
for (int i=0;i<65537;i++) { for (int i=0;i<65537;i++) {
@ -70,11 +70,11 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
{ {
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"))); boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
if (boost::filesystem::exists(config->getWordMapFilePath())) { if (boost::filesystem::exists(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME)) {
boost::filesystem::remove(config->getWordMapFilePath()); boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
} }
HashGenerator hashGenerator1 = HashGenerator(config); HashGenerator hashGenerator1 = HashGenerator(TestResourcesManager::getTempPath(), config);
std::vector<INDEX_CHARACTER_TYPE> hash1 = hashGenerator1.generateHash("Ala posiada kota").getCodes(); std::vector<INDEX_CHARACTER_TYPE> hash1 = hashGenerator1.generateHash("Ala posiada kota").getCodes();
std::vector<INDEX_CHARACTER_TYPE> expected1; std::vector<INDEX_CHARACTER_TYPE> expected1;
@ -85,7 +85,7 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
hashGenerator1.serializeWordMap(); hashGenerator1.serializeWordMap();
HashGenerator hashGenerator2 = HashGenerator(config); HashGenerator hashGenerator2 = HashGenerator(TestResourcesManager::getTempPath(), config);
std::vector<INDEX_CHARACTER_TYPE> hash2 = hashGenerator2.generateHash("Ala posiada psa").getCodes(); std::vector<INDEX_CHARACTER_TYPE> hash2 = hashGenerator2.generateHash("Ala posiada psa").getCodes();
std::vector<INDEX_CHARACTER_TYPE> expected2; std::vector<INDEX_CHARACTER_TYPE> expected2;
expected2.push_back(0); expected2.push_back(0);
@ -100,11 +100,11 @@ BOOST_AUTO_TEST_CASE( TokenVectorTest )
{ {
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"))); boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
if (boost::filesystem::exists(config->getWordMapFilePath())) { if (boost::filesystem::exists(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME)) {
boost::filesystem::remove(config->getWordMapFilePath()); boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
} }
HashGenerator hashGenerator = HashGenerator(config); HashGenerator hashGenerator = HashGenerator(TestResourcesManager::getTempPath(), config);
TokenizedSentence tokenizedSentence = hashGenerator.generateHash("12.02.2014 o godzinie 17:40 doszło do kolizji na ulicy Grobla; policjanci ustalili, że <b>kierowca</b> zaparkował samochód."); TokenizedSentence tokenizedSentence = hashGenerator.generateHash("12.02.2014 o godzinie 17:40 doszło do kolizji na ulicy Grobla; policjanci ustalili, że <b>kierowca</b> zaparkował samochód.");

View File

@ -29,7 +29,7 @@ File first.cpp:
using namespace std; using namespace std;
int main() { int main() {
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg"); Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
cout << concordia.getVersion() << endl; cout << concordia.getVersion() << endl;
} }
\endverbatim \endverbatim
@ -52,7 +52,7 @@ File simple_search.cpp:
using namespace std; using namespace std;
int main() { int main() {
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg"); Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
// adding sentences to index // adding sentences to index
concordia.addExample(Example("Alice has a cat", 56)); concordia.addExample(Example("Alice has a cat", 56));
@ -116,7 +116,7 @@ File concordia_searching.cpp:
using namespace std; using namespace std;
int main() { int main() {
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg"); Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
TokenizedSentence ts = concordia.addExample(Example("Alice has a cat", 56)); TokenizedSentence ts = concordia.addExample(Example("Alice has a cat", 56));
cout << "Added the following tokens: " << endl; cout << "Added the following tokens: " << endl;
@ -209,17 +209,6 @@ Every option is documented in comments within the configuration file.
#--------------------------- #---------------------------
# #
#-------------------------------------------------------------------------------
# The below set the paths for hashed index, markers array and word map files.
# If all the files pointed by these paths exist, Concordia reads them to its
# RAM index. When none of these files exist, a new empty index is created.
# However, if any of these files exist and any other is missing, the index
# is considered corrupt and Concordia does not start.
hashed_index_path = "<CONCORDIA_HOME>/tests/resources/temp/temp_hashed_index.bin"
markers_path = "<CONCORDIA_HOME>/tests/resources/temp/temp_markers.bin"
word_map_path = "<CONCORDIA_HOME>/tests/resources/temp/temp_word_map.bin"
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
# The following settings control the sentence tokenizer mechanism. Tokenizer # The following settings control the sentence tokenizer mechanism. Tokenizer
# takes into account html tags, substitutes predefined symbols # takes into account html tags, substitutes predefined symbols
@ -260,6 +249,7 @@ The full list of program options is given below:
\verbatim \verbatim
-h [ --help ] Display this message -h [ --help ] Display this message
-c [ --config ] arg Concordia configuration file (required) -c [ --config ] arg Concordia configuration file (required)
-i [ --index ] arg Index directory path (required)
-s [ --simple-search ] arg Pattern to be searched in the index -s [ --simple-search ] arg Pattern to be searched in the index
-n [ --silent ] While searching, do not -n [ --silent ] While searching, do not
output search results output search results
@ -277,12 +267,12 @@ From <CONCORDIA_HOME> directory:
Read sentences from file sentences.txt Read sentences from file sentences.txt
\verbatim \verbatim
./build/concordia-console/concordia-console -c tests/resources/concordia-config/concordia.cfg -r ~/sentences.txt ./build/concordia-console/concordia-console -i /tmp -c tests/resources/concordia-config/concordia.cfg -r ~/sentences.txt
\endverbatim \endverbatim
Run concordia search on the index Run concordia search on the index
\verbatim \verbatim
./build/concordia-console/concordia-console -c tests/resources/concordia-config/concordia.cfg -x "some pattern" ./build/concordia-console/concordia-console -i /tmp -c tests/resources/concordia-config/concordia.cfg -x "some pattern"
\endverbatim \endverbatim
*/ */

View File

@ -12,7 +12,7 @@
using namespace std; using namespace std;
int main() { int main() {
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg"); Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
TokenizedSentence ts = concordia.addExample(Example("Alice has a cat", 56)); TokenizedSentence ts = concordia.addExample(Example("Alice has a cat", 56));
cout << "Added the following tokens: " << endl; cout << "Added the following tokens: " << endl;

View File

@ -7,6 +7,6 @@
using namespace std; using namespace std;
int main() { int main() {
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg"); Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
cout << concordia.getVersion() << endl; cout << concordia.getVersion() << endl;
} }

View File

@ -10,7 +10,7 @@
using namespace std; using namespace std;
int main() { int main() {
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg"); Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
// adding sentences to index // adding sentences to index
concordia.addExample(Example("Alice has a cat", 56)); concordia.addExample(Example("Alice has a cat", 56));

View File

@ -3,17 +3,6 @@
#--------------------------- #---------------------------
# #
#-------------------------------------------------------------------------------
# The below set the paths for hashed index, markers array and word map files.
# If all the files pointed by these paths exist, Concordia reads them to its
# RAM index. When none of these files exist, a new empty index is created.
# However, if any of these files exist and any other is missing, the index
# is considered corrupt and Concordia does not start.
hashed_index_path = "@PROD_RESOURCES_DIRECTORY@/temp/@TEMP_HASHED_INDEX@"
markers_path = "@PROD_RESOURCES_DIRECTORY@/temp/@TEMP_MARKERS@"
word_map_path = "@PROD_RESOURCES_DIRECTORY@/temp/@TEMP_WORD_MAP@"
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
# The following settings control the sentence anonymizer mechanism. It is used to # The following settings control the sentence anonymizer mechanism. It is used to
# remove unnecessary symbols and possibly words from sentences added to index # remove unnecessary symbols and possibly words from sentences added to index

View File

@ -1,5 +1,5 @@
#!/bin/sh #!/bin/sh
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -a "$1" ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -a "$1"

View File

@ -1,5 +1,5 @@
#!/bin/sh #!/bin/sh
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "$1" ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -x "$1"

View File

@ -9,6 +9,6 @@ echo "CONCORDIA INDEXER: Running Concordia"
rm ../prod/resources/temp/* rm ../prod/resources/temp/*
echo "CONCORDIA INDEXER: reading from file" echo "CONCORDIA INDEXER: reading from file"
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt

View File

@ -9,22 +9,22 @@ echo "CONCORDIA RUNNER: Running Concordia"
rm ../prod/resources/temp/* rm ../prod/resources/temp/*
echo "CONCORDIA RUNNER: reading from file" echo "CONCORDIA RUNNER: reading from file"
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt
echo "CONCORDIA RUNNER: concordia searching for pattern: \"Współpraca Państw Członkowskich i Komisji Europejskiej\"" echo "CONCORDIA RUNNER: concordia searching for pattern: \"Współpraca Państw Członkowskich i Komisji Europejskiej\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "Współpraca Państw Członkowskich i Komisji Europejskiej" ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -x "Współpraca Państw Członkowskich i Komisji Europejskiej"
echo "CONCORDIA RUNNER: concordia searching for pattern: \"8. W odniesieniu do artykułu 45 ustęp 12\"" echo "CONCORDIA RUNNER: concordia searching for pattern: \"8. W odniesieniu do artykułu 45 ustęp 12\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "8. W odniesieniu do artykułu 45 ustęp 12" ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -x "8. W odniesieniu do artykułu 45 ustęp 12"
echo "CONCORDIA RUNNER: concordia searching for pattern: \"Prawo europejskie umożliwia handel zagraniczny\"" echo "CONCORDIA RUNNER: concordia searching for pattern: \"Prawo europejskie umożliwia handel zagraniczny\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "Prawo europejskie umożliwia handel zagraniczny" ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -x "Prawo europejskie umożliwia handel zagraniczny"
echo "CONCORDIA RUNNER: searching for pattern: \"Parlamentu Europejskiego\"" echo "CONCORDIA RUNNER: searching for pattern: \"Parlamentu Europejskiego\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "Parlamentu Europejskiego" -n ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "Parlamentu Europejskiego" -n
echo "CONCORDIA RUNNER: searching for pattern: \"Dostęp do zatrudnienia\"" echo "CONCORDIA RUNNER: searching for pattern: \"Dostęp do zatrudnienia\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "Dostęp do zatrudnienia" -n ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "Dostęp do zatrudnienia" -n
echo "CONCORDIA RUNNER: searching for pattern: \"Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem\"" echo "CONCORDIA RUNNER: searching for pattern: \"Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem" -n ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem" -n
rm ../prod/resources/text-files/jrc_smaller.txt rm ../prod/resources/text-files/jrc_smaller.txt

View File

@ -9,10 +9,10 @@ echo "CONCORDIA RUNNER: Running Concordia"
rm ../prod/resources/temp/* rm ../prod/resources/temp/*
echo "CONCORDIA RUNNER: reading from file" echo "CONCORDIA RUNNER: reading from file"
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/large.txt ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/large.txt
echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\"" echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own" -n ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own" -n
echo "CONCORDIA RUNNER: searching for pattern: \"it is\"" echo "CONCORDIA RUNNER: searching for pattern: \"it is\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n
rm ../prod/resources/text-files/large.txt rm ../prod/resources/text-files/large.txt

View File

@ -4,8 +4,8 @@ echo "CONCORDIA RUNNER: Running Concordia"
rm ../prod/resources/temp/* rm ../prod/resources/temp/*
echo "CONCORDIA RUNNER: reading from file" echo "CONCORDIA RUNNER: reading from file"
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/medium.txt ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/medium.txt
echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\"" echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own" ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own"
echo "CONCORDIA RUNNER: searching for pattern: \"it is\"" echo "CONCORDIA RUNNER: searching for pattern: \"it is\""
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n

View File

@ -1,5 +1,5 @@
#!/bin/sh #!/bin/sh
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "$1" -n ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "$1" -n

View File

@ -9,6 +9,6 @@ echo "CONCORDIA RUNNER: Running Concordia"
rm ../prod/resources/temp/* rm ../prod/resources/temp/*
echo "CONCORDIA RUNNER: testing" echo "CONCORDIA RUNNER: testing"
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/jrc_smaller.txt ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/jrc_smaller.txt
rm ../prod/resources/text-files/jrc_smaller.txt rm ../prod/resources/text-files/jrc_smaller.txt

View File

@ -5,5 +5,5 @@ echo "CONCORDIA RUNNER: Running Concordia"
rm ../prod/resources/temp/* rm ../prod/resources/temp/*
echo "CONCORDIA RUNNER: testing" echo "CONCORDIA RUNNER: testing"
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/medium.txt ../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/medium.txt

View File

@ -25,3 +25,8 @@ std::string TestResourcesManager::getTestFilePath(const std::string & module, co
return result + "/" + module + "/" + filename; return result + "/" + module + "/" + filename;
} }
std::string TestResourcesManager::getTempPath() {
std::string result = std::string(TEST_RESOURCES_DIRECTORY);
return result + "/temp";
}

View File

@ -16,6 +16,7 @@ public:
static std::string getTestFilePath(const std::string & module, const std::string & filename); static std::string getTestFilePath(const std::string & module, const std::string & filename);
static std::string getTempPath();
}; };
#endif #endif

View File

@ -6,12 +6,6 @@
# Anubis score threshold # Anubis score threshold
anubis_threshold = "0.3" anubis_threshold = "0.3"
word_map_path = "/tmp/wm.bin"
hashed_index_path = "/tmp/hi.bin"
markers_path = "/tmp/ma.bin"
html_tags_path = "/tmp/html_tags.txt" html_tags_path = "/tmp/html_tags.txt"
stop_words_enabled = "true" stop_words_enabled = "true"

View File

@ -3,17 +3,6 @@
#--------------------------- #---------------------------
# #
#-------------------------------------------------------------------------------
# The below set the paths for hashed index, markers array and word map files.
# If all the files pointed by these paths exist, Concordia reads them to its
# RAM index. When none of these files exist, a new empty index is created.
# However, if any of these files exist and any other is missing, the index
# is considered corrupt and Concordia does not start.
hashed_index_path = "@TEST_RESOURCES_DIRECTORY@/temp/@TEMP_HASHED_INDEX@"
markers_path = "@TEST_RESOURCES_DIRECTORY@/temp/@TEMP_MARKERS@"
word_map_path = "@TEST_RESOURCES_DIRECTORY@/temp/@TEMP_WORD_MAP@"
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
# The following settings control the sentence anonymizer mechanism. It is used to # The following settings control the sentence anonymizer mechanism. It is used to
# remove unnecessary symbols and possibly words from sentences added to index # remove unnecessary symbols and possibly words from sentences added to index