add index path as required argument to concordia constructor
This commit is contained in:
parent
f585ff9e01
commit
1adabf4833
@ -57,10 +57,6 @@ set (PROD_RESOURCES_DIRECTORY "${concordia_SOURCE_DIR}/prod/resources")
|
||||
# ============================== #
|
||||
|
||||
set (TEST_RESOURCES_DIRECTORY "${concordia_SOURCE_DIR}/tests/resources")
|
||||
set (TEMP_WORD_MAP "temp_word_map.bin")
|
||||
set (TEMP_HASHED_INDEX "temp_hashed_index.bin")
|
||||
set (TEMP_MARKERS "temp_markers.bin")
|
||||
set (TEMP_SUFFIX_ARRAY "temp_suffix_array.bin")
|
||||
|
||||
file(MAKE_DIRECTORY ${TEST_RESOURCES_DIRECTORY}/temp)
|
||||
file(MAKE_DIRECTORY ${PROD_RESOURCES_DIRECTORY}/temp)
|
||||
|
@ -108,6 +108,8 @@ int main(int argc, char** argv) {
|
||||
|
||||
desc.add_options()
|
||||
("help,h", "Display this message")
|
||||
("index,i", boost::program_options::value<std::string>(),
|
||||
"Index directory (required)")
|
||||
("config,c", boost::program_options::value<std::string>(),
|
||||
"Concordia configuration file (required)")
|
||||
("simple-search,s", boost::program_options::value<std::string>(),
|
||||
@ -144,12 +146,20 @@ int main(int argc, char** argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::string indexDirectory;
|
||||
if (cli.count("index")) {
|
||||
indexDirectory = cli["index"].as<std::string>();
|
||||
} else {
|
||||
std::cerr << "No index directory path given. Terminating."
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
try {
|
||||
std::cout << "\tInitializing concordia..." << std::endl;
|
||||
boost::posix_time::ptime time_start =
|
||||
boost::posix_time::microsec_clock::local_time();
|
||||
Concordia concordia(configFile);
|
||||
Concordia concordia(indexDirectory, configFile);
|
||||
boost::posix_time::ptime time_end =
|
||||
boost::posix_time::microsec_clock::local_time();
|
||||
boost::posix_time::time_duration msdiff = time_end - time_start;
|
||||
|
@ -2,10 +2,6 @@
|
||||
#define CONCORDIA_VERSION_MINOR @CONCORDIA_VERSION_MINOR@
|
||||
|
||||
#define TEST_RESOURCES_DIRECTORY "@TEST_RESOURCES_DIRECTORY@"
|
||||
#define TEMP_WORD_MAP "@TEMP_WORD_MAP@"
|
||||
#define TEMP_HASHED_INDEX "@TEMP_HASHED_INDEX@"
|
||||
#define TEMP_MARKERS "@TEMP_MARKERS@"
|
||||
#define TEMP_SUFFIX_ARRAY "@TEMP_SUFFIX_ARRAY@"
|
||||
|
||||
#define PROD_RESOURCES_DIRECTORY "@PROD_RESOURCES_DIRECTORY@"
|
||||
|
||||
@ -30,3 +26,7 @@ typedef @SUFFIX_MARKER_TYPE@ SUFFIX_MARKER_TYPE;
|
||||
// and the last SUFFIX_MARKER_SENTENCE_BYTES store the sentence length.
|
||||
|
||||
#define CONCORDIA_SEARCH_MAX_RESULTS 3
|
||||
|
||||
#define WORD_MAP_FILE_NAME "word_map.bin"
|
||||
#define MARKERS_FILE_NAME "markers.bin"
|
||||
#define HASHED_INDEX_FILE_NAME "hashed_index.bin"
|
||||
|
@ -15,13 +15,15 @@ std::string Concordia::_libraryVersion = _createLibraryVersion();
|
||||
|
||||
// ===========================================
|
||||
|
||||
Concordia::Concordia(const std::string & configFilePath)
|
||||
throw(ConcordiaException) {
|
||||
Concordia::Concordia(const std::string & indexPath,
|
||||
const std::string & configFilePath)
|
||||
throw(ConcordiaException) :
|
||||
_indexPath(indexPath) {
|
||||
_config = boost::shared_ptr<ConcordiaConfig> (
|
||||
new ConcordiaConfig(configFilePath));
|
||||
_index = boost::shared_ptr<ConcordiaIndex>(
|
||||
new ConcordiaIndex(_config->getHashedIndexFilePath(),
|
||||
_config->getMarkersFilePath()));
|
||||
new ConcordiaIndex(_getHashedIndexFilePath(),
|
||||
_getMarkersFilePath()));
|
||||
_searcher = boost::shared_ptr<IndexSearcher>(new IndexSearcher());
|
||||
_initializeIndex();
|
||||
}
|
||||
@ -101,14 +103,14 @@ std::vector<TokenizedSentence> Concordia::addAllExamples(
|
||||
}
|
||||
|
||||
void Concordia::loadRAMIndexFromDisk() throw(ConcordiaException) {
|
||||
if (boost::filesystem::exists(_config->getWordMapFilePath())
|
||||
&& boost::filesystem::exists(_config->getHashedIndexFilePath())
|
||||
&& boost::filesystem::exists(_config->getMarkersFilePath())) {
|
||||
if (boost::filesystem::exists(_getWordMapFilePath())
|
||||
&& boost::filesystem::exists(_getHashedIndexFilePath())
|
||||
&& boost::filesystem::exists(_getMarkersFilePath())) {
|
||||
// reading index from file
|
||||
_T->clear();
|
||||
std::ifstream hashedIndexFile;
|
||||
hashedIndexFile.open(
|
||||
_config->getHashedIndexFilePath().c_str(), std::ios::in
|
||||
_getHashedIndexFilePath().c_str(), std::ios::in
|
||||
| std::ios::ate | std::ios::binary);
|
||||
saidx_t hiFileSize = hashedIndexFile.tellg();
|
||||
if (hiFileSize > 0) {
|
||||
@ -128,7 +130,7 @@ void Concordia::loadRAMIndexFromDisk() throw(ConcordiaException) {
|
||||
// reading markers from file
|
||||
_markers->clear();
|
||||
std::ifstream markersFile;
|
||||
markersFile.open(_config->getMarkersFilePath().c_str(), std::ios::in
|
||||
markersFile.open(_getMarkersFilePath().c_str(), std::ios::in
|
||||
| std::ios::ate | std::ios::binary);
|
||||
saidx_t maFileSize = markersFile.tellg();
|
||||
if (maFileSize > 0) {
|
||||
@ -158,16 +160,17 @@ void Concordia::refreshSAfromRAM() throw(ConcordiaException) {
|
||||
|
||||
void Concordia::_initializeIndex() throw(ConcordiaException) {
|
||||
_hashGenerator = boost::shared_ptr<HashGenerator>(
|
||||
new HashGenerator(_config));
|
||||
new HashGenerator(_indexPath,
|
||||
_config));
|
||||
_T = boost::shared_ptr<std::vector<sauchar_t> >(
|
||||
new std::vector<sauchar_t>);
|
||||
_markers = boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> >(
|
||||
new std::vector<SUFFIX_MARKER_TYPE>);
|
||||
if (boost::filesystem::exists(_config->getWordMapFilePath())
|
||||
&& boost::filesystem::exists(_config->getHashedIndexFilePath())) {
|
||||
if (boost::filesystem::exists(_getWordMapFilePath())
|
||||
&& boost::filesystem::exists(_getHashedIndexFilePath())) {
|
||||
loadRAMIndexFromDisk();
|
||||
} else if (!boost::filesystem::exists(_config->getWordMapFilePath())
|
||||
&& !boost::filesystem::exists(_config->getHashedIndexFilePath())) {
|
||||
} else if (!boost::filesystem::exists(_getWordMapFilePath())
|
||||
&& !boost::filesystem::exists(_getHashedIndexFilePath())) {
|
||||
// empty index
|
||||
_SA = boost::shared_ptr<std::vector<saidx_t> >(
|
||||
new std::vector<saidx_t>);
|
||||
@ -233,7 +236,19 @@ void Concordia::clearIndex() throw(ConcordiaException) {
|
||||
_SA = boost::shared_ptr<std::vector<saidx_t> >(
|
||||
new std::vector<saidx_t>);
|
||||
|
||||
boost::filesystem::remove(_config->getHashedIndexFilePath());
|
||||
boost::filesystem::remove(_config->getMarkersFilePath());
|
||||
boost::filesystem::remove(_getHashedIndexFilePath());
|
||||
boost::filesystem::remove(_getMarkersFilePath());
|
||||
}
|
||||
|
||||
std::string Concordia::_getWordMapFilePath() {
|
||||
return _indexPath+"/"+WORD_MAP_FILE_NAME;
|
||||
}
|
||||
|
||||
std::string Concordia::_getHashedIndexFilePath() {
|
||||
return _indexPath+"/"+HASHED_INDEX_FILE_NAME;
|
||||
}
|
||||
|
||||
std::string Concordia::_getMarkersFilePath() {
|
||||
return _indexPath+"/"+MARKERS_FILE_NAME;
|
||||
}
|
||||
|
||||
|
@ -38,10 +38,12 @@
|
||||
class Concordia {
|
||||
public:
|
||||
/*! Constructor.
|
||||
\param indexPath path to the index directory
|
||||
\param configFilePath path to the Concordia configuration file
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
explicit Concordia(const std::string & configFilePath)
|
||||
explicit Concordia(const std::string & indexPath,
|
||||
const std::string & configFilePath)
|
||||
throw(ConcordiaException);
|
||||
/*! Destructor.
|
||||
*/
|
||||
@ -163,10 +165,18 @@ public:
|
||||
void clearIndex() throw(ConcordiaException);
|
||||
|
||||
private:
|
||||
std::string _getWordMapFilePath();
|
||||
|
||||
std::string _getHashedIndexFilePath();
|
||||
|
||||
std::string _getMarkersFilePath();
|
||||
|
||||
void _initializeIndex() throw(ConcordiaException);
|
||||
|
||||
static std::string _libraryVersion;
|
||||
|
||||
std::string _indexPath;
|
||||
|
||||
boost::shared_ptr<ConcordiaConfig> _config;
|
||||
|
||||
boost::shared_ptr<ConcordiaIndex> _index;
|
||||
|
@ -4,9 +4,6 @@
|
||||
#include "concordia/common/logging.hpp"
|
||||
|
||||
#define PUDDLE_TAGSET_PARAM "puddle_tagset_path"
|
||||
#define WORD_MAP_PARAM "word_map_path"
|
||||
#define HASHED_INDEX_PARAM "hashed_index_path"
|
||||
#define MARKERS_PARAM "markers_path"
|
||||
#define SUFFIX_ARRAY_PARAM "suffix_array_path"
|
||||
#define HTML_TAGS_PARAM "html_tags_path"
|
||||
#define STOP_WORDS_ENABLED_PARAM "stop_words_enabled"
|
||||
@ -25,12 +22,6 @@ ConcordiaConfig::ConcordiaConfig(const std::string & configFilePath)
|
||||
+configFilePath);
|
||||
}
|
||||
|
||||
_wordMapFilePath =
|
||||
ConcordiaConfig::_readConfigParameterStr(WORD_MAP_PARAM);
|
||||
_hashedIndexFilePath =
|
||||
ConcordiaConfig::_readConfigParameterStr(HASHED_INDEX_PARAM);
|
||||
_markersFilePath =
|
||||
ConcordiaConfig::_readConfigParameterStr(MARKERS_PARAM);
|
||||
_htmlTagsFilePath =
|
||||
ConcordiaConfig::_readConfigParameterStr(HTML_TAGS_PARAM);
|
||||
_stopWordsEnabled =
|
||||
|
@ -24,30 +24,6 @@ public:
|
||||
*/
|
||||
virtual ~ConcordiaConfig();
|
||||
|
||||
/*! Getter for word map file path.
|
||||
For more information see \ref tutorial3.
|
||||
\returns word map file path
|
||||
*/
|
||||
std::string & getWordMapFilePath() {
|
||||
return _wordMapFilePath;
|
||||
}
|
||||
|
||||
/*! Getter for hashed index file path.
|
||||
For more information see \ref tutorial3.
|
||||
\returns hashed index file path
|
||||
*/
|
||||
std::string & getHashedIndexFilePath() {
|
||||
return _hashedIndexFilePath;
|
||||
}
|
||||
|
||||
/*! Getter for markers file path.
|
||||
For more information see \ref tutorial3.
|
||||
\returns markers file path
|
||||
*/
|
||||
std::string & getMarkersFilePath() {
|
||||
return _markersFilePath;
|
||||
}
|
||||
|
||||
/*! Getter for html tags file path.
|
||||
For more information see \ref tutorial3.
|
||||
\returns html tags file path
|
||||
@ -91,12 +67,6 @@ public:
|
||||
private:
|
||||
libconfig::Config _config;
|
||||
|
||||
std::string _wordMapFilePath;
|
||||
|
||||
std::string _hashedIndexFilePath;
|
||||
|
||||
std::string _markersFilePath;
|
||||
|
||||
std::string _htmlTagsFilePath;
|
||||
|
||||
bool _stopWordsEnabled;
|
||||
|
@ -10,9 +10,10 @@
|
||||
|
||||
#include <fstream>
|
||||
|
||||
HashGenerator::HashGenerator(boost::shared_ptr<ConcordiaConfig> config)
|
||||
HashGenerator::HashGenerator(std::string indexPath,
|
||||
boost::shared_ptr<ConcordiaConfig> config)
|
||||
throw(ConcordiaException) :
|
||||
_wordMapFilePath(config->getWordMapFilePath()),
|
||||
_wordMapFilePath(indexPath+"/"+WORD_MAP_FILE_NAME),
|
||||
_wordMap(boost::shared_ptr<WordMap>(new WordMap)),
|
||||
_sentenceTokenizer(boost::shared_ptr<SentenceTokenizer>(
|
||||
new SentenceTokenizer(config))) {
|
||||
|
@ -30,9 +30,11 @@ class HashGenerator {
|
||||
public:
|
||||
/*!
|
||||
Constructor.
|
||||
\param indexPath path to the index directory
|
||||
\param config pointer to current config object
|
||||
*/
|
||||
explicit HashGenerator(boost::shared_ptr<ConcordiaConfig> config)
|
||||
explicit HashGenerator(std::string indexPath,
|
||||
boost::shared_ptr<ConcordiaConfig> config)
|
||||
throw(ConcordiaException);
|
||||
|
||||
/*! Destructor.
|
||||
|
@ -62,16 +62,17 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
|
||||
std::vector<INDEX_CHARACTER_TYPE> hash =
|
||||
hashGenerator->generateHash(pattern).getCodes();
|
||||
|
||||
// append sentence boundary marker, as we are looking only for exact sentence matches
|
||||
// append sentence boundary marker,
|
||||
// as we are looking only for exact sentence matches
|
||||
hash.push_back(INDEX_CHARACTER_TYPE_MAX_VALUE);
|
||||
|
||||
|
||||
saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE);
|
||||
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
|
||||
|
||||
int size = sa_search(T->data(), (saidx_t) T->size(),
|
||||
(const sauchar_t *) patternArray, patternLength,
|
||||
SA->data(), (saidx_t) SA->size(), &left);
|
||||
|
||||
|
||||
SUFFIX_MARKER_TYPE occurencesCount = 0;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
saidx_t resultPos = SA->at(left + i);
|
||||
@ -86,7 +87,7 @@ SUFFIX_MARKER_TYPE IndexSearcher::countOccurences(
|
||||
}
|
||||
|
||||
delete[] patternArray;
|
||||
|
||||
|
||||
return occurencesCount;
|
||||
}
|
||||
|
||||
|
@ -18,7 +18,8 @@ BOOST_AUTO_TEST_SUITE(concordia_main)
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaVersion )
|
||||
{
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
std::string version = concordia.getVersion();
|
||||
BOOST_CHECK_EQUAL( version , "1.0");
|
||||
}
|
||||
@ -26,7 +27,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaVersion )
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
|
||||
{
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
TokenizedSentence ts = concordia.addExample(Example("Ala posiada kota",14));
|
||||
/*
|
||||
0,3 type: 1 value: ala
|
||||
@ -85,7 +87,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
||||
{
|
||||
// modified stop words to avoid anonymization
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
std::vector<Example> testExamples;
|
||||
testExamples.push_back(Example("xto xjest okno",312));
|
||||
testExamples.push_back(Example("czy xjest okno otwarte",202));
|
||||
@ -128,7 +131,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
||||
|
||||
*/
|
||||
|
||||
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
std::vector<MatchedPatternFragment> searchResult1 = concordia2.simpleSearch("xto xjest");
|
||||
std::vector<MatchedPatternFragment> searchResult2 = concordia2.simpleSearch("xjest okno");
|
||||
|
||||
@ -156,13 +160,15 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
|
||||
{
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
std::vector<Example> testExamples;
|
||||
testExamples.push_back(Example("2. Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem, z jakiego korzystają obywatele tego państwa.",312));
|
||||
testExamples.push_back(Example("czy xjest żółte otwarte",202));
|
||||
concordia.addAllExamples(testExamples);
|
||||
|
||||
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
Concordia concordia2 = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
std::vector<MatchedPatternFragment> searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia");
|
||||
|
||||
concordia2.clearIndex();
|
||||
@ -175,7 +181,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 )
|
||||
{
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
concordia.addExample(Example("Ala posiada kota",14));
|
||||
concordia.addExample(Example("Ala posiada rysia",51));
|
||||
concordia.addExample(Example("Marysia posiada rysia",123));
|
||||
@ -220,7 +227,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 )
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
|
||||
{
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
concordia.addExample(Example("Ala posiada kota",14));
|
||||
concordia.addExample(Example("Ala posiada rysia",51));
|
||||
concordia.addExample(Example("Marysia posiada rysia",123));
|
||||
@ -292,7 +300,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
|
||||
{
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
/*
|
||||
concordia.addExample(Example("Alice has a cat", 56));
|
||||
concordia.addExample(Example("Alice has a dog", 23));
|
||||
@ -349,7 +358,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
|
||||
|
||||
BOOST_AUTO_TEST_CASE( Tokenize )
|
||||
{
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
TokenizedSentence ts = concordia.tokenize(" Ala posiada kota");
|
||||
/*
|
||||
0,3 type: 1 value: ala
|
||||
@ -382,7 +392,8 @@ BOOST_AUTO_TEST_CASE( Tokenize )
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaCountOccurences )
|
||||
{
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTempPath(),
|
||||
TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
concordia.addExample(Example("Ala posiada kota",14));
|
||||
concordia.addExample(Example("Ala posiada rysia",51));
|
||||
concordia.addExample(Example("Ala posiada kota",16));
|
||||
|
@ -12,9 +12,6 @@ BOOST_AUTO_TEST_SUITE(concordia_config)
|
||||
BOOST_AUTO_TEST_CASE( ConfigParameters )
|
||||
{
|
||||
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("concordia-mock.cfg"));
|
||||
BOOST_CHECK_EQUAL( config.getWordMapFilePath() , "/tmp/wm.bin" );
|
||||
BOOST_CHECK_EQUAL( config.getHashedIndexFilePath() , "/tmp/hi.bin" );
|
||||
BOOST_CHECK_EQUAL( config.getMarkersFilePath() , "/tmp/ma.bin" );
|
||||
BOOST_CHECK_EQUAL( config.getHtmlTagsFilePath() , "/tmp/html_tags.txt" );
|
||||
BOOST_CHECK_EQUAL( config.getStopWordsFilePath() , "/tmp/stop_words.txt" );
|
||||
BOOST_CHECK_EQUAL( config.getNamedEntitiesFilePath() , "/tmp/named_entities.txt" );
|
||||
|
@ -356,11 +356,11 @@ BOOST_AUTO_TEST_CASE( TmMatchesTest )
|
||||
SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7
|
||||
*/
|
||||
|
||||
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX),
|
||||
TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
|
||||
ConcordiaIndex index(TestResourcesManager::getTempPath()+"/"+HASHED_INDEX_FILE_NAME,
|
||||
TestResourcesManager::getTempPath()+"/"+MARKERS_FILE_NAME);
|
||||
boost::shared_ptr<ConcordiaConfig> config(
|
||||
new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
|
||||
boost::shared_ptr<HashGenerator> hashGenerator(new HashGenerator(config));
|
||||
boost::shared_ptr<HashGenerator> hashGenerator(new HashGenerator(TestResourcesManager::getTempPath(), config));
|
||||
|
||||
boost::shared_ptr<std::vector<sauchar_t> > T(new std::vector<sauchar_t>());
|
||||
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers(new std::vector<SUFFIX_MARKER_TYPE>());
|
||||
@ -428,9 +428,9 @@ BOOST_AUTO_TEST_CASE( TmMatchesTest )
|
||||
BOOST_CHECK_EQUAL(patternIntervals123[1].getEnd(), 4);
|
||||
|
||||
|
||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
|
||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
||||
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
|
||||
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+HASHED_INDEX_FILE_NAME);
|
||||
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+MARKERS_FILE_NAME);
|
||||
|
||||
}
|
||||
|
||||
|
@ -17,11 +17,11 @@ BOOST_AUTO_TEST_CASE( SimpleHashTest )
|
||||
{
|
||||
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
|
||||
|
||||
if (boost::filesystem::exists(config->getWordMapFilePath())) {
|
||||
boost::filesystem::remove(config->getWordMapFilePath());
|
||||
if (boost::filesystem::exists(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME)) {
|
||||
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
|
||||
}
|
||||
|
||||
HashGenerator hashGenerator = HashGenerator(config);
|
||||
HashGenerator hashGenerator = HashGenerator(TestResourcesManager::getTempPath(), config);
|
||||
|
||||
std::vector<INDEX_CHARACTER_TYPE> hash = hashGenerator.generateHash("Ala posiada kota").getCodes();
|
||||
std::vector<INDEX_CHARACTER_TYPE> expected;
|
||||
@ -38,11 +38,11 @@ BOOST_AUTO_TEST_CASE( TooLongHashTest )
|
||||
{
|
||||
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
|
||||
|
||||
if (boost::filesystem::exists(config->getWordMapFilePath())) {
|
||||
boost::filesystem::remove(config->getWordMapFilePath());
|
||||
if (boost::filesystem::exists(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME)) {
|
||||
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
|
||||
}
|
||||
|
||||
HashGenerator hashGenerator = HashGenerator(config);
|
||||
HashGenerator hashGenerator = HashGenerator(TestResourcesManager::getTempPath(), config);
|
||||
|
||||
std::stringstream ss;
|
||||
for (int i=0;i<65537;i++) {
|
||||
@ -70,11 +70,11 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
|
||||
{
|
||||
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
|
||||
|
||||
if (boost::filesystem::exists(config->getWordMapFilePath())) {
|
||||
boost::filesystem::remove(config->getWordMapFilePath());
|
||||
if (boost::filesystem::exists(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME)) {
|
||||
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
|
||||
}
|
||||
|
||||
HashGenerator hashGenerator1 = HashGenerator(config);
|
||||
HashGenerator hashGenerator1 = HashGenerator(TestResourcesManager::getTempPath(), config);
|
||||
|
||||
std::vector<INDEX_CHARACTER_TYPE> hash1 = hashGenerator1.generateHash("Ala posiada kota").getCodes();
|
||||
std::vector<INDEX_CHARACTER_TYPE> expected1;
|
||||
@ -85,7 +85,7 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
|
||||
|
||||
hashGenerator1.serializeWordMap();
|
||||
|
||||
HashGenerator hashGenerator2 = HashGenerator(config);
|
||||
HashGenerator hashGenerator2 = HashGenerator(TestResourcesManager::getTempPath(), config);
|
||||
std::vector<INDEX_CHARACTER_TYPE> hash2 = hashGenerator2.generateHash("Ala posiada psa").getCodes();
|
||||
std::vector<INDEX_CHARACTER_TYPE> expected2;
|
||||
expected2.push_back(0);
|
||||
@ -100,11 +100,11 @@ BOOST_AUTO_TEST_CASE( TokenVectorTest )
|
||||
{
|
||||
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
|
||||
|
||||
if (boost::filesystem::exists(config->getWordMapFilePath())) {
|
||||
boost::filesystem::remove(config->getWordMapFilePath());
|
||||
if (boost::filesystem::exists(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME)) {
|
||||
boost::filesystem::remove(TestResourcesManager::getTempPath()+"/"+WORD_MAP_FILE_NAME);
|
||||
}
|
||||
|
||||
HashGenerator hashGenerator = HashGenerator(config);
|
||||
HashGenerator hashGenerator = HashGenerator(TestResourcesManager::getTempPath(), config);
|
||||
|
||||
TokenizedSentence tokenizedSentence = hashGenerator.generateHash("12.02.2014 o godzinie 17:40 doszło do kolizji na ulicy Grobla; policjanci ustalili, że <b>kierowca</b> zaparkował samochód.");
|
||||
|
||||
|
@ -29,7 +29,7 @@ File first.cpp:
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||
Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||
cout << concordia.getVersion() << endl;
|
||||
}
|
||||
\endverbatim
|
||||
@ -52,7 +52,7 @@ File simple_search.cpp:
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||
Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||
|
||||
// adding sentences to index
|
||||
concordia.addExample(Example("Alice has a cat", 56));
|
||||
@ -116,7 +116,7 @@ File concordia_searching.cpp:
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||
Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||
|
||||
TokenizedSentence ts = concordia.addExample(Example("Alice has a cat", 56));
|
||||
cout << "Added the following tokens: " << endl;
|
||||
@ -209,17 +209,6 @@ Every option is documented in comments within the configuration file.
|
||||
#---------------------------
|
||||
#
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# The below set the paths for hashed index, markers array and word map files.
|
||||
# If all the files pointed by these paths exist, Concordia reads them to its
|
||||
# RAM index. When none of these files exist, a new empty index is created.
|
||||
# However, if any of these files exist and any other is missing, the index
|
||||
# is considered corrupt and Concordia does not start.
|
||||
|
||||
hashed_index_path = "<CONCORDIA_HOME>/tests/resources/temp/temp_hashed_index.bin"
|
||||
markers_path = "<CONCORDIA_HOME>/tests/resources/temp/temp_markers.bin"
|
||||
word_map_path = "<CONCORDIA_HOME>/tests/resources/temp/temp_word_map.bin"
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# The following settings control the sentence tokenizer mechanism. Tokenizer
|
||||
# takes into account html tags, substitutes predefined symbols
|
||||
@ -260,6 +249,7 @@ The full list of program options is given below:
|
||||
\verbatim
|
||||
-h [ --help ] Display this message
|
||||
-c [ --config ] arg Concordia configuration file (required)
|
||||
-i [ --index ] arg Index directory path (required)
|
||||
-s [ --simple-search ] arg Pattern to be searched in the index
|
||||
-n [ --silent ] While searching, do not
|
||||
output search results
|
||||
@ -277,12 +267,12 @@ From <CONCORDIA_HOME> directory:
|
||||
|
||||
Read sentences from file sentences.txt
|
||||
\verbatim
|
||||
./build/concordia-console/concordia-console -c tests/resources/concordia-config/concordia.cfg -r ~/sentences.txt
|
||||
./build/concordia-console/concordia-console -i /tmp -c tests/resources/concordia-config/concordia.cfg -r ~/sentences.txt
|
||||
\endverbatim
|
||||
|
||||
Run concordia search on the index
|
||||
\verbatim
|
||||
./build/concordia-console/concordia-console -c tests/resources/concordia-config/concordia.cfg -x "some pattern"
|
||||
./build/concordia-console/concordia-console -i /tmp -c tests/resources/concordia-config/concordia.cfg -x "some pattern"
|
||||
\endverbatim
|
||||
|
||||
*/
|
||||
|
@ -12,7 +12,7 @@
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||
Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||
|
||||
TokenizedSentence ts = concordia.addExample(Example("Alice has a cat", 56));
|
||||
cout << "Added the following tokens: " << endl;
|
||||
|
@ -7,6 +7,6 @@
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||
Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||
cout << concordia.getVersion() << endl;
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||
Concordia concordia("/tmp", EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||
|
||||
// adding sentences to index
|
||||
concordia.addExample(Example("Alice has a cat", 56));
|
||||
|
@ -3,17 +3,6 @@
|
||||
#---------------------------
|
||||
#
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# The below set the paths for hashed index, markers array and word map files.
|
||||
# If all the files pointed by these paths exist, Concordia reads them to its
|
||||
# RAM index. When none of these files exist, a new empty index is created.
|
||||
# However, if any of these files exist and any other is missing, the index
|
||||
# is considered corrupt and Concordia does not start.
|
||||
|
||||
hashed_index_path = "@PROD_RESOURCES_DIRECTORY@/temp/@TEMP_HASHED_INDEX@"
|
||||
markers_path = "@PROD_RESOURCES_DIRECTORY@/temp/@TEMP_MARKERS@"
|
||||
word_map_path = "@PROD_RESOURCES_DIRECTORY@/temp/@TEMP_WORD_MAP@"
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# The following settings control the sentence anonymizer mechanism. It is used to
|
||||
# remove unnecessary symbols and possibly words from sentences added to index
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -a "$1"
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -a "$1"
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "$1"
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -x "$1"
|
||||
|
||||
|
@ -9,6 +9,6 @@ echo "CONCORDIA INDEXER: Running Concordia"
|
||||
rm ../prod/resources/temp/*
|
||||
|
||||
echo "CONCORDIA INDEXER: reading from file"
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt
|
||||
|
||||
|
||||
|
@ -9,22 +9,22 @@ echo "CONCORDIA RUNNER: Running Concordia"
|
||||
rm ../prod/resources/temp/*
|
||||
|
||||
echo "CONCORDIA RUNNER: reading from file"
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/jrc_smaller.txt
|
||||
|
||||
|
||||
echo "CONCORDIA RUNNER: concordia searching for pattern: \"Współpraca Państw Członkowskich i Komisji Europejskiej\""
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "Współpraca Państw Członkowskich i Komisji Europejskiej"
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -x "Współpraca Państw Członkowskich i Komisji Europejskiej"
|
||||
echo "CONCORDIA RUNNER: concordia searching for pattern: \"8. W odniesieniu do artykułu 45 ustęp 12\""
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "8. W odniesieniu do artykułu 45 ustęp 12"
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -x "8. W odniesieniu do artykułu 45 ustęp 12"
|
||||
echo "CONCORDIA RUNNER: concordia searching for pattern: \"Prawo europejskie umożliwia handel zagraniczny\""
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -x "Prawo europejskie umożliwia handel zagraniczny"
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -x "Prawo europejskie umożliwia handel zagraniczny"
|
||||
|
||||
|
||||
echo "CONCORDIA RUNNER: searching for pattern: \"Parlamentu Europejskiego\""
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "Parlamentu Europejskiego" -n
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "Parlamentu Europejskiego" -n
|
||||
echo "CONCORDIA RUNNER: searching for pattern: \"Dostęp do zatrudnienia\""
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "Dostęp do zatrudnienia" -n
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "Dostęp do zatrudnienia" -n
|
||||
echo "CONCORDIA RUNNER: searching for pattern: \"Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem\""
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem" -n
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem" -n
|
||||
|
||||
rm ../prod/resources/text-files/jrc_smaller.txt
|
||||
|
@ -9,10 +9,10 @@ echo "CONCORDIA RUNNER: Running Concordia"
|
||||
rm ../prod/resources/temp/*
|
||||
|
||||
echo "CONCORDIA RUNNER: reading from file"
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/large.txt
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/large.txt
|
||||
echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\""
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own" -n
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own" -n
|
||||
echo "CONCORDIA RUNNER: searching for pattern: \"it is\""
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n
|
||||
|
||||
rm ../prod/resources/text-files/large.txt
|
||||
|
@ -4,8 +4,8 @@ echo "CONCORDIA RUNNER: Running Concordia"
|
||||
|
||||
rm ../prod/resources/temp/*
|
||||
echo "CONCORDIA RUNNER: reading from file"
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/medium.txt
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -r ../prod/resources/text-files/medium.txt
|
||||
echo "CONCORDIA RUNNER: searching for pattern: \"drawn from his own\""
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own"
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "drawn from his own"
|
||||
echo "CONCORDIA RUNNER: searching for pattern: \"it is\""
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "it is" -n
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -s "$1" -n
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -s "$1" -n
|
||||
|
||||
|
@ -9,6 +9,6 @@ echo "CONCORDIA RUNNER: Running Concordia"
|
||||
rm ../prod/resources/temp/*
|
||||
|
||||
echo "CONCORDIA RUNNER: testing"
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/jrc_smaller.txt
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/jrc_smaller.txt
|
||||
|
||||
rm ../prod/resources/text-files/jrc_smaller.txt
|
||||
|
@ -5,5 +5,5 @@ echo "CONCORDIA RUNNER: Running Concordia"
|
||||
rm ../prod/resources/temp/*
|
||||
|
||||
echo "CONCORDIA RUNNER: testing"
|
||||
../build/concordia-console/concordia-console -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/medium.txt
|
||||
../build/concordia-console/concordia-console -i ../prod/resources/temp/ -c ../prod/resources/concordia-config/concordia.cfg -t ../prod/resources/text-files/medium.txt
|
||||
|
||||
|
@ -25,3 +25,8 @@ std::string TestResourcesManager::getTestFilePath(const std::string & module, co
|
||||
return result + "/" + module + "/" + filename;
|
||||
}
|
||||
|
||||
std::string TestResourcesManager::getTempPath() {
|
||||
std::string result = std::string(TEST_RESOURCES_DIRECTORY);
|
||||
return result + "/temp";
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@ public:
|
||||
|
||||
static std::string getTestFilePath(const std::string & module, const std::string & filename);
|
||||
|
||||
static std::string getTempPath();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -6,12 +6,6 @@
|
||||
# Anubis score threshold
|
||||
anubis_threshold = "0.3"
|
||||
|
||||
word_map_path = "/tmp/wm.bin"
|
||||
|
||||
hashed_index_path = "/tmp/hi.bin"
|
||||
|
||||
markers_path = "/tmp/ma.bin"
|
||||
|
||||
html_tags_path = "/tmp/html_tags.txt"
|
||||
|
||||
stop_words_enabled = "true"
|
||||
|
@ -3,17 +3,6 @@
|
||||
#---------------------------
|
||||
#
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# The below set the paths for hashed index, markers array and word map files.
|
||||
# If all the files pointed by these paths exist, Concordia reads them to its
|
||||
# RAM index. When none of these files exist, a new empty index is created.
|
||||
# However, if any of these files exist and any other is missing, the index
|
||||
# is considered corrupt and Concordia does not start.
|
||||
|
||||
hashed_index_path = "@TEST_RESOURCES_DIRECTORY@/temp/@TEMP_HASHED_INDEX@"
|
||||
markers_path = "@TEST_RESOURCES_DIRECTORY@/temp/@TEMP_MARKERS@"
|
||||
word_map_path = "@TEST_RESOURCES_DIRECTORY@/temp/@TEMP_WORD_MAP@"
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# The following settings control the sentence anonymizer mechanism. It is used to
|
||||
# remove unnecessary symbols and possibly words from sentences added to index
|
||||
|
Loading…
Reference in New Issue
Block a user