diff --git a/concordia/CMakeLists.txt b/concordia/CMakeLists.txt index 68e6e3d..384eeff 100644 --- a/concordia/CMakeLists.txt +++ b/concordia/CMakeLists.txt @@ -6,6 +6,7 @@ foreach(dir ${ALL_DIRECTORIES}) endforeach(dir) add_library(concordia SHARED + concordia_index.cpp word_map.cpp hash_generator.cpp concordia.cpp diff --git a/concordia/concordia_index.cpp b/concordia/concordia_index.cpp new file mode 100644 index 0000000..62dfd28 --- /dev/null +++ b/concordia/concordia_index.cpp @@ -0,0 +1,40 @@ +#include "concordia/concordia_index.hpp" + +#include + +ConcordiaIndex::ConcordiaIndex(const string & wordMapFilepath, + const string & hashedIndexFilepath, + const string & suffixArrayFilepath) + throw(ConcordiaException) { + if (boost::filesystem::exists(wordMapFilepath)) { + if (boost::filesystem::exists(hashedIndexFilepath)) { + // create hashed index file for appending + } else { + throw ConcordiaException("E01: Word map file exists " + "but hashed index file absent."); + } + } else { // WordMap file does not exist + if (boost::filesystem::exists(hashedIndexFilepath)) { + throw ConcordiaException("E02: Hashed index file exists " + "but word map file absent."); + } else { + // create hashed index file for writing + } + } + _hashGenerator = boost::shared_ptr( + new HashGenerator(wordMapFilepath)); +} + +ConcordiaIndex::~ConcordiaIndex() { +} + +void ConcordiaIndex::serializeWordMap() { + _hashGenerator->serializeWordMap(); +} + +void ConcordiaIndex::generateSuffixArray() { +} + +void ConcordiaIndex::addSentence(const string & sentence) { +} + diff --git a/concordia/concordia_index.hpp b/concordia/concordia_index.hpp new file mode 100644 index 0000000..c55f473 --- /dev/null +++ b/concordia/concordia_index.hpp @@ -0,0 +1,36 @@ +#ifndef CONCORDIA_INDEX_HDR +#define CONCORDIA_INDEX_HDR + +#include +#include "concordia/hash_generator.hpp" +#include "concordia/concordia_exception.hpp" + +/*! + Class for creating and maintaining the index. + +*/ + +using namespace std; + +class ConcordiaIndex { +public: + explicit ConcordiaIndex(const string & wordMapFilepath, + const string & hashedIndexFilepath, + const string & suffixArrayFilepath) + throw(ConcordiaException); + + /*! Destructor. + */ + virtual ~ConcordiaIndex(); + + void addSentence(const string & sentence); + + void serializeWordMap(); + + void generateSuffixArray(); + +private: + boost::shared_ptr _hashGenerator; +}; + +#endif diff --git a/concordia/hash_generator.cpp b/concordia/hash_generator.cpp index 79f734a..0a0c2fd 100644 --- a/concordia/hash_generator.cpp +++ b/concordia/hash_generator.cpp @@ -5,12 +5,12 @@ #include #include -HashGenerator::HashGenerator(const string & wordMapFilename) +HashGenerator::HashGenerator(const string & wordMapFilePath) throw(ConcordiaException) : - _wordMapFilename(wordMapFilename), + _wordMapFilePath(wordMapFilePath), _wordMap(boost::shared_ptr(new WordMap)) { - if (boost::filesystem::exists(_wordMapFilename)) { - ifstream ifs(_wordMapFilename.c_str(), std::ios::binary); + if (boost::filesystem::exists(_wordMapFilePath)) { + ifstream ifs(_wordMapFilePath.c_str(), std::ios::binary); boost::archive::binary_iarchive ia(ifs); boost::shared_ptr restoredWordMap(new WordMap); ia >> *_wordMap; @@ -36,7 +36,7 @@ vector HashGenerator::generateHash(const string & sentence) { } void HashGenerator::serializeWordMap() { - ofstream ofs(_wordMapFilename.c_str(), std::ios::binary); + ofstream ofs(_wordMapFilePath.c_str(), std::ios::binary); boost::archive::binary_oarchive oa(ofs); oa << *_wordMap; } diff --git a/concordia/hash_generator.hpp b/concordia/hash_generator.hpp index 15da5dd..5c1f101 100644 --- a/concordia/hash_generator.hpp +++ b/concordia/hash_generator.hpp @@ -17,7 +17,7 @@ using namespace std; class HashGenerator { public: - explicit HashGenerator(const string & wordMapFilename) + explicit HashGenerator(const string & wordMapFilePath) throw(ConcordiaException); /*! Destructor. @@ -31,7 +31,7 @@ public: private: boost::shared_ptr _wordMap; - string _wordMapFilename; + string _wordMapFilePath; }; #endif diff --git a/concordia/t/CMakeLists.txt b/concordia/t/CMakeLists.txt index 74176fd..04da115 100644 --- a/concordia/t/CMakeLists.txt +++ b/concordia/t/CMakeLists.txt @@ -3,6 +3,7 @@ add_library(concordia-tests test_concordia_config.cpp test_word_map.cpp test_hash_generator.cpp + test_concordia_index.cpp ) target_link_libraries(concordia-tests concordia ${LIBCONFIG_LIB} concordia-tests-common) diff --git a/concordia/t/test_concordia_index.cpp b/concordia/t/test_concordia_index.cpp new file mode 100644 index 0000000..fa2355a --- /dev/null +++ b/concordia/t/test_concordia_index.cpp @@ -0,0 +1,18 @@ +#include "tests/unit-tests/unit_tests_globals.hpp" + +#include "concordia/concordia_index.hpp" +#include "tests/common/test_resources_manager.hpp" + +using namespace std; + +BOOST_AUTO_TEST_SUITE(concordia_index) + +BOOST_AUTO_TEST_CASE( ResourcesExistenceTest1 ) +{ + ConcordiaIndex index(TestResourcesManager::getTestWordMapFilePath("mock_word_map.bin"), + TestResourcesManager::getTestHashIndexFilePath("mock_hash_index.bin"), + TestResourcesManager::getTestSuffixArrayFilePath()); + +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/common/test_resources_manager.cpp b/tests/common/test_resources_manager.cpp index 46c5c4e..c6b6f32 100644 --- a/tests/common/test_resources_manager.cpp +++ b/tests/common/test_resources_manager.cpp @@ -3,6 +3,7 @@ #define PUDDLE_TEST_DIRECTORY "puddle" #define CONCORDIA_TAGSET_DIRECTORY "concordia-tagset" #define CONCORDIA_CONFIG_DIRECTORY "concordia-config" +#define CONCORDIA_INDEX_DIRECTORY "concordia-index" string TestResourcesManager::getPuddleFilePath(const string & filename) { string result = string(TEST_RESOURCES_DIRECTORY); @@ -15,6 +16,21 @@ string TestResourcesManager::getTestConcordiaConfigFilePath(const string & filen return result + "/" + CONCORDIA_CONFIG_DIRECTORY + "/" + filename; } +string TestResourcesManager::getTestWordMapFilePath(const string & filename) { + string result = string(TEST_RESOURCES_DIRECTORY); + return result + "/" + CONCORDIA_INDEX_DIRECTORY + "/" + filename; +} + +string TestResourcesManager::getTestHashIndexFilePath(const string & filename) { + string result = string(TEST_RESOURCES_DIRECTORY); + return result + "/" + CONCORDIA_INDEX_DIRECTORY + "/" + filename; +} + +string TestResourcesManager::getTestSuffixArrayFilePath() { + string result = string(TEST_RESOURCES_DIRECTORY); + return result + "/" + CONCORDIA_INDEX_DIRECTORY + "/test_SA.bin"; +} + string TestResourcesManager::getProdConcordiaConfigFilePath(const string & filename) { string result = string(PROD_RESOURCES_DIRECTORY); return result + "/" + CONCORDIA_CONFIG_DIRECTORY + "/" + filename; diff --git a/tests/common/test_resources_manager.hpp b/tests/common/test_resources_manager.hpp index 607f966..846647d 100644 --- a/tests/common/test_resources_manager.hpp +++ b/tests/common/test_resources_manager.hpp @@ -14,6 +14,12 @@ public: static string getTestConcordiaConfigFilePath(const string & filename); + static string getTestWordMapFilePath(const string & filename); + + static string getTestHashIndexFilePath(const string & filename); + + static string getTestSuffixArrayFilePath(); + static string getProdConcordiaConfigFilePath(const string & filename); }; diff --git a/tests/resources/concordia-index/mock_hash_index.bin b/tests/resources/concordia-index/mock_hash_index.bin new file mode 100644 index 0000000..e69de29 diff --git a/tests/resources/concordia-index/mock_word_map.bin b/tests/resources/concordia-index/mock_word_map.bin new file mode 100644 index 0000000..3da4c04 Binary files /dev/null and b/tests/resources/concordia-index/mock_word_map.bin differ