#ifndef CONCORDIA_INDEX_HDR #define CONCORDIA_INDEX_HDR #include #include #include #include #include #include "concordia/common/config.hpp" #include "concordia/example.hpp" #include "concordia/hash_generator.hpp" #include "concordia/concordia_exception.hpp" #include "concordia/tokenized_sentence.hpp" #include /*! Class for creating and maintaining the index. This class does not hold the index data structures but only operates on them when they are passed to ConcordiaIndex methods by smart pointers. This class only remembers paths to two files: hashed index and markers array, which are backups of the respective data structures on HDD. */ class ConcordiaIndex { public: /*! Constructor. \param hashedIndexFilePath path to the hashed index file \param markersFilePath path to the markers array \throws ConcordiaException */ explicit ConcordiaIndex(const std::string & hashedIndexFilePath, const std::string & markersFilePath) throw(ConcordiaException); /*! Destructor. */ virtual ~ConcordiaIndex(); /*! Adds an Example to the index. Example is first hashed using the hash generator passed to this method. Then, hashed index and markers array (also passed to this method) are appended with the hashed example. At the same time, HDD versions of these two data structures are also appended with the same example. \param hashGenerator hash generator to be used to prepare the hash of the example \param T RAM-based hash index to be appended to \param markers RAM-based markers array to be appended to \param example example to be added to index \throws ConcordiaException */ boost::shared_ptr addExample( boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, const Example & example); /*! Adds multiple examples to the index. Examples are first hashed using the hash generator passed to this method. Then, hashed index and markers array (also passed to this method) are appended with the hashed examples. At the same time, HDD versions of these two data structures are also appended with the same examples. \param hashGenerator hash generator to be used to prepare the hash of the example \param T RAM-based hash index to be appended to \param markers RAM-based markers array to be appended to \param examples vector of examples to be added to index \throws ConcordiaException */ std::vector addAllExamples( boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, const std::vector & examples); /*! Generates suffix array based on the passed hashed index. \returns the generated suffix array \throws ConcordiaException */ boost::shared_ptr > generateSuffixArray( boost::shared_ptr > T); private: boost::shared_ptr _addSingleExample(std::ofstream & hashedIndexFile, std::ofstream & markersFile, boost::shared_ptr hashGenerator, boost::shared_ptr > T, boost::shared_ptr > markers, const Example & example); std::string _hashedIndexFilePath; std::string _markersFilePath; }; #endif