#ifndef HASH_GENERATOR_HDR #define HASH_GENERATOR_HDR #include #include #include #include #include #include "concordia/word_map.hpp" #include "concordia/common/config.hpp" #include "concordia/sentence_tokenizer.hpp" #include "concordia/concordia_config.hpp" #include "concordia/concordia_exception.hpp" /*! Class for generating a sentence hash. The hash is generated from a sentence given in raw string. String is first tokenized by SentenceTokenizer and then each token is coded as an integer, according to WordMap. Resulting hash is an instance of TokenizedSentence. Hashed sentence is used when adding a sentence to index and during searching. HashGenerator holds an instance of WordMap, used to code tokens as integers and SentenceTokenizer, used to tokenize the sentence string. */ class HashGenerator { public: /*! Constructor. \param indexPath path to the index directory \param config pointer to current config object */ explicit HashGenerator(std::string indexPath, boost::shared_ptr config) throw(ConcordiaException); /*! Destructor. */ virtual ~HashGenerator(); /*! Generates hash of a sentence. \param sentence sentence to generate hash from \returns tokenized sentence, containing the hash */ TokenizedSentence generateHash(const std::string & sentence) throw(ConcordiaException); /*! Saves the contents of current WordMap to HDD. */ void serializeWordMap(); /*! Clears word map. */ void clearWordMap(); private: boost::shared_ptr _wordMap; boost::shared_ptr _sentenceTokenizer; std::string _wordMapFilePath; }; #endif