#ifndef HASH_GENERATOR_HDR #define HASH_GENERATOR_HDR #include #include #include #include #include #include "concordia/word_map.hpp" #include "concordia/common/config.hpp" #include "concordia/sentence_tokenizer.hpp" #include "concordia/concordia_config.hpp" #include "concordia/concordia_exception.hpp" /*! Class for generating a sentence hash. The hash is generated from a sentence given in raw string. String is first anonymized and tokenized. After these operations, each token is coded as an integer, according to WordMap. Resulting hash is a vector of integers. Sentence hashed is used when adding a sentence to index and during searching. HashGenerator holds an instance of WordMap, used to code tokens as integers and SentenceAnonymizer, used to preprocess the sentence string. */ class HashGenerator { public: /*! Constructor. \param config pointer to current config object */ explicit HashGenerator(boost::shared_ptr config) throw(ConcordiaException); /*! Destructor. */ virtual ~HashGenerator(); /*! Generates hash of a sentence. \param sentence sentence to generate hash from \returns vector of integers */ boost::shared_ptr generateHash(const std::string & sentence) throw(ConcordiaException); /*! Saves the contents of current WordMap to HDD. */ void serializeWordMap(); /*! Clears word map. */ void clearWordMap(); private: boost::shared_ptr _wordMap; boost::shared_ptr _sentenceTokenizer; std::string _wordMapFilePath; }; #endif