#include "concordia/hash_generator.hpp" #include "concordia/common/utils.hpp" #include "concordia/token_annotation.hpp" #include #include #include #include #include #include HashGenerator::HashGenerator(std::string indexPath, boost::shared_ptr config): _wordMapFilePath(indexPath+"/"+WORD_MAP_FILE_NAME), _wordMap(boost::shared_ptr(new WordMap)), _sentenceTokenizer(boost::shared_ptr( new SentenceTokenizer(config))) { if (boost::filesystem::exists(_wordMapFilePath)) { std::ifstream ifs(_wordMapFilePath.c_str(), std::ios::binary); boost::archive::binary_iarchive ia(ifs); boost::shared_ptr restoredWordMap(new WordMap); ia >> *_wordMap; } } HashGenerator::~HashGenerator() { } TokenizedSentence HashGenerator::generateHash( const std::string & sentence, bool byWhitespace) { TokenizedSentence ts = _sentenceTokenizer->tokenize(sentence, byWhitespace); ts.generateHash(_wordMap); if (ts.getTokens().size() > Utils::maxSentenceSize) { throw ConcordiaException("Trying to add too long sentence."); } return ts; } TokenizedSentence HashGenerator::generateTokens( const std::string & sentence, bool byWhitespace) { TokenizedSentence ts = _sentenceTokenizer->tokenize(sentence, byWhitespace); ts.generateTokens(); if (ts.getTokens().size() > Utils::maxSentenceSize) { throw ConcordiaException("Trying to add too long sentence."); } return ts; } void HashGenerator::serializeWordMap() { std::ofstream ofs(_wordMapFilePath.c_str(), std::ios::binary); boost::archive::binary_oarchive oa(ofs); oa << *_wordMap; } void HashGenerator::clearWordMap() { _wordMap = boost::shared_ptr(new WordMap); boost::filesystem::remove(_wordMapFilePath); }