#ifndef SENTENCE_ANONYMIZER_HDR #define SENTENCE_ANONYMIZER_HDR #include #include #include "concordia/common/config.hpp" #include "concordia/regex_replacement.hpp" #include "concordia/concordia_config.hpp" #include "concordia/concordia_exception.hpp" #include #include /*! Class for anonymizing sentence before adding to index. */ using namespace std; class SentenceAnonymizer { public: explicit SentenceAnonymizer(boost::shared_ptr config) throw(ConcordiaException); /*! Destructor. */ virtual ~SentenceAnonymizer(); string anonymize(const string & sentence); private: void _createNeRules(string & namedEntitiesPath); void _createHtmlTagsRule(string & htmlTagsPath); boost::shared_ptr _getMultipleReplacementRule( string & filePath, string replacement, bool wholeWord = false); vector _namedEntities; boost::shared_ptr _htmlTags; boost::shared_ptr _stopWords; boost::shared_ptr _stopSymbols; boost::shared_ptr _spaceSymbols; }; #endif