concordia-library/concordia/regex_rule.hpp
2015-06-25 10:12:51 +02:00

54 lines
1.3 KiB
C++

#ifndef REGEX_ANNOTATION_HDR
#define REGEX_ANNOTATION_HDR
#include <string>
#include "concordia/common/config.hpp"
#include "concordia/tokenized_sentence.hpp"
#include "concordia/concordia_exception.hpp"
#include <boost/shared_ptr.hpp>
#include <boost/regex.hpp>
#include <boost/regex/icu.hpp>
#include <unicode/unistr.h>
typedef boost::error_info<struct my_tag, std::string> my_tag_error_info;
/*!
Class for representing a regular expression annotation rule.
Holds regex pattern string for matching and replacement string for
annotating found matches.
*/
class RegexRule {
public:
/*!
Constructor.
\param patternString regex pattern to match
\param annoationType type of annotation
\param caseSensitive case sensitivity of the pattern
*/
RegexRule(std::string patternString,
char annotationType,
std::string value,
bool caseSensitive = true)
throw(ConcordiaException);
/*! Destructor.
*/
virtual ~RegexRule();
/*! Applies the operation on anonymized sentence.
\param sentence the input sentence
*/
void apply(boost::shared_ptr<TokenizedSentence> sentence);
private:
char _annotationType;
std::string _value;
boost::u32regex _pattern;
};
#endif