concordia-library/concordia/regex_rule.hpp

56 lines
1.4 KiB
C++
Raw Normal View History

2015-06-22 13:52:56 +02:00
#ifndef REGEX_ANNOTATION_HDR
#define REGEX_ANNOTATION_HDR
#include <string>
#include "concordia/common/config.hpp"
2015-06-25 10:12:51 +02:00
#include "concordia/tokenized_sentence.hpp"
#include "concordia/concordia_exception.hpp"
#include <boost/shared_ptr.hpp>
#include <boost/regex.hpp>
#include <boost/regex/icu.hpp>
2015-06-22 13:52:56 +02:00
#include <unicode/unistr.h>
typedef boost::error_info<struct my_tag, std::string> my_tag_error_info;
2015-05-01 14:52:53 +02:00
/*!
2015-06-22 13:52:56 +02:00
Class for representing a regular expression annotation rule.
2015-06-27 12:40:24 +02:00
Holds regex pattern string for matching and default value to assign
to the annotations. Rule also has a type, given to all annotations
produced by it.
2015-05-01 14:52:53 +02:00
*/
2015-06-22 13:52:56 +02:00
class RegexRule {
public:
2015-05-01 14:52:53 +02:00
/*!
Constructor.
\param patternString regex pattern to match
2015-06-25 10:12:51 +02:00
\param annoationType type of annotation
2015-06-27 12:40:24 +02:00
\param value value to be assigned to the annotation
2015-05-01 14:52:53 +02:00
\param caseSensitive case sensitivity of the pattern
*/
2015-06-25 10:12:51 +02:00
RegexRule(std::string patternString,
2015-06-25 20:49:22 +02:00
int annotationType,
2015-06-25 10:12:51 +02:00
std::string value,
bool caseSensitive = true)
throw(ConcordiaException);
/*! Destructor.
*/
2015-06-22 13:52:56 +02:00
virtual ~RegexRule();
2015-06-27 12:40:24 +02:00
/*! Applies regex annotation on tokenized sentence.
2015-06-22 13:52:56 +02:00
\param sentence the input sentence
2015-05-01 14:52:53 +02:00
*/
2015-08-19 20:49:26 +02:00
void apply(TokenizedSentence & sentence);
private:
2015-06-25 20:49:22 +02:00
int _annotationType;
2015-06-22 13:52:56 +02:00
std::string _value;
2015-06-27 12:40:24 +02:00
2015-06-25 10:12:51 +02:00
boost::u32regex _pattern;
};
#endif