2015-06-22 13:52:56 +02:00
|
|
|
#ifndef REGEX_ANNOTATION_HDR
|
|
|
|
#define REGEX_ANNOTATION_HDR
|
2014-04-13 12:21:30 +02:00
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include "concordia/common/config.hpp"
|
2015-06-25 10:12:51 +02:00
|
|
|
#include "concordia/tokenized_sentence.hpp"
|
2014-04-13 12:21:30 +02:00
|
|
|
#include "concordia/concordia_exception.hpp"
|
|
|
|
#include <boost/shared_ptr.hpp>
|
|
|
|
#include <boost/regex.hpp>
|
2014-04-24 08:36:48 +02:00
|
|
|
#include <boost/regex/icu.hpp>
|
2015-06-22 13:52:56 +02:00
|
|
|
#include <unicode/unistr.h>
|
2014-04-13 12:21:30 +02:00
|
|
|
|
|
|
|
|
2014-04-29 14:46:04 +02:00
|
|
|
typedef boost::error_info<struct my_tag, std::string> my_tag_error_info;
|
2014-04-13 12:21:30 +02:00
|
|
|
|
2015-05-01 14:52:53 +02:00
|
|
|
/*!
|
2015-06-22 13:52:56 +02:00
|
|
|
Class for representing a regular expression annotation rule.
|
2015-06-27 12:40:24 +02:00
|
|
|
Holds regex pattern string for matching and default value to assign
|
|
|
|
to the annotations. Rule also has a type, given to all annotations
|
|
|
|
produced by it.
|
2015-05-01 14:52:53 +02:00
|
|
|
|
|
|
|
*/
|
2015-06-22 13:52:56 +02:00
|
|
|
class RegexRule {
|
2014-04-13 12:21:30 +02:00
|
|
|
public:
|
2015-05-01 14:52:53 +02:00
|
|
|
/*!
|
|
|
|
Constructor.
|
|
|
|
\param patternString regex pattern to match
|
2015-06-25 10:12:51 +02:00
|
|
|
\param annoationType type of annotation
|
2015-06-27 12:40:24 +02:00
|
|
|
\param value value to be assigned to the annotation
|
2015-05-01 14:52:53 +02:00
|
|
|
\param caseSensitive case sensitivity of the pattern
|
|
|
|
*/
|
2015-06-25 10:12:51 +02:00
|
|
|
RegexRule(std::string patternString,
|
2015-06-25 20:49:22 +02:00
|
|
|
int annotationType,
|
2015-06-25 10:12:51 +02:00
|
|
|
std::string value,
|
2019-01-18 13:30:51 +01:00
|
|
|
bool caseSensitive = true);
|
2014-04-13 12:21:30 +02:00
|
|
|
|
|
|
|
/*! Destructor.
|
|
|
|
*/
|
2015-06-22 13:52:56 +02:00
|
|
|
virtual ~RegexRule();
|
2014-04-29 14:46:04 +02:00
|
|
|
|
2015-06-27 12:40:24 +02:00
|
|
|
/*! Applies regex annotation on tokenized sentence.
|
2015-06-22 13:52:56 +02:00
|
|
|
\param sentence the input sentence
|
2015-05-01 14:52:53 +02:00
|
|
|
*/
|
2015-08-19 20:49:26 +02:00
|
|
|
void apply(TokenizedSentence & sentence);
|
2014-04-13 12:21:30 +02:00
|
|
|
|
|
|
|
private:
|
2015-06-25 20:49:22 +02:00
|
|
|
int _annotationType;
|
2014-04-29 14:46:04 +02:00
|
|
|
|
2015-06-22 13:52:56 +02:00
|
|
|
std::string _value;
|
2015-06-27 12:40:24 +02:00
|
|
|
|
2015-06-25 10:12:51 +02:00
|
|
|
boost::u32regex _pattern;
|
2014-04-13 12:21:30 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|