concordia-library/concordia/token_annotation.hpp

70 lines
1.4 KiB
C++
Raw Normal View History

2015-06-22 13:52:56 +02:00
#ifndef TOKEN_ANNOTATION_HDR
#define TOKEN_ANNOTATION_HDR
#include "concordia/common/config.hpp"
#include "concordia/interval.hpp"
#include <string>
/*!
2015-06-27 12:40:24 +02:00
Class representing annotation of char sequence as a token.
2015-06-22 13:52:56 +02:00
It is a type of interval that is also storing information
about the annoation type and value.
*/
class TokenAnnotation : public Interval {
public:
/*! Constructor.
\param start start index of the annotation (char-level, 0-based)
\param end end index of the annotation (char-level, 0-based)
2015-06-27 12:40:24 +02:00
\param annotationType annotation type
2015-06-22 13:52:56 +02:00
\param value annotation value
*/
TokenAnnotation(const SUFFIX_MARKER_TYPE start,
const SUFFIX_MARKER_TYPE end,
2015-06-25 20:49:22 +02:00
const int annotationType,
2015-06-22 13:52:56 +02:00
const std::string & value);
/*! Destructor.
*/
virtual ~TokenAnnotation();
/*! Getter for annotation type.
\returns annotation type
*/
2015-06-25 20:49:22 +02:00
int getType() const {
2015-06-22 13:52:56 +02:00
return _annotationType;
}
/*! Getter for annotation value.
\returns annotation value
*/
std::string getValue() const {
return _value;
}
2015-06-27 12:40:24 +02:00
/*! Named entity annotation type
*/
2015-06-25 20:49:22 +02:00
static int NE;
2015-06-25 10:12:51 +02:00
2015-06-27 12:40:24 +02:00
/*! Word annotation type
*/
2015-06-25 20:49:22 +02:00
static int WORD;
2015-06-25 10:12:51 +02:00
2015-06-27 12:40:24 +02:00
/*! Html tag annotation type
*/
2015-06-25 20:49:22 +02:00
static int HTML_TAG;
2015-06-25 10:12:51 +02:00
2015-06-27 12:40:24 +02:00
/*! Stop word annotation type
*/
2015-06-25 20:49:22 +02:00
static int STOP_WORD;
2015-06-27 12:40:24 +02:00
2015-06-22 13:52:56 +02:00
protected:
2015-06-25 20:49:22 +02:00
int _annotationType;
2015-06-22 13:52:56 +02:00
std::string _value;
};
#endif