62 lines
1.3 KiB
C++
62 lines
1.3 KiB
C++
#ifndef TOKEN_ANNOTATION_HDR
|
|
#define TOKEN_ANNOTATION_HDR
|
|
|
|
#include "concordia/common/config.hpp"
|
|
#include "concordia/interval.hpp"
|
|
|
|
#include <string>
|
|
|
|
/*!
|
|
Class representing annotatio of char sequence as a token.
|
|
It is a type of interval that is also storing information
|
|
about the annoation type and value.
|
|
|
|
*/
|
|
|
|
class TokenAnnotation : public Interval {
|
|
public:
|
|
/*! Constructor.
|
|
\param start start index of the annotation (char-level, 0-based)
|
|
\param end end index of the annotation (char-level, 0-based)
|
|
\param type annotation type
|
|
\param value annotation value
|
|
*/
|
|
TokenAnnotation(const SUFFIX_MARKER_TYPE start,
|
|
const SUFFIX_MARKER_TYPE end,
|
|
const char annotationType,
|
|
const std::string & value);
|
|
|
|
/*! Destructor.
|
|
*/
|
|
virtual ~TokenAnnotation();
|
|
|
|
/*! Getter for annotation type.
|
|
\returns annotation type
|
|
*/
|
|
char getType() const {
|
|
return _annotationType;
|
|
}
|
|
|
|
/*! Getter for annotation value.
|
|
\returns annotation value
|
|
*/
|
|
std::string getValue() const {
|
|
return _value;
|
|
}
|
|
|
|
static char NE_TYPE;
|
|
|
|
static char WORD_TYPE;
|
|
|
|
static char HTML_TAG_TYPE;
|
|
|
|
static char STOP_WORD_TYPE;
|
|
|
|
protected:
|
|
char _annotationType;
|
|
|
|
std::string _value;
|
|
};
|
|
|
|
#endif
|