concordia-library/concordia/tokenized_sentence.hpp

65 lines
1.5 KiB
C++
Raw Normal View History

2015-06-25 10:12:51 +02:00
#ifndef TOKENIZED_SENTENCE_HDR
#define TOKENIZED_SENTENCE_HDR
2015-06-22 13:52:56 +02:00
#include "concordia/common/config.hpp"
#include "concordia/token_annotation.hpp"
#include <string>
#include <vector>
#include <list>
/*!
A sentence after anonymization operations. The class
holds the current string represenation of the sentence
along with the annotations list.
*/
2015-06-25 10:12:51 +02:00
class TokenizedSentence {
2015-06-22 13:52:56 +02:00
public:
/*!
Constructor.
*/
2015-06-25 10:12:51 +02:00
TokenizedSentence(std::string sentence);
2015-06-22 13:52:56 +02:00
/*! Destructor.
*/
2015-06-25 10:12:51 +02:00
virtual ~TokenizedSentence();
2015-06-22 13:52:56 +02:00
/*! Getter for sentence
\returns sentence
*/
std::string getSentence() const {
return _sentence;
}
/*! Getter for annotations list
\returns annotations list
*/
std::list<TokenAnnotation> getAnnotations() const {
return _tokenAnnotations;
}
/*!
Transform the sentence to lower case.
*/
void toLowerCase();
/*!
Add new annotations to the existing annotations list. Assumptions:
1. existing _tokenAnnotations vector contains disjoint, sorted intervals;
2. the annotations to be added list also has the above properties.
The below algorithm will only add the annotations that do not
intersect with any of the existing ones.
\param annotations list of annotations to be added
*/
void addAnnotations(std::vector<TokenAnnotation> annotations);
private:
std::string _sentence;
std::list<TokenAnnotation> _tokenAnnotations;
};
#endif