original sentence in tokenized sentence

This commit is contained in:
rjawor 2017-04-28 13:48:32 +02:00
parent 4faae4e91a
commit 96a5bc3108
2 changed files with 14 additions and 2 deletions

View File

@ -7,7 +7,8 @@
#include <boost/algorithm/string.hpp>
TokenizedSentence::TokenizedSentence(std::string sentence):
_sentence(sentence) {
_sentence(sentence),
_originalSentence(sentence) {
}
TokenizedSentence::~TokenizedSentence() {

View File

@ -35,13 +35,22 @@ public:
*/
virtual ~TokenizedSentence();
/*! Getter for the string sentence, which is used for extracting tokens.
/*! Getter for the string sentence,
which might have been modified during tokenization.
\returns sentence
*/
std::string getSentence() const {
return _sentence;
}
/*! Getter for the original string sentence,
which was used for extracting tokens.
\returns originalSentence
*/
std::string getOriginalSentence() const {
return _originalSentence;
}
/*! Method for getting tokenized sentence in a string format (
tokens separated by single spaces.
\returns tokenized sentence
@ -126,6 +135,8 @@ public:
private:
std::string _sentence;
std::string _originalSentence;
std::list<TokenAnnotation> _tokenAnnotations;
std::vector<INDEX_CHARACTER_TYPE> _codes;