original sentence in tokenized sentence
This commit is contained in:
parent
4faae4e91a
commit
96a5bc3108
@ -7,7 +7,8 @@
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
TokenizedSentence::TokenizedSentence(std::string sentence):
|
||||
_sentence(sentence) {
|
||||
_sentence(sentence),
|
||||
_originalSentence(sentence) {
|
||||
}
|
||||
|
||||
TokenizedSentence::~TokenizedSentence() {
|
||||
|
@ -35,13 +35,22 @@ public:
|
||||
*/
|
||||
virtual ~TokenizedSentence();
|
||||
|
||||
/*! Getter for the string sentence, which is used for extracting tokens.
|
||||
/*! Getter for the string sentence,
|
||||
which might have been modified during tokenization.
|
||||
\returns sentence
|
||||
*/
|
||||
std::string getSentence() const {
|
||||
return _sentence;
|
||||
}
|
||||
|
||||
/*! Getter for the original string sentence,
|
||||
which was used for extracting tokens.
|
||||
\returns originalSentence
|
||||
*/
|
||||
std::string getOriginalSentence() const {
|
||||
return _originalSentence;
|
||||
}
|
||||
|
||||
/*! Method for getting tokenized sentence in a string format (
|
||||
tokens separated by single spaces.
|
||||
\returns tokenized sentence
|
||||
@ -126,6 +135,8 @@ public:
|
||||
private:
|
||||
std::string _sentence;
|
||||
|
||||
std::string _originalSentence;
|
||||
|
||||
std::list<TokenAnnotation> _tokenAnnotations;
|
||||
|
||||
std::vector<INDEX_CHARACTER_TYPE> _codes;
|
||||
|
Loading…
Reference in New Issue
Block a user