From 96a5bc31089dadca466327794626991231609340 Mon Sep 17 00:00:00 2001 From: rjawor Date: Fri, 28 Apr 2017 13:48:32 +0200 Subject: [PATCH] original sentence in tokenized sentence --- concordia/tokenized_sentence.cpp | 3 ++- concordia/tokenized_sentence.hpp | 13 ++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/concordia/tokenized_sentence.cpp b/concordia/tokenized_sentence.cpp index e32b813..bd2a189 100644 --- a/concordia/tokenized_sentence.cpp +++ b/concordia/tokenized_sentence.cpp @@ -7,7 +7,8 @@ #include TokenizedSentence::TokenizedSentence(std::string sentence): - _sentence(sentence) { + _sentence(sentence), + _originalSentence(sentence) { } TokenizedSentence::~TokenizedSentence() { diff --git a/concordia/tokenized_sentence.hpp b/concordia/tokenized_sentence.hpp index e2a3eab..fd7648d 100644 --- a/concordia/tokenized_sentence.hpp +++ b/concordia/tokenized_sentence.hpp @@ -35,13 +35,22 @@ public: */ virtual ~TokenizedSentence(); - /*! Getter for the string sentence, which is used for extracting tokens. + /*! Getter for the string sentence, + which might have been modified during tokenization. \returns sentence */ std::string getSentence() const { return _sentence; } + /*! Getter for the original string sentence, + which was used for extracting tokens. + \returns originalSentence + */ + std::string getOriginalSentence() const { + return _originalSentence; + } + /*! Method for getting tokenized sentence in a string format ( tokens separated by single spaces. \returns tokenized sentence @@ -126,6 +135,8 @@ public: private: std::string _sentence; + std::string _originalSentence; + std::list _tokenAnnotations; std::vector _codes;