original sentence in tokenized sentence

2017-04-28 13:48:32 +02:00 · 2017-04-28 13:48:32 +02:00 · 96a5bc3108
commit 96a5bc3108
parent 4faae4e91a
2 changed files with 14 additions and 2 deletions
--- a/concordia/tokenized_sentence.cpp
+++ b/concordia/tokenized_sentence.cpp
@ -7,7 +7,8 @@
 #include <boost/algorithm/string.hpp>

 TokenizedSentence::TokenizedSentence(std::string sentence):
-                                         _sentence(sentence) {
+                                         _sentence(sentence),
+                                         _originalSentence(sentence) {
 }

 TokenizedSentence::~TokenizedSentence() {
--- a/concordia/tokenized_sentence.hpp
+++ b/concordia/tokenized_sentence.hpp
@ -35,13 +35,22 @@ public:
    */
    virtual ~TokenizedSentence();

-    /*! Getter for the string sentence, which is used for extracting tokens.
+    /*! Getter for the string sentence,
+        which might have been modified during tokenization.
      \returns sentence
    */
    std::string getSentence() const {
        return _sentence;
    }

+    /*! Getter for the original string sentence,
+        which was used for extracting tokens.
+      \returns originalSentence
+    */
+    std::string getOriginalSentence() const {
+        return _originalSentence;
+    }
+
    /*! Method for getting tokenized sentence in a string format (
    tokens separated by single spaces.
      \returns tokenized sentence
@ -126,6 +135,8 @@ public:
 private:
    std::string _sentence;

+    std::string _originalSentence;
+
    std::list<TokenAnnotation> _tokenAnnotations;

    std::vector<INDEX_CHARACTER_TYPE> _codes;