From 96a5bc31089dadca466327794626991231609340 Mon Sep 17 00:00:00 2001
From: rjawor <rjawor@amu.edu.pl>
Date: Fri, 28 Apr 2017 13:48:32 +0200
Subject: [PATCH] original sentence in tokenized sentence

---
 concordia/tokenized_sentence.cpp |  3 ++-
 concordia/tokenized_sentence.hpp | 13 ++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/concordia/tokenized_sentence.cpp b/concordia/tokenized_sentence.cpp
index e32b813..bd2a189 100644
--- a/concordia/tokenized_sentence.cpp
+++ b/concordia/tokenized_sentence.cpp
@@ -7,7 +7,8 @@
 #include <boost/algorithm/string.hpp>
 
 TokenizedSentence::TokenizedSentence(std::string sentence):
-                                         _sentence(sentence) {
+                                         _sentence(sentence),
+                                         _originalSentence(sentence) {
 }
 
 TokenizedSentence::~TokenizedSentence() {
diff --git a/concordia/tokenized_sentence.hpp b/concordia/tokenized_sentence.hpp
index e2a3eab..fd7648d 100644
--- a/concordia/tokenized_sentence.hpp
+++ b/concordia/tokenized_sentence.hpp
@@ -35,13 +35,22 @@ public:
     */
     virtual ~TokenizedSentence();
 
-    /*! Getter for the string sentence, which is used for extracting tokens.
+    /*! Getter for the string sentence,
+        which might have been modified during tokenization.
       \returns sentence
     */
     std::string getSentence() const {
         return _sentence;
     }
 
+    /*! Getter for the original string sentence,
+        which was used for extracting tokens.
+      \returns originalSentence
+    */
+    std::string getOriginalSentence() const {
+        return _originalSentence;
+    }
+
     /*! Method for getting tokenized sentence in a string format (
     tokens separated by single spaces.
       \returns tokenized sentence
@@ -126,6 +135,8 @@ public:
 private:
     std::string _sentence;
 
+    std::string _originalSentence;
+
     std::list<TokenAnnotation> _tokenAnnotations;
 
     std::vector<INDEX_CHARACTER_TYPE> _codes;