From 68fecaddf8374b29810ebcf0ac32bcc4d0232d85 Mon Sep 17 00:00:00 2001
From: rjawor <rjawor@amu.edu.pl>
Date: Wed, 19 Aug 2015 20:49:26 +0200
Subject: [PATCH] adding all tokenized examples

---
 TODO.txt                                |  1 +
 concordia-console/concordia-console.cpp |  4 +-
 concordia/concordia.cpp                 | 34 ++++++++++---
 concordia/concordia.hpp                 | 31 +++++++++---
 concordia/concordia_index.cpp           | 47 +++++++++++++-----
 concordia/concordia_index.hpp           | 33 ++++++++++---
 concordia/concordia_search_result.cpp   |  8 ++--
 concordia/concordia_search_result.hpp   |  7 ++-
 concordia/hash_generator.cpp            |  9 ++--
 concordia/hash_generator.hpp            |  5 +-
 concordia/index_searcher.cpp            |  9 ++--
 concordia/regex_rule.cpp                |  8 ++--
 concordia/regex_rule.hpp                |  2 +-
 concordia/sentence_tokenizer.cpp        |  8 ++--
 concordia/sentence_tokenizer.hpp        |  3 +-
 concordia/t/test_concordia.cpp          | 64 +++++++++++++++++--------
 concordia/t/test_concordia_searcher.cpp |  2 +-
 concordia/t/test_hash_generator.cpp     | 10 ++--
 concordia/t/test_regex_rule.cpp         | 36 +++++++-------
 concordia/t/test_sentence_tokenizer.cpp | 18 +++----
 20 files changed, 220 insertions(+), 119 deletions(-)
diff --git a/TODO.txt b/TODO.txt
index 810d6cd..14b9f38 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,4 +1,5 @@
 ---------------------------- Developer's private notes (language may vary, bo tak czasem wygodniej) -----------------------------
+DONE - change the arguments of addExample* fucntions to const reference to TokenizedSentence (not boost::shared_ptr<TokenizedSentence>
 - multiple indexes based on different hashes. One can be word-net base forms, other - pos-tags and so on. Develop a method of combining results.
 IN PROGRESS - document the code (classes, cfg files) and update tutorial
 - wiele pamięci tłumaczeń: można je przechowywać w jednym indeksie, ale trzeba dodać tm_id jako metadane zdania (np. zamiast example length). Przy wyszukiwaniu należy filtrować wyniki, aby pochodziły z odpowiedniej pamięci tłumaczeń.
diff --git a/concordia-console/concordia-console.cpp b/concordia-console/concordia-console.cpp
index 8dcadd4..1702262 100644
--- a/concordia-console/concordia-console.cpp
+++ b/concordia-console/concordia-console.cpp
@@ -29,7 +29,7 @@ void checkConcordiaResults(
     long lineIndex = 1;
     BOOST_FOREACH(ConcordiaSearchResult result, results) {
         SUFFIX_MARKER_TYPE patternSize =
-                    result.getTokenizedPattern()->getTokens().size();
+                    result.getTokenizedPattern().getTokens().size();
         if (patternSize > 0) {
             if (result.getBestOverlay().size() != 1) {
                 reportError(baseLineCount + lineIndex,
@@ -203,7 +203,7 @@ int main(int argc, char** argv) {
 
             std::cout << "\tPattern used: " << std::endl << "\t\t";
             BOOST_FOREACH(TokenAnnotation annotation,
-                                  result->getTokenizedPattern()->getTokens()) {
+                                  result->getTokenizedPattern().getTokens()) {
                 std::cout << annotation.getValue() << " ";
             }
             std::cout << std::endl;
diff --git a/concordia/concordia.cpp b/concordia/concordia.cpp
index af1a3c3..0972333 100644
--- a/concordia/concordia.cpp
+++ b/concordia/concordia.cpp
@@ -1,4 +1,5 @@
 #include <sstream>
+#include <boost/foreach.hpp>
 
 #include "concordia/concordia.hpp"
 #include "concordia/common/config.hpp"
@@ -42,19 +43,31 @@ std::string _createLibraryVersion() {
     return version.str();
 }
 
-boost::shared_ptr<TokenizedSentence>
+TokenizedSentence
             Concordia::tokenize(const std::string & sentence)
                                   throw(ConcordiaException) {
-    boost::shared_ptr<TokenizedSentence> result =
+    TokenizedSentence result =
                 _hashGenerator->generateHash(sentence);
     _hashGenerator->serializeWordMap();
     return result;
 }
 
+std::vector<TokenizedSentence> Concordia::tokenizeAll(
+                     const std::vector<std::string> & sentences)
+                                     throw(ConcordiaException) {
+    std::vector<TokenizedSentence> result;
+    BOOST_FOREACH(std::string sentence, sentences) {
+        result.push_back(_hashGenerator->generateHash(sentence));
+    }
+
+    _hashGenerator->serializeWordMap();
+    return result;
+}
+
 
 // Sentences are written to disk and added to T.
 // SA is generated on command by other methods.
-boost::shared_ptr<TokenizedSentence> Concordia::addExample(
+TokenizedSentence Concordia::addExample(
                                       const Example & example)
                                       throw(ConcordiaException) {
     return _index->addExample(_hashGenerator, _T, _markers, example);
@@ -63,13 +76,21 @@ boost::shared_ptr<TokenizedSentence> Concordia::addExample(
 // Sentences are written to disk and added to T.
 // SA is generated on command by other methods.
 void Concordia::addTokenizedExample(
-                    boost::shared_ptr<TokenizedSentence> tokenizedSentence,
-                    SUFFIX_MARKER_TYPE id)
+                    const TokenizedSentence & tokenizedSentence,
+                    const SUFFIX_MARKER_TYPE id)
                                               throw(ConcordiaException) {
     _index->addTokenizedExample(_hashGenerator, _T,
                                 _markers, tokenizedSentence, id);
 }
 
+void Concordia::addAllTokenizedExamples(
+                const std::vector<TokenizedSentence> & tokenizedSentences,
+                const std::vector<SUFFIX_MARKER_TYPE> & ids)
+                                              throw(ConcordiaException) {
+    _index->addAllTokenizedExamples(_hashGenerator, _T,
+                                _markers, tokenizedSentences, ids);
+}
+
 
 // Sentences are written to disk and added to T.
 // SA is generated on command by other methods.
@@ -188,8 +209,7 @@ boost::shared_ptr<ConcordiaSearchResult> Concordia::concordiaSearch(
     } else {
         std::string empty;
         return boost::shared_ptr<ConcordiaSearchResult>(
-            new ConcordiaSearchResult(boost::shared_ptr<TokenizedSentence>(
-                                            new TokenizedSentence(empty))));
+            new ConcordiaSearchResult(TokenizedSentence(empty)));
     }
 }
 
diff --git a/concordia/concordia.hpp b/concordia/concordia.hpp
index 33628e8..3deb2b1 100644
--- a/concordia/concordia.hpp
+++ b/concordia/concordia.hpp
@@ -58,7 +58,16 @@ public:
                containing information about original word positions
       \throws ConcordiaException
     */
-    boost::shared_ptr<TokenizedSentence> tokenize(const std::string & sentence)
+    TokenizedSentence tokenize(const std::string & sentence)
+                                                     throw(ConcordiaException);
+
+    /*! Tokenizes all the given sentences.
+      \param sentences vector of sentences to be tokenized
+      \returns vector of tokenized sentence objects
+      \throws ConcordiaException
+    */
+    std::vector<TokenizedSentence> tokenizeAll(
+                                   const std::vector<std::string> & sentences)
                                                      throw(ConcordiaException);
 
     /*! Adds an Example to the index.
@@ -67,17 +76,27 @@ public:
                containing information about original word positions
       \throws ConcordiaException
     */
-    boost::shared_ptr<TokenizedSentence> addExample(const Example & example)
-                                                   throw(ConcordiaException);
+    TokenizedSentence addExample(const Example & example)
+                                          throw(ConcordiaException);
 
     /*! Adds a tokenized example to the index.
       \param tokenizedSentence tokenized sentence to be added
-      \param id of the sentence to be added
+      \param id id of the sentence to be added
       \throws ConcordiaException
     */
     void addTokenizedExample(
-                    boost::shared_ptr<TokenizedSentence> tokenizedSentence,
-                    SUFFIX_MARKER_TYPE id)
+                    const TokenizedSentence & tokenizedSentence,
+                    const SUFFIX_MARKER_TYPE id)
+                                                  throw(ConcordiaException);
+
+    /*! Adds multiple tokenized examples to the index.
+      \param examples vector of examples to be added
+      \param ids vector of ids of the sentences to be added
+      \throws ConcordiaException
+    */
+    void addAllTokenizedExamples(
+                    const std::vector<TokenizedSentence> & tokenizedSentences,
+                    const std::vector<SUFFIX_MARKER_TYPE> & ids)
                                                   throw(ConcordiaException);
 
     /*! Adds multiple examples to the index.
diff --git a/concordia/concordia_index.cpp b/concordia/concordia_index.cpp
index fc7493e..3eb98d7 100644
--- a/concordia/concordia_index.cpp
+++ b/concordia/concordia_index.cpp
@@ -4,6 +4,8 @@
 #include "concordia/common/config.hpp"
 #include <boost/filesystem.hpp>
 #include <boost/foreach.hpp>
+#include <boost/make_shared.hpp>
+
 #include <iostream>
 #include <climits>
 
@@ -48,10 +50,10 @@ std::vector<TokenizedSentence> ConcordiaIndex::addAllExamples(
 
     std::vector<TokenizedSentence> hashedPatterns;
     BOOST_FOREACH(Example example, examples) {
-        boost::shared_ptr<TokenizedSentence> hashedPattern =
+        TokenizedSentence hashedPattern =
              _addSingleExample(hashedIndexFile, markersFile, hashGenerator,
                                                       T, markers, example);
-        hashedPatterns.push_back(*hashedPattern);
+        hashedPatterns.push_back(hashedPattern);
     }
 
     hashedIndexFile.close();
@@ -61,7 +63,7 @@ std::vector<TokenizedSentence> ConcordiaIndex::addAllExamples(
     return hashedPatterns;
 }
 
-boost::shared_ptr<TokenizedSentence> ConcordiaIndex::addExample(
+TokenizedSentence ConcordiaIndex::addExample(
                 boost::shared_ptr<HashGenerator> hashGenerator,
                 boost::shared_ptr<std::vector<sauchar_t> > T,
                 boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
@@ -72,7 +74,7 @@ boost::shared_ptr<TokenizedSentence> ConcordiaIndex::addExample(
     std::ofstream markersFile;
     markersFile.open(_markersFilePath.c_str(), std::ios::out|
                                              std::ios::app|std::ios::binary);
-    boost::shared_ptr<TokenizedSentence> hashedPattern =
+    TokenizedSentence hashedPattern =
              _addSingleExample(hashedIndexFile, markersFile, hashGenerator,
                                                       T, markers, example);
     hashedIndexFile.close();
@@ -86,8 +88,8 @@ void ConcordiaIndex::addTokenizedExample(
             boost::shared_ptr<HashGenerator> hashGenerator,
             boost::shared_ptr<std::vector<sauchar_t> > T,
             boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
-            boost::shared_ptr<TokenizedSentence> tokenizedSentence,
-            SUFFIX_MARKER_TYPE id) {
+            const TokenizedSentence & tokenizedSentence,
+            const SUFFIX_MARKER_TYPE id) {
     std::ofstream hashedIndexFile;
     hashedIndexFile.open(_hashedIndexFilePath.c_str(), std::ios::out|
                                              std::ios::app|std::ios::binary);
@@ -100,15 +102,38 @@ void ConcordiaIndex::addTokenizedExample(
     markersFile.close();
 }
 
+void ConcordiaIndex::addAllTokenizedExamples(
+            boost::shared_ptr<HashGenerator> hashGenerator,
+            boost::shared_ptr<std::vector<sauchar_t> > T,
+            boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
+            const std::vector<TokenizedSentence> & tokenizedSentences,
+            const std::vector<SUFFIX_MARKER_TYPE> & ids) {
+    std::ofstream hashedIndexFile;
+    hashedIndexFile.open(_hashedIndexFilePath.c_str(), std::ios::out|
+                                             std::ios::app|std::ios::binary);
+    std::ofstream markersFile;
+    markersFile.open(_markersFilePath.c_str(), std::ios::out|
+                                             std::ios::app|std::ios::binary);
+
+    int index = 0;
+    BOOST_FOREACH(TokenizedSentence tokenizedSentence, tokenizedSentences) {
+        _addSingleTokenizedExample(hashedIndexFile, markersFile, hashGenerator,
+                                  T, markers, tokenizedSentence, ids.at(index));
+        index++;
+    }
+    hashedIndexFile.close();
+    markersFile.close();
+}
+
 void ConcordiaIndex::_addSingleTokenizedExample(
                    std::ofstream & hashedIndexFile,
                    std::ofstream & markersFile,
                    boost::shared_ptr<HashGenerator> hashGenerator,
                    boost::shared_ptr<std::vector<sauchar_t> > T,
                    boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
-                   boost::shared_ptr<TokenizedSentence> tokenizedSentence,
-                   SUFFIX_MARKER_TYPE id) {
-    std::vector<INDEX_CHARACTER_TYPE> hash = tokenizedSentence->getCodes();
+                   const TokenizedSentence & tokenizedSentence,
+                   const SUFFIX_MARKER_TYPE id) {
+    std::vector<INDEX_CHARACTER_TYPE> hash = tokenizedSentence.getCodes();
 
     int offset = 0;
     for (std::vector<INDEX_CHARACTER_TYPE>::iterator it = hash.begin();
@@ -139,14 +164,14 @@ void ConcordiaIndex::_addSingleTokenizedExample(
     markers->push_back(sentenceBoundaryMA);
 }
 
-boost::shared_ptr<TokenizedSentence> ConcordiaIndex::_addSingleExample(
+TokenizedSentence ConcordiaIndex::_addSingleExample(
                    std::ofstream & hashedIndexFile,
                    std::ofstream & markersFile,
                    boost::shared_ptr<HashGenerator> hashGenerator,
                    boost::shared_ptr<std::vector<sauchar_t> > T,
                    boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
                    const Example & example) {
-    boost::shared_ptr<TokenizedSentence> hashedPattern =
+    TokenizedSentence hashedPattern =
                     hashGenerator->generateHash(example.getSentence());
     _addSingleTokenizedExample(hashedIndexFile, markersFile, hashGenerator,
                                T, markers, hashedPattern, example.getId());
diff --git a/concordia/concordia_index.hpp b/concordia/concordia_index.hpp
index f59469b..c3dd27a 100644
--- a/concordia/concordia_index.hpp
+++ b/concordia/concordia_index.hpp
@@ -53,7 +53,7 @@ public:
       \returns tokenized example
       \throws ConcordiaException
     */
-    boost::shared_ptr<TokenizedSentence> addExample(
+    TokenizedSentence addExample(
                 boost::shared_ptr<HashGenerator> hashGenerator,
                 boost::shared_ptr<std::vector<sauchar_t> > T,
                 boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
@@ -63,7 +63,6 @@ public:
         and markers array are appended with the example.
         At the same time, HDD versions of these
         two data structures are also appended with the same example.
-        The method returns a tokenized version of the example.
       \param hashGenerator hash generator to be used to prepare the hash
              of the example
       \param T RAM-based hash index to be appended to
@@ -77,8 +76,28 @@ public:
                 boost::shared_ptr<HashGenerator> hashGenerator,
                 boost::shared_ptr<std::vector<sauchar_t> > T,
                 boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
-                boost::shared_ptr<TokenizedSentence> tokenizedSentence,
-                SUFFIX_MARKER_TYPE id);
+                const TokenizedSentence & tokenizedSentence,
+                const SUFFIX_MARKER_TYPE id);
+
+    /*! Adds multiple tokenized examples to the index. Hashed index
+        and markers array are appended with the examples.
+        At the same time, HDD versions of these
+        two data structures are also appended with the same examples.
+      \param hashGenerator hash generator to be used to prepare the hash
+             of the example
+      \param T RAM-based hash index to be appended to
+      \param markers RAM-based markers array to be appended to
+      \param example example to be added to index
+      \param tokenizedSentences vector of tokenized sentences to be added
+      \param ids vector of ids of the sentences to be added
+      \throws ConcordiaException
+    */
+    void addAllTokenizedExamples(
+                boost::shared_ptr<HashGenerator> hashGenerator,
+                boost::shared_ptr<std::vector<sauchar_t> > T,
+                boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
+                const std::vector<TokenizedSentence> & tokenizedSentences,
+                const std::vector<SUFFIX_MARKER_TYPE> & ids);
 
     /*! Adds multiple examples to the index. Examples are first hashed using
         the hash generator passed to this method. Then, hashed index
@@ -114,10 +133,10 @@ private:
                 boost::shared_ptr<HashGenerator> hashGenerator,
                 boost::shared_ptr<std::vector<sauchar_t> > T,
                 boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
-                boost::shared_ptr<TokenizedSentence> tokenizedSentence,
-                SUFFIX_MARKER_TYPE id);
+                const TokenizedSentence & tokenizedSentence,
+                const SUFFIX_MARKER_TYPE id);
 
-    boost::shared_ptr<TokenizedSentence> _addSingleExample(
+    TokenizedSentence _addSingleExample(
                 std::ofstream & hashedIndexFile,
                 std::ofstream & markersFile,
                 boost::shared_ptr<HashGenerator> hashGenerator,
diff --git a/concordia/concordia_search_result.cpp b/concordia/concordia_search_result.cpp
index 410ba7c..dbd3bc3 100644
--- a/concordia/concordia_search_result.cpp
+++ b/concordia/concordia_search_result.cpp
@@ -4,9 +4,9 @@
 #include <algorithm>
 
 ConcordiaSearchResult::ConcordiaSearchResult(
-                boost::shared_ptr<TokenizedSentence> tokenizedPattern):
-                                   _tokenizedPattern(tokenizedPattern),
-                                   _bestOverlayScore(0) {
+                TokenizedSentence tokenizedPattern):
+                _tokenizedPattern(tokenizedPattern),
+                _bestOverlayScore(0) {
 }
 
 ConcordiaSearchResult::~ConcordiaSearchResult() {
@@ -27,7 +27,7 @@ void ConcordiaSearchResult::computeBestOverlay() {
     // the fragments are already sorted by their ends, ascending
     _checkPossibleOverlays(std::vector<MatchedPatternFragment>(),
                            -1,
-                           _tokenizedPattern->getTokens().size());
+                           _tokenizedPattern.getTokens().size());
 }
 
 void ConcordiaSearchResult::_checkPossibleOverlays(
diff --git a/concordia/concordia_search_result.hpp b/concordia/concordia_search_result.hpp
index 41fa7e4..da4c751 100644
--- a/concordia/concordia_search_result.hpp
+++ b/concordia/concordia_search_result.hpp
@@ -26,8 +26,7 @@ public:
     /*! Constructor.
       \param tokenVector tokenized pattern which was used for searching
     */
-    explicit ConcordiaSearchResult(
-                boost::shared_ptr<TokenizedSentence> tokenizedPattern);
+    explicit ConcordiaSearchResult(TokenizedSentence tokenizedPattern);
 
     /*! Destructor.
     */
@@ -51,7 +50,7 @@ public:
     /*! Getter for tokenized pattern.
         \returns tokenized search pattern
     */
-    boost::shared_ptr<TokenizedSentence> getTokenizedPattern() const {
+    TokenizedSentence getTokenizedPattern() const {
         return _tokenizedPattern;
     }
 
@@ -82,7 +81,7 @@ private:
                 SUFFIX_MARKER_TYPE lastAddedPos,
                 SUFFIX_MARKER_TYPE patternSize);
 
-    boost::shared_ptr<TokenizedSentence> _tokenizedPattern;
+    TokenizedSentence _tokenizedPattern;
 
     std::vector<MatchedPatternFragment> _matchedPatternFragments;
 
diff --git a/concordia/hash_generator.cpp b/concordia/hash_generator.cpp
index 8b93ce4..89d5997 100644
--- a/concordia/hash_generator.cpp
+++ b/concordia/hash_generator.cpp
@@ -27,13 +27,12 @@ HashGenerator::HashGenerator(boost::shared_ptr<ConcordiaConfig> config)
 HashGenerator::~HashGenerator() {
 }
 
-boost::shared_ptr<TokenizedSentence> HashGenerator::generateHash(
+TokenizedSentence HashGenerator::generateHash(
                      const std::string & sentence) throw(ConcordiaException) {
-    boost::shared_ptr<TokenizedSentence> ts =
-                                    _sentenceTokenizer->tokenize(sentence);
-    ts->generateHash(_wordMap);
+    TokenizedSentence ts = _sentenceTokenizer->tokenize(sentence);
+    ts.generateHash(_wordMap);
 
-    if (ts->getTokens().size() > Utils::maxSentenceSize) {
+    if (ts.getTokens().size() > Utils::maxSentenceSize) {
         throw ConcordiaException("Trying to add too long sentence.");
     }
 
diff --git a/concordia/hash_generator.hpp b/concordia/hash_generator.hpp
index 6528dcf..e94f8d6 100644
--- a/concordia/hash_generator.hpp
+++ b/concordia/hash_generator.hpp
@@ -44,9 +44,8 @@ public:
       \param sentence sentence to generate hash from
       \returns tokenized sentence, containing the hash
     */
-    boost::shared_ptr<TokenizedSentence> generateHash(
-                                const std::string & sentence)
-                                throw(ConcordiaException);
+    TokenizedSentence generateHash(const std::string & sentence)
+                                       throw(ConcordiaException);
 
     /*!
         Saves the contents of current WordMap to HDD.
diff --git a/concordia/index_searcher.cpp b/concordia/index_searcher.cpp
index 6012ba1..79d5b48 100644
--- a/concordia/index_searcher.cpp
+++ b/concordia/index_searcher.cpp
@@ -23,7 +23,7 @@ std::vector<MatchedPatternFragment> IndexSearcher::simpleSearch(
 
     int left;
     std::vector<INDEX_CHARACTER_TYPE> hash =
-                            hashGenerator->generateHash(pattern)->getCodes();
+                            hashGenerator->generateHash(pattern).getCodes();
     saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE);
     sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
 
@@ -60,7 +60,7 @@ std::vector<AnubisSearchResult> IndexSearcher::anubisSearch(
                   boost::shared_ptr<std::vector<saidx_t> > SA,
                   const std::string & pattern) throw(ConcordiaException) {
     std::vector<INDEX_CHARACTER_TYPE> hash =
-                         hashGenerator->generateHash(pattern)->getCodes();
+                         hashGenerator->generateHash(pattern).getCodes();
     return _concordiaSearcher->anubisSearch(config, T, markers, SA, hash);
 }
 
@@ -70,13 +70,12 @@ boost::shared_ptr<ConcordiaSearchResult> IndexSearcher::concordiaSearch(
                   boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
                   boost::shared_ptr<std::vector<saidx_t> > SA,
                   const std::string & pattern) throw(ConcordiaException) {
-    boost::shared_ptr<TokenizedSentence> hashedPattern =
-                                 hashGenerator->generateHash(pattern);
+    TokenizedSentence hashedPattern = hashGenerator->generateHash(pattern);
     boost::shared_ptr<ConcordiaSearchResult> result =
      boost::shared_ptr<ConcordiaSearchResult>(
        new ConcordiaSearchResult(hashedPattern));
 
     _concordiaSearcher->concordiaSearch(result, T, markers,
-                                        SA, hashedPattern->getCodes());
+                                        SA, hashedPattern.getCodes());
     return result;
 }
diff --git a/concordia/regex_rule.cpp b/concordia/regex_rule.cpp
index 04bb825..aad5e84 100644
--- a/concordia/regex_rule.cpp
+++ b/concordia/regex_rule.cpp
@@ -36,9 +36,9 @@ RegexRule::RegexRule(std::string patternString,
 RegexRule::~RegexRule() {
 }
 
-void RegexRule::apply(boost::shared_ptr<TokenizedSentence> sentence) {
+void RegexRule::apply(TokenizedSentence & sentence) {
     try {
-        UnicodeString s(sentence->getSentence().c_str());
+        UnicodeString s(sentence.getSentence().c_str());
         boost::u32regex_iterator<const UChar*> begin(
                              boost::make_u32regex_iterator(s, _pattern));
         boost::u32regex_iterator<const UChar*> end;
@@ -58,12 +58,12 @@ void RegexRule::apply(boost::shared_ptr<TokenizedSentence> sentence) {
                                       _annotationType, value);
             annotations.push_back(annotation);
         }
-        sentence->addAnnotations(annotations);
+        sentence.addAnnotations(annotations);
     } catch(const std::exception & e) {
         std::stringstream ss;
         ss << "Exception while applying regex rule: "
                           << _annotationType << " to text: "
-                          << sentence->getSentence();
+                          << sentence.getSentence();
         ss << ", message: " << e.what();
         throw ConcordiaException(ss.str());
     }
diff --git a/concordia/regex_rule.hpp b/concordia/regex_rule.hpp
index ce62fd1..878b088 100644
--- a/concordia/regex_rule.hpp
+++ b/concordia/regex_rule.hpp
@@ -42,7 +42,7 @@ public:
     /*! Applies regex annotation on tokenized sentence.
       \param sentence the input sentence
     */
-    void apply(boost::shared_ptr<TokenizedSentence> sentence);
+    void apply(TokenizedSentence & sentence);
 
 private:
     int _annotationType;
diff --git a/concordia/sentence_tokenizer.cpp b/concordia/sentence_tokenizer.cpp
index 9ffe173..0666a5d 100644
--- a/concordia/sentence_tokenizer.cpp
+++ b/concordia/sentence_tokenizer.cpp
@@ -24,10 +24,8 @@ SentenceTokenizer::SentenceTokenizer(
 SentenceTokenizer::~SentenceTokenizer() {
 }
 
-boost::shared_ptr<TokenizedSentence>
-              SentenceTokenizer::tokenize(const std::string & sentence) {
-    boost::shared_ptr<TokenizedSentence>
-                    result(new TokenizedSentence(sentence));
+TokenizedSentence SentenceTokenizer::tokenize(const std::string & sentence) {
+    TokenizedSentence result(sentence);
 
     _htmlTags->apply(result);
 
@@ -35,7 +33,7 @@ boost::shared_ptr<TokenizedSentence>
         neRule.apply(result);
     }
 
-    result->toLowerCase();
+    result.toLowerCase();
 
     if (_stopWordsEnabled) {
         _stopWords->apply(result);
diff --git a/concordia/sentence_tokenizer.hpp b/concordia/sentence_tokenizer.hpp
index 7e354eb..6d92f1c 100644
--- a/concordia/sentence_tokenizer.hpp
+++ b/concordia/sentence_tokenizer.hpp
@@ -36,8 +36,7 @@ public:
       \param sentence input sentence
       \returns tokenized sentence object build on the input sentence
     */
-    boost::shared_ptr<TokenizedSentence>
-                                   tokenize(const std::string & sentence);
+    TokenizedSentence tokenize(const std::string & sentence);
 
 private:
     void _createNeRules(std::string & namedEntitiesPath);
diff --git a/concordia/t/test_concordia.cpp b/concordia/t/test_concordia.cpp
index d5e5907..21548a0 100644
--- a/concordia/t/test_concordia.cpp
+++ b/concordia/t/test_concordia.cpp
@@ -27,17 +27,17 @@ BOOST_AUTO_TEST_CASE( ConcordiaVersion )
 BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
 {
     Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
-    boost::shared_ptr<TokenizedSentence> ts = concordia.addExample(Example("Ala posiada kota",14));
+    TokenizedSentence ts = concordia.addExample(Example("Ala posiada kota",14));
     /*
     0,3 type: 1 value: ala
     4,11 type: 1 value: posiada
     12,16 type: 1 value: kota
     */
-    BOOST_CHECK_EQUAL(ts->getTokens().size(), 3);
-    BOOST_CHECK_EQUAL(ts->getTokens().at(1).getStart(), 4);
-    BOOST_CHECK_EQUAL(ts->getTokens().at(1).getEnd(), 11);
-    BOOST_CHECK_EQUAL(ts->getTokens().at(1).getType(), 1);
-    BOOST_CHECK_EQUAL(ts->getTokens().at(1).getValue(), "posiada");
+    BOOST_CHECK_EQUAL(ts.getTokens().size(), 3);
+    BOOST_CHECK_EQUAL(ts.getTokens().at(1).getStart(), 4);
+    BOOST_CHECK_EQUAL(ts.getTokens().at(1).getEnd(), 11);
+    BOOST_CHECK_EQUAL(ts.getTokens().at(1).getType(), 1);
+    BOOST_CHECK_EQUAL(ts.getTokens().at(1).getValue(), "posiada");
     
     concordia.addExample(Example("Ala posiada rysia",51));
     concordia.addExample(Example("Marysia posiada rysia",123));
@@ -293,24 +293,36 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
 BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
 {
     Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
+    /*
     concordia.addExample(Example("Alice has a cat", 56));
     concordia.addExample(Example("Alice has a dog", 23));
     concordia.addExample(Example("New test product has a mistake", 321));
-    boost::shared_ptr<TokenizedSentence> ts = concordia.tokenize("This is just testing and it has nothing to do with the above");
+    */
+    std::vector<std::string> sentences;
+    std::vector<SUFFIX_MARKER_TYPE> ids;
+    sentences.push_back("Alice has a cat");
+    ids.push_back(56);
+    sentences.push_back("Alice has a dog");
+    ids.push_back(23);
+    sentences.push_back("New test product has a mistake");
+    ids.push_back(321);
+    std::vector<TokenizedSentence> tokenizedSentences = concordia.tokenizeAll(sentences);
+    concordia.addAllTokenizedExamples(tokenizedSentences, ids);
+
+    TokenizedSentence ts = concordia.tokenize("This is just testing and it has nothing to do with the above");
     concordia.addTokenizedExample(ts, 14);
+
     concordia.refreshSAfromRAM();
         
     boost::shared_ptr<ConcordiaSearchResult> searchResult1 = concordia.concordiaSearch("Our new test product has nothing to do with computers");
     // best overlay: 
 
-    /*
     BOOST_CHECK_EQUAL(searchResult1->getBestOverlay().size(), 2);
-    BOOST_CHECK_CLOSE(searchResult1->getBestOverlayScore(), 0.695, 0.1);
-    BOOST_CHECK_EQUAL(searchResult1->getBestOverlay().at(0).getStart(), 0);
-    BOOST_CHECK_EQUAL(searchResult1->getBestOverlay().at(0).getEnd(), 2);
-    BOOST_CHECK_EQUAL(searchResult1->getBestOverlay().at(1).getStart(), 2);
-    BOOST_CHECK_EQUAL(searchResult1->getBestOverlay().at(1).getEnd(), 3);
-    */
+    BOOST_CHECK_CLOSE(searchResult1->getBestOverlayScore(), 0.537, 0.1);
+    BOOST_CHECK_EQUAL(searchResult1->getBestOverlay().at(0).getStart(), 1);
+    BOOST_CHECK_EQUAL(searchResult1->getBestOverlay().at(0).getEnd(), 5);
+    BOOST_CHECK_EQUAL(searchResult1->getBestOverlay().at(1).getStart(), 5);
+    BOOST_CHECK_EQUAL(searchResult1->getBestOverlay().at(1).getEnd(), 9);
 
     BOOST_CHECK_EQUAL(searchResult1->getFragments().size(), 8);
 
@@ -338,7 +350,7 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
 BOOST_AUTO_TEST_CASE( Tokenize )
 {
     Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
-    boost::shared_ptr<TokenizedSentence> ts = concordia.tokenize("  Ala    posiada kota");
+    TokenizedSentence ts = concordia.tokenize("  Ala    posiada kota");
     /*
     0,3 type: 1 value: ala
     4,11 type: 1 value: posiada
@@ -347,10 +359,22 @@ BOOST_AUTO_TEST_CASE( Tokenize )
 
     concordia.clearIndex();
 
-    BOOST_CHECK_EQUAL(ts->getTokens().size(), 3);
-    BOOST_CHECK_EQUAL(ts->getTokens().at(1).getStart(), 9);
-    BOOST_CHECK_EQUAL(ts->getTokens().at(1).getEnd(), 16);
-    BOOST_CHECK_EQUAL(ts->getTokens().at(1).getType(), 1);
-    BOOST_CHECK_EQUAL(ts->getTokens().at(1).getValue(), "posiada");
+    BOOST_CHECK_EQUAL(ts.getTokens().size(), 3);
+    BOOST_CHECK_EQUAL(ts.getTokens().at(1).getStart(), 9);
+    BOOST_CHECK_EQUAL(ts.getTokens().at(1).getEnd(), 16);
+    BOOST_CHECK_EQUAL(ts.getTokens().at(1).getType(), 1);
+    BOOST_CHECK_EQUAL(ts.getTokens().at(1).getValue(), "posiada");
+    
+    std::vector<std::string> sentences;
+    sentences.push_back("Marysia, ma rysia;");
+    sentences.push_back("Testing complete;");
+    sentences.push_back("This, is (a) weird;! sentence <>");
+    std::vector<TokenizedSentence> tokenizedSentences = concordia.tokenizeAll(sentences);
+    
+    BOOST_CHECK_EQUAL(tokenizedSentences.size(), 3);
+    BOOST_CHECK_EQUAL(tokenizedSentences.at(0).getTokens().size(), 3);
+    BOOST_CHECK_EQUAL(tokenizedSentences.at(1).getTokens().size(), 2);
+    BOOST_CHECK_EQUAL(tokenizedSentences.at(2).getTokens().size(), 5);
+    
 }
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/concordia/t/test_concordia_searcher.cpp b/concordia/t/test_concordia_searcher.cpp
index 5e2d0b2..f8ab3fa 100644
--- a/concordia/t/test_concordia_searcher.cpp
+++ b/concordia/t/test_concordia_searcher.cpp
@@ -373,7 +373,7 @@ BOOST_AUTO_TEST_CASE( TmMatchesTest )
         
     // searching for pattern "Ola posiada rysia Marysia" (5 1 3 4)
     
-    std::vector<INDEX_CHARACTER_TYPE> pattern = hashGenerator->generateHash("Ola posiada rysia Marysia")->getCodes();
+    std::vector<INDEX_CHARACTER_TYPE> pattern = hashGenerator->generateHash("Ola posiada rysia Marysia").getCodes();
     
     boost::shared_ptr<TmMatchesMap> tmMatchesMap = searcher.getTmMatches(T, markers, SA, pattern);
 
diff --git a/concordia/t/test_hash_generator.cpp b/concordia/t/test_hash_generator.cpp
index 61e1cfe..c1fd782 100644
--- a/concordia/t/test_hash_generator.cpp
+++ b/concordia/t/test_hash_generator.cpp
@@ -23,7 +23,7 @@ BOOST_AUTO_TEST_CASE( SimpleHashTest )
     
     HashGenerator hashGenerator = HashGenerator(config);
 
-    std::vector<INDEX_CHARACTER_TYPE> hash = hashGenerator.generateHash("Ala posiada kota")->getCodes();
+    std::vector<INDEX_CHARACTER_TYPE> hash = hashGenerator.generateHash("Ala posiada kota").getCodes();
     std::vector<INDEX_CHARACTER_TYPE> expected;
     expected.push_back(0);
     expected.push_back(1);
@@ -76,7 +76,7 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
     
     HashGenerator hashGenerator1 = HashGenerator(config);
 
-    std::vector<INDEX_CHARACTER_TYPE> hash1 = hashGenerator1.generateHash("Ala posiada kota")->getCodes();
+    std::vector<INDEX_CHARACTER_TYPE> hash1 = hashGenerator1.generateHash("Ala posiada kota").getCodes();
     std::vector<INDEX_CHARACTER_TYPE> expected1;
     expected1.push_back(0);
     expected1.push_back(1);
@@ -86,7 +86,7 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
     hashGenerator1.serializeWordMap();
    
     HashGenerator hashGenerator2 = HashGenerator(config);
-    std::vector<INDEX_CHARACTER_TYPE> hash2 = hashGenerator2.generateHash("Ala posiada psa")->getCodes();
+    std::vector<INDEX_CHARACTER_TYPE> hash2 = hashGenerator2.generateHash("Ala posiada psa").getCodes();
     std::vector<INDEX_CHARACTER_TYPE> expected2;
     expected2.push_back(0);
     expected2.push_back(1);
@@ -106,9 +106,9 @@ BOOST_AUTO_TEST_CASE( TokenVectorTest )
     
     HashGenerator hashGenerator = HashGenerator(config);
 
-    boost::shared_ptr<TokenizedSentence> tokenizedSentence = hashGenerator.generateHash("12.02.2014   o  godzinie 17:40 doszło do kolizji na ulicy Grobla; policjanci ustalili, że <b>kierowca</b> zaparkował  samochód.");
+    TokenizedSentence tokenizedSentence = hashGenerator.generateHash("12.02.2014   o  godzinie 17:40 doszło do kolizji na ulicy Grobla; policjanci ustalili, że <b>kierowca</b> zaparkował  samochód.");
 
-    std::vector<TokenAnnotation> tokens = tokenizedSentence->getTokens();
+    std::vector<TokenAnnotation> tokens = tokenizedSentence.getTokens();
 
     /*    
     BOOST_FOREACH(TokenAnnotation annotation, tokens) {
diff --git a/concordia/t/test_regex_rule.cpp b/concordia/t/test_regex_rule.cpp
index ada81eb..4634f74 100644
--- a/concordia/t/test_regex_rule.cpp
+++ b/concordia/t/test_regex_rule.cpp
@@ -13,10 +13,10 @@ BOOST_AUTO_TEST_SUITE(regex_rule)
 BOOST_AUTO_TEST_CASE( SimpleAnnotation )
 {
     RegexRule rr("a", TokenAnnotation::WORD, "b");
-    boost::shared_ptr<TokenizedSentence> ts(new TokenizedSentence("xxxxxxxaxxxaxxaxaxa"));
+    TokenizedSentence ts("xxxxxxxaxxxaxxaxaxa");
     rr.apply(ts);    
-    BOOST_CHECK_EQUAL(ts->getAnnotations().size(),5);
-    std::list<TokenAnnotation> annotations = ts->getAnnotations();
+    BOOST_CHECK_EQUAL(ts.getAnnotations().size(),5);
+    std::list<TokenAnnotation> annotations = ts.getAnnotations();
     std::list<TokenAnnotation>::iterator iter = annotations.begin();
 
     BOOST_CHECK_EQUAL(iter->getStart(),7);
@@ -56,10 +56,10 @@ BOOST_AUTO_TEST_CASE( BadRegex )
 BOOST_AUTO_TEST_CASE( WeirdSymbolsAnnotation )
 {
     RegexRule rr("['\"\\\\.]", TokenAnnotation::WORD, "");
-    boost::shared_ptr<TokenizedSentence> ts(new TokenizedSentence("Don't stop believin' \\ Hold on to the feelin'."));
+    TokenizedSentence ts("Don't stop believin' \\ Hold on to the feelin'.");
     rr.apply(ts);
-    BOOST_CHECK_EQUAL(ts->getAnnotations().size(),5);
-    std::list<TokenAnnotation> annotations = ts->getAnnotations();
+    BOOST_CHECK_EQUAL(ts.getAnnotations().size(),5);
+    std::list<TokenAnnotation> annotations = ts.getAnnotations();
     std::list<TokenAnnotation>::iterator iter = annotations.begin();
 
     BOOST_CHECK_EQUAL(iter->getStart(),3);
@@ -86,10 +86,10 @@ BOOST_AUTO_TEST_CASE( WeirdSymbolsAnnotation )
 BOOST_AUTO_TEST_CASE( CaseInsensitiveAnnotation )
 {
     RegexRule rr("abc", TokenAnnotation::WORD, "xxx", false);
-    boost::shared_ptr<TokenizedSentence> ts(new TokenizedSentence("This is AbC and ABC and abc and aBC."));
+    TokenizedSentence ts("This is AbC and ABC and abc and aBC.");
     rr.apply(ts);
-    BOOST_CHECK_EQUAL(ts->getAnnotations().size(),4);
-    std::list<TokenAnnotation> annotations = ts->getAnnotations();
+    BOOST_CHECK_EQUAL(ts.getAnnotations().size(),4);
+    std::list<TokenAnnotation> annotations = ts.getAnnotations();
     std::list<TokenAnnotation>::iterator iter = annotations.begin();
 
     BOOST_CHECK_EQUAL(iter->getStart(),8);
@@ -111,10 +111,10 @@ BOOST_AUTO_TEST_CASE( CaseInsensitiveAnnotation )
 BOOST_AUTO_TEST_CASE( UnicodeAnnotation )
 {
     RegexRule rr("ą", TokenAnnotation::WORD, "x");
-    boost::shared_ptr<TokenizedSentence> ts(new TokenizedSentence("zażółć gęślą jaźń"));
+    TokenizedSentence ts("zażółć gęślą jaźń");
     rr.apply(ts);
-    BOOST_CHECK_EQUAL(ts->getAnnotations().size(),1);
-    std::list<TokenAnnotation> annotations = ts->getAnnotations();
+    BOOST_CHECK_EQUAL(ts.getAnnotations().size(),1);
+    std::list<TokenAnnotation> annotations = ts.getAnnotations();
     std::list<TokenAnnotation>::iterator iter = annotations.begin();
 
     BOOST_CHECK_EQUAL(iter->getStart(),11);
@@ -124,10 +124,10 @@ BOOST_AUTO_TEST_CASE( UnicodeAnnotation )
 BOOST_AUTO_TEST_CASE( CaseInsensitiveUnicodeAnnotation )
 {
     RegexRule rr("ą", TokenAnnotation::WORD, "x", false);
-    boost::shared_ptr<TokenizedSentence> ts(new TokenizedSentence("zażółć gęślą jaźń ZAŻÓŁĆ GĘŚLĄ JAŹŃ"));
+    TokenizedSentence ts("zażółć gęślą jaźń ZAŻÓŁĆ GĘŚLĄ JAŹŃ");
     rr.apply(ts);
-    BOOST_CHECK_EQUAL(ts->getAnnotations().size(),2);
-    std::list<TokenAnnotation> annotations = ts->getAnnotations();
+    BOOST_CHECK_EQUAL(ts.getAnnotations().size(),2);
+    std::list<TokenAnnotation> annotations = ts.getAnnotations();
     std::list<TokenAnnotation>::iterator iter = annotations.begin();
 
     BOOST_CHECK_EQUAL(iter->getStart(),11);
@@ -141,10 +141,10 @@ BOOST_AUTO_TEST_CASE( CaseInsensitiveUnicodeAnnotation )
 BOOST_AUTO_TEST_CASE( CaseInsensitiveUnicodeClassReplacement )
 {
     RegexRule rr("[ąćęłńóśżź]", TokenAnnotation::WORD, "x", false);
-    boost::shared_ptr<TokenizedSentence> ts(new TokenizedSentence("zażółć gęślą jaźń ZAŻÓŁĆ GĘŚLĄ JAŹŃ"));
+    TokenizedSentence ts("zażółć gęślą jaźń ZAŻÓŁĆ GĘŚLĄ JAŹŃ");
     rr.apply(ts);
-    BOOST_CHECK_EQUAL(ts->getAnnotations().size(),18);
-    std::list<TokenAnnotation> annotations = ts->getAnnotations();
+    BOOST_CHECK_EQUAL(ts.getAnnotations().size(),18);
+    std::list<TokenAnnotation> annotations = ts.getAnnotations();
     std::list<TokenAnnotation>::iterator iter = annotations.begin();
 
     BOOST_CHECK_EQUAL(iter->getStart(),2);
diff --git a/concordia/t/test_sentence_tokenizer.cpp b/concordia/t/test_sentence_tokenizer.cpp
index 626fdc9..8c5580f 100644
--- a/concordia/t/test_sentence_tokenizer.cpp
+++ b/concordia/t/test_sentence_tokenizer.cpp
@@ -20,8 +20,8 @@ BOOST_AUTO_TEST_CASE( NETest )
     
     
     std::string sentence = "Date: 12.04.2012, mail: test@example.com, number: 5.34, hello3 zażółć gęślą jaźń, ZAŻÓŁĆ GĘŚLĄ JAŹŃ";
-    boost::shared_ptr<TokenizedSentence> ts = tokenizer.tokenize(sentence);
-    std::list<TokenAnnotation> annotations = ts->getAnnotations();
+    TokenizedSentence ts = tokenizer.tokenize(sentence);
+    std::list<TokenAnnotation> annotations = ts.getAnnotations();
     std::list<TokenAnnotation>::iterator iter = annotations.begin();
     
     BOOST_CHECK_EQUAL(14,annotations.size());
@@ -134,8 +134,8 @@ BOOST_AUTO_TEST_CASE( HtmlTagsTest )
     SentenceTokenizer tokenizer(config);
 
     std::string sentence = "<a href='http://wp.pl'>link</a> and <b>bold</b> and newline <br/>";
-    boost::shared_ptr<TokenizedSentence> ts = tokenizer.tokenize(sentence);
-    std::list<TokenAnnotation> annotations = ts->getAnnotations();
+    TokenizedSentence ts = tokenizer.tokenize(sentence);
+    std::list<TokenAnnotation> annotations = ts.getAnnotations();
     std::list<TokenAnnotation>::iterator iter = annotations.begin();
         
     /*
@@ -214,8 +214,8 @@ BOOST_AUTO_TEST_CASE( InWordSymbolsTest )
     SentenceTokenizer tokenizer(config);
 
     std::string sentence = "This is a sentence, don't over-analyze it. zażółć' gęś'lą -jaźń ZAŻ-ÓŁĆ GĘŚLĄ JAŹ'Ń";
-    boost::shared_ptr<TokenizedSentence> ts = tokenizer.tokenize(sentence);
-    std::list<TokenAnnotation> annotations = ts->getAnnotations();
+    TokenizedSentence ts = tokenizer.tokenize(sentence);
+    std::list<TokenAnnotation> annotations = ts.getAnnotations();
     std::list<TokenAnnotation>::iterator iter = annotations.begin();
     
     /*
@@ -322,7 +322,7 @@ BOOST_AUTO_TEST_CASE( StopWordsTest )
     if (config->isStopWordsEnabled()) {
         SentenceTokenizer tokenizer(config);
         std::string sentence = "Aczkolwiek nie wiem, czy to konieczne";
-        BOOST_CHECK_EQUAL(tokenizer.tokenize(sentence)->getSentence(),"  wiem   konieczne");
+        BOOST_CHECK_EQUAL(tokenizer.tokenize(sentence).getSentence(),"  wiem   konieczne");
     }
 }
 
@@ -332,8 +332,8 @@ BOOST_AUTO_TEST_CASE( WeirdSentenceTest )
     SentenceTokenizer tokenizer(config);
     
     std::string sentence = "Sony | DXC-M7PKDXC-M7PDXC-M7PHDXC-M7PK/1DXC-M7P/1DXC-M7PH/1DXC-327PKDXC-327PLDXC-327PHDXC-327APKDXC-327APLDXC-327AHDXC-537PKDXC-537PLDXC-537PHDXC-537APKDXC-537APLDXC-537APHEVW-537PKEVW-327PKDXC-637PDXC-637PKDXC-637PLDXC-637PHPVW-637PKPVW-637PLDXC-D30PFDXC-D30PKDXC-D30PLDXC-D30PHDSR-130PFDSR-130PKDSR-130PLPVW-D30PFPVW-D30PKPVW-D30PLDXC-327BPFDXC-327BPKDXC-327BPLDXC-327BPHDXC-D30WSPDXC-D35PHDXC-D35PLDXC-D35PKDXC-D35WSPLDSR-135PL | DXF-3000CEDXF-325CEDXF-501CEDXF-M3CEDXF-M7CEDXF-40CEDXF-40ACEDXF-50CEDXF-601CEDXF-40BCEDXF-50BCEDXF-701CEDXF-WSCEDXF-801CEHDVF-C30W | CCU-M3PCCU-M5PCCU-M7PCUU-M5AP | RM-M7GRM-M7E | — | CA-325PCA-325APCA-325BCA-327PCA-537PCA-511CA-512PCA-513VCT-U14 |";
-    boost::shared_ptr<TokenizedSentence> ts = tokenizer.tokenize(sentence);
-    std::list<TokenAnnotation> annotations = ts->getAnnotations();
+    TokenizedSentence ts = tokenizer.tokenize(sentence);
+    std::list<TokenAnnotation> annotations = ts.getAnnotations();
     std::list<TokenAnnotation>::iterator iter = annotations.begin();
     
     BOOST_CHECK_EQUAL(161, annotations.size());