diff --git a/concordia-server/bool_param.cpp b/concordia-server/bool_param.cpp
new file mode 100644
index 0000000..8029c8f
--- /dev/null
+++ b/concordia-server/bool_param.cpp
@@ -0,0 +1,24 @@
+#include "bool_param.hpp"
+
+
+BoolParam::BoolParam(bool value):_value(value) {
+}
+
+BoolParam::~BoolParam() {
+}
+
+const char * BoolParam::getValue() {
+    if (_value) {
+        return "t";
+    } else {
+        return "f";
+    }
+}
+
+const int BoolParam::getLength() {
+    return 1;
+}
+
+const int BoolParam::isBinary() {
+    return 0;
+}
diff --git a/concordia-server/bool_param.hpp b/concordia-server/bool_param.hpp
new file mode 100644
index 0000000..ddb08f5
--- /dev/null
+++ b/concordia-server/bool_param.hpp
@@ -0,0 +1,24 @@
+#ifndef BOOL_PARAM_HDR
+#define BOOL_PARAM_HDR
+
+#include "query_param.hpp"
+
+class BoolParam : public QueryParam {
+public:
+    /*! Constructor.
+    */
+    BoolParam(bool value);
+    /*! Destructor.
+    */
+    virtual ~BoolParam();
+
+    const char * getValue();
+
+    const int getLength();
+
+    const int isBinary();
+private:
+    bool _value;
+};
+
+#endif
diff --git a/concordia-server/concordia_server.cpp b/concordia-server/concordia_server.cpp
index d33fba2..a5689d7 100644
--- a/concordia-server/concordia_server.cpp
+++ b/concordia-server/concordia_server.cpp
@@ -5,6 +5,7 @@
 #include <iostream>
 #include <fstream>
 #include <ctime>
+#include <utility>
 
 #include <concordia/interval.hpp>
 
@@ -19,16 +20,17 @@
 ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
                                          throw(ConcordiaException) :
                                          _configFilePath(configFilePath) {
+
     std::vector<int> tmIds = _tmDAO.getTmIds();
     _concordiasMap = boost::shared_ptr<boost::ptr_map<int,Concordia> >(new boost::ptr_map<int,Concordia>());
 
     BOOST_FOREACH(int & tmId, tmIds) {
         _addTm(tmId);
     }
-    _indexController = boost::shared_ptr<IndexController> (new IndexController(_concordiasMap));
-    _searcherController = boost::shared_ptr<SearcherController> (new SearcherController(_concordiasMap));
-
     _lemmatizerFacade = boost::shared_ptr<LemmatizerFacade> (new LemmatizerFacade());
+
+    _indexController = boost::shared_ptr<IndexController> (new IndexController(_concordiasMap, _lemmatizerFacade));
+    _searcherController = boost::shared_ptr<SearcherController> (new SearcherController(_concordiasMap, _lemmatizerFacade));
 }
 
 ConcordiaServer::~ConcordiaServer() {
@@ -95,6 +97,27 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
                     }
                 }
                 _indexController->addAlignedSentences(jsonWriter, sourceSentences, targetSentences, tmId);
+            } else if (operation == ADD_ALIGNED_LEMMATIZED_SENTENCES_OP) {
+                std::vector<std::string> sourceSentences;
+                std::vector<std::string> targetSentences;
+                std::vector<std::string> alignmentStrings;
+                int tmId = d[TM_ID_PARAM].GetInt();
+                // loading data from json
+                const rapidjson::Value & sentencesArray = d[EXAMPLES_PARAM];
+                Logger::log("addAlignedLemmatizedSentences");
+                Logger::logInt("lemmatized sentences to add", sentencesArray.Size());
+                Logger::logInt("tm id", tmId);
+                for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) {
+                    if (sentencesArray[i].Size() != 3) {
+                        JsonGenerator::signalError(jsonWriter, "sentence should be an array of 3 elements");
+                        break;
+                    } else {
+                        sourceSentences.push_back(sentencesArray[i][0].GetString());
+                        targetSentences.push_back(sentencesArray[i][1].GetString());
+                        alignmentStrings.push_back(sentencesArray[i][2].GetString());
+                    }
+                }
+                _indexController->addAlignedLemmatizedSentences(jsonWriter, sourceSentences, targetSentences, alignmentStrings, tmId);
             } else if (operation == "lemmatize") {
                 std::string sentence = _getStringParameter(d, "sentence");
                 std::string languageCode = _getStringParameter(d, "languageCode");
@@ -130,7 +153,8 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
                 int sourceLangId = _getIntParameter(d, SOURCE_LANG_PARAM);
                 int targetLangId = _getIntParameter(d, TARGET_LANG_PARAM);
                 std::string name = _getStringParameter(d, NAME_PARAM);
-                int newId = _tmDAO.addTm(sourceLangId, targetLangId, name);
+                bool lemmatized = _getBoolParameter(d, TM_LEMMATIZED_PARAM);
+                int newId = _tmDAO.addTm(sourceLangId, targetLangId, name, lemmatized);
                 _addTm(newId);
 
                 jsonWriter.StartObject();
@@ -179,6 +203,17 @@ int ConcordiaServer::_getIntParameter(rapidjson::Document & d, const char * name
     }
 }
 
+int ConcordiaServer::_getBoolParameter(rapidjson::Document & d, const char * name)
+                                                       throw (ConcordiaException) {
+    rapidjson::Value::ConstMemberIterator itr = d.FindMember(name);
+    if (itr != d.MemberEnd()) {
+        bool value = itr->value.GetBool();
+        return value;
+    } else {
+        throw ConcordiaException("missing parameter: " + std::string(name));
+    }
+}
+
 void ConcordiaServer::_addTm(int tmId) {
     std::stringstream indexPath;
     indexPath << INDEX_DIRECTORY << "/tm_" << tmId;
diff --git a/concordia-server/concordia_server.hpp b/concordia-server/concordia_server.hpp
index c0e11c2..2822a9e 100644
--- a/concordia-server/concordia_server.hpp
+++ b/concordia-server/concordia_server.hpp
@@ -38,6 +38,8 @@ private:
 
     int _getIntParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException);
 
+    int _getBoolParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException);
+
     void _addTm(int tmId);
 
     std::string _configFilePath;
diff --git a/concordia-server/config.hpp.in b/concordia-server/config.hpp.in
index dac7ae6..571d18a 100644
--- a/concordia-server/config.hpp.in
+++ b/concordia-server/config.hpp.in
@@ -16,7 +16,9 @@
 #define SOURCE_SENTENCE_PARAM "sourceSentence"
 #define TARGET_SENTENCE_PARAM "targetSentence"
 #define TM_ID_PARAM "tmId"
+#define TM_LEMMATIZED_PARAM "tmLemmatized"
 #define SENTENCES_PARAM "sentences"
+#define EXAMPLES_PARAM "examples"
 #define SOURCE_LANG_PARAM "sourceLangId"
 #define TARGET_LANG_PARAM "targetLangId"
 #define NAME_PARAM "name"
@@ -25,6 +27,7 @@
 #define ADD_SENTENCE_OP "addSentence"
 #define ADD_SENTENCES_OP "addSentences"
 #define ADD_ALIGNED_SENTENCES_OP "addAlignedSentences"
+#define ADD_ALIGNED_LEMMATIZED_SENTENCES_OP "addAlignedLemmatizedSentences"
 #define REFRESH_INDEX_OP "refreshIndex"
 #define SIMPLE_SEARCH_OP "simpleSearch"
 #define CONCORDIA_SEARCH_OP "concordiaSearch"
diff --git a/concordia-server/db_connection.cpp b/concordia-server/db_connection.cpp
index c46516c..8b26eeb 100644
--- a/concordia-server/db_connection.cpp
+++ b/concordia-server/db_connection.cpp
@@ -17,7 +17,7 @@ DBconnection::DBconnection() throw(ConcordiaException) {
         ss << "Connection string: " << connectionInfo;
         throw ConcordiaException(ss.str());
     }
-    
+
 }
 
 DBconnection::~DBconnection() {
@@ -90,8 +90,8 @@ PGresult * DBconnection::execute(std::string query,
             paramFormats[index] = param->isBinary();
             index++;
         }
-        
-        
+
+
         PGresult * result = PQexecParams(_connection,
                                          query.c_str(),
                                          params.size(),
@@ -129,7 +129,18 @@ int DBconnection::getIntValue(PGresult * result, int row, int col) throw (Concor
     } catch (std::exception & e) {
         std::stringstream ss;
         ss << "Error getting int value. Message: " << e.what();
-        throw ConcordiaException(ss.str());    
+        throw ConcordiaException(ss.str());
+    }
+}
+
+bool DBconnection::getBoolValue(PGresult * result, int row, int col) throw (ConcordiaException) {
+    try {
+        char * valueStr = PQgetvalue(result,row,col);
+        return std::string(valueStr) == "t";
+    } catch (std::exception & e) {
+        std::stringstream ss;
+        ss << "Error getting bool value. Message: " << e.what();
+        throw ConcordiaException(ss.str());
     }
 }
 
@@ -150,7 +161,6 @@ int DBconnection::getRowCount(PGresult * result) throw (ConcordiaException) {
     } catch (std::exception & e) {
         std::stringstream ss;
         ss << "Error getting int value. Message: " << e.what();
-        throw ConcordiaException(ss.str());    
+        throw ConcordiaException(ss.str());
     }
 }
-
diff --git a/concordia-server/db_connection.hpp b/concordia-server/db_connection.hpp
index c65fb35..9542fb8 100644
--- a/concordia-server/db_connection.hpp
+++ b/concordia-server/db_connection.hpp
@@ -31,6 +31,8 @@ public:
 
     int getIntValue(PGresult * result, int row, int col)  throw (ConcordiaException);
 
+    bool getBoolValue(PGresult * result, int row, int col)  throw (ConcordiaException);
+
     std::string getStringValue(PGresult * result, int row, int col) throw (ConcordiaException);
 
     int getRowCount(PGresult * result)  throw (ConcordiaException);
diff --git a/concordia-server/index_controller.cpp b/concordia-server/index_controller.cpp
index 60d65f0..37de410 100644
--- a/concordia-server/index_controller.cpp
+++ b/concordia-server/index_controller.cpp
@@ -14,9 +14,11 @@
 #include "json_generator.hpp"
 #include "logger.hpp"
 
-IndexController::IndexController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap)
+IndexController::IndexController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap,
+                                 boost::shared_ptr<LemmatizerFacade> lemmatizerFacade)
                                                                     throw(ConcordiaException):
-                                                                     _concordiasMap(concordiasMap) {
+                                                                     _concordiasMap(concordiasMap),
+                                                                     _lemmatizerFacade(lemmatizerFacade) {
 }
 
 IndexController::~IndexController() {
@@ -32,9 +34,10 @@ void IndexController::addSentence(
     try {
         boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
         if (it != _concordiasMap->end()) {
+            TokenizedSentence tokenizedLemmatizedSentence = it->second->tokenize(_lemmatizerFacade->lemmatizeIfNeeded(sourceSentence, tmId));
             TokenizedSentence tokenizedSentence = it->second->tokenize(sourceSentence);
-            int sentenceId = _unitDAO.addSentence(tokenizedSentence, targetSentence, tmId);     
-            it->second->addTokenizedExample(tokenizedSentence, sentenceId);
+            int sentenceId = _unitDAO.addSentence(tokenizedSentence, targetSentence, tmId);
+            it->second->addTokenizedExample(tokenizedLemmatizedSentence, sentenceId);
             it->second->refreshSAfromRAM();
 
             jsonWriter.StartObject();
@@ -42,20 +45,20 @@ void IndexController::addSentence(
             jsonWriter.String("success");
             jsonWriter.EndObject();
         } else {
-            JsonGenerator::signalError(jsonWriter, "no such tm!");        
+            JsonGenerator::signalError(jsonWriter, "no such tm!");
         }
     } catch (ConcordiaException & e) {
         std::stringstream errorstream;
         errorstream << "concordia error: " << e.what();
-        JsonGenerator::signalError(jsonWriter, errorstream.str());        
+        JsonGenerator::signalError(jsonWriter, errorstream.str());
     } catch (std::exception & e) {
         std::stringstream errorstream;
         errorstream << "general error: " << e.what();
-        JsonGenerator::signalError(jsonWriter, errorstream.str());        
+        JsonGenerator::signalError(jsonWriter, errorstream.str());
     } catch (...) {
         std::stringstream errorstream;
         errorstream << "unexpected error occurred";
-        JsonGenerator::signalError(jsonWriter, errorstream.str());        
+        JsonGenerator::signalError(jsonWriter, errorstream.str());
     }
 }
 
@@ -67,21 +70,22 @@ void IndexController::addSentences(
     try {
         boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
         if (it != _concordiasMap->end()) {
+            std::vector<TokenizedSentence> tokenizedLemmatizedSentences = it->second->tokenizeAll(_lemmatizerFacade->lemmatizeSentencesIfNeeded(sourceSentences, tmId));
             std::vector<TokenizedSentence> tokenizedSentences = it->second->tokenizeAll(sourceSentences);
             std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmId);
-            it->second->addAllTokenizedExamples(tokenizedSentences, sentenceIds);
+            it->second->addAllTokenizedExamples(tokenizedLemmatizedSentences, sentenceIds);
 
             jsonWriter.StartObject();
             jsonWriter.String("status");
             jsonWriter.String("success");
             jsonWriter.EndObject();
         } else {
-            JsonGenerator::signalError(jsonWriter, "no such tm!");        
+            JsonGenerator::signalError(jsonWriter, "no such tm!");
         }
     } catch (ConcordiaException & e) {
         std::stringstream errorstream;
         errorstream << "concordia error: " << e.what();
-        JsonGenerator::signalError(jsonWriter, errorstream.str());        
+        JsonGenerator::signalError(jsonWriter, errorstream.str());
     }
 }
 
@@ -96,28 +100,66 @@ void IndexController::addAlignedSentences(
             std::vector<std::string> sourceSentences;
             std::vector<std::vector<std::vector<int> > > allAlignments;
             _getSourceSentencesAndAlignments(sourceSentences, allAlignments, rawSourceSentences);
-            
-            std::vector<TokenizedSentence> tokenizedSourceSentences = it->second->tokenizeAll(sourceSentences, true, true);            
+
+            std::vector<TokenizedSentence> tokenizedSourceSentences = it->second->tokenizeAll(sourceSentences, true, true);
             std::vector<TokenizedSentence> tokenizedTargetSentences = it->second->tokenizeAll(targetSentences, true, false);
 
             std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addAlignedSentences(tokenizedSourceSentences, tokenizedTargetSentences, allAlignments, tmId);
             for(int index = 0; index < tokenizedSourceSentences.size(); index++) {
                 it->second->addTokenizedExample(tokenizedSourceSentences.at(index), sentenceIds.at(index));
-            } 
+            }
             jsonWriter.StartObject();
             jsonWriter.String("status");
             jsonWriter.String("success");
             jsonWriter.EndObject();
         } else {
-            JsonGenerator::signalError(jsonWriter, "no such tm!");        
+            JsonGenerator::signalError(jsonWriter, "no such tm!");
         }
     } catch (ConcordiaException & e) {
         std::stringstream errorstream;
         errorstream << "concordia error: " << e.what();
-        JsonGenerator::signalError(jsonWriter, errorstream.str());        
+        JsonGenerator::signalError(jsonWriter, errorstream.str());
     }
 }
 
+void IndexController::addAlignedLemmatizedSentences(
+                 rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
+                 const std::vector<std::string> & sourceSentences,
+                 const std::vector<std::string> & targetSentences,
+                 const std::vector<std::string> & alignmentStrings,
+                 const int tmId) {
+    try {
+        boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
+        if (it != _concordiasMap->end()) {
+            std::vector<std::string> lemmatizedSourceSentences;
+            std::vector<std::vector<std::vector<int> > > allAlignments;
+            _getSourceSentencesAndAlignments(lemmatizedSourceSentences, allAlignments, alignmentStrings);
+
+            std::vector<TokenizedSentence> tokenizedLemmatizedSourceSentences = it->second->tokenizeAll(lemmatizedSourceSentences, true, true);
+            std::vector<TokenizedSentence> tokenizedSourceSentences = it->second->tokenizeAll(sourceSentences, true, false);
+            std::vector<TokenizedSentence> tokenizedTargetSentences = it->second->tokenizeAll(targetSentences, true, false);
+
+            std::vector<SUFFIX_MARKER_TYPE> sentenceIds =
+
+            _unitDAO.addAlignedSentences(tokenizedSourceSentences, tokenizedTargetSentences, allAlignments, tmId);
+            for(int index = 0; index < tokenizedLemmatizedSourceSentences.size(); index++) {
+                it->second->addTokenizedExample(tokenizedLemmatizedSourceSentences.at(index), sentenceIds.at(index));
+            }
+            jsonWriter.StartObject();
+            jsonWriter.String("status");
+            jsonWriter.String("success");
+            jsonWriter.EndObject();
+        } else {
+            JsonGenerator::signalError(jsonWriter, "no such tm!");
+        }
+    } catch (ConcordiaException & e) {
+        std::stringstream errorstream;
+        errorstream << "concordia error: " << e.what();
+        JsonGenerator::signalError(jsonWriter, errorstream.str());
+    }
+}
+
+
 void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
                                           const int tmId) {
     try {
@@ -130,12 +172,12 @@ void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuf
             jsonWriter.String("success");
             jsonWriter.EndObject();
         } else {
-            JsonGenerator::signalError(jsonWriter, "no such tm!");        
+            JsonGenerator::signalError(jsonWriter, "no such tm!");
         }
     } catch (ConcordiaException & e) {
         std::stringstream errorstream;
         errorstream << "concordia error: " << e.what();
-        JsonGenerator::signalError(jsonWriter, errorstream.str());        
+        JsonGenerator::signalError(jsonWriter, errorstream.str());
     }
 
 }
@@ -147,10 +189,10 @@ void IndexController::_getSourceSentencesAndAlignments(
 
     for (int i = 0; i<rawSourceSentences.size(); i++) {
         std::string rawSourceSentence = rawSourceSentences[i];
-        
+
         std::string sourceSentence = "";
         std::vector<std::vector<int> > alignments;
-        
+
         UnicodeString s(rawSourceSentence.c_str());
         boost::u32regex_iterator<const UChar*> begin(
                            boost::make_u32regex_iterator(
@@ -159,21 +201,21 @@ void IndexController::_getSourceSentencesAndAlignments(
                            )
                                                );
         boost::u32regex_iterator<const UChar*> end;
-        
+
         for (; begin != end; ++begin) {
             UnicodeString tokenUTF8((*begin)[1].first, (*begin).length(1));
             std::string token;
             tokenUTF8.toUTF8String(token);
 
             if (token != "NULL") {
-                std::string numbers((*begin)[2].first, (*begin)[2].second);            
+                std::string numbers((*begin)[2].first, (*begin)[2].second);
                 std::istringstream iss(numbers);
                 std::vector<std::string> numberStrings;
                 std::copy(std::istream_iterator<std::string>(iss),
                           std::istream_iterator<std::string>(),
                           std::back_inserter(numberStrings));
 
-                std::vector<int> tokenAlignments;                
+                std::vector<int> tokenAlignments;
                 for (int j=0;j<numberStrings.size();j++) {
                     int n = atoi(numberStrings[j].c_str()) - 1; //subtracting 1 as we want alignments to be 0-based
                     tokenAlignments.push_back(n);
@@ -182,11 +224,10 @@ void IndexController::_getSourceSentencesAndAlignments(
                 sourceSentence += token + " ";
             }
         }
-        
+
         sourceSentence = sourceSentence.substr(0, sourceSentence.length()-1);
-        
+
         sourceSentences.push_back(sourceSentence);
         allAlignments.push_back(alignments);
     }
 }
-
diff --git a/concordia-server/index_controller.hpp b/concordia-server/index_controller.hpp
index dea675d..c75d9ab 100644
--- a/concordia-server/index_controller.hpp
+++ b/concordia-server/index_controller.hpp
@@ -10,6 +10,8 @@
 
 
 #include "unit_dao.hpp"
+#include "lemmatizer_facade.hpp"
+
 
 #include "rapidjson/writer.h"
 
@@ -17,7 +19,8 @@ class IndexController {
 public:
     /*! Constructor.
     */
-    explicit IndexController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap)
+    explicit IndexController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap,
+                             boost::shared_ptr<LemmatizerFacade> lemmatizerFacade)
                                                                    throw(ConcordiaException);
     /*! Destructor.
     */
@@ -38,9 +41,16 @@ public:
                              const std::vector<std::string> & targetSentences,
                              const int tmId);
 
+    void addAlignedLemmatizedSentences(
+                          rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
+                          const std::vector<std::string> & sourceSentences,
+                          const std::vector<std::string> & targetSentences,
+                          const std::vector<std::string> & alignmentStrings,
+                          const int tmId);
+
     void refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
                              const int tmId);
-    
+
 private:
     void _getSourceSentencesAndAlignments(
                             std::vector<std::string> & sourceSentences,
@@ -48,7 +58,9 @@ private:
                             const std::vector<std::string> & rawSourceSentences);
 
     boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
-    
+
+    boost::shared_ptr<LemmatizerFacade> _lemmatizerFacade;
+
     UnitDAO _unitDAO;
 };
 
diff --git a/concordia-server/lemmatizer_facade.cpp b/concordia-server/lemmatizer_facade.cpp
index f6adc31..43b0aae 100644
--- a/concordia-server/lemmatizer_facade.cpp
+++ b/concordia-server/lemmatizer_facade.cpp
@@ -1,5 +1,7 @@
 #include "lemmatizer_facade.hpp"
 
+#include <boost/foreach.hpp>
+
 
 LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) {
     _lemmatizersMap = boost::ptr_map<std::string,SocketLemmatizer>();
@@ -28,3 +30,26 @@ std::string LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::s
     }
 
 }
+
+std::string LemmatizerFacade::lemmatizeIfNeeded(std::string pattern, int tmId) {
+    std::pair<bool, std::string> tmInfo = _tmDAO.getTmInfo(tmId);
+    if (tmInfo.first) {
+        return lemmatizeSentence(tmInfo.second, pattern);
+    } else {
+        return pattern;
+    }
+}
+
+std::vector<std::string> LemmatizerFacade::lemmatizeSentencesIfNeeded(std::vector<std::string> patterns, int tmId) {
+    std::pair<bool, std::string> tmInfo = _tmDAO.getTmInfo(tmId);
+    if (tmInfo.first) {
+        std::vector<std::string> result;
+        BOOST_FOREACH(std::string & pattern, patterns) {
+            result.push_back(lemmatizeSentence(tmInfo.second, pattern));
+        }
+        return result;
+    } else {
+        return patterns;
+    }
+
+}
diff --git a/concordia-server/lemmatizer_facade.hpp b/concordia-server/lemmatizer_facade.hpp
index 7eea156..e9f5c3e 100644
--- a/concordia-server/lemmatizer_facade.hpp
+++ b/concordia-server/lemmatizer_facade.hpp
@@ -2,6 +2,7 @@
 #define LEMMATIZER_FACADE_HDR
 
 #include "socket_lemmatizer.hpp"
+#include "tm_dao.hpp"
 
 #include <string>
 #include <concordia/concordia_exception.hpp>
@@ -18,8 +19,15 @@ public:
     virtual ~LemmatizerFacade();
 
     std::string lemmatizeSentence(std::string languageCode, std::string sentence);
+
+    std::string lemmatizeIfNeeded(std::string pattern, int tmId);
+
+    std::vector<std::string> lemmatizeSentencesIfNeeded(std::vector<std::string> patterns, int tmId);
+
 private:
     boost::ptr_map<std::string,SocketLemmatizer> _lemmatizersMap;
+
+    TmDAO _tmDAO;
 };
 
 #endif
diff --git a/concordia-server/searcher_controller.cpp b/concordia-server/searcher_controller.cpp
index 11d36ac..dd7eb03 100644
--- a/concordia-server/searcher_controller.cpp
+++ b/concordia-server/searcher_controller.cpp
@@ -8,9 +8,11 @@
 #include "logger.hpp"
 
 
-SearcherController::SearcherController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap)
+SearcherController::SearcherController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap,
+                                       boost::shared_ptr<LemmatizerFacade> lemmatizerFacade)
                                                                      throw(ConcordiaException):
-                                                                     _concordiasMap(concordiasMap) {
+                                                                     _concordiasMap(concordiasMap),
+                                                                     _lemmatizerFacade(lemmatizerFacade) {
 }
 
 SearcherController::~SearcherController() {
@@ -22,6 +24,7 @@ void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer>
                                       const int tmId) {
     boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
     if (it != _concordiasMap->end()) {
+        pattern = _lemmatizerFacade->lemmatizeIfNeeded(pattern, tmId);
         std::vector<SimpleSearchResult> results = _unitDAO.getSearchResults(it->second->simpleSearch(pattern));
 
         jsonWriter.StartObject();
@@ -30,48 +33,49 @@ void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer>
         jsonWriter.String("results");
         jsonWriter.StartArray();
         BOOST_FOREACH(SimpleSearchResult & result, results) {
-            JsonGenerator::writeSearchResult(jsonWriter, result);        
-        }    
+            JsonGenerator::writeSearchResult(jsonWriter, result);
+        }
         jsonWriter.EndArray();
         jsonWriter.EndObject();
     } else {
-        JsonGenerator::signalError(jsonWriter, "no such tm!");    
+        JsonGenerator::signalError(jsonWriter, "no such tm!");
     }
 }
 
 void SearcherController::concordiaPhraseSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
                                                std::string & pattern,
                                                const std::vector<Interval> & intervals,
-                                               const int tmId) {    
+                                               const int tmId) {
     boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
     if (it != _concordiasMap->end()) {
         if (intervals.size() > 0) {
 //            std::string shortPattern = pattern.substr(intervals[0].getStart(), intervals[0].getEnd() - intervals[0].getStart());
+            pattern = _lemmatizerFacade->lemmatizeIfNeeded(pattern, tmId);
             std::string shortPattern = _substrUTF8(pattern, intervals[0].getStart(), intervals[0].getEnd() - intervals[0].getStart());
-            
+
             Logger::log("concordiaPhraseSearch");
             Logger::logString("short pattern", shortPattern);
             std::vector<SimpleSearchResult> shortPatternResults = _unitDAO.getSearchResults(it->second->simpleSearch(shortPattern));
-            
-            
-            
+
+
+
             jsonWriter.StartObject();
             jsonWriter.String("status");
             jsonWriter.String("success");
             jsonWriter.String("found");
             if (shortPatternResults.size() > 0) {
                 jsonWriter.Bool(true);
-            
-    
+
+
                 std::vector<SimpleSearchResult> bestOverlay;
-                
+
                 int currStart = 0;
                 BOOST_FOREACH(const Interval & interval, intervals) {
                     CompleteConcordiaSearchResult restResult = _unitDAO.getConcordiaResult(
                                                     it->second->concordiaSearch(pattern.substr(currStart, interval.getStart()-currStart)));
                     restResult.offsetPattern(currStart);
                     bestOverlay.insert(bestOverlay.end(), restResult.getBestOverlay().begin(), restResult.getBestOverlay().end());
-                    
+
                     SimpleSearchResult shortPatternresult = shortPatternResults[0];
                     shortPatternresult.setMatchedPatternStart(interval.getStart());
                     shortPatternresult.setMatchedPatternEnd(interval.getEnd());
@@ -82,26 +86,26 @@ void SearcherController::concordiaPhraseSearch(rapidjson::Writer<rapidjson::Stri
                                                 it->second->concordiaSearch(_substrUTF8(pattern,currStart,INT_MAX)));
                 lastRestResult.offsetPattern(currStart);
                 bestOverlay.insert(bestOverlay.end(), lastRestResult.getBestOverlay().begin(), lastRestResult.getBestOverlay().end());
-                
+
                 jsonWriter.String("result");
                 jsonWriter.StartObject();
                 jsonWriter.String("bestOverlay");
                 jsonWriter.StartArray();
                 BOOST_FOREACH(SimpleSearchResult & simpleResult, bestOverlay) {
-                    JsonGenerator::writeSearchResult(jsonWriter, simpleResult);        
-                }    
+                    JsonGenerator::writeSearchResult(jsonWriter, simpleResult);
+                }
                 jsonWriter.EndArray();
                 jsonWriter.EndObject();
             } else {
-                jsonWriter.Bool(false);            
+                jsonWriter.Bool(false);
             }
             jsonWriter.EndObject();
         } else {
             JsonGenerator::signalError(jsonWriter, "no intervals for phrase search");
         }
     } else {
-        JsonGenerator::signalError(jsonWriter, "no such tm!");        
-    }            
+        JsonGenerator::signalError(jsonWriter, "no such tm!");
+    }
 }
 
 
@@ -111,8 +115,9 @@ void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuff
 
     boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
     if (it != _concordiasMap->end()) {
+        pattern = _lemmatizerFacade->lemmatizeIfNeeded(pattern, tmId);
         CompleteConcordiaSearchResult result = _unitDAO.getConcordiaResult(it->second->concordiaSearch(pattern));
-        
+
         jsonWriter.StartObject();
         jsonWriter.String("status");
         jsonWriter.String("success");
@@ -123,16 +128,16 @@ void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuff
         jsonWriter.String("bestOverlay");
         jsonWriter.StartArray();
         BOOST_FOREACH(SimpleSearchResult & simpleResult, result.getBestOverlay()) {
-            JsonGenerator::writeSearchResult(jsonWriter, simpleResult);        
-        }    
+            JsonGenerator::writeSearchResult(jsonWriter, simpleResult);
+        }
         jsonWriter.EndArray();
         jsonWriter.EndObject();
-        
-        
+
+
         jsonWriter.EndObject();
     } else {
-        JsonGenerator::signalError(jsonWriter, "no such tm!");        
-    }            
+        JsonGenerator::signalError(jsonWriter, "no such tm!");
+    }
 }
 
 std::string SearcherController::_substrUTF8(std::string source, int start, int length) {
@@ -146,6 +151,3 @@ std::string SearcherController::_substrUTF8(std::string source, int start, int l
 
     return result;
 }
-
-
-
diff --git a/concordia-server/searcher_controller.hpp b/concordia-server/searcher_controller.hpp
index e74cacd..659ca7d 100644
--- a/concordia-server/searcher_controller.hpp
+++ b/concordia-server/searcher_controller.hpp
@@ -10,6 +10,7 @@
 
 #include "unit_dao.hpp"
 #include "simple_search_result.hpp"
+#include "lemmatizer_facade.hpp"
 #include "rapidjson/writer.h"
 
 
@@ -17,8 +18,9 @@ class SearcherController {
 public:
     /*! Constructor.
     */
-    explicit SearcherController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap)
-                                                                      throw(ConcordiaException);
+    explicit SearcherController(boost::shared_ptr<boost::ptr_map<int,Concordia> > concordiasMap,
+                                boost::shared_ptr<LemmatizerFacade> LemmatizerFacade)
+                                                            throw(ConcordiaException);
     /*! Destructor.
     */
     virtual ~SearcherController();
@@ -40,7 +42,9 @@ private:
     std::string _substrUTF8(std::string source, int start, int length);
 
     boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
-    
+
+    boost::shared_ptr<LemmatizerFacade> _lemmatizerFacade;
+
     UnitDAO _unitDAO;
 };
 
diff --git a/concordia-server/tm_dao.cpp b/concordia-server/tm_dao.cpp
index 1319907..4b2e2da 100644
--- a/concordia-server/tm_dao.cpp
+++ b/concordia-server/tm_dao.cpp
@@ -3,6 +3,7 @@
 #include "query_param.hpp"
 #include "string_param.hpp"
 #include "int_param.hpp"
+#include "bool_param.hpp"
 #include "int_array_param.hpp"
 #include "logger.hpp"
 
@@ -27,20 +28,25 @@ std::vector<int> TmDAO::getTmIds() {
     }
     connection.clearResult(dbResult);
     connection.endTransaction();
-    
+
     return result;
 }
 
 int TmDAO::addTm(const int sourceLangId, const int targetLangId, const std::string name) {
+    addTm(sourceLangId, targetLangId, name, false);
+}
+
+int TmDAO::addTm(const int sourceLangId, const int targetLangId, const std::string name, bool lemmatized) {
     DBconnection connection;
     connection.startTransaction();
 
-    std::string query = "INSERT INTO tm(source_lang_id, target_lang_id, name) values($1::integer,$2::integer,$3::text) RETURNING id";
+    std::string query = "INSERT INTO tm(source_lang_id, target_lang_id, name, lemmatized) values($1::integer,$2::integer,$3::text,$4::bool) RETURNING id";
     std::vector<QueryParam*> params;
     params.push_back(new IntParam(sourceLangId));
     params.push_back(new IntParam(targetLangId));
     params.push_back(new StringParam(name));
-    
+    params.push_back(new BoolParam(lemmatized));
+
     PGresult * result = connection.execute(query, params);
     int newId = connection.getIntValue(result, 0, 0);
     connection.clearResult(result);
@@ -48,8 +54,23 @@ int TmDAO::addTm(const int sourceLangId, const int targetLangId, const std::stri
     BOOST_FOREACH (QueryParam * param, params) {
         delete param;
     }
-    
+
     return newId;
 
 }
 
+std::pair<bool, std::string> TmDAO::getTmInfo(int tmId) {
+    DBconnection connection;
+    connection.startTransaction();
+    std::string query = "select tm.id, tm.lemmatized, language.code from tm inner join language on language.id = tm.source_lang_id where tm.id = $1::integer;";
+    std::vector<QueryParam*> params;
+    params.push_back(new IntParam(tmId));
+    PGresult * dbResult = connection.execute(query, params);
+    bool lemmatized = connection.getBoolValue(dbResult, 0, 1);
+    std::string languageCode = connection.getStringValue(dbResult, 0, 2);
+    connection.clearResult(dbResult);
+    connection.endTransaction();
+
+    return std::pair<bool, std::string>(lemmatized, languageCode);
+
+}
diff --git a/concordia-server/tm_dao.hpp b/concordia-server/tm_dao.hpp
index e43822a..4db8097 100644
--- a/concordia-server/tm_dao.hpp
+++ b/concordia-server/tm_dao.hpp
@@ -3,6 +3,7 @@
 
 #include <string>
 #include <vector>
+#include <utility>
 
 #include <concordia/common/config.hpp>
 #include "db_connection.hpp"
@@ -18,8 +19,12 @@ public:
 
     int addTm(const int sourceLangId, const int targetLangId, const std::string name);
 
+    int addTm(const int sourceLangId, const int targetLangId, const std::string name, bool lemmatized);
+
     std::vector<int> getTmIds();
 
+    std::pair<bool, std::string> getTmInfo(int tmId);
+
 private:
 
 };
diff --git a/concordia-server/unit_dao.cpp b/concordia-server/unit_dao.cpp
index 16a1e92..6a1a68c 100644
--- a/concordia-server/unit_dao.cpp
+++ b/concordia-server/unit_dao.cpp
@@ -22,7 +22,7 @@ int UnitDAO::addSentence(
      const TokenizedSentence & sourceSentence,
      const std::string & targetSentence,
      const int tmId) {
-    
+
     DBconnection connection;
     connection.startTransaction();
     int newId = _addSingleSentence(connection, sourceSentence, targetSentence, tmId);
@@ -38,7 +38,7 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
     std::vector<SUFFIX_MARKER_TYPE> newIds;
     connection.startTransaction();
     int index = 0;
-    BOOST_FOREACH(const TokenizedSentence & sourceSentence, sourceSentences) {    
+    BOOST_FOREACH(const TokenizedSentence & sourceSentence, sourceSentences) {
         newIds.push_back(_addSingleSentence(connection, sourceSentence, targetSentences.at(index), tmId));
         index++;
     }
@@ -50,7 +50,7 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addAlignedSentences(
          const std::vector<TokenizedSentence> & sourceSentences,
          const std::vector<TokenizedSentence> & targetSentences,
          const std::vector<std::vector<std::vector<int> > > & allAlignments,
-         const int tmId) {
+         const int tmId) throw (ConcordiaException) {
 
     DBconnection connection;
     std::vector<SUFFIX_MARKER_TYPE> newIds;
@@ -59,9 +59,9 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addAlignedSentences(
     for (int i=0; i< sourceSentences.size(); i++) {
         newIds.push_back(_addAlignedUnit(connection, sourceSentences.at(i), targetSentences.at(i), allAlignments.at(i), tmId));
     }
-    
+
     connection.endTransaction();
-    return newIds;      
+    return newIds;
 }
 
 std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & fragments) {
@@ -83,7 +83,7 @@ void UnitDAO::_getResultsFromFragments(
                               std::vector<SimpleSearchResult> & results,
                               const std::vector<MatchedPatternFragment> & fragments,
                               const TokenizedSentence & tokenizedPattern) {
-    
+
     DBconnection connection;
     connection.startTransaction();
 
@@ -95,9 +95,9 @@ void UnitDAO::_getResultsFromFragments(
             matchedPatternStart = tokenizedPattern.getTokens().at(fragment.getStart()).getStart();
             matchedPatternEnd = tokenizedPattern.getTokens().at(fragment.getStart()+fragment.getMatchedLength() - 1).getEnd();
         }
-        
-        
-        
+
+
+
         std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
         std::vector<QueryParam*> params;
         params.push_back(new IntParam(2*fragment.getExampleOffset()+1));
@@ -116,7 +116,7 @@ void UnitDAO::_getResultsFromFragments(
             delete param;
         }
 
-        // now add all target fragments matched with this fragment        
+        // now add all target fragments matched with this fragment
         std::string targetQuery = "SELECT target_token_pos, target_tokens[2*target_token_pos+1], target_tokens[2*target_token_pos+2] FROM unit INNER JOIN alignment ON alignment.unit_id = unit.id AND unit.id = $1::integer AND source_token_pos between $2::integer and $3::integer ORDER BY target_token_pos";
         std::vector<QueryParam*> targetParams;
         targetParams.push_back(new IntParam(fragment.getExampleId()));
@@ -127,12 +127,12 @@ void UnitDAO::_getResultsFromFragments(
         int prevPos = -2;
         int currStart = -1;
         int currEnd = -1;
-        
+
         for (int i=0;i<connection.getRowCount(targetResult);i++) {
             int targetPos = connection.getIntValue(targetResult, i, 0);
             int targetStart = connection.getIntValue(targetResult, i, 1);
             int targetEnd = connection.getIntValue(targetResult, i, 2);
-            
+
             if (prevPos < targetPos - 1) { // beginning of detached fragment
                 // check if there is a fragment to end
                 if (currStart >= 0) {
@@ -141,7 +141,7 @@ void UnitDAO::_getResultsFromFragments(
                 currStart = targetStart;
             }
 
-            currEnd = targetEnd;            
+            currEnd = targetEnd;
             prevPos = targetPos;
         }
 
@@ -154,9 +154,9 @@ void UnitDAO::_getResultsFromFragments(
         BOOST_FOREACH (QueryParam * param, targetParams) {
             delete param;
         }
-        
+
         results.push_back(ssResult);
-    }    
+    }
     connection.endTransaction();
 }
 
@@ -181,25 +181,29 @@ int UnitDAO::_addSingleSentence(
     params.push_back(new StringParam(targetSentence));
     params.push_back(new IntParam(tmId));
     params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence)));
-    
+
     PGresult * result = connection.execute(query, params);
     int newId = connection.getIntValue(result, 0, 0);
     connection.clearResult(result);
     BOOST_FOREACH (QueryParam * param, params) {
         delete param;
     }
-    
+
     return newId;
 }
 
 
-int UnitDAO::_addAlignedUnit(
+int UnitDAO::_addAlignedUnit (
      DBconnection & connection,
      const TokenizedSentence & sourceSentence,
      const TokenizedSentence & targetSentence,
      const std::vector<std::vector<int> > & alignments,
-     const int tmId) {
-        
+     const int tmId) throw(ConcordiaException) {
+
+    if (sourceSentence.getTokens().size() != alignments.size()) {
+        throw ConcordiaException("The size of source sentence does not match the size of alignments array.");
+    }
+
     std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens, target_tokens) values($1::text,$2::text,$3::integer,$4,$5) RETURNING id";
     std::vector<QueryParam*> params;
     params.push_back(new StringParam(sourceSentence.getSentence()));
@@ -207,14 +211,14 @@ int UnitDAO::_addAlignedUnit(
     params.push_back(new IntParam(tmId));
     params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence)));
     params.push_back(new IntArrayParam(_getTokenPositions(targetSentence)));
-    
+
     PGresult * result = connection.execute(query, params);
     int newId = connection.getIntValue(result, 0, 0);
     connection.clearResult(result);
     BOOST_FOREACH (QueryParam * param, params) {
         delete param;
     }
-    
+
     // add alignments
     bool nonEmpty = false;
     std::stringstream alignmentsQuery;
@@ -230,10 +234,8 @@ int UnitDAO::_addAlignedUnit(
         query = alignmentsQuery.str();
         query = query.substr(0, query.length()-1);
         PGresult * result = connection.execute(query);
-        connection.clearResult(result);    
+        connection.clearResult(result);
     }
 
     return newId;
 }
-
-
diff --git a/concordia-server/unit_dao.hpp b/concordia-server/unit_dao.hpp
index 875fa0a..7159320 100644
--- a/concordia-server/unit_dao.hpp
+++ b/concordia-server/unit_dao.hpp
@@ -9,6 +9,7 @@
 #include <concordia/substring_occurence.hpp>
 #include <concordia/matched_pattern_fragment.hpp>
 #include <concordia/concordia_search_result.hpp>
+#include <concordia/concordia_exception.hpp>
 #include <boost/shared_ptr.hpp>
 
 #include "simple_search_result.hpp"
@@ -33,13 +34,13 @@ public:
              const std::vector<TokenizedSentence> & sourceSentences,
              const std::vector<std::string> & targetSentences,
              const int tmId);
-             
+
     std::vector<SUFFIX_MARKER_TYPE> addAlignedSentences(
              const std::vector<TokenizedSentence> & sourceSentences,
              const std::vector<TokenizedSentence> & targetSentences,
              const std::vector<std::vector<std::vector<int> > > & allAlignments,
-             const int tmId);
-    
+             const int tmId) throw (ConcordiaException);
+
     std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & fragments);
 
     CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
@@ -50,7 +51,7 @@ private:
                                   const TokenizedSentence & tokenizedPattern);
 
     std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
-    
+
     int _addSingleSentence(
          DBconnection & connection,
          const TokenizedSentence & sourceSentence,
@@ -62,7 +63,7 @@ private:
          const TokenizedSentence & sourceSentence,
          const TokenizedSentence & targetSentence,
          const std::vector<std::vector<int> > & alignments,
-         const int tmId);
+         const int tmId) throw(ConcordiaException);
 };
 
 #endif
diff --git a/db/concordia_server.sql b/db/concordia_server.sql
index 125df22..c8a8a21 100644
--- a/db/concordia_server.sql
+++ b/db/concordia_server.sql
@@ -3,7 +3,8 @@ CREATE TABLE tm (
     id SERIAL PRIMARY KEY,
     source_lang_id integer,
     target_lang_id integer,
-    name varchar(40)
+    name varchar(40),
+    lemmatized bool DEFAULT false
 );
 
 DROP TABLE IF EXISTS language;
diff --git a/mgiza-aligner/LemmaGenSentenceLemmatizer/.vs/LemmaGenSentenceLemmatizer/v14/.suo b/mgiza-aligner/LemmaGenSentenceLemmatizer/.vs/LemmaGenSentenceLemmatizer/v14/.suo
index ef1ddee..b0abdc9 100644
Binary files a/mgiza-aligner/LemmaGenSentenceLemmatizer/.vs/LemmaGenSentenceLemmatizer/v14/.suo and b/mgiza-aligner/LemmaGenSentenceLemmatizer/.vs/LemmaGenSentenceLemmatizer/v14/.suo differ
diff --git a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/Program.cs b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/Program.cs
index 4c18358..f53b436 100644
--- a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/Program.cs
+++ b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/Program.cs
@@ -12,12 +12,20 @@ namespace LemmaGenSentenceLemmatizer
         {
             if (args.Length == 1)
             {
-                SentenceLemmatizer lemmatizer = new SentenceLemmatizer(args[0]);
-                string line = Console.ReadLine();
-                while (!string.IsNullOrEmpty(line))
+                try
                 {
-                    Console.WriteLine(lemmatizer.lemmatizeSentence(line));
-                    line = Console.ReadLine();
+                    SentenceLemmatizer lemmatizer = new SentenceLemmatizer(args[0]);
+                    string line = Console.ReadLine();
+                    while (line != null)
+                    {
+                        Console.WriteLine(lemmatizer.lemmatizeSentence(line));
+                        line = Console.ReadLine();
+                    }
+
+                }
+                catch (Exception ex)
+                {
+                    Console.WriteLine("Exception occurred: " + ex.Message);
                 }
 
 
diff --git a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/bin/Debug/LemmaGenSentenceLemmatizer.exe b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/bin/Debug/LemmaGenSentenceLemmatizer.exe
index 3f36045..d33ee58 100644
Binary files a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/bin/Debug/LemmaGenSentenceLemmatizer.exe and b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/bin/Debug/LemmaGenSentenceLemmatizer.exe differ
diff --git a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/bin/Debug/LemmaGenSentenceLemmatizer.pdb b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/bin/Debug/LemmaGenSentenceLemmatizer.pdb
index 7fafbe7..92ae405 100644
Binary files a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/bin/Debug/LemmaGenSentenceLemmatizer.pdb and b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/bin/Debug/LemmaGenSentenceLemmatizer.pdb differ
diff --git a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/DesignTimeResolveAssemblyReferences.cache b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/DesignTimeResolveAssemblyReferences.cache
new file mode 100644
index 0000000..5767a4e
Binary files /dev/null and b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/DesignTimeResolveAssemblyReferences.cache differ
diff --git a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/LemmaGenSentenceLemmatizer.csprojResolveAssemblyReference.cache b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/LemmaGenSentenceLemmatizer.csprojResolveAssemblyReference.cache
index b26246f..70c7d05 100644
Binary files a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/LemmaGenSentenceLemmatizer.csprojResolveAssemblyReference.cache and b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/LemmaGenSentenceLemmatizer.csprojResolveAssemblyReference.cache differ
diff --git a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/LemmaGenSentenceLemmatizer.exe b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/LemmaGenSentenceLemmatizer.exe
index 3f36045..d33ee58 100644
Binary files a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/LemmaGenSentenceLemmatizer.exe and b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/LemmaGenSentenceLemmatizer.exe differ
diff --git a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/LemmaGenSentenceLemmatizer.pdb b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/LemmaGenSentenceLemmatizer.pdb
index 7fafbe7..92ae405 100644
Binary files a/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/LemmaGenSentenceLemmatizer.pdb and b/mgiza-aligner/LemmaGenSentenceLemmatizer/LemmaGenSentenceLemmatizer/obj/Debug/LemmaGenSentenceLemmatizer.pdb differ
diff --git a/mgiza-aligner/Makefile b/mgiza-aligner/Makefile
index 7e384c0..aebcaeb 100644
--- a/mgiza-aligner/Makefile
+++ b/mgiza-aligner/Makefile
@@ -1,10 +1,22 @@
 SRC_LANG=en
 TRG_LANG=pl
-CORPUS_NAME=europarl
+CORPUS_NAME=europarljrc
 
 all: corpora/$(CORPUS_NAME)/giza.cfg corpora/$(CORPUS_NAME)/src.low_trg.low.cooc corpora/$(CORPUS_NAME)/src.low_trg.low.snt corpora/$(CORPUS_NAME)/src.low.vcb corpora/$(CORPUS_NAME)/trg.low.vcb
 	mgiza/mgizapp/bin/mgiza corpora/$(CORPUS_NAME)/giza.cfg
-	cat corpora/$(CORPUS_NAME)/aligned*part* > corpora/$(CORPUS_NAME)/aligned.txt
+	cat corpora/$(CORPUS_NAME)/aligned*part* | ./sortGizaAlignments.py > corpora/$(CORPUS_NAME)/aligned.txt
+
+clean-intermediate-files:
+	rm -f corpora/$(CORPUS_NAME)/*.lem
+	rm -f corpora/$(CORPUS_NAME)/*.low
+	rm -f corpora/$(CORPUS_NAME)/*.classes
+	rm -f corpora/$(CORPUS_NAME)/*.classes.cats
+	rm -f corpora/$(CORPUS_NAME)/*.vcb
+	rm -f corpora/$(CORPUS_NAME)/*.snt
+	rm -f corpora/$(CORPUS_NAME)/*.cooc
+	rm -f corpora/$(CORPUS_NAME)/aligned*part*
+	rm -f corpora/$(CORPUS_NAME)/giza.cfg
+
 
 clean:
 	rm -f corpora/$(CORPUS_NAME)/*.tok
diff --git a/mgiza-aligner/clean-corpus-n.perl b/mgiza-aligner/clean-corpus-n.perl
new file mode 100755
index 0000000..76a09e5
--- /dev/null
+++ b/mgiza-aligner/clean-corpus-n.perl
@@ -0,0 +1,168 @@
+#!/usr/bin/env perl
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+# $Id: clean-corpus-n.perl 3633 2010-10-21 09:49:27Z phkoehn $
+use warnings;
+use strict;
+use Getopt::Long;
+my $help;
+my $lc = 0; # lowercase the corpus?
+my $ignore_ratio = 0;
+my $ignore_xml = 0;
+my $enc = "utf8"; # encoding of the input and output files
+    # set to anything else you wish, but I have not tested it yet
+my $max_word_length = 1000; # any segment with a word (or factor) exceeding this length in chars
+    # is discarded; motivated by symal.cpp, which has its own such parameter (hardcoded to 1000)
+    # and crashes if it encounters a word that exceeds it
+my $ratio = 9;
+
+GetOptions(
+  "help" => \$help,
+  "lowercase|lc" => \$lc,
+  "encoding=s" => \$enc,
+  "ratio=f" => \$ratio,
+  "ignore-ratio" => \$ignore_ratio,
+  "ignore-xml" => \$ignore_xml,
+  "max-word-length|mwl=s" => \$max_word_length
+) or exit(1);
+
+if (scalar(@ARGV) < 6 || $help) {
+    print "syntax: clean-corpus-n.perl [-ratio n] corpus l1 l2 clean-corpus min max [lines retained file]\n";
+    exit;
+}
+
+my $corpus = $ARGV[0];
+my $l1 = $ARGV[1];
+my $l2 = $ARGV[2];
+my $out = $ARGV[3];
+my $min = $ARGV[4];
+my $max = $ARGV[5];
+
+my $linesRetainedFile = "";
+if (scalar(@ARGV) > 6) {
+	$linesRetainedFile = $ARGV[6];
+	open(LINES_RETAINED,">$linesRetainedFile") or die "Can't write $linesRetainedFile";
+}
+
+print STDERR "clean-corpus.perl: processing $corpus.$l1 & .$l2 to $out, cutoff $min-$max, ratio $ratio\n";
+
+my $opn = undef;
+my $l1input = "$corpus.$l1";
+if (-e $l1input) {
+  $opn = $l1input;
+} elsif (-e $l1input.".gz") {
+  $opn = "gunzip -c $l1input.gz |";
+} else {
+    die "Error: $l1input does not exist";
+}
+open(F,$opn) or die "Can't open '$opn'";
+$opn = undef;
+my $l2input = "$corpus.$l2";
+if (-e $l2input) {
+  $opn = $l2input;
+} elsif (-e $l2input.".gz") {
+  $opn = "gunzip -c $l2input.gz |";
+} else  {
+ die "Error: $l2input does not exist";
+}
+
+open(E,$opn) or die "Can't open '$opn'";
+
+open(FO,">$out.$l1") or die "Can't write $out.$l1";
+open(EO,">$out.$l2") or die "Can't write $out.$l2";
+
+# necessary for proper lowercasing
+my $binmode;
+if ($enc eq "utf8") {
+  $binmode = ":utf8";
+} else {
+  $binmode = ":encoding($enc)";
+}
+binmode(F, $binmode);
+binmode(E, $binmode);
+binmode(FO, $binmode);
+binmode(EO, $binmode);
+
+my $innr = 0;
+my $outnr = 0;
+my $factored_flag;
+while(my $f = <F>) {
+  $innr++;
+  print STDERR "." if $innr % 10000 == 0;
+  print STDERR "($innr)" if $innr % 100000 == 0;
+  my $e = <E>;
+  die "$corpus.$l2 is too short!" if !defined $e;
+  chomp($e);
+  chomp($f);
+  if ($innr == 1) {
+    $factored_flag = ($e =~ /\|/ || $f =~ /\|/);
+  }
+
+  #if lowercasing, lowercase
+  if ($lc) {
+    $e = lc($e);
+    $f = lc($f);
+  }
+
+  $e =~ s/\|//g unless $factored_flag;
+  $e =~ s/\s+/ /g;
+  $e =~ s/^ //;
+  $e =~ s/ $//;
+  $f =~ s/\|//g unless $factored_flag;
+  $f =~ s/\s+/ /g;
+  $f =~ s/^ //;
+  $f =~ s/ $//;
+  next if $f eq '';
+  next if $e eq '';
+
+  my $ec = &word_count($e);
+  my $fc = &word_count($f);
+  next if $ec > $max;
+  next if $fc > $max;
+  next if $ec < $min;
+  next if $fc < $min;
+  next if !$ignore_ratio && $ec/$fc > $ratio;
+  next if !$ignore_ratio && $fc/$ec > $ratio;
+  # Skip this segment if any factor is longer than $max_word_length
+  my $max_word_length_plus_one = $max_word_length + 1;
+  next if $e =~ /[^\s\|]{$max_word_length_plus_one}/;
+  next if $f =~ /[^\s\|]{$max_word_length_plus_one}/;
+
+  # An extra check: none of the factors can be blank!
+  die "There is a blank factor in $corpus.$l1 on line $innr: $f"
+    if $f =~ /[ \|]\|/;
+  die "There is a blank factor in $corpus.$l2 on line $innr: $e"
+    if $e =~ /[ \|]\|/;
+
+  $outnr++;
+  print FO $f."\n";
+  print EO $e."\n";
+
+  if ($linesRetainedFile ne "") {
+	print LINES_RETAINED $innr."\n";
+  }
+}
+
+if ($linesRetainedFile ne "") {
+  close LINES_RETAINED;
+}
+
+print STDERR "\n";
+my $e = <E>;
+die "$corpus.$l2 is too long!" if defined $e;
+
+print STDERR "Input sentences: $innr  Output sentences:  $outnr\n";
+
+sub word_count {
+  my ($line) = @_;
+  if ($ignore_xml) {
+    $line =~ s/<\S[^>]*\S>/ /g;
+    $line =~ s/\s+/ /g;
+    $line =~ s/^ //g;
+    $line =~ s/ $//g;
+  }
+  my @w = split(/ /,$line);
+  return scalar @w;
+}
diff --git a/mgiza-aligner/sortGizaAlignments.py b/mgiza-aligner/sortGizaAlignments.py
new file mode 100755
index 0000000..e6762ca
--- /dev/null
+++ b/mgiza-aligner/sortGizaAlignments.py
@@ -0,0 +1,26 @@
+#!/usr/bin/python3
+
+import sys, re
+
+examples_dict = {}
+p = re.compile("# Sentence pair \((\d+)\)")
+
+i = 0
+for line in sys.stdin:
+    line = line.strip()
+    if i % 3 == 0:
+        current_example = [line]
+        m = p.match(line)
+        if m:
+            current_key = int(m.group(1))
+        else:
+            raise Exception("Wrong line: "+line)
+    elif i % 3 == 1:
+        current_example.append(line)
+    else:
+        current_example.append(line)
+        examples_dict[current_key] = current_example
+    i+=1
+
+for key in sorted(examples_dict.keys()):
+    print ('\n'.join(examples_dict[key]))
diff --git a/tests/addAlignedLemmatizedTM.py b/tests/addAlignedLemmatizedTM.py
old mode 100644
new mode 100755
index ee8a246..aecc7dd
--- a/tests/addAlignedLemmatizedTM.py
+++ b/tests/addAlignedLemmatizedTM.py
@@ -21,10 +21,15 @@ def file_len(fname):
             pass
     return i + 1
 
-def add_data(data):
+def add_examples(examplesData):
     req = urllib2.Request(address)
     req.add_header('Content-Type', 'application/json')
-    json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+    response = json.loads(urllib2.urlopen(req, json.dumps(examplesData)).read())
+    if response['status'] == 'error':
+        raise Exception(response['message'])
+
+if len(sys.argv) != 7:
+    raise Exception("wrong number of arguments")
 
 name = sys.argv[1]
 sourceFile = sys.argv[2]
@@ -40,13 +45,14 @@ if (file_len(alignmentsFile) != 3*file_len(sourceFile)):
     raise Exception("alignments file is not exactly 3 times longer than source and target")
 
 
-totalLines = file_len(sourceFile)
+totalExamples = file_len(sourceFile)
 
 data = {
     'operation': 'addTm',
     'sourceLangId':sourceLangId,
     'targetLangId':targetLangId,
-    'name':name
+    'name':name,
+    'tmLemmatized':True
 }
 
 req = urllib2.Request(address)
@@ -60,35 +66,35 @@ data = {
     'tmId':tmId
 }
 
-sentences = []
+examples = []
 start = time.time()
-with open(sourceFile) as sourceLines, open(targetFile) as targetLines, open(alignmentsFile) as alignmentsLines:
+with open(sourceFile) as sf, open(targetFile) as tf, open(alignmentsFile) as af:
+    for lineNumber in range(totalExamples):
+        sourceSentence = sf.readline().strip()
+        targetSentence = tf.readline().strip()
 
-    lineNumber = 0
-    for line in sourceLines:
-        line = line.strip()
-        if lineNumber % 3 == 1:
-            currSentence.append(line)
-        elif lineNumber % 3 == 2:
-            currSentence.append(line)
-            currSentence.reverse()
-            sentences.append(currSentence)
-            currSentence = []
-            if len(sentences) >= BUFFER_SIZE:
-                data['sentences'] = sentences
-                add_data(data)
-                mark = time.time()
-                print "Added %d of %d sentences. Time elapsed: %.4f s, current speed: %.4f sentences/second" % ( (lineNumber+1)/3, totalLines/3, mark-start, (lineNumber+1)/(3*(mark-start)))
-                sentences = []
-        lineNumber += 1
+        # skip to lines of the alignments file, these are lemmatized and we need the raw sentences from the source and target files.
+        af.readline()
+        af.readline()
+
+        alignmentString = af.readline().strip()
+
+        examples.append([sourceSentence, targetSentence, alignmentString])
+
+        if len(examples) >= BUFFER_SIZE:
+            data['examples'] = examples
+            add_examples(data)
+            mark = time.time()
+            print "Added %d of %d lemmatized examples. Time elapsed: %.4f s, current speed: %.4f examples/second" % ( (lineNumber+1), totalExamples, mark-start, (lineNumber+1)/(mark-start))
+            examples = []
 
 
-if len(sentences) > 0:
-    data['sentences'] = sentences
-    add_data(data)
+if len(examples) > 0:
+    data['examples'] = examples
+    add_examples(data)
 
 end = time.time()
-print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1)/3, end-start, (lineNumber+1)/(3*(end-start)))
+print "Added all %d lemmatized sentences. Time elapsed: %.4f s, overall speed: %.4f sentences/second" % ((lineNumber+1), end-start, (lineNumber+1)/(end-start))
 
 print "Generating index..."
 start = time.time()
diff --git a/tests/addLemmatizedTM.sh b/tests/addLemmatizedTM.sh
new file mode 100755
index 0000000..ba53f7f
--- /dev/null
+++ b/tests/addLemmatizedTM.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+CORPUS_NAME="europarl_sample"
+SRC_LANG_ID=2
+TRG_LANG_ID=1
+
+./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src.tok $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg.tok $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned.txt
diff --git a/tests/addTm.py b/tests/addTm.py
index e3bfaa3..c36e791 100755
--- a/tests/addTm.py
+++ b/tests/addTm.py
@@ -16,7 +16,8 @@ data = {
     'operation': 'addTm',
     'sourceLangId':int(sys.argv[1]),
     'targetLangId':int(sys.argv[2]),
-    'name':sys.argv[3]
+    'name':sys.argv[3],
+    'tmLemmatized':bool(int(sys.argv[4]))
 }
 
 req = urllib2.Request(address)