diff --git a/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs b/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs index 9eee9bf..9e525d8 100644 --- a/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs +++ b/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs @@ -20,6 +20,7 @@ namespace LemmaGenSockets { lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish)); lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English)); + lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian)); } public LemmatizerListener() @@ -29,15 +30,24 @@ namespace LemmaGenSockets private string lemmatizeSentence(string languageCode, string sentence) { - string[] tokens = sentence.Split(null); - - string result = ""; - foreach (string token in tokens) + if (lemmatizersDict.ContainsKey(languageCode)) { - result += lemmatizeWord(languageCode, token) + " "; - } + string[] tokens = sentence.Split(null); - return result.Trim(); + string result = ""; + foreach (string token in tokens) + { + result += lemmatizeWord(languageCode, token) + " "; + } + + return result.Trim(); + } + else + { + //if we can not lemmatize, let's not do it at all + //primum non nocere + return sentence; + } } private string lemmatizeWord(string languageCode, string word) diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe index 8cb06db..aee4031 100644 Binary files a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe and b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe differ diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.pdb b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.pdb index 1ab947c..b047314 100644 Binary files a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.pdb and b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.pdb differ diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csprojResolveAssemblyReference.cache b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csprojResolveAssemblyReference.cache index 18ff92c..196d272 100644 Binary files a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csprojResolveAssemblyReference.cache and b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csprojResolveAssemblyReference.cache differ diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.exe b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.exe index 8cb06db..aee4031 100644 Binary files a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.exe and b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.exe differ diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.pdb b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.pdb index 1ab947c..b047314 100644 Binary files a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.pdb and b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.pdb differ diff --git a/concordia-server/concordia_server.cpp b/concordia-server/concordia_server.cpp index 9d45eaf..d33fba2 100644 --- a/concordia-server/concordia_server.cpp +++ b/concordia-server/concordia_server.cpp @@ -11,7 +11,6 @@ #include "json_generator.hpp" #include "config.hpp" #include "logger.hpp" -#include "socket_lemmatizer.hpp" #include "rapidjson/rapidjson.h" #include #include @@ -28,6 +27,8 @@ ConcordiaServer::ConcordiaServer(const std::string & configFilePath) } _indexController = boost::shared_ptr (new IndexController(_concordiasMap)); _searcherController = boost::shared_ptr (new SearcherController(_concordiasMap)); + + _lemmatizerFacade = boost::shared_ptr (new LemmatizerFacade()); } ConcordiaServer::~ConcordiaServer() { @@ -97,8 +98,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) { } else if (operation == "lemmatize") { std::string sentence = _getStringParameter(d, "sentence"); std::string languageCode = _getStringParameter(d, "languageCode"); - SocketLemmatizer lemmatizer; - std::string lemmatizedSentence = lemmatizer.lemmatizeSentence(languageCode, sentence); + std::string lemmatizedSentence = _lemmatizerFacade->lemmatizeSentence(languageCode, sentence); jsonWriter.StartObject(); jsonWriter.String("lemmatizedSentence"); jsonWriter.String(lemmatizedSentence.c_str()); diff --git a/concordia-server/concordia_server.hpp b/concordia-server/concordia_server.hpp index 4214694..c0e11c2 100644 --- a/concordia-server/concordia_server.hpp +++ b/concordia-server/concordia_server.hpp @@ -14,6 +14,8 @@ #include "tm_dao.hpp" #include "index_controller.hpp" #include "searcher_controller.hpp" +#include "lemmatizer_facade.hpp" + class ConcordiaServer { public: @@ -48,6 +50,8 @@ private: boost::shared_ptr _searcherController; + boost::shared_ptr _lemmatizerFacade; + }; #endif diff --git a/concordia-server/config.hpp.in b/concordia-server/config.hpp.in index 093b494..dac7ae6 100644 --- a/concordia-server/config.hpp.in +++ b/concordia-server/config.hpp.in @@ -31,3 +31,4 @@ #define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch" #define ADD_TM_OP "addTm" +#define LEMMATIZER_DELIMITER "@#@" diff --git a/concordia-server/lemmatizer_facade.cpp b/concordia-server/lemmatizer_facade.cpp new file mode 100644 index 0000000..f6adc31 --- /dev/null +++ b/concordia-server/lemmatizer_facade.cpp @@ -0,0 +1,30 @@ +#include "lemmatizer_facade.hpp" + + +LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) { + _lemmatizersMap = boost::ptr_map(); + + // todo: extract this to configuration, especially when new lemmatizers ConstMemberIterator + SocketLemmatizer * socketLemmatizer1 = new SocketLemmatizer(11000); + std::string plCode = "pl"; + std::string enCode = "en"; + std::string hrCode = "hr"; + + _lemmatizersMap.insert(plCode, socketLemmatizer1); + _lemmatizersMap.insert(enCode, socketLemmatizer1); + _lemmatizersMap.insert(hrCode, socketLemmatizer1); +} + +LemmatizerFacade::~LemmatizerFacade() { +} + +std::string LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::string sentence) { + + boost::ptr_map::iterator it = _lemmatizersMap.find(languageCode); + if (it != _lemmatizersMap.end()) { + return it->second->lemmatizeSentence(languageCode, sentence); + } else { + throw ConcordiaException("lemmatizer for language: "+languageCode+" not found."); + } + +} diff --git a/concordia-server/lemmatizer_facade.hpp b/concordia-server/lemmatizer_facade.hpp new file mode 100644 index 0000000..7eea156 --- /dev/null +++ b/concordia-server/lemmatizer_facade.hpp @@ -0,0 +1,25 @@ +#ifndef LEMMATIZER_FACADE_HDR +#define LEMMATIZER_FACADE_HDR + +#include "socket_lemmatizer.hpp" + +#include +#include +#include + + +class LemmatizerFacade { +public: + /*! Constructor. + */ + LemmatizerFacade() throw(ConcordiaException); + /*! Destructor. + */ + virtual ~LemmatizerFacade(); + + std::string lemmatizeSentence(std::string languageCode, std::string sentence); +private: + boost::ptr_map _lemmatizersMap; +}; + +#endif diff --git a/concordia-server/socket_lemmatizer.cpp b/concordia-server/socket_lemmatizer.cpp index f6170a8..0cd6aee 100644 --- a/concordia-server/socket_lemmatizer.cpp +++ b/concordia-server/socket_lemmatizer.cpp @@ -1,8 +1,10 @@ #include "socket_lemmatizer.hpp" -SocketLemmatizer::SocketLemmatizer() throw(ConcordiaException) : - _sock(-1) { - _connect("127.0.0.1" , 11000); +#include "config.hpp" +#include + +SocketLemmatizer::SocketLemmatizer(int port) throw(ConcordiaException) : + _port(port) { } SocketLemmatizer::~SocketLemmatizer() { @@ -11,17 +13,16 @@ SocketLemmatizer::~SocketLemmatizer() { /** Connect to a host on a certain port number */ -bool SocketLemmatizer::_connect(std::string address , int port) -{ - //create socket if it is not already created - if(_sock == -1) { - //Create socket - _sock = socket(AF_INET , SOCK_STREAM , 0); - if (_sock == -1) { - throw ConcordiaException("Could not create socket for the lemmatizer."); - } +bool SocketLemmatizer::_connect() { + + //Create socket + _sock = socket(AF_INET , SOCK_STREAM , 0); + if (_sock == -1) { + throw ConcordiaException("Could not create socket for the lemmatizer."); } + std::string address = "127.0.0.1"; + //setup address structure if(inet_addr(address.c_str()) == -1) { struct hostent *he; @@ -45,16 +46,21 @@ bool SocketLemmatizer::_connect(std::string address , int port) } _server.sin_family = AF_INET; - _server.sin_port = htons(port); + _server.sin_port = htons(_port); //Connect to remote server if (connect(_sock , (struct sockaddr *) & _server , sizeof(_server)) < 0) { - throw ConcordiaException("connect failed. Error"); + throw ConcordiaException("Connect failed. Error on address: "+address+":"+boost::lexical_cast(_port)); } return true; } +bool SocketLemmatizer::_disconnect() { + close(_sock); + _sock = -1; +} + /** Send data to the connected host */ @@ -84,7 +90,9 @@ std::string SocketLemmatizer::_receive(int size=512) } std::string SocketLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) { - _send_data(languageCode+sentence+"@#@"); + _connect(); + _send_data(languageCode+sentence+LEMMATIZER_DELIMITER); std::string reply = _receive(512); - return reply.substr(0,reply.find("@#@")); + _disconnect(); + return reply.substr(0,reply.find(LEMMATIZER_DELIMITER)); } diff --git a/concordia-server/socket_lemmatizer.hpp b/concordia-server/socket_lemmatizer.hpp index 7f20255..4f5e9e9 100644 --- a/concordia-server/socket_lemmatizer.hpp +++ b/concordia-server/socket_lemmatizer.hpp @@ -2,9 +2,10 @@ #define SOCKET_LEMMATIZER_HDR #include -#include //socket -#include //inet_addr -#include //hostent +#include //socket +#include //inet_addr +#include //hostent +#include #include @@ -13,23 +14,26 @@ class SocketLemmatizer { public: /*! Constructor. */ - SocketLemmatizer() throw(ConcordiaException); + explicit SocketLemmatizer(int port) throw(ConcordiaException); /*! Destructor. */ virtual ~SocketLemmatizer(); std::string lemmatizeSentence(std::string languageCode, std::string sentence); private: - bool _connect(std::string, int); + bool _connect(); + + bool _disconnect(); bool _send_data(std::string data); - std::string _receive(int); + std::string _receive(int size); + + int _port; int _sock; struct sockaddr_in _server; - }; #endif