lemmatizer facade

This commit is contained in:
Rafał Jaworski 2017-03-05 22:45:11 +01:00
parent 803ea2660f
commit e558cb05d8
13 changed files with 115 additions and 33 deletions

View File

@ -20,6 +20,7 @@ namespace LemmaGenSockets
{ {
lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish)); lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish));
lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English)); lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English));
lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian));
} }
public LemmatizerListener() public LemmatizerListener()
@ -29,15 +30,24 @@ namespace LemmaGenSockets
private string lemmatizeSentence(string languageCode, string sentence) private string lemmatizeSentence(string languageCode, string sentence)
{ {
string[] tokens = sentence.Split(null); if (lemmatizersDict.ContainsKey(languageCode))
string result = "";
foreach (string token in tokens)
{ {
result += lemmatizeWord(languageCode, token) + " "; string[] tokens = sentence.Split(null);
}
return result.Trim(); string result = "";
foreach (string token in tokens)
{
result += lemmatizeWord(languageCode, token) + " ";
}
return result.Trim();
}
else
{
//if we can not lemmatize, let's not do it at all
//primum non nocere
return sentence;
}
} }
private string lemmatizeWord(string languageCode, string word) private string lemmatizeWord(string languageCode, string word)

View File

@ -11,7 +11,6 @@
#include "json_generator.hpp" #include "json_generator.hpp"
#include "config.hpp" #include "config.hpp"
#include "logger.hpp" #include "logger.hpp"
#include "socket_lemmatizer.hpp"
#include "rapidjson/rapidjson.h" #include "rapidjson/rapidjson.h"
#include <boost/foreach.hpp> #include <boost/foreach.hpp>
#include <boost/ptr_container/ptr_map.hpp> #include <boost/ptr_container/ptr_map.hpp>
@ -28,6 +27,8 @@ ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
} }
_indexController = boost::shared_ptr<IndexController> (new IndexController(_concordiasMap)); _indexController = boost::shared_ptr<IndexController> (new IndexController(_concordiasMap));
_searcherController = boost::shared_ptr<SearcherController> (new SearcherController(_concordiasMap)); _searcherController = boost::shared_ptr<SearcherController> (new SearcherController(_concordiasMap));
_lemmatizerFacade = boost::shared_ptr<LemmatizerFacade> (new LemmatizerFacade());
} }
ConcordiaServer::~ConcordiaServer() { ConcordiaServer::~ConcordiaServer() {
@ -97,8 +98,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
} else if (operation == "lemmatize") { } else if (operation == "lemmatize") {
std::string sentence = _getStringParameter(d, "sentence"); std::string sentence = _getStringParameter(d, "sentence");
std::string languageCode = _getStringParameter(d, "languageCode"); std::string languageCode = _getStringParameter(d, "languageCode");
SocketLemmatizer lemmatizer; std::string lemmatizedSentence = _lemmatizerFacade->lemmatizeSentence(languageCode, sentence);
std::string lemmatizedSentence = lemmatizer.lemmatizeSentence(languageCode, sentence);
jsonWriter.StartObject(); jsonWriter.StartObject();
jsonWriter.String("lemmatizedSentence"); jsonWriter.String("lemmatizedSentence");
jsonWriter.String(lemmatizedSentence.c_str()); jsonWriter.String(lemmatizedSentence.c_str());

View File

@ -14,6 +14,8 @@
#include "tm_dao.hpp" #include "tm_dao.hpp"
#include "index_controller.hpp" #include "index_controller.hpp"
#include "searcher_controller.hpp" #include "searcher_controller.hpp"
#include "lemmatizer_facade.hpp"
class ConcordiaServer { class ConcordiaServer {
public: public:
@ -48,6 +50,8 @@ private:
boost::shared_ptr<SearcherController> _searcherController; boost::shared_ptr<SearcherController> _searcherController;
boost::shared_ptr<LemmatizerFacade> _lemmatizerFacade;
}; };
#endif #endif

View File

@ -31,3 +31,4 @@
#define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch" #define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch"
#define ADD_TM_OP "addTm" #define ADD_TM_OP "addTm"
#define LEMMATIZER_DELIMITER "@#@"

View File

@ -0,0 +1,30 @@
#include "lemmatizer_facade.hpp"
LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) {
_lemmatizersMap = boost::ptr_map<std::string,SocketLemmatizer>();
// todo: extract this to configuration, especially when new lemmatizers ConstMemberIterator
SocketLemmatizer * socketLemmatizer1 = new SocketLemmatizer(11000);
std::string plCode = "pl";
std::string enCode = "en";
std::string hrCode = "hr";
_lemmatizersMap.insert(plCode, socketLemmatizer1);
_lemmatizersMap.insert(enCode, socketLemmatizer1);
_lemmatizersMap.insert(hrCode, socketLemmatizer1);
}
LemmatizerFacade::~LemmatizerFacade() {
}
std::string LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::string sentence) {
boost::ptr_map<std::string,SocketLemmatizer>::iterator it = _lemmatizersMap.find(languageCode);
if (it != _lemmatizersMap.end()) {
return it->second->lemmatizeSentence(languageCode, sentence);
} else {
throw ConcordiaException("lemmatizer for language: "+languageCode+" not found.");
}
}

View File

@ -0,0 +1,25 @@
#ifndef LEMMATIZER_FACADE_HDR
#define LEMMATIZER_FACADE_HDR
#include "socket_lemmatizer.hpp"
#include <string>
#include <concordia/concordia_exception.hpp>
#include <boost/ptr_container/ptr_map.hpp>
class LemmatizerFacade {
public:
/*! Constructor.
*/
LemmatizerFacade() throw(ConcordiaException);
/*! Destructor.
*/
virtual ~LemmatizerFacade();
std::string lemmatizeSentence(std::string languageCode, std::string sentence);
private:
boost::ptr_map<std::string,SocketLemmatizer> _lemmatizersMap;
};
#endif

View File

@ -1,8 +1,10 @@
#include "socket_lemmatizer.hpp" #include "socket_lemmatizer.hpp"
SocketLemmatizer::SocketLemmatizer() throw(ConcordiaException) : #include "config.hpp"
_sock(-1) { #include <boost/lexical_cast.hpp>
_connect("127.0.0.1" , 11000);
SocketLemmatizer::SocketLemmatizer(int port) throw(ConcordiaException) :
_port(port) {
} }
SocketLemmatizer::~SocketLemmatizer() { SocketLemmatizer::~SocketLemmatizer() {
@ -11,17 +13,16 @@ SocketLemmatizer::~SocketLemmatizer() {
/** /**
Connect to a host on a certain port number Connect to a host on a certain port number
*/ */
bool SocketLemmatizer::_connect(std::string address , int port) bool SocketLemmatizer::_connect() {
{
//create socket if it is not already created //Create socket
if(_sock == -1) { _sock = socket(AF_INET , SOCK_STREAM , 0);
//Create socket if (_sock == -1) {
_sock = socket(AF_INET , SOCK_STREAM , 0); throw ConcordiaException("Could not create socket for the lemmatizer.");
if (_sock == -1) {
throw ConcordiaException("Could not create socket for the lemmatizer.");
}
} }
std::string address = "127.0.0.1";
//setup address structure //setup address structure
if(inet_addr(address.c_str()) == -1) { if(inet_addr(address.c_str()) == -1) {
struct hostent *he; struct hostent *he;
@ -45,16 +46,21 @@ bool SocketLemmatizer::_connect(std::string address , int port)
} }
_server.sin_family = AF_INET; _server.sin_family = AF_INET;
_server.sin_port = htons(port); _server.sin_port = htons(_port);
//Connect to remote server //Connect to remote server
if (connect(_sock , (struct sockaddr *) & _server , sizeof(_server)) < 0) { if (connect(_sock , (struct sockaddr *) & _server , sizeof(_server)) < 0) {
throw ConcordiaException("connect failed. Error"); throw ConcordiaException("Connect failed. Error on address: "+address+":"+boost::lexical_cast<std::string>(_port));
} }
return true; return true;
} }
bool SocketLemmatizer::_disconnect() {
close(_sock);
_sock = -1;
}
/** /**
Send data to the connected host Send data to the connected host
*/ */
@ -84,7 +90,9 @@ std::string SocketLemmatizer::_receive(int size=512)
} }
std::string SocketLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) { std::string SocketLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) {
_send_data(languageCode+sentence+"@#@"); _connect();
_send_data(languageCode+sentence+LEMMATIZER_DELIMITER);
std::string reply = _receive(512); std::string reply = _receive(512);
return reply.substr(0,reply.find("@#@")); _disconnect();
return reply.substr(0,reply.find(LEMMATIZER_DELIMITER));
} }

View File

@ -2,9 +2,10 @@
#define SOCKET_LEMMATIZER_HDR #define SOCKET_LEMMATIZER_HDR
#include <string> #include <string>
#include<sys/socket.h> //socket #include <sys/socket.h> //socket
#include<arpa/inet.h> //inet_addr #include <arpa/inet.h> //inet_addr
#include<netdb.h> //hostent #include <netdb.h> //hostent
#include <unistd.h>
#include <concordia/concordia_exception.hpp> #include <concordia/concordia_exception.hpp>
@ -13,23 +14,26 @@ class SocketLemmatizer {
public: public:
/*! Constructor. /*! Constructor.
*/ */
SocketLemmatizer() throw(ConcordiaException); explicit SocketLemmatizer(int port) throw(ConcordiaException);
/*! Destructor. /*! Destructor.
*/ */
virtual ~SocketLemmatizer(); virtual ~SocketLemmatizer();
std::string lemmatizeSentence(std::string languageCode, std::string sentence); std::string lemmatizeSentence(std::string languageCode, std::string sentence);
private: private:
bool _connect(std::string, int); bool _connect();
bool _disconnect();
bool _send_data(std::string data); bool _send_data(std::string data);
std::string _receive(int); std::string _receive(int size);
int _port;
int _sock; int _sock;
struct sockaddr_in _server; struct sockaddr_in _server;
}; };
#endif #endif