lemmatizer facade

This commit is contained in:
Rafał Jaworski 2017-03-05 22:45:11 +01:00
parent 803ea2660f
commit e558cb05d8
13 changed files with 115 additions and 33 deletions

View File

@ -20,6 +20,7 @@ namespace LemmaGenSockets
{
lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish));
lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English));
lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian));
}
public LemmatizerListener()
@ -29,15 +30,24 @@ namespace LemmaGenSockets
private string lemmatizeSentence(string languageCode, string sentence)
{
string[] tokens = sentence.Split(null);
string result = "";
foreach (string token in tokens)
if (lemmatizersDict.ContainsKey(languageCode))
{
result += lemmatizeWord(languageCode, token) + " ";
}
string[] tokens = sentence.Split(null);
return result.Trim();
string result = "";
foreach (string token in tokens)
{
result += lemmatizeWord(languageCode, token) + " ";
}
return result.Trim();
}
else
{
//if we can not lemmatize, let's not do it at all
//primum non nocere
return sentence;
}
}
private string lemmatizeWord(string languageCode, string word)

View File

@ -11,7 +11,6 @@
#include "json_generator.hpp"
#include "config.hpp"
#include "logger.hpp"
#include "socket_lemmatizer.hpp"
#include "rapidjson/rapidjson.h"
#include <boost/foreach.hpp>
#include <boost/ptr_container/ptr_map.hpp>
@ -28,6 +27,8 @@ ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
}
_indexController = boost::shared_ptr<IndexController> (new IndexController(_concordiasMap));
_searcherController = boost::shared_ptr<SearcherController> (new SearcherController(_concordiasMap));
_lemmatizerFacade = boost::shared_ptr<LemmatizerFacade> (new LemmatizerFacade());
}
ConcordiaServer::~ConcordiaServer() {
@ -97,8 +98,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
} else if (operation == "lemmatize") {
std::string sentence = _getStringParameter(d, "sentence");
std::string languageCode = _getStringParameter(d, "languageCode");
SocketLemmatizer lemmatizer;
std::string lemmatizedSentence = lemmatizer.lemmatizeSentence(languageCode, sentence);
std::string lemmatizedSentence = _lemmatizerFacade->lemmatizeSentence(languageCode, sentence);
jsonWriter.StartObject();
jsonWriter.String("lemmatizedSentence");
jsonWriter.String(lemmatizedSentence.c_str());

View File

@ -14,6 +14,8 @@
#include "tm_dao.hpp"
#include "index_controller.hpp"
#include "searcher_controller.hpp"
#include "lemmatizer_facade.hpp"
class ConcordiaServer {
public:
@ -48,6 +50,8 @@ private:
boost::shared_ptr<SearcherController> _searcherController;
boost::shared_ptr<LemmatizerFacade> _lemmatizerFacade;
};
#endif

View File

@ -31,3 +31,4 @@
#define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch"
#define ADD_TM_OP "addTm"
#define LEMMATIZER_DELIMITER "@#@"

View File

@ -0,0 +1,30 @@
#include "lemmatizer_facade.hpp"
LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) {
_lemmatizersMap = boost::ptr_map<std::string,SocketLemmatizer>();
// todo: extract this to configuration, especially when new lemmatizers ConstMemberIterator
SocketLemmatizer * socketLemmatizer1 = new SocketLemmatizer(11000);
std::string plCode = "pl";
std::string enCode = "en";
std::string hrCode = "hr";
_lemmatizersMap.insert(plCode, socketLemmatizer1);
_lemmatizersMap.insert(enCode, socketLemmatizer1);
_lemmatizersMap.insert(hrCode, socketLemmatizer1);
}
LemmatizerFacade::~LemmatizerFacade() {
}
std::string LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::string sentence) {
boost::ptr_map<std::string,SocketLemmatizer>::iterator it = _lemmatizersMap.find(languageCode);
if (it != _lemmatizersMap.end()) {
return it->second->lemmatizeSentence(languageCode, sentence);
} else {
throw ConcordiaException("lemmatizer for language: "+languageCode+" not found.");
}
}

View File

@ -0,0 +1,25 @@
#ifndef LEMMATIZER_FACADE_HDR
#define LEMMATIZER_FACADE_HDR
#include "socket_lemmatizer.hpp"
#include <string>
#include <concordia/concordia_exception.hpp>
#include <boost/ptr_container/ptr_map.hpp>
class LemmatizerFacade {
public:
/*! Constructor.
*/
LemmatizerFacade() throw(ConcordiaException);
/*! Destructor.
*/
virtual ~LemmatizerFacade();
std::string lemmatizeSentence(std::string languageCode, std::string sentence);
private:
boost::ptr_map<std::string,SocketLemmatizer> _lemmatizersMap;
};
#endif

View File

@ -1,8 +1,10 @@
#include "socket_lemmatizer.hpp"
SocketLemmatizer::SocketLemmatizer() throw(ConcordiaException) :
_sock(-1) {
_connect("127.0.0.1" , 11000);
#include "config.hpp"
#include <boost/lexical_cast.hpp>
SocketLemmatizer::SocketLemmatizer(int port) throw(ConcordiaException) :
_port(port) {
}
SocketLemmatizer::~SocketLemmatizer() {
@ -11,17 +13,16 @@ SocketLemmatizer::~SocketLemmatizer() {
/**
Connect to a host on a certain port number
*/
bool SocketLemmatizer::_connect(std::string address , int port)
{
//create socket if it is not already created
if(_sock == -1) {
//Create socket
_sock = socket(AF_INET , SOCK_STREAM , 0);
if (_sock == -1) {
throw ConcordiaException("Could not create socket for the lemmatizer.");
}
bool SocketLemmatizer::_connect() {
//Create socket
_sock = socket(AF_INET , SOCK_STREAM , 0);
if (_sock == -1) {
throw ConcordiaException("Could not create socket for the lemmatizer.");
}
std::string address = "127.0.0.1";
//setup address structure
if(inet_addr(address.c_str()) == -1) {
struct hostent *he;
@ -45,16 +46,21 @@ bool SocketLemmatizer::_connect(std::string address , int port)
}
_server.sin_family = AF_INET;
_server.sin_port = htons(port);
_server.sin_port = htons(_port);
//Connect to remote server
if (connect(_sock , (struct sockaddr *) & _server , sizeof(_server)) < 0) {
throw ConcordiaException("connect failed. Error");
throw ConcordiaException("Connect failed. Error on address: "+address+":"+boost::lexical_cast<std::string>(_port));
}
return true;
}
bool SocketLemmatizer::_disconnect() {
close(_sock);
_sock = -1;
}
/**
Send data to the connected host
*/
@ -84,7 +90,9 @@ std::string SocketLemmatizer::_receive(int size=512)
}
std::string SocketLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) {
_send_data(languageCode+sentence+"@#@");
_connect();
_send_data(languageCode+sentence+LEMMATIZER_DELIMITER);
std::string reply = _receive(512);
return reply.substr(0,reply.find("@#@"));
_disconnect();
return reply.substr(0,reply.find(LEMMATIZER_DELIMITER));
}

View File

@ -2,9 +2,10 @@
#define SOCKET_LEMMATIZER_HDR
#include <string>
#include<sys/socket.h> //socket
#include<arpa/inet.h> //inet_addr
#include<netdb.h> //hostent
#include <sys/socket.h> //socket
#include <arpa/inet.h> //inet_addr
#include <netdb.h> //hostent
#include <unistd.h>
#include <concordia/concordia_exception.hpp>
@ -13,23 +14,26 @@ class SocketLemmatizer {
public:
/*! Constructor.
*/
SocketLemmatizer() throw(ConcordiaException);
explicit SocketLemmatizer(int port) throw(ConcordiaException);
/*! Destructor.
*/
virtual ~SocketLemmatizer();
std::string lemmatizeSentence(std::string languageCode, std::string sentence);
private:
bool _connect(std::string, int);
bool _connect();
bool _disconnect();
bool _send_data(std::string data);
std::string _receive(int);
std::string _receive(int size);
int _port;
int _sock;
struct sockaddr_in _server;
};
#endif