lemmatizer facade
This commit is contained in:
parent
803ea2660f
commit
e558cb05d8
@ -20,6 +20,7 @@ namespace LemmaGenSockets
|
|||||||
{
|
{
|
||||||
lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish));
|
lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish));
|
||||||
lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English));
|
lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English));
|
||||||
|
lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian));
|
||||||
}
|
}
|
||||||
|
|
||||||
public LemmatizerListener()
|
public LemmatizerListener()
|
||||||
@ -29,15 +30,24 @@ namespace LemmaGenSockets
|
|||||||
|
|
||||||
private string lemmatizeSentence(string languageCode, string sentence)
|
private string lemmatizeSentence(string languageCode, string sentence)
|
||||||
{
|
{
|
||||||
string[] tokens = sentence.Split(null);
|
if (lemmatizersDict.ContainsKey(languageCode))
|
||||||
|
|
||||||
string result = "";
|
|
||||||
foreach (string token in tokens)
|
|
||||||
{
|
{
|
||||||
result += lemmatizeWord(languageCode, token) + " ";
|
string[] tokens = sentence.Split(null);
|
||||||
}
|
|
||||||
|
|
||||||
return result.Trim();
|
string result = "";
|
||||||
|
foreach (string token in tokens)
|
||||||
|
{
|
||||||
|
result += lemmatizeWord(languageCode, token) + " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.Trim();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//if we can not lemmatize, let's not do it at all
|
||||||
|
//primum non nocere
|
||||||
|
return sentence;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private string lemmatizeWord(string languageCode, string word)
|
private string lemmatizeWord(string languageCode, string word)
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -11,7 +11,6 @@
|
|||||||
#include "json_generator.hpp"
|
#include "json_generator.hpp"
|
||||||
#include "config.hpp"
|
#include "config.hpp"
|
||||||
#include "logger.hpp"
|
#include "logger.hpp"
|
||||||
#include "socket_lemmatizer.hpp"
|
|
||||||
#include "rapidjson/rapidjson.h"
|
#include "rapidjson/rapidjson.h"
|
||||||
#include <boost/foreach.hpp>
|
#include <boost/foreach.hpp>
|
||||||
#include <boost/ptr_container/ptr_map.hpp>
|
#include <boost/ptr_container/ptr_map.hpp>
|
||||||
@ -28,6 +27,8 @@ ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
|
|||||||
}
|
}
|
||||||
_indexController = boost::shared_ptr<IndexController> (new IndexController(_concordiasMap));
|
_indexController = boost::shared_ptr<IndexController> (new IndexController(_concordiasMap));
|
||||||
_searcherController = boost::shared_ptr<SearcherController> (new SearcherController(_concordiasMap));
|
_searcherController = boost::shared_ptr<SearcherController> (new SearcherController(_concordiasMap));
|
||||||
|
|
||||||
|
_lemmatizerFacade = boost::shared_ptr<LemmatizerFacade> (new LemmatizerFacade());
|
||||||
}
|
}
|
||||||
|
|
||||||
ConcordiaServer::~ConcordiaServer() {
|
ConcordiaServer::~ConcordiaServer() {
|
||||||
@ -97,8 +98,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
|||||||
} else if (operation == "lemmatize") {
|
} else if (operation == "lemmatize") {
|
||||||
std::string sentence = _getStringParameter(d, "sentence");
|
std::string sentence = _getStringParameter(d, "sentence");
|
||||||
std::string languageCode = _getStringParameter(d, "languageCode");
|
std::string languageCode = _getStringParameter(d, "languageCode");
|
||||||
SocketLemmatizer lemmatizer;
|
std::string lemmatizedSentence = _lemmatizerFacade->lemmatizeSentence(languageCode, sentence);
|
||||||
std::string lemmatizedSentence = lemmatizer.lemmatizeSentence(languageCode, sentence);
|
|
||||||
jsonWriter.StartObject();
|
jsonWriter.StartObject();
|
||||||
jsonWriter.String("lemmatizedSentence");
|
jsonWriter.String("lemmatizedSentence");
|
||||||
jsonWriter.String(lemmatizedSentence.c_str());
|
jsonWriter.String(lemmatizedSentence.c_str());
|
||||||
|
@ -14,6 +14,8 @@
|
|||||||
#include "tm_dao.hpp"
|
#include "tm_dao.hpp"
|
||||||
#include "index_controller.hpp"
|
#include "index_controller.hpp"
|
||||||
#include "searcher_controller.hpp"
|
#include "searcher_controller.hpp"
|
||||||
|
#include "lemmatizer_facade.hpp"
|
||||||
|
|
||||||
|
|
||||||
class ConcordiaServer {
|
class ConcordiaServer {
|
||||||
public:
|
public:
|
||||||
@ -48,6 +50,8 @@ private:
|
|||||||
|
|
||||||
boost::shared_ptr<SearcherController> _searcherController;
|
boost::shared_ptr<SearcherController> _searcherController;
|
||||||
|
|
||||||
|
boost::shared_ptr<LemmatizerFacade> _lemmatizerFacade;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -31,3 +31,4 @@
|
|||||||
#define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch"
|
#define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch"
|
||||||
#define ADD_TM_OP "addTm"
|
#define ADD_TM_OP "addTm"
|
||||||
|
|
||||||
|
#define LEMMATIZER_DELIMITER "@#@"
|
||||||
|
30
concordia-server/lemmatizer_facade.cpp
Normal file
30
concordia-server/lemmatizer_facade.cpp
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#include "lemmatizer_facade.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) {
|
||||||
|
_lemmatizersMap = boost::ptr_map<std::string,SocketLemmatizer>();
|
||||||
|
|
||||||
|
// todo: extract this to configuration, especially when new lemmatizers ConstMemberIterator
|
||||||
|
SocketLemmatizer * socketLemmatizer1 = new SocketLemmatizer(11000);
|
||||||
|
std::string plCode = "pl";
|
||||||
|
std::string enCode = "en";
|
||||||
|
std::string hrCode = "hr";
|
||||||
|
|
||||||
|
_lemmatizersMap.insert(plCode, socketLemmatizer1);
|
||||||
|
_lemmatizersMap.insert(enCode, socketLemmatizer1);
|
||||||
|
_lemmatizersMap.insert(hrCode, socketLemmatizer1);
|
||||||
|
}
|
||||||
|
|
||||||
|
LemmatizerFacade::~LemmatizerFacade() {
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::string sentence) {
|
||||||
|
|
||||||
|
boost::ptr_map<std::string,SocketLemmatizer>::iterator it = _lemmatizersMap.find(languageCode);
|
||||||
|
if (it != _lemmatizersMap.end()) {
|
||||||
|
return it->second->lemmatizeSentence(languageCode, sentence);
|
||||||
|
} else {
|
||||||
|
throw ConcordiaException("lemmatizer for language: "+languageCode+" not found.");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
25
concordia-server/lemmatizer_facade.hpp
Normal file
25
concordia-server/lemmatizer_facade.hpp
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#ifndef LEMMATIZER_FACADE_HDR
|
||||||
|
#define LEMMATIZER_FACADE_HDR
|
||||||
|
|
||||||
|
#include "socket_lemmatizer.hpp"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <concordia/concordia_exception.hpp>
|
||||||
|
#include <boost/ptr_container/ptr_map.hpp>
|
||||||
|
|
||||||
|
|
||||||
|
class LemmatizerFacade {
|
||||||
|
public:
|
||||||
|
/*! Constructor.
|
||||||
|
*/
|
||||||
|
LemmatizerFacade() throw(ConcordiaException);
|
||||||
|
/*! Destructor.
|
||||||
|
*/
|
||||||
|
virtual ~LemmatizerFacade();
|
||||||
|
|
||||||
|
std::string lemmatizeSentence(std::string languageCode, std::string sentence);
|
||||||
|
private:
|
||||||
|
boost::ptr_map<std::string,SocketLemmatizer> _lemmatizersMap;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -1,8 +1,10 @@
|
|||||||
#include "socket_lemmatizer.hpp"
|
#include "socket_lemmatizer.hpp"
|
||||||
|
|
||||||
SocketLemmatizer::SocketLemmatizer() throw(ConcordiaException) :
|
#include "config.hpp"
|
||||||
_sock(-1) {
|
#include <boost/lexical_cast.hpp>
|
||||||
_connect("127.0.0.1" , 11000);
|
|
||||||
|
SocketLemmatizer::SocketLemmatizer(int port) throw(ConcordiaException) :
|
||||||
|
_port(port) {
|
||||||
}
|
}
|
||||||
|
|
||||||
SocketLemmatizer::~SocketLemmatizer() {
|
SocketLemmatizer::~SocketLemmatizer() {
|
||||||
@ -11,17 +13,16 @@ SocketLemmatizer::~SocketLemmatizer() {
|
|||||||
/**
|
/**
|
||||||
Connect to a host on a certain port number
|
Connect to a host on a certain port number
|
||||||
*/
|
*/
|
||||||
bool SocketLemmatizer::_connect(std::string address , int port)
|
bool SocketLemmatizer::_connect() {
|
||||||
{
|
|
||||||
//create socket if it is not already created
|
//Create socket
|
||||||
if(_sock == -1) {
|
_sock = socket(AF_INET , SOCK_STREAM , 0);
|
||||||
//Create socket
|
if (_sock == -1) {
|
||||||
_sock = socket(AF_INET , SOCK_STREAM , 0);
|
throw ConcordiaException("Could not create socket for the lemmatizer.");
|
||||||
if (_sock == -1) {
|
|
||||||
throw ConcordiaException("Could not create socket for the lemmatizer.");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string address = "127.0.0.1";
|
||||||
|
|
||||||
//setup address structure
|
//setup address structure
|
||||||
if(inet_addr(address.c_str()) == -1) {
|
if(inet_addr(address.c_str()) == -1) {
|
||||||
struct hostent *he;
|
struct hostent *he;
|
||||||
@ -45,16 +46,21 @@ bool SocketLemmatizer::_connect(std::string address , int port)
|
|||||||
}
|
}
|
||||||
|
|
||||||
_server.sin_family = AF_INET;
|
_server.sin_family = AF_INET;
|
||||||
_server.sin_port = htons(port);
|
_server.sin_port = htons(_port);
|
||||||
|
|
||||||
//Connect to remote server
|
//Connect to remote server
|
||||||
if (connect(_sock , (struct sockaddr *) & _server , sizeof(_server)) < 0) {
|
if (connect(_sock , (struct sockaddr *) & _server , sizeof(_server)) < 0) {
|
||||||
throw ConcordiaException("connect failed. Error");
|
throw ConcordiaException("Connect failed. Error on address: "+address+":"+boost::lexical_cast<std::string>(_port));
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SocketLemmatizer::_disconnect() {
|
||||||
|
close(_sock);
|
||||||
|
_sock = -1;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Send data to the connected host
|
Send data to the connected host
|
||||||
*/
|
*/
|
||||||
@ -84,7 +90,9 @@ std::string SocketLemmatizer::_receive(int size=512)
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string SocketLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) {
|
std::string SocketLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) {
|
||||||
_send_data(languageCode+sentence+"@#@");
|
_connect();
|
||||||
|
_send_data(languageCode+sentence+LEMMATIZER_DELIMITER);
|
||||||
std::string reply = _receive(512);
|
std::string reply = _receive(512);
|
||||||
return reply.substr(0,reply.find("@#@"));
|
_disconnect();
|
||||||
|
return reply.substr(0,reply.find(LEMMATIZER_DELIMITER));
|
||||||
}
|
}
|
||||||
|
@ -2,9 +2,10 @@
|
|||||||
#define SOCKET_LEMMATIZER_HDR
|
#define SOCKET_LEMMATIZER_HDR
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include<sys/socket.h> //socket
|
#include <sys/socket.h> //socket
|
||||||
#include<arpa/inet.h> //inet_addr
|
#include <arpa/inet.h> //inet_addr
|
||||||
#include<netdb.h> //hostent
|
#include <netdb.h> //hostent
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
#include <concordia/concordia_exception.hpp>
|
#include <concordia/concordia_exception.hpp>
|
||||||
|
|
||||||
@ -13,23 +14,26 @@ class SocketLemmatizer {
|
|||||||
public:
|
public:
|
||||||
/*! Constructor.
|
/*! Constructor.
|
||||||
*/
|
*/
|
||||||
SocketLemmatizer() throw(ConcordiaException);
|
explicit SocketLemmatizer(int port) throw(ConcordiaException);
|
||||||
/*! Destructor.
|
/*! Destructor.
|
||||||
*/
|
*/
|
||||||
virtual ~SocketLemmatizer();
|
virtual ~SocketLemmatizer();
|
||||||
|
|
||||||
std::string lemmatizeSentence(std::string languageCode, std::string sentence);
|
std::string lemmatizeSentence(std::string languageCode, std::string sentence);
|
||||||
private:
|
private:
|
||||||
bool _connect(std::string, int);
|
bool _connect();
|
||||||
|
|
||||||
|
bool _disconnect();
|
||||||
|
|
||||||
bool _send_data(std::string data);
|
bool _send_data(std::string data);
|
||||||
|
|
||||||
std::string _receive(int);
|
std::string _receive(int size);
|
||||||
|
|
||||||
|
int _port;
|
||||||
|
|
||||||
int _sock;
|
int _sock;
|
||||||
|
|
||||||
struct sockaddr_in _server;
|
struct sockaddr_in _server;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user