lemmatizer facade
This commit is contained in:
parent
803ea2660f
commit
e558cb05d8
@ -20,6 +20,7 @@ namespace LemmaGenSockets
|
||||
{
|
||||
lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish));
|
||||
lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English));
|
||||
lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian));
|
||||
}
|
||||
|
||||
public LemmatizerListener()
|
||||
@ -29,15 +30,24 @@ namespace LemmaGenSockets
|
||||
|
||||
private string lemmatizeSentence(string languageCode, string sentence)
|
||||
{
|
||||
string[] tokens = sentence.Split(null);
|
||||
|
||||
string result = "";
|
||||
foreach (string token in tokens)
|
||||
if (lemmatizersDict.ContainsKey(languageCode))
|
||||
{
|
||||
result += lemmatizeWord(languageCode, token) + " ";
|
||||
}
|
||||
string[] tokens = sentence.Split(null);
|
||||
|
||||
return result.Trim();
|
||||
string result = "";
|
||||
foreach (string token in tokens)
|
||||
{
|
||||
result += lemmatizeWord(languageCode, token) + " ";
|
||||
}
|
||||
|
||||
return result.Trim();
|
||||
}
|
||||
else
|
||||
{
|
||||
//if we can not lemmatize, let's not do it at all
|
||||
//primum non nocere
|
||||
return sentence;
|
||||
}
|
||||
}
|
||||
|
||||
private string lemmatizeWord(string languageCode, string word)
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -11,7 +11,6 @@
|
||||
#include "json_generator.hpp"
|
||||
#include "config.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "socket_lemmatizer.hpp"
|
||||
#include "rapidjson/rapidjson.h"
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/ptr_container/ptr_map.hpp>
|
||||
@ -28,6 +27,8 @@ ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
|
||||
}
|
||||
_indexController = boost::shared_ptr<IndexController> (new IndexController(_concordiasMap));
|
||||
_searcherController = boost::shared_ptr<SearcherController> (new SearcherController(_concordiasMap));
|
||||
|
||||
_lemmatizerFacade = boost::shared_ptr<LemmatizerFacade> (new LemmatizerFacade());
|
||||
}
|
||||
|
||||
ConcordiaServer::~ConcordiaServer() {
|
||||
@ -97,8 +98,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
||||
} else if (operation == "lemmatize") {
|
||||
std::string sentence = _getStringParameter(d, "sentence");
|
||||
std::string languageCode = _getStringParameter(d, "languageCode");
|
||||
SocketLemmatizer lemmatizer;
|
||||
std::string lemmatizedSentence = lemmatizer.lemmatizeSentence(languageCode, sentence);
|
||||
std::string lemmatizedSentence = _lemmatizerFacade->lemmatizeSentence(languageCode, sentence);
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("lemmatizedSentence");
|
||||
jsonWriter.String(lemmatizedSentence.c_str());
|
||||
|
@ -14,6 +14,8 @@
|
||||
#include "tm_dao.hpp"
|
||||
#include "index_controller.hpp"
|
||||
#include "searcher_controller.hpp"
|
||||
#include "lemmatizer_facade.hpp"
|
||||
|
||||
|
||||
class ConcordiaServer {
|
||||
public:
|
||||
@ -48,6 +50,8 @@ private:
|
||||
|
||||
boost::shared_ptr<SearcherController> _searcherController;
|
||||
|
||||
boost::shared_ptr<LemmatizerFacade> _lemmatizerFacade;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -31,3 +31,4 @@
|
||||
#define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch"
|
||||
#define ADD_TM_OP "addTm"
|
||||
|
||||
#define LEMMATIZER_DELIMITER "@#@"
|
||||
|
30
concordia-server/lemmatizer_facade.cpp
Normal file
30
concordia-server/lemmatizer_facade.cpp
Normal file
@ -0,0 +1,30 @@
|
||||
#include "lemmatizer_facade.hpp"
|
||||
|
||||
|
||||
LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) {
|
||||
_lemmatizersMap = boost::ptr_map<std::string,SocketLemmatizer>();
|
||||
|
||||
// todo: extract this to configuration, especially when new lemmatizers ConstMemberIterator
|
||||
SocketLemmatizer * socketLemmatizer1 = new SocketLemmatizer(11000);
|
||||
std::string plCode = "pl";
|
||||
std::string enCode = "en";
|
||||
std::string hrCode = "hr";
|
||||
|
||||
_lemmatizersMap.insert(plCode, socketLemmatizer1);
|
||||
_lemmatizersMap.insert(enCode, socketLemmatizer1);
|
||||
_lemmatizersMap.insert(hrCode, socketLemmatizer1);
|
||||
}
|
||||
|
||||
LemmatizerFacade::~LemmatizerFacade() {
|
||||
}
|
||||
|
||||
std::string LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::string sentence) {
|
||||
|
||||
boost::ptr_map<std::string,SocketLemmatizer>::iterator it = _lemmatizersMap.find(languageCode);
|
||||
if (it != _lemmatizersMap.end()) {
|
||||
return it->second->lemmatizeSentence(languageCode, sentence);
|
||||
} else {
|
||||
throw ConcordiaException("lemmatizer for language: "+languageCode+" not found.");
|
||||
}
|
||||
|
||||
}
|
25
concordia-server/lemmatizer_facade.hpp
Normal file
25
concordia-server/lemmatizer_facade.hpp
Normal file
@ -0,0 +1,25 @@
|
||||
#ifndef LEMMATIZER_FACADE_HDR
|
||||
#define LEMMATIZER_FACADE_HDR
|
||||
|
||||
#include "socket_lemmatizer.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <concordia/concordia_exception.hpp>
|
||||
#include <boost/ptr_container/ptr_map.hpp>
|
||||
|
||||
|
||||
class LemmatizerFacade {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
LemmatizerFacade() throw(ConcordiaException);
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~LemmatizerFacade();
|
||||
|
||||
std::string lemmatizeSentence(std::string languageCode, std::string sentence);
|
||||
private:
|
||||
boost::ptr_map<std::string,SocketLemmatizer> _lemmatizersMap;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,8 +1,10 @@
|
||||
#include "socket_lemmatizer.hpp"
|
||||
|
||||
SocketLemmatizer::SocketLemmatizer() throw(ConcordiaException) :
|
||||
_sock(-1) {
|
||||
_connect("127.0.0.1" , 11000);
|
||||
#include "config.hpp"
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
SocketLemmatizer::SocketLemmatizer(int port) throw(ConcordiaException) :
|
||||
_port(port) {
|
||||
}
|
||||
|
||||
SocketLemmatizer::~SocketLemmatizer() {
|
||||
@ -11,17 +13,16 @@ SocketLemmatizer::~SocketLemmatizer() {
|
||||
/**
|
||||
Connect to a host on a certain port number
|
||||
*/
|
||||
bool SocketLemmatizer::_connect(std::string address , int port)
|
||||
{
|
||||
//create socket if it is not already created
|
||||
if(_sock == -1) {
|
||||
//Create socket
|
||||
_sock = socket(AF_INET , SOCK_STREAM , 0);
|
||||
if (_sock == -1) {
|
||||
throw ConcordiaException("Could not create socket for the lemmatizer.");
|
||||
}
|
||||
bool SocketLemmatizer::_connect() {
|
||||
|
||||
//Create socket
|
||||
_sock = socket(AF_INET , SOCK_STREAM , 0);
|
||||
if (_sock == -1) {
|
||||
throw ConcordiaException("Could not create socket for the lemmatizer.");
|
||||
}
|
||||
|
||||
std::string address = "127.0.0.1";
|
||||
|
||||
//setup address structure
|
||||
if(inet_addr(address.c_str()) == -1) {
|
||||
struct hostent *he;
|
||||
@ -45,16 +46,21 @@ bool SocketLemmatizer::_connect(std::string address , int port)
|
||||
}
|
||||
|
||||
_server.sin_family = AF_INET;
|
||||
_server.sin_port = htons(port);
|
||||
_server.sin_port = htons(_port);
|
||||
|
||||
//Connect to remote server
|
||||
if (connect(_sock , (struct sockaddr *) & _server , sizeof(_server)) < 0) {
|
||||
throw ConcordiaException("connect failed. Error");
|
||||
throw ConcordiaException("Connect failed. Error on address: "+address+":"+boost::lexical_cast<std::string>(_port));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SocketLemmatizer::_disconnect() {
|
||||
close(_sock);
|
||||
_sock = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
Send data to the connected host
|
||||
*/
|
||||
@ -84,7 +90,9 @@ std::string SocketLemmatizer::_receive(int size=512)
|
||||
}
|
||||
|
||||
std::string SocketLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) {
|
||||
_send_data(languageCode+sentence+"@#@");
|
||||
_connect();
|
||||
_send_data(languageCode+sentence+LEMMATIZER_DELIMITER);
|
||||
std::string reply = _receive(512);
|
||||
return reply.substr(0,reply.find("@#@"));
|
||||
_disconnect();
|
||||
return reply.substr(0,reply.find(LEMMATIZER_DELIMITER));
|
||||
}
|
||||
|
@ -2,9 +2,10 @@
|
||||
#define SOCKET_LEMMATIZER_HDR
|
||||
|
||||
#include <string>
|
||||
#include<sys/socket.h> //socket
|
||||
#include<arpa/inet.h> //inet_addr
|
||||
#include<netdb.h> //hostent
|
||||
#include <sys/socket.h> //socket
|
||||
#include <arpa/inet.h> //inet_addr
|
||||
#include <netdb.h> //hostent
|
||||
#include <unistd.h>
|
||||
|
||||
#include <concordia/concordia_exception.hpp>
|
||||
|
||||
@ -13,23 +14,26 @@ class SocketLemmatizer {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
SocketLemmatizer() throw(ConcordiaException);
|
||||
explicit SocketLemmatizer(int port) throw(ConcordiaException);
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~SocketLemmatizer();
|
||||
|
||||
std::string lemmatizeSentence(std::string languageCode, std::string sentence);
|
||||
private:
|
||||
bool _connect(std::string, int);
|
||||
bool _connect();
|
||||
|
||||
bool _disconnect();
|
||||
|
||||
bool _send_data(std::string data);
|
||||
|
||||
std::string _receive(int);
|
||||
std::string _receive(int size);
|
||||
|
||||
int _port;
|
||||
|
||||
int _sock;
|
||||
|
||||
struct sockaddr_in _server;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user