json lemmatizer

This commit is contained in:
Rafał Jaworski 2019-06-27 12:54:16 +02:00
parent 1e6d9dfa89
commit 2fb17e2bed
8 changed files with 94 additions and 12 deletions

View File

@ -150,6 +150,31 @@ if(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE})
endif(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE})
# ----------------------------------------------------
# restclient-cpp
# ----------------------------------------------------
find_library(RESTCLIENT_CPP_LIB NAMES restclient-cpp REQUIRED)
find_path(RESTCLIENT_CPP_INCLUDE restclient-cpp)
if(EXISTS ${RESTCLIENT_CPP_LIB} AND EXISTS ${RESTCLIENT_CPP_INCLUDE})
message(STATUS "Found restclient-cpp")
include_directories(${RESTCLIENT_CPP_INCLUDE})
link_directories(${RESTCLIENT_CPP_LIB})
endif(EXISTS ${RESTCLIENT_CPP_LIB} AND EXISTS ${RESTCLIENT_CPP_INCLUDE})
# ----------------------------------------------------
# curl
# ----------------------------------------------------
find_library(CURL_LIB NAMES curl REQUIRED)
find_path(CURL_INCLUDE curl)
if(EXISTS ${CURL_LIB} AND EXISTS ${CURL_INCLUDE})
message(STATUS "Found curl")
include_directories(${CURL_INCLUDE})
link_directories(${CURL_LIB})
endif(EXISTS ${CURL_LIB} AND EXISTS ${CURL_INCLUDE})
# ----------------------------------------------------
# Logging
# ----------------------------------------------------

View File

@ -3,4 +3,4 @@ file(GLOB main_sources "*.cpp")
add_executable(concordia_server_process
${main_sources}
)
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case icuuc)
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case icuuc restclient-cpp curl)

View File

@ -0,0 +1,36 @@
#include "json_lemmatizer.hpp"
#include "config.hpp"
#include "restclient-cpp/restclient.h"
#include "rapidjson/rapidjson.h"
#include "rapidjson/document.h"
#include "rapidjson/stringbuffer.h"
#include "rapidjson/writer.h"
#include "rapidjson/error/en.h"
#include <string>
JsonLemmatizer::JsonLemmatizer() throw(ConcordiaException) {
}
JsonLemmatizer::~JsonLemmatizer() {
}
std::string JsonLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) {
rapidjson::StringBuffer paramsJson;
rapidjson::Writer<rapidjson::StringBuffer> jsonWriter(paramsJson);
jsonWriter.StartObject();
jsonWriter.String("language");
jsonWriter.String(languageCode.c_str());
jsonWriter.String("sentences");
jsonWriter.StartArray();
jsonWriter.String(sentence.c_str());
jsonWriter.EndArray();
jsonWriter.EndObject();
RestClient::Response r = RestClient::post("http://concordia-preprocessor:9001/lemmatize", "application/json", paramsJson.GetString());
rapidjson::Document d;
d.Parse(r.body.c_str());
std::string lemmatized = d["processed_sentences"][0]["tokens"].GetString();
return lemmatized;
}

View File

@ -0,0 +1,24 @@
#ifndef JSON_LEMMATIZER_HDR
#define JSON_LEMMATIZER_HDR
#include <string>
#include <concordia/concordia_exception.hpp>
#include "logger.hpp"
class JsonLemmatizer {
public:
/*! Constructor.
*/
explicit JsonLemmatizer() throw(ConcordiaException);
/*! Destructor.
*/
virtual ~JsonLemmatizer();
std::string lemmatizeSentence(std::string languageCode, std::string sentence);
private:
Logger _logger;
};
#endif

View File

@ -4,19 +4,15 @@
LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) {
_lemmatizersMap = boost::ptr_map<std::string,SocketLemmatizer>();
_lemmatizersMap = boost::ptr_map<std::string,JsonLemmatizer>();
// todo: extract this to configuration, especially when new lemmatizers ConstMemberIterator
SocketLemmatizer * socketLemmatizer1 = new SocketLemmatizer(11000);
JsonLemmatizer * jsonLemmatizer1 = new JsonLemmatizer();
std::string plCode = "pl";
std::string enCode = "en";
std::string hrCode = "hr";
std::string frCode = "fr";
_lemmatizersMap.insert(plCode, socketLemmatizer1);
_lemmatizersMap.insert(enCode, socketLemmatizer1);
_lemmatizersMap.insert(hrCode, socketLemmatizer1);
_lemmatizersMap.insert(frCode, socketLemmatizer1);
_lemmatizersMap.insert(plCode, jsonLemmatizer1);
_lemmatizersMap.insert(enCode, jsonLemmatizer1);
}
LemmatizerFacade::~LemmatizerFacade() {
@ -24,7 +20,7 @@ LemmatizerFacade::~LemmatizerFacade() {
std::string LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::string sentence) {
boost::ptr_map<std::string,SocketLemmatizer>::iterator it = _lemmatizersMap.find(languageCode);
boost::ptr_map<std::string,JsonLemmatizer>::iterator it = _lemmatizersMap.find(languageCode);
if (it != _lemmatizersMap.end()) {
return it->second->lemmatizeSentence(languageCode, sentence);
} else {

View File

@ -2,6 +2,7 @@
#define LEMMATIZER_FACADE_HDR
#include "socket_lemmatizer.hpp"
#include "json_lemmatizer.hpp"
#include "tm_dao.hpp"
#include <string>
@ -27,7 +28,7 @@ public:
std::vector<std::string> lemmatizeSentencesIfNeeded(std::vector<std::string> patterns, int tmId);
private:
boost::ptr_map<std::string,SocketLemmatizer> _lemmatizersMap;
boost::ptr_map<std::string,JsonLemmatizer> _lemmatizersMap;
TmDAO _tmDAO;
};

1
scripts/cmake_stubs/simplestart.sh.in Normal file → Executable file
View File

@ -1,5 +1,4 @@
#!/bin/sh
mono "@LEMMAGEN_BINARIES_PATH@"/LemmaGenSockets.exe &
spawn-fcgi -p 8000 -n "@COMPILED_BINARIES_PATH@"/concordia_server_process

View File

@ -27,3 +27,4 @@ end = time.time()
print "Execution time: %.4f seconds." % (end-start)
print "Result: "
print response
print response['lemmatizedSentence']