json lemmatizer
This commit is contained in:
parent
1e6d9dfa89
commit
2fb17e2bed
@ -150,6 +150,31 @@ if(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE})
|
|||||||
endif(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE})
|
endif(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE})
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------
|
||||||
|
# restclient-cpp
|
||||||
|
# ----------------------------------------------------
|
||||||
|
find_library(RESTCLIENT_CPP_LIB NAMES restclient-cpp REQUIRED)
|
||||||
|
find_path(RESTCLIENT_CPP_INCLUDE restclient-cpp)
|
||||||
|
|
||||||
|
if(EXISTS ${RESTCLIENT_CPP_LIB} AND EXISTS ${RESTCLIENT_CPP_INCLUDE})
|
||||||
|
message(STATUS "Found restclient-cpp")
|
||||||
|
include_directories(${RESTCLIENT_CPP_INCLUDE})
|
||||||
|
link_directories(${RESTCLIENT_CPP_LIB})
|
||||||
|
endif(EXISTS ${RESTCLIENT_CPP_LIB} AND EXISTS ${RESTCLIENT_CPP_INCLUDE})
|
||||||
|
|
||||||
|
# ----------------------------------------------------
|
||||||
|
# curl
|
||||||
|
# ----------------------------------------------------
|
||||||
|
find_library(CURL_LIB NAMES curl REQUIRED)
|
||||||
|
find_path(CURL_INCLUDE curl)
|
||||||
|
|
||||||
|
if(EXISTS ${CURL_LIB} AND EXISTS ${CURL_INCLUDE})
|
||||||
|
message(STATUS "Found curl")
|
||||||
|
include_directories(${CURL_INCLUDE})
|
||||||
|
link_directories(${CURL_LIB})
|
||||||
|
endif(EXISTS ${CURL_LIB} AND EXISTS ${CURL_INCLUDE})
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
# Logging
|
# Logging
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
@ -3,4 +3,4 @@ file(GLOB main_sources "*.cpp")
|
|||||||
add_executable(concordia_server_process
|
add_executable(concordia_server_process
|
||||||
${main_sources}
|
${main_sources}
|
||||||
)
|
)
|
||||||
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case icuuc)
|
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case icuuc restclient-cpp curl)
|
||||||
|
36
concordia-server/json_lemmatizer.cpp
Normal file
36
concordia-server/json_lemmatizer.cpp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#include "json_lemmatizer.hpp"
|
||||||
|
#include "config.hpp"
|
||||||
|
#include "restclient-cpp/restclient.h"
|
||||||
|
#include "rapidjson/rapidjson.h"
|
||||||
|
#include "rapidjson/document.h"
|
||||||
|
#include "rapidjson/stringbuffer.h"
|
||||||
|
#include "rapidjson/writer.h"
|
||||||
|
#include "rapidjson/error/en.h"
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
JsonLemmatizer::JsonLemmatizer() throw(ConcordiaException) {
|
||||||
|
}
|
||||||
|
|
||||||
|
JsonLemmatizer::~JsonLemmatizer() {
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::string JsonLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) {
|
||||||
|
rapidjson::StringBuffer paramsJson;
|
||||||
|
rapidjson::Writer<rapidjson::StringBuffer> jsonWriter(paramsJson);
|
||||||
|
jsonWriter.StartObject();
|
||||||
|
jsonWriter.String("language");
|
||||||
|
jsonWriter.String(languageCode.c_str());
|
||||||
|
jsonWriter.String("sentences");
|
||||||
|
jsonWriter.StartArray();
|
||||||
|
jsonWriter.String(sentence.c_str());
|
||||||
|
jsonWriter.EndArray();
|
||||||
|
jsonWriter.EndObject();
|
||||||
|
|
||||||
|
|
||||||
|
RestClient::Response r = RestClient::post("http://concordia-preprocessor:9001/lemmatize", "application/json", paramsJson.GetString());
|
||||||
|
rapidjson::Document d;
|
||||||
|
d.Parse(r.body.c_str());
|
||||||
|
std::string lemmatized = d["processed_sentences"][0]["tokens"].GetString();
|
||||||
|
return lemmatized;
|
||||||
|
}
|
24
concordia-server/json_lemmatizer.hpp
Normal file
24
concordia-server/json_lemmatizer.hpp
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#ifndef JSON_LEMMATIZER_HDR
|
||||||
|
#define JSON_LEMMATIZER_HDR
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <concordia/concordia_exception.hpp>
|
||||||
|
|
||||||
|
#include "logger.hpp"
|
||||||
|
|
||||||
|
class JsonLemmatizer {
|
||||||
|
public:
|
||||||
|
/*! Constructor.
|
||||||
|
*/
|
||||||
|
explicit JsonLemmatizer() throw(ConcordiaException);
|
||||||
|
/*! Destructor.
|
||||||
|
*/
|
||||||
|
virtual ~JsonLemmatizer();
|
||||||
|
|
||||||
|
std::string lemmatizeSentence(std::string languageCode, std::string sentence);
|
||||||
|
private:
|
||||||
|
Logger _logger;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -4,19 +4,15 @@
|
|||||||
|
|
||||||
|
|
||||||
LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) {
|
LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) {
|
||||||
_lemmatizersMap = boost::ptr_map<std::string,SocketLemmatizer>();
|
_lemmatizersMap = boost::ptr_map<std::string,JsonLemmatizer>();
|
||||||
|
|
||||||
// todo: extract this to configuration, especially when new lemmatizers ConstMemberIterator
|
// todo: extract this to configuration, especially when new lemmatizers ConstMemberIterator
|
||||||
SocketLemmatizer * socketLemmatizer1 = new SocketLemmatizer(11000);
|
JsonLemmatizer * jsonLemmatizer1 = new JsonLemmatizer();
|
||||||
std::string plCode = "pl";
|
std::string plCode = "pl";
|
||||||
std::string enCode = "en";
|
std::string enCode = "en";
|
||||||
std::string hrCode = "hr";
|
|
||||||
std::string frCode = "fr";
|
|
||||||
|
|
||||||
_lemmatizersMap.insert(plCode, socketLemmatizer1);
|
_lemmatizersMap.insert(plCode, jsonLemmatizer1);
|
||||||
_lemmatizersMap.insert(enCode, socketLemmatizer1);
|
_lemmatizersMap.insert(enCode, jsonLemmatizer1);
|
||||||
_lemmatizersMap.insert(hrCode, socketLemmatizer1);
|
|
||||||
_lemmatizersMap.insert(frCode, socketLemmatizer1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
LemmatizerFacade::~LemmatizerFacade() {
|
LemmatizerFacade::~LemmatizerFacade() {
|
||||||
@ -24,7 +20,7 @@ LemmatizerFacade::~LemmatizerFacade() {
|
|||||||
|
|
||||||
std::string LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::string sentence) {
|
std::string LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::string sentence) {
|
||||||
|
|
||||||
boost::ptr_map<std::string,SocketLemmatizer>::iterator it = _lemmatizersMap.find(languageCode);
|
boost::ptr_map<std::string,JsonLemmatizer>::iterator it = _lemmatizersMap.find(languageCode);
|
||||||
if (it != _lemmatizersMap.end()) {
|
if (it != _lemmatizersMap.end()) {
|
||||||
return it->second->lemmatizeSentence(languageCode, sentence);
|
return it->second->lemmatizeSentence(languageCode, sentence);
|
||||||
} else {
|
} else {
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#define LEMMATIZER_FACADE_HDR
|
#define LEMMATIZER_FACADE_HDR
|
||||||
|
|
||||||
#include "socket_lemmatizer.hpp"
|
#include "socket_lemmatizer.hpp"
|
||||||
|
#include "json_lemmatizer.hpp"
|
||||||
#include "tm_dao.hpp"
|
#include "tm_dao.hpp"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -27,7 +28,7 @@ public:
|
|||||||
std::vector<std::string> lemmatizeSentencesIfNeeded(std::vector<std::string> patterns, int tmId);
|
std::vector<std::string> lemmatizeSentencesIfNeeded(std::vector<std::string> patterns, int tmId);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
boost::ptr_map<std::string,SocketLemmatizer> _lemmatizersMap;
|
boost::ptr_map<std::string,JsonLemmatizer> _lemmatizersMap;
|
||||||
|
|
||||||
TmDAO _tmDAO;
|
TmDAO _tmDAO;
|
||||||
};
|
};
|
||||||
|
1
scripts/cmake_stubs/simplestart.sh.in
Normal file → Executable file
1
scripts/cmake_stubs/simplestart.sh.in
Normal file → Executable file
@ -1,5 +1,4 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
mono "@LEMMAGEN_BINARIES_PATH@"/LemmaGenSockets.exe &
|
|
||||||
spawn-fcgi -p 8000 -n "@COMPILED_BINARIES_PATH@"/concordia_server_process
|
spawn-fcgi -p 8000 -n "@COMPILED_BINARIES_PATH@"/concordia_server_process
|
||||||
|
|
||||||
|
@ -27,3 +27,4 @@ end = time.time()
|
|||||||
print "Execution time: %.4f seconds." % (end-start)
|
print "Execution time: %.4f seconds." % (end-start)
|
||||||
print "Result: "
|
print "Result: "
|
||||||
print response
|
print response
|
||||||
|
print response['lemmatizedSentence']
|
||||||
|
Loading…
Reference in New Issue
Block a user