json lemmatizer
This commit is contained in:
parent
1e6d9dfa89
commit
2fb17e2bed
@ -150,6 +150,31 @@ if(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE})
|
||||
endif(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE})
|
||||
|
||||
|
||||
# ----------------------------------------------------
|
||||
# restclient-cpp
|
||||
# ----------------------------------------------------
|
||||
find_library(RESTCLIENT_CPP_LIB NAMES restclient-cpp REQUIRED)
|
||||
find_path(RESTCLIENT_CPP_INCLUDE restclient-cpp)
|
||||
|
||||
if(EXISTS ${RESTCLIENT_CPP_LIB} AND EXISTS ${RESTCLIENT_CPP_INCLUDE})
|
||||
message(STATUS "Found restclient-cpp")
|
||||
include_directories(${RESTCLIENT_CPP_INCLUDE})
|
||||
link_directories(${RESTCLIENT_CPP_LIB})
|
||||
endif(EXISTS ${RESTCLIENT_CPP_LIB} AND EXISTS ${RESTCLIENT_CPP_INCLUDE})
|
||||
|
||||
# ----------------------------------------------------
|
||||
# curl
|
||||
# ----------------------------------------------------
|
||||
find_library(CURL_LIB NAMES curl REQUIRED)
|
||||
find_path(CURL_INCLUDE curl)
|
||||
|
||||
if(EXISTS ${CURL_LIB} AND EXISTS ${CURL_INCLUDE})
|
||||
message(STATUS "Found curl")
|
||||
include_directories(${CURL_INCLUDE})
|
||||
link_directories(${CURL_LIB})
|
||||
endif(EXISTS ${CURL_LIB} AND EXISTS ${CURL_INCLUDE})
|
||||
|
||||
|
||||
# ----------------------------------------------------
|
||||
# Logging
|
||||
# ----------------------------------------------------
|
||||
|
@ -3,4 +3,4 @@ file(GLOB main_sources "*.cpp")
|
||||
add_executable(concordia_server_process
|
||||
${main_sources}
|
||||
)
|
||||
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case icuuc)
|
||||
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case icuuc restclient-cpp curl)
|
||||
|
36
concordia-server/json_lemmatizer.cpp
Normal file
36
concordia-server/json_lemmatizer.cpp
Normal file
@ -0,0 +1,36 @@
|
||||
#include "json_lemmatizer.hpp"
|
||||
#include "config.hpp"
|
||||
#include "restclient-cpp/restclient.h"
|
||||
#include "rapidjson/rapidjson.h"
|
||||
#include "rapidjson/document.h"
|
||||
#include "rapidjson/stringbuffer.h"
|
||||
#include "rapidjson/writer.h"
|
||||
#include "rapidjson/error/en.h"
|
||||
#include <string>
|
||||
|
||||
JsonLemmatizer::JsonLemmatizer() throw(ConcordiaException) {
|
||||
}
|
||||
|
||||
JsonLemmatizer::~JsonLemmatizer() {
|
||||
}
|
||||
|
||||
|
||||
std::string JsonLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) {
|
||||
rapidjson::StringBuffer paramsJson;
|
||||
rapidjson::Writer<rapidjson::StringBuffer> jsonWriter(paramsJson);
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("language");
|
||||
jsonWriter.String(languageCode.c_str());
|
||||
jsonWriter.String("sentences");
|
||||
jsonWriter.StartArray();
|
||||
jsonWriter.String(sentence.c_str());
|
||||
jsonWriter.EndArray();
|
||||
jsonWriter.EndObject();
|
||||
|
||||
|
||||
RestClient::Response r = RestClient::post("http://concordia-preprocessor:9001/lemmatize", "application/json", paramsJson.GetString());
|
||||
rapidjson::Document d;
|
||||
d.Parse(r.body.c_str());
|
||||
std::string lemmatized = d["processed_sentences"][0]["tokens"].GetString();
|
||||
return lemmatized;
|
||||
}
|
24
concordia-server/json_lemmatizer.hpp
Normal file
24
concordia-server/json_lemmatizer.hpp
Normal file
@ -0,0 +1,24 @@
|
||||
#ifndef JSON_LEMMATIZER_HDR
|
||||
#define JSON_LEMMATIZER_HDR
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <concordia/concordia_exception.hpp>
|
||||
|
||||
#include "logger.hpp"
|
||||
|
||||
class JsonLemmatizer {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
explicit JsonLemmatizer() throw(ConcordiaException);
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~JsonLemmatizer();
|
||||
|
||||
std::string lemmatizeSentence(std::string languageCode, std::string sentence);
|
||||
private:
|
||||
Logger _logger;
|
||||
};
|
||||
|
||||
#endif
|
@ -4,19 +4,15 @@
|
||||
|
||||
|
||||
LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) {
|
||||
_lemmatizersMap = boost::ptr_map<std::string,SocketLemmatizer>();
|
||||
_lemmatizersMap = boost::ptr_map<std::string,JsonLemmatizer>();
|
||||
|
||||
// todo: extract this to configuration, especially when new lemmatizers ConstMemberIterator
|
||||
SocketLemmatizer * socketLemmatizer1 = new SocketLemmatizer(11000);
|
||||
JsonLemmatizer * jsonLemmatizer1 = new JsonLemmatizer();
|
||||
std::string plCode = "pl";
|
||||
std::string enCode = "en";
|
||||
std::string hrCode = "hr";
|
||||
std::string frCode = "fr";
|
||||
|
||||
_lemmatizersMap.insert(plCode, socketLemmatizer1);
|
||||
_lemmatizersMap.insert(enCode, socketLemmatizer1);
|
||||
_lemmatizersMap.insert(hrCode, socketLemmatizer1);
|
||||
_lemmatizersMap.insert(frCode, socketLemmatizer1);
|
||||
_lemmatizersMap.insert(plCode, jsonLemmatizer1);
|
||||
_lemmatizersMap.insert(enCode, jsonLemmatizer1);
|
||||
}
|
||||
|
||||
LemmatizerFacade::~LemmatizerFacade() {
|
||||
@ -24,7 +20,7 @@ LemmatizerFacade::~LemmatizerFacade() {
|
||||
|
||||
std::string LemmatizerFacade::lemmatizeSentence(std::string languageCode, std::string sentence) {
|
||||
|
||||
boost::ptr_map<std::string,SocketLemmatizer>::iterator it = _lemmatizersMap.find(languageCode);
|
||||
boost::ptr_map<std::string,JsonLemmatizer>::iterator it = _lemmatizersMap.find(languageCode);
|
||||
if (it != _lemmatizersMap.end()) {
|
||||
return it->second->lemmatizeSentence(languageCode, sentence);
|
||||
} else {
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define LEMMATIZER_FACADE_HDR
|
||||
|
||||
#include "socket_lemmatizer.hpp"
|
||||
#include "json_lemmatizer.hpp"
|
||||
#include "tm_dao.hpp"
|
||||
|
||||
#include <string>
|
||||
@ -27,7 +28,7 @@ public:
|
||||
std::vector<std::string> lemmatizeSentencesIfNeeded(std::vector<std::string> patterns, int tmId);
|
||||
|
||||
private:
|
||||
boost::ptr_map<std::string,SocketLemmatizer> _lemmatizersMap;
|
||||
boost::ptr_map<std::string,JsonLemmatizer> _lemmatizersMap;
|
||||
|
||||
TmDAO _tmDAO;
|
||||
};
|
||||
|
1
scripts/cmake_stubs/simplestart.sh.in
Normal file → Executable file
1
scripts/cmake_stubs/simplestart.sh.in
Normal file → Executable file
@ -1,5 +1,4 @@
|
||||
#!/bin/sh
|
||||
|
||||
mono "@LEMMAGEN_BINARIES_PATH@"/LemmaGenSockets.exe &
|
||||
spawn-fcgi -p 8000 -n "@COMPILED_BINARIES_PATH@"/concordia_server_process
|
||||
|
||||
|
@ -27,3 +27,4 @@ end = time.time()
|
||||
print "Execution time: %.4f seconds." % (end-start)
|
||||
print "Result: "
|
||||
print response
|
||||
print response['lemmatizedSentence']
|
||||
|
Loading…
Reference in New Issue
Block a user