diff --git a/.gitignore b/.gitignore
index 70a29fc..b3ee04f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ db/pgbouncer.pid
db/pgbouncer.ini
upstart/concordia-server.conf
upstart/pgbouncer.conf
+upstart/lemmagen.conf
cat/host.cfg
mgiza-aligner/mgiza/mgizapp/CMakeCache.txt
mgiza-aligner/mgiza/mgizapp/CMakeFiles/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1403d70..f4b6818 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,6 +27,7 @@ configure_file (
)
set(COMPILED_BINARIES_PATH "${concordia-server_SOURCE_DIR}/build/concordia-server")
+set(LEMMAGEN_BINARIES_PATH "${concordia-server_SOURCE_DIR}/LemmaGenSockets/LemmaGenSockets/bin/Debug")
set(SCRIPTS_PATH "${concordia-server_SOURCE_DIR}/scripts")
configure_file (
"${concordia-server_SOURCE_DIR}/scripts/cmake_stubs/start.sh.in"
@@ -55,6 +56,12 @@ configure_file (
"${concordia-server_SOURCE_DIR}/upstart/pgbouncer.conf"
)
+configure_file (
+ "${concordia-server_SOURCE_DIR}/upstart/cmake_stubs/lemmagen.conf.in"
+ "${concordia-server_SOURCE_DIR}/upstart/lemmagen.conf"
+ )
+
+
configure_file (
"${concordia-server_SOURCE_DIR}/db/pgbouncer.ini.in"
"${concordia-server_SOURCE_DIR}/db/pgbouncer.ini"
@@ -119,7 +126,7 @@ endif(EXISTS ${ICU_LIB} AND EXISTS ${ICU_INCLUDE})
# ----------------------------------------------------
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_STATIC_RUNTIME OFF)
-find_package(Boost COMPONENTS
+find_package(Boost COMPONENTS
serialization unit_test_framework system filesystem program_options iostreams regex locale REQUIRED)
# ----------------------------------------------------
@@ -214,8 +221,3 @@ if(EXISTS ${UTF8CASE_LIB})
endif(EXISTS ${UTF8CASE_LIB})
add_subdirectory(concordia-server)
-
-
-
-
-
diff --git a/LemmaGenSockets/LemmaGen/LemmaSharp.dll b/LemmaGenSockets/LemmaGen/LemmaSharp.dll
new file mode 100644
index 0000000..5d8380d
Binary files /dev/null and b/LemmaGenSockets/LemmaGen/LemmaSharp.dll differ
diff --git a/LemmaGenSockets/LemmaGen/LemmaSharpPrebuilt.dll b/LemmaGenSockets/LemmaGen/LemmaSharpPrebuilt.dll
new file mode 100644
index 0000000..dde85f7
Binary files /dev/null and b/LemmaGenSockets/LemmaGen/LemmaSharpPrebuilt.dll differ
diff --git a/LemmaGenSockets/LemmaGen/LemmaSharpPrebuiltCompact.dll b/LemmaGenSockets/LemmaGen/LemmaSharpPrebuiltCompact.dll
new file mode 100644
index 0000000..c1b2a38
Binary files /dev/null and b/LemmaGenSockets/LemmaGen/LemmaSharpPrebuiltCompact.dll differ
diff --git a/LemmaGenSockets/LemmaGen/Lzma#.dll b/LemmaGenSockets/LemmaGen/Lzma#.dll
new file mode 100644
index 0000000..2bb9990
Binary files /dev/null and b/LemmaGenSockets/LemmaGen/Lzma#.dll differ
diff --git a/LemmaGenSockets/LemmaGenSockets.sln b/LemmaGenSockets/LemmaGenSockets.sln
new file mode 100644
index 0000000..970942e
--- /dev/null
+++ b/LemmaGenSockets/LemmaGenSockets.sln
@@ -0,0 +1,22 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.25420.1
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaGenSockets", "LemmaGenSockets\LemmaGenSockets.csproj", "{3098BC55-2CC9-4612-9F79-8C812B3BE539}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Release|Any CPU = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {3098BC55-2CC9-4612-9F79-8C812B3BE539}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {3098BC55-2CC9-4612-9F79-8C812B3BE539}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {3098BC55-2CC9-4612-9F79-8C812B3BE539}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {3098BC55-2CC9-4612-9F79-8C812B3BE539}.Release|Any CPU.Build.0 = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/LemmaGenSockets/LemmaGenSockets/App.config b/LemmaGenSockets/LemmaGenSockets/App.config
new file mode 100644
index 0000000..d740e88
--- /dev/null
+++ b/LemmaGenSockets/LemmaGenSockets/App.config
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/LemmaGenSockets/LemmaGenSockets/LemmaGenSockets.csproj b/LemmaGenSockets/LemmaGenSockets/LemmaGenSockets.csproj
new file mode 100644
index 0000000..eaae3e2
--- /dev/null
+++ b/LemmaGenSockets/LemmaGenSockets/LemmaGenSockets.csproj
@@ -0,0 +1,73 @@
+
+
+
+
+ Debug
+ AnyCPU
+ {3098BC55-2CC9-4612-9F79-8C812B3BE539}
+ Exe
+ Properties
+ LemmaGenSockets
+ LemmaGenSockets
+ v4.5.2
+ 512
+ true
+
+
+ AnyCPU
+ true
+ full
+ false
+ bin\Debug\
+ DEBUG;TRACE
+ prompt
+ 4
+
+
+ AnyCPU
+ pdbonly
+ true
+ bin\Release\
+ TRACE
+ prompt
+ 4
+
+
+
+ ..\LemmaGen\LemmaSharp.dll
+
+
+ ..\LemmaGen\LemmaSharpPrebuilt.dll
+
+
+ ..\LemmaGen\LemmaSharpPrebuiltCompact.dll
+
+
+ ..\LemmaGen\Lzma#.dll
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs b/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs
new file mode 100644
index 0000000..9eee9bf
--- /dev/null
+++ b/LemmaGenSockets/LemmaGenSockets/LemmatizerListener.cs
@@ -0,0 +1,130 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Net;
+using System.Net.Sockets;
+using System.Threading.Tasks;
+using LemmaSharp;
+
+namespace LemmaGenSockets
+{
+ class LemmatizerListener
+ {
+ private Dictionary lemmatizersDict = new Dictionary();
+
+ private char[] wordInnerSeparator = { '-' };
+
+
+ private void initializeLemmatizers()
+ {
+ lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish));
+ lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English));
+ }
+
+ public LemmatizerListener()
+ {
+ initializeLemmatizers();
+ }
+
+ private string lemmatizeSentence(string languageCode, string sentence)
+ {
+ string[] tokens = sentence.Split(null);
+
+ string result = "";
+ foreach (string token in tokens)
+ {
+ result += lemmatizeWord(languageCode, token) + " ";
+ }
+
+ return result.Trim();
+ }
+
+ private string lemmatizeWord(string languageCode, string word)
+ {
+ string[] parts = word.Split(wordInnerSeparator);
+ if (parts.Length == 2)
+ {
+ string firstPart = parts[0];
+ if (!parts[0].EndsWith("o"))
+ {
+ firstPart = lemmatizersDict[languageCode].Lemmatize(firstPart);
+ }
+ string secondPart = lemmatizersDict[languageCode].Lemmatize(parts[1]);
+ return firstPart + "-" + secondPart;
+ }
+ else
+ {
+ return lemmatizersDict[languageCode].Lemmatize(word);
+ }
+ }
+
+ public void DoListening()
+ {
+ // Data buffer for incoming data.
+ byte[] bytes = new Byte[1024];
+
+ string data;
+
+ // Establish the local endpoint for the socket.
+ IPAddress ipAddress = IPAddress.Parse("127.0.0.1");
+ IPEndPoint localEndPoint = new IPEndPoint(ipAddress, 11000);
+
+ // Create a TCP/IP socket.
+ Socket listener = new Socket(AddressFamily.InterNetwork,
+ SocketType.Stream, ProtocolType.Tcp);
+
+ // Bind the socket to the local endpoint and
+ // listen for incoming connections.
+ try
+ {
+ listener.Bind(localEndPoint);
+ listener.Listen(10);
+
+ // Start listening for connections.
+ while (true)
+ {
+ // Program is suspended while waiting for an incoming connection.
+ Socket handler = listener.Accept();
+ data = null;
+
+ // An incoming connection needs to be processed.
+ while (true)
+ {
+ bytes = new byte[1024];
+ int bytesRec = handler.Receive(bytes);
+ data += Encoding.UTF8.GetString(bytes, 0, bytesRec);
+ if (data.IndexOf("@#@") > -1)
+ {
+ break;
+ }
+ }
+
+ data = data.Substring(0, data.IndexOf("@#@"));
+
+ string languageCode = data.Substring(0, 2);
+ string sentence = data.Substring(2);
+
+
+ // Show the data on the console.
+ // Console.WriteLine("Sentence received : "+ sentence + ", language code : "+languageCode);
+
+ // Send lemmatized data back to client.
+ byte[] msg = Encoding.UTF8.GetBytes(lemmatizeSentence(languageCode, sentence) + "@#@");
+
+ handler.Send(msg);
+ handler.Shutdown(SocketShutdown.Both);
+ handler.Close();
+ }
+
+ }
+ catch (Exception e)
+ {
+ Console.WriteLine(e.ToString());
+ }
+
+ }
+
+
+ }
+}
diff --git a/LemmaGenSockets/LemmaGenSockets/Program.cs b/LemmaGenSockets/LemmaGenSockets/Program.cs
new file mode 100644
index 0000000..b7ce8a5
--- /dev/null
+++ b/LemmaGenSockets/LemmaGenSockets/Program.cs
@@ -0,0 +1,26 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Net;
+using System.Net.Sockets;
+using System.Threading.Tasks;
+using LemmaSharp;
+
+namespace LemmaGenSockets
+{
+ class Program
+ {
+
+ // Incoming data from the client.
+ public static string data = null;
+
+
+
+ static void Main(string[] args)
+ {
+ LemmatizerListener listener = new LemmatizerListener();
+ listener.DoListening();
+ }
+ }
+}
diff --git a/LemmaGenSockets/LemmaGenSockets/Properties/AssemblyInfo.cs b/LemmaGenSockets/LemmaGenSockets/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..a865875
--- /dev/null
+++ b/LemmaGenSockets/LemmaGenSockets/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("LemmaGenSockets")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("LemmaGenSockets")]
+[assembly: AssemblyCopyright("Copyright © 2017")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("3098bc55-2cc9-4612-9f79-8c812b3be539")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe
new file mode 100644
index 0000000..8cb06db
Binary files /dev/null and b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe differ
diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe.config b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe.config
new file mode 100644
index 0000000..d740e88
--- /dev/null
+++ b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.exe.config
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.pdb b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.pdb
new file mode 100644
index 0000000..1ab947c
Binary files /dev/null and b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.pdb differ
diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.vshost.exe b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.vshost.exe
new file mode 100644
index 0000000..681ab77
Binary files /dev/null and b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.vshost.exe differ
diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.vshost.exe.config b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.vshost.exe.config
new file mode 100644
index 0000000..d740e88
--- /dev/null
+++ b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.vshost.exe.config
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.vshost.exe.manifest b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.vshost.exe.manifest
new file mode 100644
index 0000000..f96b1d6
--- /dev/null
+++ b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaGenSockets.vshost.exe.manifest
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaSharp.dll b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaSharp.dll
new file mode 100644
index 0000000..5d8380d
Binary files /dev/null and b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaSharp.dll differ
diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaSharpPrebuilt.dll b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaSharpPrebuilt.dll
new file mode 100644
index 0000000..dde85f7
Binary files /dev/null and b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaSharpPrebuilt.dll differ
diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaSharpPrebuiltCompact.dll b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaSharpPrebuiltCompact.dll
new file mode 100644
index 0000000..c1b2a38
Binary files /dev/null and b/LemmaGenSockets/LemmaGenSockets/bin/Debug/LemmaSharpPrebuiltCompact.dll differ
diff --git a/LemmaGenSockets/LemmaGenSockets/bin/Debug/Lzma#.dll b/LemmaGenSockets/LemmaGenSockets/bin/Debug/Lzma#.dll
new file mode 100644
index 0000000..2bb9990
Binary files /dev/null and b/LemmaGenSockets/LemmaGenSockets/bin/Debug/Lzma#.dll differ
diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/DesignTimeResolveAssemblyReferencesInput.cache b/LemmaGenSockets/LemmaGenSockets/obj/Debug/DesignTimeResolveAssemblyReferencesInput.cache
new file mode 100644
index 0000000..5a81c4d
Binary files /dev/null and b/LemmaGenSockets/LemmaGenSockets/obj/Debug/DesignTimeResolveAssemblyReferencesInput.cache differ
diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csproj.FileListAbsolute.txt b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csproj.FileListAbsolute.txt
new file mode 100644
index 0000000..3df4681
--- /dev/null
+++ b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csproj.FileListAbsolute.txt
@@ -0,0 +1,10 @@
+j:\documents\visual studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.exe.config
+j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.exe
+j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.pdb
+j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharp.dll
+j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharpPrebuilt.dll
+j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharpPrebuiltCompact.dll
+j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\Lzma#.dll
+j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csprojResolveAssemblyReference.cache
+j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.exe
+j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.pdb
diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csprojResolveAssemblyReference.cache b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csprojResolveAssemblyReference.cache
new file mode 100644
index 0000000..18ff92c
Binary files /dev/null and b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.csprojResolveAssemblyReference.cache differ
diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.exe b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.exe
new file mode 100644
index 0000000..8cb06db
Binary files /dev/null and b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.exe differ
diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.pdb b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.pdb
new file mode 100644
index 0000000..1ab947c
Binary files /dev/null and b/LemmaGenSockets/LemmaGenSockets/obj/Debug/LemmaGenSockets.pdb differ
diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/TemporaryGeneratedFile_036C0B5B-1481-4323-8D20-8F5ADCB23D92.cs b/LemmaGenSockets/LemmaGenSockets/obj/Debug/TemporaryGeneratedFile_036C0B5B-1481-4323-8D20-8F5ADCB23D92.cs
new file mode 100644
index 0000000..e69de29
diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/TemporaryGeneratedFile_5937a670-0e60-4077-877b-f7221da3dda1.cs b/LemmaGenSockets/LemmaGenSockets/obj/Debug/TemporaryGeneratedFile_5937a670-0e60-4077-877b-f7221da3dda1.cs
new file mode 100644
index 0000000..e69de29
diff --git a/LemmaGenSockets/LemmaGenSockets/obj/Debug/TemporaryGeneratedFile_E7A71F73-0F8D-4B9B-B56E-8E70B10BC5D3.cs b/LemmaGenSockets/LemmaGenSockets/obj/Debug/TemporaryGeneratedFile_E7A71F73-0F8D-4B9B-B56E-8E70B10BC5D3.cs
new file mode 100644
index 0000000..e69de29
diff --git a/concordia-server/CMakeLists.txt b/concordia-server/CMakeLists.txt
index 82c95d8..ef392b8 100644
--- a/concordia-server/CMakeLists.txt
+++ b/concordia-server/CMakeLists.txt
@@ -1,19 +1,6 @@
-add_executable(concordia_server_process
- concordia_server_process.cpp
- concordia_server.cpp
- index_controller.cpp
- searcher_controller.cpp
- json_generator.cpp
- unit_dao.cpp
- db_connection.cpp
- query_param.cpp
- string_param.cpp
- int_param.cpp
- logger.cpp
- int_array_param.cpp
- simple_search_result.cpp
- complete_concordia_search_result.cpp
- tm_dao.cpp
- )
-target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case icuuc)
+file(GLOB main_sources "*.cpp")
+add_executable(concordia_server_process
+ ${main_sources}
+ )
+target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case icuuc)
diff --git a/concordia-server/concordia_server.cpp b/concordia-server/concordia_server.cpp
index 8bdf5ee..9d45eaf 100644
--- a/concordia-server/concordia_server.cpp
+++ b/concordia-server/concordia_server.cpp
@@ -11,6 +11,7 @@
#include "json_generator.hpp"
#include "config.hpp"
#include "logger.hpp"
+#include "socket_lemmatizer.hpp"
#include "rapidjson/rapidjson.h"
#include
#include
@@ -26,7 +27,7 @@ ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
_addTm(tmId);
}
_indexController = boost::shared_ptr (new IndexController(_concordiasMap));
- _searcherController = boost::shared_ptr (new SearcherController(_concordiasMap));
+ _searcherController = boost::shared_ptr (new SearcherController(_concordiasMap));
}
ConcordiaServer::~ConcordiaServer() {
@@ -50,7 +51,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
JsonGenerator::signalError(jsonWriter, errorstream.str());
} else { // json parsed
std::string operation = _getStringParameter(d, OPERATION_PARAM);
- if (operation == ADD_SENTENCE_OP) {
+ if (operation == ADD_SENTENCE_OP) {
std::string sourceSentence = _getStringParameter(d, SOURCE_SENTENCE_PARAM);
std::string targetSentence = _getStringParameter(d, TARGET_SENTENCE_PARAM);
int tmId = _getIntParameter(d, TM_ID_PARAM);
@@ -93,6 +94,15 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
}
}
_indexController->addAlignedSentences(jsonWriter, sourceSentences, targetSentences, tmId);
+ } else if (operation == "lemmatize") {
+ std::string sentence = _getStringParameter(d, "sentence");
+ std::string languageCode = _getStringParameter(d, "languageCode");
+ SocketLemmatizer lemmatizer;
+ std::string lemmatizedSentence = lemmatizer.lemmatizeSentence(languageCode, sentence);
+ jsonWriter.StartObject();
+ jsonWriter.String("lemmatizedSentence");
+ jsonWriter.String(lemmatizedSentence.c_str());
+ jsonWriter.EndObject();
} else if (operation == REFRESH_INDEX_OP) {
int tmId = _getIntParameter(d, TM_ID_PARAM);
_indexController->refreshIndexFromRAM(jsonWriter, tmId);
@@ -104,7 +114,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
std::string pattern = _getStringParameter(d, PATTERN_PARAM);
int tmId = _getIntParameter(d, TM_ID_PARAM);
Logger::logString("concordia search pattern", pattern);
- _searcherController->concordiaSearch(jsonWriter, pattern, tmId);
+ _searcherController->concordiaSearch(jsonWriter, pattern, tmId);
} else if (operation == CONCORDIA_PHRASE_SEARCH_OP) {
std::string pattern = _getStringParameter(d, PATTERN_PARAM);
int tmId = _getIntParameter(d, TM_ID_PARAM);
@@ -114,31 +124,31 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
const rapidjson::Value & intervalsArray = d[INTERVALS_PARAM];
for (rapidjson::SizeType i = 0; i < intervalsArray.Size(); i++) {
intervals.push_back(Interval(intervalsArray[i][0].GetInt(), intervalsArray[i][1].GetInt()));
- }
- _searcherController->concordiaPhraseSearch(jsonWriter, pattern, intervals, tmId);
+ }
+ _searcherController->concordiaPhraseSearch(jsonWriter, pattern, intervals, tmId);
} else if (operation == ADD_TM_OP) {
int sourceLangId = _getIntParameter(d, SOURCE_LANG_PARAM);
int targetLangId = _getIntParameter(d, TARGET_LANG_PARAM);
std::string name = _getStringParameter(d, NAME_PARAM);
int newId = _tmDAO.addTm(sourceLangId, targetLangId, name);
_addTm(newId);
-
+
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("success");
jsonWriter.String("newTmId");
jsonWriter.Int(newId);
jsonWriter.EndObject();
-
+
} else {
JsonGenerator::signalError(jsonWriter, "no such operation: " + operation);
}
}
-
+
} catch (ConcordiaException & e) {
std::stringstream errorstream;
errorstream << "concordia error: " << e.what();
- JsonGenerator::signalError(jsonWriter, errorstream.str());
+ JsonGenerator::signalError(jsonWriter, errorstream.str());
}
outputString << outputJson.GetString();
@@ -182,5 +192,5 @@ void ConcordiaServer::_logPhrase(std::string phraseString) {
std::ofstream logFile;
logFile.open(PHRASE_LOG_FILE_PATH, std::ios::out | std::ios::app);
logFile << phraseString.substr(0, phraseString.size()-1) << ", \"timestamp\":" << std::time(0) << "}\n";
- logFile.close();
+ logFile.close();
}
diff --git a/concordia-server/db_connection.hpp b/concordia-server/db_connection.hpp
index 5b821af..c65fb35 100644
--- a/concordia-server/db_connection.hpp
+++ b/concordia-server/db_connection.hpp
@@ -1,5 +1,5 @@
-#ifndef DB_MANAGER_HDR
-#define DB_MANAGER_HDR
+#ifndef DB_CONNECTION_HDR
+#define DB_CONNECTION_HDR
#include
#include
@@ -17,7 +17,7 @@ public:
/*! Destructor.
*/
virtual ~DBconnection();
-
+
void startTransaction() throw(ConcordiaException);
void endTransaction() throw(ConcordiaException);
@@ -28,16 +28,16 @@ public:
std::vector params) throw(ConcordiaException);
void clearResult(PGresult * result);
-
+
int getIntValue(PGresult * result, int row, int col) throw (ConcordiaException);
-
+
std::string getStringValue(PGresult * result, int row, int col) throw (ConcordiaException);
int getRowCount(PGresult * result) throw (ConcordiaException);
private:
void close();
-
+
PGconn * _connection;
};
diff --git a/concordia-server/socket_lemmatizer.cpp b/concordia-server/socket_lemmatizer.cpp
new file mode 100644
index 0000000..f6170a8
--- /dev/null
+++ b/concordia-server/socket_lemmatizer.cpp
@@ -0,0 +1,90 @@
+#include "socket_lemmatizer.hpp"
+
+SocketLemmatizer::SocketLemmatizer() throw(ConcordiaException) :
+ _sock(-1) {
+ _connect("127.0.0.1" , 11000);
+}
+
+SocketLemmatizer::~SocketLemmatizer() {
+}
+
+/**
+ Connect to a host on a certain port number
+*/
+bool SocketLemmatizer::_connect(std::string address , int port)
+{
+ //create socket if it is not already created
+ if(_sock == -1) {
+ //Create socket
+ _sock = socket(AF_INET , SOCK_STREAM , 0);
+ if (_sock == -1) {
+ throw ConcordiaException("Could not create socket for the lemmatizer.");
+ }
+ }
+
+ //setup address structure
+ if(inet_addr(address.c_str()) == -1) {
+ struct hostent *he;
+ struct in_addr **addr_list;
+
+ //resolve the hostname, its not an ip address
+ if ( (he = gethostbyname( address.c_str() ) ) == NULL) {
+ //gethostbyname failed
+ throw ConcordiaException("gethostbyname: Failed to resolve hostname");
+ }
+
+ //Cast the h_addr_list to in_addr , since h_addr_list also has the ip address in long format only
+ addr_list = (struct in_addr **) he->h_addr_list;
+
+ for(int i = 0; addr_list[i] != NULL; i++) {
+ _server.sin_addr = *addr_list[i];
+ break;
+ }
+ } else { //plain ip address
+ _server.sin_addr.s_addr = inet_addr(address.c_str());
+ }
+
+ _server.sin_family = AF_INET;
+ _server.sin_port = htons(port);
+
+ //Connect to remote server
+ if (connect(_sock , (struct sockaddr *) & _server , sizeof(_server)) < 0) {
+ throw ConcordiaException("connect failed. Error");
+ }
+
+ return true;
+}
+
+/**
+ Send data to the connected host
+*/
+bool SocketLemmatizer::_send_data(std::string data)
+{
+ //Send some data
+ if(send(_sock , data.c_str() , strlen(data.c_str() ) , 0) < 0) {
+ throw ConcordiaException("Send failed");
+ }
+ return true;
+}
+
+/**
+ Receive data from the connected host
+*/
+std::string SocketLemmatizer::_receive(int size=512)
+{
+ char buffer[size];
+ std::string reply;
+
+ //Receive a reply from the server
+ if(recv(_sock , buffer , sizeof(buffer) , 0) < 0) {
+ throw ConcordiaException("Receive failed");
+ }
+ reply = buffer;
+ return reply;
+}
+
+std::string SocketLemmatizer::lemmatizeSentence(std::string languageCode, std::string sentence) {
+ _send_data(languageCode+sentence+"@#@");
+ std::string reply = _receive(512);
+ return reply.substr(0,reply.find("@#@"));
+}
diff --git a/concordia-server/socket_lemmatizer.hpp b/concordia-server/socket_lemmatizer.hpp
new file mode 100644
index 0000000..7f20255
--- /dev/null
+++ b/concordia-server/socket_lemmatizer.hpp
@@ -0,0 +1,35 @@
+#ifndef SOCKET_LEMMATIZER_HDR
+#define SOCKET_LEMMATIZER_HDR
+
+#include
+#include //socket
+#include //inet_addr
+#include //hostent
+
+#include
+
+
+class SocketLemmatizer {
+public:
+ /*! Constructor.
+ */
+ SocketLemmatizer() throw(ConcordiaException);
+ /*! Destructor.
+ */
+ virtual ~SocketLemmatizer();
+
+ std::string lemmatizeSentence(std::string languageCode, std::string sentence);
+private:
+ bool _connect(std::string, int);
+
+ bool _send_data(std::string data);
+
+ std::string _receive(int);
+
+ int _sock;
+
+ struct sockaddr_in _server;
+
+};
+
+#endif
diff --git a/tests/lemmatizeSentence.py b/tests/lemmatizeSentence.py
new file mode 100755
index 0000000..4874f7c
--- /dev/null
+++ b/tests/lemmatizeSentence.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import unittest
+import json
+import urllib2
+import sys
+import time
+import host
+
+data = {
+ 'operation': 'lemmatize',
+ 'languageCode':sys.argv[1],
+ 'sentence':sys.argv[2]
+}
+
+address = 'http://'+host.concordia_host
+if len(host.concordia_port) > 0:
+ address += ':'+host.concordia_port
+
+start = time.time()
+req = urllib2.Request(address)
+req.add_header('Content-Type', 'application/json')
+response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+end = time.time()
+
+print "Execution time: %.4f seconds." % (end-start)
+print "Result: "
+print response
diff --git a/upstart/README.txt b/upstart/README.txt
index 894ef48..7be3284 100644
--- a/upstart/README.txt
+++ b/upstart/README.txt
@@ -1,4 +1,4 @@
-In order to configure Concordia as upstart job, copy the 2 .conf files into your /etc/init and run:
+In order to configure Concordia as upstart job, copy the 3 .conf files into your /etc/init and run:
sudo initctl reload-configuration
diff --git a/upstart/cmake_stubs/lemmagen.conf.in b/upstart/cmake_stubs/lemmagen.conf.in
new file mode 100644
index 0000000..86a7714
--- /dev/null
+++ b/upstart/cmake_stubs/lemmagen.conf.in
@@ -0,0 +1,23 @@
+# lemmagen
+#
+# This service runs the LemmaGen lemmatizer
+# via mono. The process starts listening on the port 11000
+
+description LemmaGen
+
+# When to start the service
+start on started concordia-server
+
+# When to stop the service
+stop on runlevel [016]
+
+# Automatically restart process if crashed
+respawn
+
+# Essentially lets upstart know the process will detach itself to the background
+expect fork
+
+# Start the process
+script
+ exec mono @LEMMAGEN_BINARIES_PATH@/LemmaGenSockets.exe &
+end script