initial commit from concordia-server repository
This commit is contained in:
commit
e14ff73fc6
BIN
LemmaGen/LemmaSharp.dll
Normal file
BIN
LemmaGen/LemmaSharp.dll
Normal file
Binary file not shown.
BIN
LemmaGen/LemmaSharpPrebuilt.dll
Normal file
BIN
LemmaGen/LemmaSharpPrebuilt.dll
Normal file
Binary file not shown.
BIN
LemmaGen/LemmaSharpPrebuiltCompact.dll
Normal file
BIN
LemmaGen/LemmaSharpPrebuiltCompact.dll
Normal file
Binary file not shown.
BIN
LemmaGen/Lzma#.dll
Normal file
BIN
LemmaGen/Lzma#.dll
Normal file
Binary file not shown.
22
LemmaGenSockets.sln
Normal file
22
LemmaGenSockets.sln
Normal file
@ -0,0 +1,22 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 14
|
||||
VisualStudioVersion = 14.0.25420.1
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaGenSockets", "LemmaGenSockets\LemmaGenSockets.csproj", "{3098BC55-2CC9-4612-9F79-8C812B3BE539}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{3098BC55-2CC9-4612-9F79-8C812B3BE539}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{3098BC55-2CC9-4612-9F79-8C812B3BE539}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{3098BC55-2CC9-4612-9F79-8C812B3BE539}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{3098BC55-2CC9-4612-9F79-8C812B3BE539}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
6
LemmaGenSockets/App.config
Normal file
6
LemmaGenSockets/App.config
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<configuration>
|
||||
<startup>
|
||||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.2" />
|
||||
</startup>
|
||||
</configuration>
|
73
LemmaGenSockets/LemmaGenSockets.csproj
Normal file
73
LemmaGenSockets/LemmaGenSockets.csproj
Normal file
@ -0,0 +1,73 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{3098BC55-2CC9-4612-9F79-8C812B3BE539}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>LemmaGenSockets</RootNamespace>
|
||||
<AssemblyName>LemmaGenSockets</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="LemmaSharp">
|
||||
<HintPath>..\LemmaGen\LemmaSharp.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="LemmaSharpPrebuilt">
|
||||
<HintPath>..\LemmaGen\LemmaSharpPrebuilt.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="LemmaSharpPrebuiltCompact">
|
||||
<HintPath>..\LemmaGen\LemmaSharpPrebuiltCompact.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Lzma#">
|
||||
<HintPath>..\LemmaGen\Lzma#.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Net.Http" />
|
||||
<Reference Include="System.Xml" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="LemmatizerListener.cs" />
|
||||
<Compile Include="Program.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="App.config" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
|
||||
Other similar extension points exist, see Microsoft.Common.targets.
|
||||
<Target Name="BeforeBuild">
|
||||
</Target>
|
||||
<Target Name="AfterBuild">
|
||||
</Target>
|
||||
-->
|
||||
</Project>
|
180
LemmaGenSockets/LemmatizerListener.cs
Normal file
180
LemmaGenSockets/LemmatizerListener.cs
Normal file
@ -0,0 +1,180 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using System.Threading.Tasks;
|
||||
using LemmaSharp;
|
||||
|
||||
namespace LemmaGenSockets
|
||||
{
|
||||
class LemmatizerListener
|
||||
{
|
||||
private Dictionary<String, ILemmatizer> lemmatizersDict = new Dictionary<string, ILemmatizer>();
|
||||
|
||||
private char[] wordInnerSeparator = { '-' };
|
||||
|
||||
|
||||
private void initializeLemmatizers()
|
||||
{
|
||||
lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish));
|
||||
lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English));
|
||||
lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian));
|
||||
lemmatizersDict.Add("fr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.French));
|
||||
}
|
||||
|
||||
public LemmatizerListener()
|
||||
{
|
||||
initializeLemmatizers();
|
||||
}
|
||||
|
||||
private string lemmatizeSentence(string languageCode, string sentence)
|
||||
{
|
||||
if (lemmatizersDict.ContainsKey(languageCode))
|
||||
{
|
||||
string[] tokens = sentence.Split(null);
|
||||
|
||||
string result = "";
|
||||
foreach (string token in tokens)
|
||||
{
|
||||
result += lemmatizeWord(languageCode, token) + " ";
|
||||
}
|
||||
|
||||
return result.Trim();
|
||||
}
|
||||
else
|
||||
{
|
||||
//if we can not lemmatize, let's not do it at all
|
||||
//primum non nocere
|
||||
return sentence;
|
||||
}
|
||||
}
|
||||
|
||||
private string lemmatizeWord(string languageCode, string word)
|
||||
{
|
||||
// exceptions
|
||||
if (word.StartsWith("ne_"))
|
||||
{
|
||||
return word;
|
||||
}
|
||||
|
||||
|
||||
Dictionary<String, HashSet<String>> exceptions = new Dictionary<string, HashSet<string>>();
|
||||
|
||||
HashSet<String> plExceptions = new HashSet<string>();
|
||||
plExceptions.Add("i");
|
||||
plExceptions.Add("o");
|
||||
plExceptions.Add("do");
|
||||
exceptions.Add("pl", plExceptions);
|
||||
|
||||
HashSet<String> enExceptions = new HashSet<string>();
|
||||
enExceptions.Add("d");
|
||||
exceptions.Add("en", enExceptions);
|
||||
|
||||
HashSet<String> languageExceptions;
|
||||
if (exceptions.TryGetValue(languageCode, out languageExceptions))
|
||||
{
|
||||
if(languageExceptions.Contains(word))
|
||||
{
|
||||
return word;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
string result = "";
|
||||
string[] parts = word.Split(wordInnerSeparator);
|
||||
if (parts.Length == 2)
|
||||
{
|
||||
string firstPart = parts[0];
|
||||
if (!parts[0].EndsWith("o"))
|
||||
{
|
||||
firstPart = lemmatizersDict[languageCode].Lemmatize(firstPart);
|
||||
}
|
||||
string secondPart = lemmatizersDict[languageCode].Lemmatize(parts[1]);
|
||||
result = firstPart + "-" + secondPart;
|
||||
}
|
||||
else
|
||||
{
|
||||
result = lemmatizersDict[languageCode].Lemmatize(word);
|
||||
}
|
||||
|
||||
if (result == "")
|
||||
{
|
||||
return word;
|
||||
}
|
||||
else
|
||||
{
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
public void DoListening()
|
||||
{
|
||||
// Data buffer for incoming data.
|
||||
byte[] bytes = new Byte[1024];
|
||||
|
||||
string data;
|
||||
|
||||
// Establish the local endpoint for the socket.
|
||||
IPAddress ipAddress = IPAddress.Parse("127.0.0.1");
|
||||
IPEndPoint localEndPoint = new IPEndPoint(ipAddress, 11000);
|
||||
|
||||
// Create a TCP/IP socket.
|
||||
Socket listener = new Socket(AddressFamily.InterNetwork,
|
||||
SocketType.Stream, ProtocolType.Tcp);
|
||||
|
||||
// Bind the socket to the local endpoint and
|
||||
// listen for incoming connections.
|
||||
try
|
||||
{
|
||||
listener.Bind(localEndPoint);
|
||||
listener.Listen(10);
|
||||
|
||||
// Start listening for connections.
|
||||
while (true)
|
||||
{
|
||||
// Program is suspended while waiting for an incoming connection.
|
||||
Socket handler = listener.Accept();
|
||||
data = null;
|
||||
|
||||
// An incoming connection needs to be processed.
|
||||
while (true)
|
||||
{
|
||||
bytes = new byte[1024];
|
||||
int bytesRec = handler.Receive(bytes);
|
||||
data += Encoding.UTF8.GetString(bytes, 0, bytesRec);
|
||||
if (data.IndexOf("@#@") > -1)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
data = data.Substring(0, data.IndexOf("@#@"));
|
||||
|
||||
string languageCode = data.Substring(0, 2);
|
||||
string sentence = data.Substring(2);
|
||||
|
||||
|
||||
// Show the data on the console.
|
||||
// Console.WriteLine("Sentence received : "+ sentence + ", language code : "+languageCode);
|
||||
|
||||
// Send lemmatized data back to client.
|
||||
byte[] msg = Encoding.UTF8.GetBytes(lemmatizeSentence(languageCode, sentence) + "@#@");
|
||||
|
||||
handler.Send(msg);
|
||||
handler.Shutdown(SocketShutdown.Both);
|
||||
handler.Close();
|
||||
}
|
||||
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine(e.ToString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
26
LemmaGenSockets/Program.cs
Normal file
26
LemmaGenSockets/Program.cs
Normal file
@ -0,0 +1,26 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using System.Threading.Tasks;
|
||||
using LemmaSharp;
|
||||
|
||||
namespace LemmaGenSockets
|
||||
{
|
||||
class Program
|
||||
{
|
||||
|
||||
// Incoming data from the client.
|
||||
public static string data = null;
|
||||
|
||||
|
||||
|
||||
static void Main(string[] args)
|
||||
{
|
||||
LemmatizerListener listener = new LemmatizerListener();
|
||||
listener.DoListening();
|
||||
}
|
||||
}
|
||||
}
|
36
LemmaGenSockets/Properties/AssemblyInfo.cs
Normal file
36
LemmaGenSockets/Properties/AssemblyInfo.cs
Normal file
@ -0,0 +1,36 @@
|
||||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("LemmaGenSockets")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("")]
|
||||
[assembly: AssemblyProduct("LemmaGenSockets")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2017")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
||||
// Setting ComVisible to false makes the types in this assembly not visible
|
||||
// to COM components. If you need to access a type in this assembly from
|
||||
// COM, set the ComVisible attribute to true on that type.
|
||||
[assembly: ComVisible(false)]
|
||||
|
||||
// The following GUID is for the ID of the typelib if this project is exposed to COM
|
||||
[assembly: Guid("3098bc55-2cc9-4612-9f79-8c812b3be539")]
|
||||
|
||||
// Version information for an assembly consists of the following four values:
|
||||
//
|
||||
// Major Version
|
||||
// Minor Version
|
||||
// Build Number
|
||||
// Revision
|
||||
//
|
||||
// You can specify all the values or you can default the Build and Revision Numbers
|
||||
// by using the '*' as shown below:
|
||||
// [assembly: AssemblyVersion("1.0.*")]
|
||||
[assembly: AssemblyVersion("1.0.0.0")]
|
||||
[assembly: AssemblyFileVersion("1.0.0.0")]
|
BIN
LemmaGenSockets/bin/Debug/LemmaGenSockets.exe
Normal file
BIN
LemmaGenSockets/bin/Debug/LemmaGenSockets.exe
Normal file
Binary file not shown.
6
LemmaGenSockets/bin/Debug/LemmaGenSockets.exe.config
Normal file
6
LemmaGenSockets/bin/Debug/LemmaGenSockets.exe.config
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<configuration>
|
||||
<startup>
|
||||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.2" />
|
||||
</startup>
|
||||
</configuration>
|
BIN
LemmaGenSockets/bin/Debug/LemmaGenSockets.pdb
Normal file
BIN
LemmaGenSockets/bin/Debug/LemmaGenSockets.pdb
Normal file
Binary file not shown.
BIN
LemmaGenSockets/bin/Debug/LemmaGenSockets.vshost.exe
Normal file
BIN
LemmaGenSockets/bin/Debug/LemmaGenSockets.vshost.exe
Normal file
Binary file not shown.
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<configuration>
|
||||
<startup>
|
||||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.2" />
|
||||
</startup>
|
||||
</configuration>
|
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
|
||||
<assemblyIdentity version="1.0.0.0" name="MyApplication.app"/>
|
||||
<trustInfo xmlns="urn:schemas-microsoft-com:asm.v2">
|
||||
<security>
|
||||
<requestedPrivileges xmlns="urn:schemas-microsoft-com:asm.v3">
|
||||
<requestedExecutionLevel level="asInvoker" uiAccess="false"/>
|
||||
</requestedPrivileges>
|
||||
</security>
|
||||
</trustInfo>
|
||||
</assembly>
|
BIN
LemmaGenSockets/bin/Debug/LemmaSharp.dll
Normal file
BIN
LemmaGenSockets/bin/Debug/LemmaSharp.dll
Normal file
Binary file not shown.
BIN
LemmaGenSockets/bin/Debug/LemmaSharpPrebuilt.dll
Normal file
BIN
LemmaGenSockets/bin/Debug/LemmaSharpPrebuilt.dll
Normal file
Binary file not shown.
BIN
LemmaGenSockets/bin/Debug/LemmaSharpPrebuiltCompact.dll
Normal file
BIN
LemmaGenSockets/bin/Debug/LemmaSharpPrebuiltCompact.dll
Normal file
Binary file not shown.
BIN
LemmaGenSockets/bin/Debug/Lzma#.dll
Normal file
BIN
LemmaGenSockets/bin/Debug/Lzma#.dll
Normal file
Binary file not shown.
Binary file not shown.
@ -0,0 +1,21 @@
|
||||
j:\documents\visual studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.exe.config
|
||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.exe
|
||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.pdb
|
||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharp.dll
|
||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharpPrebuilt.dll
|
||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharpPrebuiltCompact.dll
|
||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Debug\Lzma#.dll
|
||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csprojResolveAssemblyReference.cache
|
||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.exe
|
||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.pdb
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.exe.config
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.exe
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.pdb
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharp.dll
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharpPrebuilt.dll
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharpPrebuiltCompact.dll
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\Lzma#.dll
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csprojResolveAssemblyReference.cache
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csproj.CoreCompileInputs.cache
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.exe
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.pdb
|
Binary file not shown.
BIN
LemmaGenSockets/obj/Debug/LemmaGenSockets.exe
Normal file
BIN
LemmaGenSockets/obj/Debug/LemmaGenSockets.exe
Normal file
Binary file not shown.
BIN
LemmaGenSockets/obj/Debug/LemmaGenSockets.pdb
Normal file
BIN
LemmaGenSockets/obj/Debug/LemmaGenSockets.pdb
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user