using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Net; using System.Net.Sockets; using System.Threading.Tasks; using LemmaSharp; namespace LemmaGenSockets { class LemmatizerListener { private Dictionary lemmatizersDict = new Dictionary(); private char[] wordInnerSeparator = { '-' }; private void initializeLemmatizers() { lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish)); lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English)); lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian)); lemmatizersDict.Add("fr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.French)); } public LemmatizerListener() { initializeLemmatizers(); } private string lemmatizeSentence(string languageCode, string sentence) { if (lemmatizersDict.ContainsKey(languageCode)) { string[] tokens = sentence.Split(null); string result = ""; foreach (string token in tokens) { result += lemmatizeWord(languageCode, token) + " "; } return result.Trim(); } else { //if we can not lemmatize, let's not do it at all //primum non nocere return sentence; } } private string lemmatizeWord(string languageCode, string word) { // exceptions if (word.StartsWith("ne_")) { return word; } Dictionary> exceptions = new Dictionary>(); HashSet plExceptions = new HashSet(); plExceptions.Add("i"); plExceptions.Add("o"); plExceptions.Add("do"); exceptions.Add("pl", plExceptions); HashSet enExceptions = new HashSet(); enExceptions.Add("d"); exceptions.Add("en", enExceptions); HashSet languageExceptions; if (exceptions.TryGetValue(languageCode, out languageExceptions)) { if(languageExceptions.Contains(word)) { return word; } } string result = ""; string[] parts = word.Split(wordInnerSeparator); if (parts.Length == 2) { string firstPart = parts[0]; if (!parts[0].EndsWith("o")) { firstPart = lemmatizersDict[languageCode].Lemmatize(firstPart); } string secondPart = lemmatizersDict[languageCode].Lemmatize(parts[1]); result = firstPart + "-" + secondPart; } else { result = lemmatizersDict[languageCode].Lemmatize(word); } if (result == "") { return word; } else { return result; } } public void DoListening() { // Data buffer for incoming data. byte[] bytes = new Byte[1024]; string data; // Establish the local endpoint for the socket. IPAddress ipAddress = IPAddress.Parse("127.0.0.1"); IPEndPoint localEndPoint = new IPEndPoint(ipAddress, 11000); // Create a TCP/IP socket. Socket listener = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); // Bind the socket to the local endpoint and // listen for incoming connections. try { listener.Bind(localEndPoint); listener.Listen(10); // Start listening for connections. while (true) { // Program is suspended while waiting for an incoming connection. Socket handler = listener.Accept(); data = null; // An incoming connection needs to be processed. while (true) { bytes = new byte[1024]; int bytesRec = handler.Receive(bytes); data += Encoding.UTF8.GetString(bytes, 0, bytesRec); if (data.IndexOf("@#@") > -1) { break; } } data = data.Substring(0, data.IndexOf("@#@")); string languageCode = data.Substring(0, 2); string sentence = data.Substring(2); // Show the data on the console. // Console.WriteLine("Sentence received : "+ sentence + ", language code : "+languageCode); // Send lemmatized data back to client. byte[] msg = Encoding.UTF8.GetBytes(lemmatizeSentence(languageCode, sentence) + "@#@"); handler.Send(msg); handler.Shutdown(SocketShutdown.Both); handler.Close(); } } catch (Exception e) { Console.WriteLine(e.ToString()); } } } }