exceptions for lemmatizer - alpha

This commit is contained in:
Rafał Jaworski 2018-12-30 23:34:00 +01:00
parent f799164d64
commit 3c575cd596
6 changed files with 23 additions and 0 deletions

View File

@ -53,6 +53,29 @@ namespace LemmaGenSockets
private string lemmatizeWord(string languageCode, string word)
{
// exceptions
Dictionary<String, HashSet<String>> exceptions = new Dictionary<string, HashSet<string>>();
HashSet<String> plExceptions = new HashSet<string>();
plExceptions.Add("i");
plExceptions.Add("o");
plExceptions.Add("do");
exceptions.Add("pl", plExceptions);
HashSet<String> enExceptions = new HashSet<string>();
enExceptions.Add("d");
exceptions.Add("en", enExceptions);
HashSet<String> languageExceptions;
if (exceptions.TryGetValue(languageCode, out languageExceptions))
{
if(languageExceptions.Contains(word))
{
return word;
}
}
string[] parts = word.Split(wordInnerSeparator);
if (parts.Length == 2)
{