exceptions for lemmatizer - alpha
This commit is contained in:
parent
f799164d64
commit
3c575cd596
@ -53,6 +53,29 @@ namespace LemmaGenSockets
|
||||
|
||||
private string lemmatizeWord(string languageCode, string word)
|
||||
{
|
||||
// exceptions
|
||||
Dictionary<String, HashSet<String>> exceptions = new Dictionary<string, HashSet<string>>();
|
||||
|
||||
HashSet<String> plExceptions = new HashSet<string>();
|
||||
plExceptions.Add("i");
|
||||
plExceptions.Add("o");
|
||||
plExceptions.Add("do");
|
||||
exceptions.Add("pl", plExceptions);
|
||||
|
||||
HashSet<String> enExceptions = new HashSet<string>();
|
||||
enExceptions.Add("d");
|
||||
exceptions.Add("en", enExceptions);
|
||||
|
||||
HashSet<String> languageExceptions;
|
||||
if (exceptions.TryGetValue(languageCode, out languageExceptions))
|
||||
{
|
||||
if(languageExceptions.Contains(word))
|
||||
{
|
||||
return word;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
string[] parts = word.Split(wordInnerSeparator);
|
||||
if (parts.Length == 2)
|
||||
{
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user