exceptions for lemmatizer - alpha
This commit is contained in:
parent
f799164d64
commit
3c575cd596
@ -53,6 +53,29 @@ namespace LemmaGenSockets
|
|||||||
|
|
||||||
private string lemmatizeWord(string languageCode, string word)
|
private string lemmatizeWord(string languageCode, string word)
|
||||||
{
|
{
|
||||||
|
// exceptions
|
||||||
|
Dictionary<String, HashSet<String>> exceptions = new Dictionary<string, HashSet<string>>();
|
||||||
|
|
||||||
|
HashSet<String> plExceptions = new HashSet<string>();
|
||||||
|
plExceptions.Add("i");
|
||||||
|
plExceptions.Add("o");
|
||||||
|
plExceptions.Add("do");
|
||||||
|
exceptions.Add("pl", plExceptions);
|
||||||
|
|
||||||
|
HashSet<String> enExceptions = new HashSet<string>();
|
||||||
|
enExceptions.Add("d");
|
||||||
|
exceptions.Add("en", enExceptions);
|
||||||
|
|
||||||
|
HashSet<String> languageExceptions;
|
||||||
|
if (exceptions.TryGetValue(languageCode, out languageExceptions))
|
||||||
|
{
|
||||||
|
if(languageExceptions.Contains(word))
|
||||||
|
{
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
string[] parts = word.Split(wordInnerSeparator);
|
string[] parts = word.Split(wordInnerSeparator);
|
||||||
if (parts.Length == 2)
|
if (parts.Length == 2)
|
||||||
{
|
{
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user