French lemmatizer
This commit is contained in:
parent
53d2f47b57
commit
f799164d64
@ -21,6 +21,7 @@ namespace LemmaGenSockets
|
|||||||
lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish));
|
lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish));
|
||||||
lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English));
|
lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English));
|
||||||
lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian));
|
lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian));
|
||||||
|
lemmatizersDict.Add("fr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.French));
|
||||||
}
|
}
|
||||||
|
|
||||||
public LemmatizerListener()
|
public LemmatizerListener()
|
||||||
@ -52,13 +53,7 @@ namespace LemmaGenSockets
|
|||||||
|
|
||||||
private string lemmatizeWord(string languageCode, string word)
|
private string lemmatizeWord(string languageCode, string word)
|
||||||
{
|
{
|
||||||
if (word.StartsWith("ne_") || word == "i" || word == "o" || word == "do")
|
|
||||||
{
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
string[] parts = word.Split(wordInnerSeparator);
|
string[] parts = word.Split(wordInnerSeparator);
|
||||||
|
|
||||||
string result = "";
|
|
||||||
if (parts.Length == 2)
|
if (parts.Length == 2)
|
||||||
{
|
{
|
||||||
string firstPart = parts[0];
|
string firstPart = parts[0];
|
||||||
@ -67,20 +62,11 @@ namespace LemmaGenSockets
|
|||||||
firstPart = lemmatizersDict[languageCode].Lemmatize(firstPart);
|
firstPart = lemmatizersDict[languageCode].Lemmatize(firstPart);
|
||||||
}
|
}
|
||||||
string secondPart = lemmatizersDict[languageCode].Lemmatize(parts[1]);
|
string secondPart = lemmatizersDict[languageCode].Lemmatize(parts[1]);
|
||||||
result = firstPart + "-" + secondPart;
|
return firstPart + "-" + secondPart;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
result = lemmatizersDict[languageCode].Lemmatize(word);
|
return lemmatizersDict[languageCode].Lemmatize(word);
|
||||||
}
|
|
||||||
|
|
||||||
if (result == "" || result.Contains(" "))
|
|
||||||
{
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -8,3 +8,14 @@ j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Deb
|
|||||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csprojResolveAssemblyReference.cache
|
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csprojResolveAssemblyReference.cache
|
||||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.exe
|
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.exe
|
||||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.pdb
|
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.pdb
|
||||||
|
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.exe.config
|
||||||
|
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.exe
|
||||||
|
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.pdb
|
||||||
|
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharp.dll
|
||||||
|
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharpPrebuilt.dll
|
||||||
|
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharpPrebuiltCompact.dll
|
||||||
|
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\Lzma#.dll
|
||||||
|
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csprojResolveAssemblyReference.cache
|
||||||
|
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csproj.CoreCompileInputs.cache
|
||||||
|
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.exe
|
||||||
|
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.pdb
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -11,10 +11,12 @@ LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) {
|
|||||||
std::string plCode = "pl";
|
std::string plCode = "pl";
|
||||||
std::string enCode = "en";
|
std::string enCode = "en";
|
||||||
std::string hrCode = "hr";
|
std::string hrCode = "hr";
|
||||||
|
std::string frCode = "fr";
|
||||||
|
|
||||||
_lemmatizersMap.insert(plCode, socketLemmatizer1);
|
_lemmatizersMap.insert(plCode, socketLemmatizer1);
|
||||||
_lemmatizersMap.insert(enCode, socketLemmatizer1);
|
_lemmatizersMap.insert(enCode, socketLemmatizer1);
|
||||||
_lemmatizersMap.insert(hrCode, socketLemmatizer1);
|
_lemmatizersMap.insert(hrCode, socketLemmatizer1);
|
||||||
|
_lemmatizersMap.insert(frCode, socketLemmatizer1);
|
||||||
}
|
}
|
||||||
|
|
||||||
LemmatizerFacade::~LemmatizerFacade() {
|
LemmatizerFacade::~LemmatizerFacade() {
|
||||||
|
Loading…
Reference in New Issue
Block a user