French lemmatizer

This commit is contained in:
Rafał Jaworski 2018-12-05 23:16:46 +01:00
parent 53d2f47b57
commit f799164d64
9 changed files with 16 additions and 17 deletions

View File

@ -21,6 +21,7 @@ namespace LemmaGenSockets
lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish)); lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish));
lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English)); lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English));
lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian)); lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian));
lemmatizersDict.Add("fr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.French));
} }
public LemmatizerListener() public LemmatizerListener()
@ -52,13 +53,7 @@ namespace LemmaGenSockets
private string lemmatizeWord(string languageCode, string word) private string lemmatizeWord(string languageCode, string word)
{ {
if (word.StartsWith("ne_") || word == "i" || word == "o" || word == "do")
{
return word;
}
string[] parts = word.Split(wordInnerSeparator); string[] parts = word.Split(wordInnerSeparator);
string result = "";
if (parts.Length == 2) if (parts.Length == 2)
{ {
string firstPart = parts[0]; string firstPart = parts[0];
@ -67,20 +62,11 @@ namespace LemmaGenSockets
firstPart = lemmatizersDict[languageCode].Lemmatize(firstPart); firstPart = lemmatizersDict[languageCode].Lemmatize(firstPart);
} }
string secondPart = lemmatizersDict[languageCode].Lemmatize(parts[1]); string secondPart = lemmatizersDict[languageCode].Lemmatize(parts[1]);
result = firstPart + "-" + secondPart; return firstPart + "-" + secondPart;
} }
else else
{ {
result = lemmatizersDict[languageCode].Lemmatize(word); return lemmatizersDict[languageCode].Lemmatize(word);
}
if (result == "" || result.Contains(" "))
{
return word;
}
else
{
return result;
} }
} }

View File

@ -8,3 +8,14 @@ j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Deb
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csprojResolveAssemblyReference.cache j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csprojResolveAssemblyReference.cache
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.exe j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.exe
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.pdb j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.pdb
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.exe.config
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.exe
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.pdb
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharp.dll
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharpPrebuilt.dll
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharpPrebuiltCompact.dll
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\Lzma#.dll
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csprojResolveAssemblyReference.cache
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csproj.CoreCompileInputs.cache
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.exe
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.pdb

View File

@ -11,10 +11,12 @@ LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) {
std::string plCode = "pl"; std::string plCode = "pl";
std::string enCode = "en"; std::string enCode = "en";
std::string hrCode = "hr"; std::string hrCode = "hr";
std::string frCode = "fr";
_lemmatizersMap.insert(plCode, socketLemmatizer1); _lemmatizersMap.insert(plCode, socketLemmatizer1);
_lemmatizersMap.insert(enCode, socketLemmatizer1); _lemmatizersMap.insert(enCode, socketLemmatizer1);
_lemmatizersMap.insert(hrCode, socketLemmatizer1); _lemmatizersMap.insert(hrCode, socketLemmatizer1);
_lemmatizersMap.insert(frCode, socketLemmatizer1);
} }
LemmatizerFacade::~LemmatizerFacade() { LemmatizerFacade::~LemmatizerFacade() {