French lemmatizer
This commit is contained in:
parent
53d2f47b57
commit
f799164d64
@ -21,6 +21,7 @@ namespace LemmaGenSockets
|
||||
lemmatizersDict.Add("pl", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Polish));
|
||||
lemmatizersDict.Add("en", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.English));
|
||||
lemmatizersDict.Add("hr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.Serbian));
|
||||
lemmatizersDict.Add("fr", new LemmatizerPrebuiltCompact(LemmaSharp.LanguagePrebuilt.French));
|
||||
}
|
||||
|
||||
public LemmatizerListener()
|
||||
@ -52,13 +53,7 @@ namespace LemmaGenSockets
|
||||
|
||||
private string lemmatizeWord(string languageCode, string word)
|
||||
{
|
||||
if (word.StartsWith("ne_") || word == "i" || word == "o" || word == "do")
|
||||
{
|
||||
return word;
|
||||
}
|
||||
string[] parts = word.Split(wordInnerSeparator);
|
||||
|
||||
string result = "";
|
||||
if (parts.Length == 2)
|
||||
{
|
||||
string firstPart = parts[0];
|
||||
@ -67,20 +62,11 @@ namespace LemmaGenSockets
|
||||
firstPart = lemmatizersDict[languageCode].Lemmatize(firstPart);
|
||||
}
|
||||
string secondPart = lemmatizersDict[languageCode].Lemmatize(parts[1]);
|
||||
result = firstPart + "-" + secondPart;
|
||||
return firstPart + "-" + secondPart;
|
||||
}
|
||||
else
|
||||
{
|
||||
result = lemmatizersDict[languageCode].Lemmatize(word);
|
||||
}
|
||||
|
||||
if (result == "" || result.Contains(" "))
|
||||
{
|
||||
return word;
|
||||
}
|
||||
else
|
||||
{
|
||||
return result;
|
||||
return lemmatizersDict[languageCode].Lemmatize(word);
|
||||
}
|
||||
}
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -8,3 +8,14 @@ j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\bin\Deb
|
||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csprojResolveAssemblyReference.cache
|
||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.exe
|
||||
j:\Documents\Visual Studio 2015\Projects\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.pdb
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.exe.config
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.exe
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaGenSockets.pdb
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharp.dll
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharpPrebuilt.dll
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\LemmaSharpPrebuiltCompact.dll
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\bin\Debug\Lzma#.dll
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csprojResolveAssemblyReference.cache
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.csproj.CoreCompileInputs.cache
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.exe
|
||||
J:\projects\concordia-server\LemmaGenSockets\LemmaGenSockets\obj\Debug\LemmaGenSockets.pdb
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -11,10 +11,12 @@ LemmatizerFacade::LemmatizerFacade() throw(ConcordiaException) {
|
||||
std::string plCode = "pl";
|
||||
std::string enCode = "en";
|
||||
std::string hrCode = "hr";
|
||||
std::string frCode = "fr";
|
||||
|
||||
_lemmatizersMap.insert(plCode, socketLemmatizer1);
|
||||
_lemmatizersMap.insert(enCode, socketLemmatizer1);
|
||||
_lemmatizersMap.insert(hrCode, socketLemmatizer1);
|
||||
_lemmatizersMap.insert(frCode, socketLemmatizer1);
|
||||
}
|
||||
|
||||
LemmatizerFacade::~LemmatizerFacade() {
|
||||
|
Loading…
Reference in New Issue
Block a user