added lowercasing when tokenizing by space
This commit is contained in:
parent
0a8d2fdd39
commit
bbf3853d2a
@ -29,6 +29,8 @@ TokenizedSentence SentenceTokenizer::tokenize(const std::string & sentence,
|
||||
TokenizedSentence result(sentence);
|
||||
|
||||
if (byWhitespace) {
|
||||
result.toLowerCase();
|
||||
|
||||
boost::shared_ptr<RegexRule> whitespaceRule(
|
||||
new RegexRule("\\S+",
|
||||
TokenAnnotation::WORD, ""));
|
||||
|
Loading…
Reference in New Issue
Block a user