Fixed toTitlecase to handle parentheses and other delimiters.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@240 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-03-08 19:40:51 +00:00
parent c147837a3e
commit 4a4ae6bf27

View File

@ -2,6 +2,7 @@ package com.metaweb.gridworks.expr.functions.strings;
import java.util.Properties; import java.util.Properties;
import org.apache.commons.lang.StringUtils;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONWriter; import org.json.JSONWriter;
@ -15,17 +16,27 @@ public class ToTitlecase implements Function {
if (args.length == 1 && args[0] != null) { if (args.length == 1 && args[0] != null) {
Object o = args[0]; Object o = args[0];
String s = o instanceof String ? (String) o : o.toString(); String s = o instanceof String ? (String) o : o.toString();
String[] words = s.split("\\s+"); String[] segments = StringUtils.splitByCharacterType(s);
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
for (int i = 0; i < words.length; i++) { boolean startOfWord = true;
String word = words[i]; for (int i = 0; i < segments.length; i++) {
if (word.length() > 0) { String segment = segments[i];
if (sb.length() > 0) { char c = segment.charAt(0);
sb.append(' ');
} if (Character.isWhitespace(c)) {
sb.append(word.substring(0, 1).toUpperCase() + word.substring(1).toLowerCase()); startOfWord = true;
} else if (c == '(' || c == '[' || c == '{' || c == '"' || c == '\'') {
startOfWord = true;
} else if (Character.isLetter(c)) {
if (startOfWord) {
segment = StringUtils.capitalize(segment);
}
startOfWord = false;
} else {
startOfWord = false;
} }
sb.append(segment);
} }
return sb.toString(); return sb.toString();