From 4a4ae6bf272fbe4e55f8eb006a9cd0b6691c56b9 Mon Sep 17 00:00:00 2001 From: David Huynh Date: Mon, 8 Mar 2010 19:40:51 +0000 Subject: [PATCH] Fixed toTitlecase to handle parentheses and other delimiters. git-svn-id: http://google-refine.googlecode.com/svn/trunk@240 7d457c2a-affb-35e4-300a-418c747d4874 --- .../expr/functions/strings/ToTitlecase.java | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/metaweb/gridworks/expr/functions/strings/ToTitlecase.java b/src/main/java/com/metaweb/gridworks/expr/functions/strings/ToTitlecase.java index c7b073d0d..9c3954ddf 100644 --- a/src/main/java/com/metaweb/gridworks/expr/functions/strings/ToTitlecase.java +++ b/src/main/java/com/metaweb/gridworks/expr/functions/strings/ToTitlecase.java @@ -2,6 +2,7 @@ package com.metaweb.gridworks.expr.functions.strings; import java.util.Properties; +import org.apache.commons.lang.StringUtils; import org.json.JSONException; import org.json.JSONWriter; @@ -15,17 +16,27 @@ public class ToTitlecase implements Function { if (args.length == 1 && args[0] != null) { Object o = args[0]; String s = o instanceof String ? (String) o : o.toString(); - String[] words = s.split("\\s+"); + String[] segments = StringUtils.splitByCharacterType(s); StringBuffer sb = new StringBuffer(); - for (int i = 0; i < words.length; i++) { - String word = words[i]; - if (word.length() > 0) { - if (sb.length() > 0) { - sb.append(' '); - } - sb.append(word.substring(0, 1).toUpperCase() + word.substring(1).toLowerCase()); + boolean startOfWord = true; + for (int i = 0; i < segments.length; i++) { + String segment = segments[i]; + char c = segment.charAt(0); + + if (Character.isWhitespace(c)) { + startOfWord = true; + } else if (c == '(' || c == '[' || c == '{' || c == '"' || c == '\'') { + startOfWord = true; + } else if (Character.isLetter(c)) { + if (startOfWord) { + segment = StringUtils.capitalize(segment); + } + startOfWord = false; + } else { + startOfWord = false; } + sb.append(segment); } return sb.toString();