Re-organized column header popup menus and added a bunch of common facets and common cell edit transforms.

Added native syntax for regex in GEL and modified replace, split, partition, and rpartition functions to support regex. Removed function replaceRegex.


git-svn-id: http://google-refine.googlecode.com/svn/trunk@249 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-03-09 06:57:08 +00:00
parent 5b079b04b7
commit 311d15f493
10 changed files with 328 additions and 148 deletions

View File

@ -1,6 +1,8 @@
package com.metaweb.gridworks.expr.functions.strings;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONException;
import org.json.JSONWriter;
@ -10,22 +12,53 @@ import com.metaweb.gridworks.gel.Function;
public class Partition implements Function {
public Object call(Properties bindings, Object[] args) {
if (args.length == 2) {
if (args.length >= 2 && args.length <= 3) {
Object o1 = args[0];
Object o2 = args[1];
if (o1 != null && o2 != null && o1 instanceof String && o2 instanceof String) {
boolean omitFragment = false;
if (args.length == 3) {
Object o3 = args[2];
if (o3 instanceof Boolean) {
omitFragment = ((Boolean) o3).booleanValue();
}
}
if (o1 != null && o2 != null && o1 instanceof String) {
String s = (String) o1;
String frag = (String) o2;
int index = s.indexOf(frag);
String[] output = new String[3];
if (index > -1) {
output[0] = s.substring(0, index);
output[1] = frag;
output[2] = s.substring(index + frag.length(), s.length());
int from = -1;
int to = -1;
if (o2 instanceof String) {
String frag = (String) o2;
from = s.indexOf(frag);
to = from + frag.length();
} else if (o2 instanceof Pattern) {
Pattern pattern = (Pattern) o2;
Matcher matcher = pattern.matcher(s);
if (matcher.find()) {
from = matcher.start();
to = matcher.end();
}
}
String[] output = omitFragment ? new String[2] : new String[3];
if (from > -1) {
output[0] = s.substring(0, from);
if (omitFragment) {
output[1] = s.substring(to);
} else {
output[1] = s.substring(from, to);
output[2] = s.substring(to);
}
} else {
output[0] = s;
output[1] = "";
output[2] = "";
if (!omitFragment) {
output[2] = "";
}
}
return output;
}
@ -37,8 +70,9 @@ public class Partition implements Function {
throws JSONException {
writer.object();
writer.key("description"); writer.value("Returns an array of strings [a,frag,b] where a is the string part before the first occurrence of frag in s and b is what's left.");
writer.key("params"); writer.value("string s, string frag");
writer.key("description"); writer.value(
"Returns an array of strings [a,frag,b] where a is the string part before the first occurrence of frag in s and b is what's left. If omitFragment is true, frag is not returned.");
writer.key("params"); writer.value("string s, string or regex frag, optional boolean omitFragment");
writer.key("returns"); writer.value("array");
writer.endObject();
}

View File

@ -1,6 +1,8 @@
package com.metaweb.gridworks.expr.functions.strings;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONException;
import org.json.JSONWriter;
@ -10,22 +12,54 @@ import com.metaweb.gridworks.gel.Function;
public class RPartition implements Function {
public Object call(Properties bindings, Object[] args) {
if (args.length == 2) {
if (args.length >= 2 && args.length <= 3) {
Object o1 = args[0];
Object o2 = args[1];
if (o1 != null && o2 != null && o1 instanceof String && o2 instanceof String) {
boolean omitFragment = false;
if (args.length == 3) {
Object o3 = args[2];
if (o3 instanceof Boolean) {
omitFragment = ((Boolean) o3).booleanValue();
}
}
if (o1 != null && o2 != null && o1 instanceof String) {
String s = (String) o1;
String frag = (String) o2;
int index = s.lastIndexOf(frag);
String[] output = new String[3];
if (index > -1) {
output[0] = s.substring(0, index);
output[1] = frag;
output[2] = s.substring(index + frag.length(), s.length());
int from = -1;
int to = -1;
if (o2 instanceof String) {
String frag = (String) o2;
from = s.lastIndexOf(frag);
to = from + frag.length();
} else if (o2 instanceof Pattern) {
Pattern pattern = (Pattern) o2;
Matcher matcher = pattern.matcher(s);
while (matcher.find()) {
from = matcher.start();
to = matcher.end();
}
}
String[] output = omitFragment ? new String[2] : new String[3];
if (from > -1) {
output[0] = s.substring(0, from);
if (omitFragment) {
output[1] = s.substring(to);
} else {
output[1] = s.substring(from, to);
output[2] = s.substring(to);
}
} else {
output[0] = s;
output[1] = "";
output[2] = "";
if (!omitFragment) {
output[2] = "";
}
}
return output;
}
@ -37,8 +71,9 @@ public class RPartition implements Function {
throws JSONException {
writer.object();
writer.key("description"); writer.value("Returns an array of strings [a,frag,b] where a is the string part before the last occurrence of frag in s and b is what's left.");
writer.key("params"); writer.value("string s, string frag");
writer.key("description"); writer.value(
"Returns an array of strings [a,frag,b] where a is the string part before the last occurrence of frag in s and b is what's left. If omitFragment is true, frag is not returned.");
writer.key("params"); writer.value("string s, string or regex frag, optional boolean omitFragment");
writer.key("returns"); writer.value("array");
writer.endObject();
}

View File

@ -1,6 +1,7 @@
package com.metaweb.gridworks.expr.functions.strings;
import java.util.Properties;
import java.util.regex.Pattern;
import org.json.JSONException;
import org.json.JSONWriter;
@ -16,12 +17,18 @@ public class Replace implements Function {
Object o1 = args[0];
Object o2 = args[1];
Object o3 = args[2];
if (o1 != null && o2 != null && o3 != null && o2 instanceof String && o3 instanceof String) {
if (o1 != null && o2 != null && o3 != null && o3 instanceof String) {
String str = (o1 instanceof String) ? (String) o1 : o1.toString();
return str.replace((String) o2, (String) o3);
if (o2 instanceof String) {
return str.replace((String) o2, (String) o3);
} else if (o2 instanceof Pattern) {
Pattern pattern = (Pattern) o2;
return pattern.matcher(str).replaceAll((String) o3);
}
}
}
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 3 strings");
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 3 strings, or 1 string, 1 regex, and 1 string");
}
@ -30,7 +37,7 @@ public class Replace implements Function {
writer.object();
writer.key("description"); writer.value("Returns the string obtained by replacing f with r in s");
writer.key("params"); writer.value("string s, string f, string r");
writer.key("params"); writer.value("string s, string or regex f, string r");
writer.key("returns"); writer.value("string");
writer.endObject();
}

View File

@ -1,37 +0,0 @@
package com.metaweb.gridworks.expr.functions.strings;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONWriter;
import com.metaweb.gridworks.expr.EvalError;
import com.metaweb.gridworks.gel.ControlFunctionRegistry;
import com.metaweb.gridworks.gel.Function;
public class ReplaceRegexp implements Function {
public Object call(Properties bindings, Object[] args) {
if (args.length == 3) {
Object o1 = args[0];
Object o2 = args[1];
Object o3 = args[2];
if (o1 != null && o2 != null && o3 != null && o2 instanceof String && o3 instanceof String) {
String str = (o1 instanceof String) ? (String) o1 : o1.toString();
return str.replaceAll((String) o2, (String) o3);
}
}
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 3 strings");
}
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("description"); writer.value("Returns the string obtained by replacing f with r in s");
writer.key("params"); writer.value("string s, string f, string r");
writer.key("returns"); writer.value("string");
writer.endObject();
}
}

View File

@ -1,6 +1,7 @@
package com.metaweb.gridworks.expr.functions.strings;
import java.util.Properties;
import java.util.regex.Pattern;
import org.json.JSONException;
import org.json.JSONWriter;
@ -15,11 +16,17 @@ public class Split implements Function {
if (args.length == 2) {
Object v = args[0];
Object split = args[1];
if (v != null && split != null && split instanceof String) {
return (v instanceof String ? (String) v : v.toString()).split((String) split);
if (v != null && split != null) {
String str = (v instanceof String ? (String) v : v.toString());
if (split instanceof String) {
return str.split((String) split);
} else if (split instanceof Pattern) {
Pattern pattern = (Pattern) split;
return pattern.split(str);
}
}
}
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings");
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings, or 1 string and 1 regex");
}
public void write(JSONWriter writer, Properties options)
@ -27,7 +34,7 @@ public class Split implements Function {
writer.object();
writer.key("description"); writer.value("Returns the array of strings obtained by splitting s with separator sep");
writer.key("params"); writer.value("string s, string sep");
writer.key("params"); writer.value("string s, string or regex sep");
writer.key("returns"); writer.value("array");
writer.endObject();
}

View File

@ -43,7 +43,6 @@ import com.metaweb.gridworks.expr.functions.strings.RPartition;
import com.metaweb.gridworks.expr.functions.strings.Reinterpret;
import com.metaweb.gridworks.expr.functions.strings.Replace;
import com.metaweb.gridworks.expr.functions.strings.ReplaceChars;
import com.metaweb.gridworks.expr.functions.strings.ReplaceRegexp;
import com.metaweb.gridworks.expr.functions.strings.SHA1;
import com.metaweb.gridworks.expr.functions.strings.Split;
import com.metaweb.gridworks.expr.functions.strings.SplitByCharType;
@ -117,7 +116,6 @@ public class ControlFunctionRegistry {
registerFunction("slice", new Slice());
registerFunction("substring", new Slice());
registerFunction("replace", new Replace());
registerFunction("replaceRegexp", new ReplaceRegexp());
registerFunction("replaceChars", new ReplaceChars());
registerFunction("split", new Split());
registerFunction("splitByCharType", new SplitByCharType());

View File

@ -2,10 +2,12 @@ package com.metaweb.gridworks.gel;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;
import com.metaweb.gridworks.expr.Evaluable;
import com.metaweb.gridworks.expr.ParsingException;
import com.metaweb.gridworks.gel.Scanner.NumberToken;
import com.metaweb.gridworks.gel.Scanner.RegexToken;
import com.metaweb.gridworks.gel.Scanner.Token;
import com.metaweb.gridworks.gel.Scanner.TokenType;
import com.metaweb.gridworks.gel.ast.ControlCallExpr;
@ -26,7 +28,7 @@ public class Parser {
public Parser(String s, int from, int to) throws ParsingException {
_scanner = new Scanner(s, from, to);
_token = _scanner.next();
_token = _scanner.next(true);
_root = parseExpression();
}
@ -35,8 +37,8 @@ public class Parser {
return _root;
}
protected void next() {
_token = _scanner.next();
protected void next(boolean regexPossible) {
_token = _scanner.next(regexPossible);
}
protected ParsingException makeException(String desc) {
@ -54,7 +56,7 @@ public class Parser {
String op = _token.text;
next();
next(true);
Evaluable sub2 = parseSubExpression();
@ -73,7 +75,7 @@ public class Parser {
String op = _token.text;
next();
next(true);
Evaluable sub2 = parseSubExpression();
@ -92,7 +94,7 @@ public class Parser {
String op = _token.text;
next();
next(true);
Evaluable factor2 = parseFactor();
@ -111,22 +113,27 @@ public class Parser {
if (_token.type == TokenType.String) {
eval = new LiteralExpr(_token.text);
next();
next(false);
} else if (_token.type == TokenType.Regex) {
RegexToken t = (RegexToken) _token;
eval = new LiteralExpr(Pattern.compile(_token.text, t.caseInsensitive ? Pattern.CASE_INSENSITIVE : 0));
next(false);
} else if (_token.type == TokenType.Number) {
eval = new LiteralExpr(((NumberToken)_token).value);
next();
next(false);
} else if (_token.type == TokenType.Operator && _token.text.equals("-")) { // unary minus?
next();
next(true);
if (_token != null && _token.type == TokenType.Number) {
eval = new LiteralExpr(-((NumberToken)_token).value);
next();
next(false);
} else {
throw makeException("Bad negative number");
}
} else if (_token.type == TokenType.Identifier) {
String text = _token.text;
next();
next(false);
if (_token == null || _token.type != TokenType.Delimiter || !_token.text.equals("(")) {
eval = new VariableExpr(text);
@ -137,7 +144,7 @@ public class Parser {
throw makeException("Unknown function or control named " + text);
}
next(); // swallow (
next(true); // swallow (
List<Evaluable> args = parseExpressionList(")");
@ -153,12 +160,12 @@ public class Parser {
}
}
} else if (_token.type == TokenType.Delimiter && _token.text.equals("(")) {
next();
next(true);
eval = parseExpression();
if (_token != null && _token.type == TokenType.Delimiter && _token.text.equals(")")) {
next();
next(false);
} else {
throw makeException("Missing )");
}
@ -168,17 +175,17 @@ public class Parser {
while (_token != null) {
if (_token.type == TokenType.Operator && _token.text.equals(".")) {
next(); // swallow .
next(false); // swallow .
if (_token == null || _token.type != TokenType.Identifier) {
throw makeException("Missing function name");
}
String identifier = _token.text;
next();
next(false);
if (_token != null && _token.type == TokenType.Delimiter && _token.text.equals("(")) {
next(); // swallow (
next(true); // swallow (
Function f = ControlFunctionRegistry.getFunction(identifier);
if (f == null) {
@ -193,7 +200,7 @@ public class Parser {
eval = new FieldAccessorExpr(eval, identifier);
}
} else if (_token.type == TokenType.Delimiter && _token.text.equals("[")) {
next(); // swallow [
next(true); // swallow [
List<Evaluable> args = parseExpressionList("]");
args.add(0, eval);
@ -219,7 +226,7 @@ public class Parser {
l.add(eval);
if (_token != null && _token.type == TokenType.Delimiter && _token.text.equals(",")) {
next(); // swallow comma, loop back for more
next(true); // swallow comma, loop back for more
} else {
break;
}
@ -227,7 +234,7 @@ public class Parser {
}
if (_token != null && _token.type == TokenType.Delimiter && _token.text.equals(closingDelimiter)) {
next(); // swallow closing delimiter
next(false); // swallow closing delimiter
} else {
throw makeException("Missing " + closingDelimiter);
}

View File

@ -7,7 +7,8 @@ public class Scanner {
Operator,
Identifier,
Number,
String
String,
Regex
}
static public class Token {
@ -42,6 +43,15 @@ public class Scanner {
}
}
static public class RegexToken extends Token {
final public boolean caseInsensitive;
public RegexToken(int start, int end, String text, boolean caseInsensitive) {
super(start, end, TokenType.Regex, text);
this.caseInsensitive = caseInsensitive;
}
}
protected String _text;
protected int _index;
protected int _limit;
@ -60,7 +70,7 @@ public class Scanner {
return _index;
}
public Token next() {
public Token next(boolean regexPossible) {
// skip whitespace
while (_index < _limit && Character.isWhitespace(_text.charAt(_index))) {
_index++;
@ -148,6 +158,46 @@ public class Scanner {
TokenType.Identifier,
_text.substring(start, _index)
);
} else if (c == '/' && regexPossible) {
/*
* Regex literal
*/
StringBuffer sb = new StringBuffer();
_index++; // skip opening delimiter
while (_index < _limit) {
c = _text.charAt(_index);
if (c == '/') {
_index++; // skip closing delimiter
boolean caseInsensitive = false;
if (_index < _limit && _text.charAt(_index) == 'i') {
caseInsensitive = true;
_index++;
}
return new RegexToken(
start,
_index,
sb.toString(),
caseInsensitive
);
} else if (c == '\\') {
sb.append(c);
_index++; // skip escaping marker
if (_index < _limit) {
sb.append(_text.charAt(_index));
}
} else {
sb.append(c);
}
_index++;
}
detail = "Regex not properly closed";
// fall through
} else if ("+-*/.".indexOf(c) >= 0) { // operator
_index++;

View File

@ -322,7 +322,10 @@ ExpressionPreviewDialog.Widget.prototype._renderPreview = function(expression, d
var renderValue = function(td, v) {
if (v !== null && v !== undefined) {
if ($.isArray(v)) {
td.text(JSON.stringify(v));
var a = [];
$.each(v, function() { a.push(JSON.stringify(this)); });
td.text("[ " + a.join(", ") + " ]");
} else if ($.isPlainObject(v)) {
$('<span></span>').addClass("expression-preview-special-value").text("Error: " + v.message).appendTo(td);
} else if (typeof v === "string" && v.length == 0) {

View File

@ -54,55 +54,6 @@ DataTableColumnHeaderUI.prototype._render = function() {
DataTableColumnHeaderUI.prototype._createMenuForColumnHeader = function(elmt) {
self = this;
MenuSystem.createAndShowStandardMenu([
{
label: "Edit Cells",
submenu: [
{
label: "To Titlecase",
click: function() { self._doTextTransform("toTitlecase(value)", "store-blank", false, ""); }
},
{
label: "To Uppercase",
click: function() { self._doTextTransform("toUppercase(value)", "store-blank", false, ""); }
},
{
label: "To Lowercase",
click: function() { self._doTextTransform("toLowercase(value)", "store-blank", false, ""); }
},
{
label: "Custom Transform ...",
click: function() { self._doTextTransformPrompt(); }
},
{},
{
label: "Split Multi-Valued Cells ...",
click: function() { self._doSplitMultiValueCells(); }
},
{
label: "Join Multi-Valued Cells ...",
click: function() { self._doJoinMultiValueCells(); }
},
{},
{
label: "Cluster & Edit ...",
click: function() { new FacetBasedEditDialog(self._column.name, "value"); }
}
]
},
{
label: "Edit Column",
submenu: [
{
label: "Add Column Based on This Column ...",
click: function() { self._doAddColumn("value"); }
},
{
label: "Remove This Column",
click: function() { self._doRemoveColumn(); }
},
]
},
{},
{
label: "Filter",
tooltip: "Filter rows by this column's cell content or characteristics",
@ -124,6 +75,24 @@ DataTableColumnHeaderUI.prototype._createMenuForColumnHeader = function(elmt) {
label: "Custom Text Facet ...",
click: function() { self._doFilterByExpressionPrompt("value", "list"); }
},
{
label: "Common Text Facets",
submenu: [
{
label: "Word Facet",
click: function() {
ui.browsingEngine.addFacet(
"list",
{
"name" : self._column.name + " value.split(' ')",
"columnName" : self._column.name,
"expression" : "value.split(' ')"
}
);
}
}
]
},
{},
{
label: "Numeric Facet",
@ -134,11 +103,7 @@ DataTableColumnHeaderUI.prototype._createMenuForColumnHeader = function(elmt) {
"name" : self._column.name,
"columnName" : self._column.name,
"expression" : "value",
"mode" : "range",
"min" : 0,
"max" : 1
},
{
"mode" : "range"
}
);
}
@ -147,6 +112,53 @@ DataTableColumnHeaderUI.prototype._createMenuForColumnHeader = function(elmt) {
label: "Custom Numeric Facet ...",
click: function() { self._doFilterByExpressionPrompt("value", "range"); }
},
{
label: "Common Numeric Facets",
submenu: [
{
label: "Text Length Facet",
click: function() {
ui.browsingEngine.addFacet(
"range",
{
"name" : self._column.name + ": value.length()",
"columnName" : self._column.name,
"expression" : "value.length()",
"mode" : "range"
}
);
}
},
{
label: "Log of Text Length Facet",
click: function() {
ui.browsingEngine.addFacet(
"range",
{
"name" : self._column.name + ": value.length().log()",
"columnName" : self._column.name,
"expression" : "value.length().log()",
"mode" : "range"
}
);
}
},
{
label: "Unicode Char-code Facet",
click: function() {
ui.browsingEngine.addFacet(
"range",
{
"name" : self._column.name + ": value.unicode()",
"columnName" : self._column.name,
"expression" : "value.unicode()",
"mode" : "range"
}
);
}
}
]
},
{},
{
label: "Text Search",
@ -205,6 +217,70 @@ DataTableColumnHeaderUI.prototype._createMenuForColumnHeader = function(elmt) {
}
]
},
{},
{
label: "Edit Cells",
submenu: [
{
label: "Transform ...",
click: function() { self._doTextTransformPrompt(); }
},
{
label: "Common Transforms",
submenu: [
{
label: "Unescape HTML entities",
click: function() { self._doTextTransform("value.unescape('html')", "store-blank", true, 10); }
},
{
label: "Collapse whitespace",
click: function() { self._doTextTransform("value.replaceRegexp('\\s+', ' ')", "store-blank", false, ""); }
},
{},
{
label: "To Titlecase",
click: function() { self._doTextTransform("toTitlecase(value)", "store-blank", false, ""); }
},
{
label: "To Uppercase",
click: function() { self._doTextTransform("toUppercase(value)", "store-blank", false, ""); }
},
{
label: "To Lowercase",
click: function() { self._doTextTransform("toLowercase(value)", "store-blank", false, ""); }
}
]
},
{},
{
label: "Split Multi-Valued Cells ...",
click: function() { self._doSplitMultiValueCells(); }
},
{
label: "Join Multi-Valued Cells ...",
click: function() { self._doJoinMultiValueCells(); }
},
{},
{
label: "Cluster & Edit ...",
click: function() { new FacetBasedEditDialog(self._column.name, "value"); }
}
]
},
{
label: "Edit Column",
submenu: [
{
label: "Add Column Based on This Column ...",
click: function() { self._doAddColumn("value"); }
},
{
label: "Remove This Column",
click: function() { self._doRemoveColumn(); }
},
]
},
{},
{
label: "View",
tooltip: "Collapse/expand columns to make viewing the data more convenient",