From 0648e8725ee0b2d292a5065fe886b11cd4a6da3c Mon Sep 17 00:00:00 2001 From: Stefano Mazzocchi Date: Tue, 1 Jun 2010 08:54:17 +0000 Subject: [PATCH] adding regexp group capturing GEL function git-svn-id: http://google-refine.googlecode.com/svn/trunk@932 7d457c2a-affb-35e4-300a-418c747d4874 --- .../expr/functions/strings/Match.java | 55 +++++++++++++++++++ .../gel/ControlFunctionRegistry.java | 2 + 2 files changed, 57 insertions(+) create mode 100644 main/src/com/metaweb/gridworks/expr/functions/strings/Match.java diff --git a/main/src/com/metaweb/gridworks/expr/functions/strings/Match.java b/main/src/com/metaweb/gridworks/expr/functions/strings/Match.java new file mode 100644 index 000000000..1c2d2460d --- /dev/null +++ b/main/src/com/metaweb/gridworks/expr/functions/strings/Match.java @@ -0,0 +1,55 @@ +package com.metaweb.gridworks.expr.functions.strings; + +import java.util.Properties; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.metaweb.gridworks.expr.EvalError; +import com.metaweb.gridworks.gel.ControlFunctionRegistry; +import com.metaweb.gridworks.gel.Function; + +public class Match implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object s = args[0]; + Object p = args[1]; + + if (s != null && p != null && (p instanceof String || p instanceof Pattern)) { + + Pattern pattern = (p instanceof String) ? Pattern.compile((String) p) : (Pattern) p; + + Matcher matcher = pattern.matcher(s.toString()); + + if (matcher.matches()) { + int count = matcher.groupCount(); + + String[] groups = new String[count]; + for (int i = 0; i < count; i++) { + groups[i] = matcher.group(i + 1); + } + + return groups; + } else { + return null; + } + } + + return null; + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string or a regexp"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns an array of the groups matching the given regular expression"); + writer.key("params"); writer.value("string or regexp"); + writer.key("returns"); writer.value("array of strings"); + writer.endObject(); + } +} diff --git a/main/src/com/metaweb/gridworks/gel/ControlFunctionRegistry.java b/main/src/com/metaweb/gridworks/gel/ControlFunctionRegistry.java index cc8b18ab8..e1f80c461 100644 --- a/main/src/com/metaweb/gridworks/gel/ControlFunctionRegistry.java +++ b/main/src/com/metaweb/gridworks/gel/ControlFunctionRegistry.java @@ -42,6 +42,7 @@ import com.metaweb.gridworks.expr.functions.strings.Fingerprint; import com.metaweb.gridworks.expr.functions.strings.IndexOf; import com.metaweb.gridworks.expr.functions.strings.LastIndexOf; import com.metaweb.gridworks.expr.functions.strings.MD5; +import com.metaweb.gridworks.expr.functions.strings.Match; import com.metaweb.gridworks.expr.functions.strings.NGram; import com.metaweb.gridworks.expr.functions.strings.NGramFingerprint; import com.metaweb.gridworks.expr.functions.strings.Partition; @@ -149,6 +150,7 @@ public class ControlFunctionRegistry { registerFunction("reinterpret", new Reinterpret()); registerFunction("jsonize", new Jsonize()); registerFunction("ngram", new NGram()); + registerFunction("match", new Match()); registerFunction("indexOf", new IndexOf()); registerFunction("lastIndexOf", new LastIndexOf());