diff --git a/main/src/com/google/refine/expr/functions/strings/Find.java b/main/src/com/google/refine/expr/functions/strings/Find.java new file mode 100644 index 000000000..44ade9442 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Find.java @@ -0,0 +1,85 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.expr.functions.strings; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.grel.ControlFunctionRegistry; +import com.google.refine.grel.Function; + +public class Find implements Function { + + @Override + public Object call(Properties bindings, Object[] args) { + List allMatches = new ArrayList(); + + if (args.length == 2) { + Object s = args[0]; + Object p = args[1]; + + if (s != null && p != null && (p instanceof String || p instanceof Pattern)) { + + Pattern pattern = (p instanceof String) ? Pattern.compile((String) p) : (Pattern) p; + + Matcher matcher = pattern.matcher(s.toString()); + + while (matcher.find()) { + allMatches.add(matcher.group()); + } + } + + return allMatches.toArray(new String[0]); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string or a regexp"); + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns an array of the groups matching the given regular expression"); + writer.key("params"); writer.value("string or regexp"); + writer.key("returns"); writer.value("array of strings"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/grel/ControlFunctionRegistry.java b/main/src/com/google/refine/grel/ControlFunctionRegistry.java index 6dad3c2dd..4e86dbd92 100644 --- a/main/src/com/google/refine/grel/ControlFunctionRegistry.java +++ b/main/src/com/google/refine/grel/ControlFunctionRegistry.java @@ -109,6 +109,7 @@ import com.google.refine.expr.functions.strings.IndexOf; import com.google.refine.expr.functions.strings.LastIndexOf; import com.google.refine.expr.functions.strings.MD5; import com.google.refine.expr.functions.strings.Match; +import com.google.refine.expr.functions.strings.Find; import com.google.refine.expr.functions.strings.NGram; import com.google.refine.expr.functions.strings.NGramFingerprint; import com.google.refine.expr.functions.strings.ParseJson; @@ -226,6 +227,7 @@ public class ControlFunctionRegistry { registerFunction("parseJson", new ParseJson()); registerFunction("ngram", new NGram()); registerFunction("match", new Match()); + registerFunction("find", new Find()); // HTML functions from JSoup registerFunction("parseHtml", new ParseHtml()); diff --git a/main/tests/server/src/com/google/refine/tests/expr/functions/FindFunctionTests.java b/main/tests/server/src/com/google/refine/tests/expr/functions/FindFunctionTests.java new file mode 100644 index 000000000..acfccd90c --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/expr/functions/FindFunctionTests.java @@ -0,0 +1,75 @@ + +package com.google.refine.tests.expr.functions; + +import java.util.Properties; + +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.RefineServlet; +import com.google.refine.grel.ControlFunctionRegistry; +import com.google.refine.grel.Function; +import com.google.refine.tests.RefineServletStub; +import com.google.refine.tests.RefineTest; + +/** + * Test cases for find function. + */ +public class FindFunctionTests extends RefineTest { + static Properties bindings; + + @Override + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + // dependencies + RefineServlet servlet; + + @BeforeMethod + public void SetUp() { + bindings = new Properties(); + + servlet = new RefineServletStub(); + } + + @AfterMethod + public void TearDown() { + } + + + @Test + public void findFunctionFindAllTest() throws Exception { + String[] matches = (String[]) invoke("find", "This is a test string for testing find.", "test"); + Assert.assertEquals(matches[0], "test"); + Assert.assertEquals(matches[1], "test"); + } + + @Test + public void findFunctionFindAllTest2() throws Exception { + String[] matches = (String[]) invoke("find", "hello 123456 goodbye.", "\\d{6}|hello"); + Assert.assertEquals(matches[0], "hello"); + Assert.assertEquals(matches[1], "123456"); + } + + /** + * Lookup a control function by name and invoke it with a variable number of args + */ + private static Object invoke(String name,Object... args) { + // registry uses static initializer, so no need to set it up + Function function = ControlFunctionRegistry.getFunction(name); + if (function == null) { + throw new IllegalArgumentException("Unknown function "+name); + } + if (args == null) { + return function.call(bindings,new Object[0]); + } else { + return function.call(bindings,args); + } + } +}