Merge pull request #1432 from OpenRefine/issue/1296

add find function. issue 1296
This commit is contained in:
Jacky 2018-01-15 21:33:37 -05:00 committed by GitHub
commit 51296b5cff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 130 additions and 0 deletions

View File

@ -0,0 +1,53 @@
package com.google.refine.expr.functions.strings;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONException;
import org.json.JSONWriter;
import com.google.refine.expr.EvalError;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
public class Find implements Function {
@Override
public Object call(Properties bindings, Object[] args) {
List<String> allMatches = new ArrayList<String>();
if (args.length == 2) {
Object s = args[0];
Object p = args[1];
if (s != null && p != null && (p instanceof String || p instanceof Pattern)) {
Pattern pattern = (p instanceof String) ? Pattern.compile((String) p) : (Pattern) p;
Matcher matcher = pattern.matcher(s.toString());
while (matcher.find()) {
allMatches.add(matcher.group());
}
}
return allMatches.toArray(new String[0]);
}
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string or a regexp");
}
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("description"); writer.value("Returns all the occurances of match given regular expression");
writer.key("params"); writer.value("string or regexp");
writer.key("returns"); writer.value("array of strings");
writer.endObject();
}
}

View File

@ -109,6 +109,7 @@ import com.google.refine.expr.functions.strings.IndexOf;
import com.google.refine.expr.functions.strings.LastIndexOf; import com.google.refine.expr.functions.strings.LastIndexOf;
import com.google.refine.expr.functions.strings.MD5; import com.google.refine.expr.functions.strings.MD5;
import com.google.refine.expr.functions.strings.Match; import com.google.refine.expr.functions.strings.Match;
import com.google.refine.expr.functions.strings.Find;
import com.google.refine.expr.functions.strings.NGram; import com.google.refine.expr.functions.strings.NGram;
import com.google.refine.expr.functions.strings.NGramFingerprint; import com.google.refine.expr.functions.strings.NGramFingerprint;
import com.google.refine.expr.functions.strings.ParseJson; import com.google.refine.expr.functions.strings.ParseJson;
@ -226,6 +227,7 @@ public class ControlFunctionRegistry {
registerFunction("parseJson", new ParseJson()); registerFunction("parseJson", new ParseJson());
registerFunction("ngram", new NGram()); registerFunction("ngram", new NGram());
registerFunction("match", new Match()); registerFunction("match", new Match());
registerFunction("find", new Find());
// HTML functions from JSoup // HTML functions from JSoup
registerFunction("parseHtml", new ParseHtml()); registerFunction("parseHtml", new ParseHtml());

View File

@ -0,0 +1,75 @@
package com.google.refine.tests.expr.functions;
import java.util.Properties;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.RefineServlet;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
import com.google.refine.tests.RefineServletStub;
import com.google.refine.tests.RefineTest;
/**
* Test cases for find function.
*/
public class FindFunctionTests extends RefineTest {
static Properties bindings;
@Override
@BeforeTest
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
}
// dependencies
RefineServlet servlet;
@BeforeMethod
public void SetUp() {
bindings = new Properties();
servlet = new RefineServletStub();
}
@AfterMethod
public void TearDown() {
}
@Test
public void findFunctionFindAllTest() throws Exception {
String[] matches = (String[]) invoke("find", "This is a test string for testing find.", "test");
Assert.assertEquals(matches[0], "test");
Assert.assertEquals(matches[1], "test");
}
@Test
public void findFunctionFindAllTest2() throws Exception {
String[] matches = (String[]) invoke("find", "hello 123456 goodbye.", "\\d{6}|hello");
Assert.assertEquals(matches[0], "hello");
Assert.assertEquals(matches[1], "123456");
}
/**
* Lookup a control function by name and invoke it with a variable number of args
*/
private static Object invoke(String name,Object... args) {
// registry uses static initializer, so no need to set it up
Function function = ControlFunctionRegistry.getFunction(name);
if (function == null) {
throw new IllegalArgumentException("Unknown function "+name);
}
if (args == null) {
return function.call(bindings,new Object[0]);
} else {
return function.call(bindings,args);
}
}
}