add find function. issue 1296
This commit is contained in:
parent
df59d3ca0e
commit
3c25647f05
85
main/src/com/google/refine/expr/functions/strings/Find.java
Normal file
85
main/src/com/google/refine/expr/functions/strings/Find.java
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2010, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.expr.functions.strings;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Properties;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import com.google.refine.expr.EvalError;
|
||||||
|
import com.google.refine.grel.ControlFunctionRegistry;
|
||||||
|
import com.google.refine.grel.Function;
|
||||||
|
|
||||||
|
public class Find implements Function {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object call(Properties bindings, Object[] args) {
|
||||||
|
List<String> allMatches = new ArrayList<String>();
|
||||||
|
|
||||||
|
if (args.length == 2) {
|
||||||
|
Object s = args[0];
|
||||||
|
Object p = args[1];
|
||||||
|
|
||||||
|
if (s != null && p != null && (p instanceof String || p instanceof Pattern)) {
|
||||||
|
|
||||||
|
Pattern pattern = (p instanceof String) ? Pattern.compile((String) p) : (Pattern) p;
|
||||||
|
|
||||||
|
Matcher matcher = pattern.matcher(s.toString());
|
||||||
|
|
||||||
|
while (matcher.find()) {
|
||||||
|
allMatches.add(matcher.group());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return allMatches.toArray(new String[0]);
|
||||||
|
}
|
||||||
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string or a regexp");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
|
||||||
|
writer.object();
|
||||||
|
writer.key("description"); writer.value("Returns an array of the groups matching the given regular expression");
|
||||||
|
writer.key("params"); writer.value("string or regexp");
|
||||||
|
writer.key("returns"); writer.value("array of strings");
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
}
|
@ -109,6 +109,7 @@ import com.google.refine.expr.functions.strings.IndexOf;
|
|||||||
import com.google.refine.expr.functions.strings.LastIndexOf;
|
import com.google.refine.expr.functions.strings.LastIndexOf;
|
||||||
import com.google.refine.expr.functions.strings.MD5;
|
import com.google.refine.expr.functions.strings.MD5;
|
||||||
import com.google.refine.expr.functions.strings.Match;
|
import com.google.refine.expr.functions.strings.Match;
|
||||||
|
import com.google.refine.expr.functions.strings.Find;
|
||||||
import com.google.refine.expr.functions.strings.NGram;
|
import com.google.refine.expr.functions.strings.NGram;
|
||||||
import com.google.refine.expr.functions.strings.NGramFingerprint;
|
import com.google.refine.expr.functions.strings.NGramFingerprint;
|
||||||
import com.google.refine.expr.functions.strings.ParseJson;
|
import com.google.refine.expr.functions.strings.ParseJson;
|
||||||
@ -226,6 +227,7 @@ public class ControlFunctionRegistry {
|
|||||||
registerFunction("parseJson", new ParseJson());
|
registerFunction("parseJson", new ParseJson());
|
||||||
registerFunction("ngram", new NGram());
|
registerFunction("ngram", new NGram());
|
||||||
registerFunction("match", new Match());
|
registerFunction("match", new Match());
|
||||||
|
registerFunction("find", new Find());
|
||||||
|
|
||||||
// HTML functions from JSoup
|
// HTML functions from JSoup
|
||||||
registerFunction("parseHtml", new ParseHtml());
|
registerFunction("parseHtml", new ParseHtml());
|
||||||
|
@ -0,0 +1,75 @@
|
|||||||
|
|
||||||
|
package com.google.refine.tests.expr.functions;
|
||||||
|
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.AfterMethod;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.BeforeTest;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.google.refine.RefineServlet;
|
||||||
|
import com.google.refine.grel.ControlFunctionRegistry;
|
||||||
|
import com.google.refine.grel.Function;
|
||||||
|
import com.google.refine.tests.RefineServletStub;
|
||||||
|
import com.google.refine.tests.RefineTest;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test cases for find function.
|
||||||
|
*/
|
||||||
|
public class FindFunctionTests extends RefineTest {
|
||||||
|
static Properties bindings;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@BeforeTest
|
||||||
|
public void init() {
|
||||||
|
logger = LoggerFactory.getLogger(this.getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
// dependencies
|
||||||
|
RefineServlet servlet;
|
||||||
|
|
||||||
|
@BeforeMethod
|
||||||
|
public void SetUp() {
|
||||||
|
bindings = new Properties();
|
||||||
|
|
||||||
|
servlet = new RefineServletStub();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterMethod
|
||||||
|
public void TearDown() {
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void findFunctionFindAllTest() throws Exception {
|
||||||
|
String[] matches = (String[]) invoke("find", "This is a test string for testing find.", "test");
|
||||||
|
Assert.assertEquals(matches[0], "test");
|
||||||
|
Assert.assertEquals(matches[1], "test");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void findFunctionFindAllTest2() throws Exception {
|
||||||
|
String[] matches = (String[]) invoke("find", "hello 123456 goodbye.", "\\d{6}|hello");
|
||||||
|
Assert.assertEquals(matches[0], "hello");
|
||||||
|
Assert.assertEquals(matches[1], "123456");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lookup a control function by name and invoke it with a variable number of args
|
||||||
|
*/
|
||||||
|
private static Object invoke(String name,Object... args) {
|
||||||
|
// registry uses static initializer, so no need to set it up
|
||||||
|
Function function = ControlFunctionRegistry.getFunction(name);
|
||||||
|
if (function == null) {
|
||||||
|
throw new IllegalArgumentException("Unknown function "+name);
|
||||||
|
}
|
||||||
|
if (args == null) {
|
||||||
|
return function.call(bindings,new Object[0]);
|
||||||
|
} else {
|
||||||
|
return function.call(bindings,args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user