Issue 338 - patch from Thad Guidry to provide function which calls JSoup ownText() method
git-svn-id: http://google-refine.googlecode.com/svn/trunk@2025 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
a4572b66c8
commit
c5312a2e6a
@ -1,6 +1,6 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
Copyright 2010,2011 Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -64,7 +64,7 @@ public class HtmlText implements Function {
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("description"); writer.value("Selects the text from within an element");
|
||||
writer.key("description"); writer.value("Selects the text from within an element (including all child elements)");
|
||||
writer.key("params"); writer.value("Element e");
|
||||
writer.key("returns"); writer.value("String text");
|
||||
writer.endObject();
|
||||
|
73
main/src/com/google/refine/expr/functions/html/OwnText.java
Normal file
73
main/src/com/google/refine/expr/functions/html/OwnText.java
Normal file
@ -0,0 +1,73 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.expr.functions.html;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONWriter;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import com.google.refine.expr.EvalError;
|
||||
import com.google.refine.grel.ControlFunctionRegistry;
|
||||
import com.google.refine.grel.Function;
|
||||
|
||||
public class OwnText implements Function {
|
||||
|
||||
public Object call(Properties bindings, Object[] args) {
|
||||
if (args.length >= 1) {
|
||||
Object o1 = args[0];
|
||||
if (o1 != null && o1 instanceof Element) {
|
||||
Element e1 = (Element)o1;
|
||||
return e1.ownText();
|
||||
|
||||
}else{
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("description"); writer.value("Gets the text owned by this element only; does not get the combined text of all children.");
|
||||
writer.key("params"); writer.value("Element e");
|
||||
writer.key("returns"); writer.value("String ownText");
|
||||
writer.endObject();
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
Copyright 2010,2011 Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -101,6 +101,7 @@ import com.google.refine.expr.functions.strings.Fingerprint;
|
||||
import com.google.refine.expr.functions.html.HtmlAttr;
|
||||
import com.google.refine.expr.functions.html.HtmlText;
|
||||
import com.google.refine.expr.functions.html.InnerHtml;
|
||||
import com.google.refine.expr.functions.html.OwnText;
|
||||
import com.google.refine.expr.functions.strings.IndexOf;
|
||||
import com.google.refine.expr.functions.strings.LastIndexOf;
|
||||
import com.google.refine.expr.functions.strings.MD5;
|
||||
@ -225,11 +226,13 @@ public class ControlFunctionRegistry {
|
||||
registerFunction("ngram", new NGram());
|
||||
registerFunction("match", new Match());
|
||||
|
||||
// HTML functions from JSoup
|
||||
registerFunction("parseHtml", new ParseHtml());
|
||||
registerFunction("select", new SelectHtml());
|
||||
registerFunction("htmlAttr", new HtmlAttr());
|
||||
registerFunction("htmlText", new HtmlText());
|
||||
registerFunction("innerHtml", new InnerHtml());
|
||||
registerFunction("ownText", new OwnText());
|
||||
|
||||
registerFunction("indexOf", new IndexOf());
|
||||
registerFunction("lastIndexOf", new LastIndexOf());
|
||||
|
Loading…
Reference in New Issue
Block a user