commit
95d0cf546d
@ -371,7 +371,7 @@
|
||||
<dependency>
|
||||
<groupId>org.jsoup</groupId>
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.11.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>net.sf.opencsv</groupId>
|
||||
|
@ -40,6 +40,7 @@ import org.json.JSONWriter;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import com.google.refine.expr.EvalError;
|
||||
import com.google.refine.expr.functions.xml.InnerXml;
|
||||
import com.google.refine.grel.ControlFunctionRegistry;
|
||||
import com.google.refine.grel.Function;
|
||||
|
||||
@ -47,17 +48,15 @@ public class InnerHtml implements Function {
|
||||
|
||||
@Override
|
||||
public Object call(Properties bindings, Object[] args) {
|
||||
if (args.length >= 1) {
|
||||
if (args.length == 1) {
|
||||
Object o1 = args[0];
|
||||
if (o1 != null && o1 instanceof Element) {
|
||||
Element e1 = (Element)o1;
|
||||
return e1.html();
|
||||
|
||||
return new InnerXml().call(bindings, args, "html");
|
||||
}else{
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
|
||||
}
|
||||
}
|
||||
return null;
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single String as an argument");
|
||||
}
|
||||
|
||||
|
||||
|
@ -37,21 +37,23 @@ import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONWriter;
|
||||
import org.jsoup.Jsoup;
|
||||
|
||||
import com.google.refine.expr.EvalError;
|
||||
import com.google.refine.expr.functions.xml.ParseXml;
|
||||
import com.google.refine.grel.ControlFunctionRegistry;
|
||||
import com.google.refine.grel.Function;
|
||||
|
||||
public class ParseHtml implements Function {
|
||||
|
||||
@Override
|
||||
public Object call(Properties bindings, Object[] args) {
|
||||
if (args.length >= 1) {
|
||||
if (args.length == 1) {
|
||||
Object o1 = args[0];
|
||||
if (o1 != null && o1 instanceof String) {
|
||||
return Jsoup.parse(o1.toString());
|
||||
return new ParseXml().call(bindings,args,"html");
|
||||
}
|
||||
}
|
||||
return null;
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single String as an argument");
|
||||
}
|
||||
|
||||
|
||||
|
84
main/src/com/google/refine/expr/functions/xml/InnerXml.java
Normal file
84
main/src/com/google/refine/expr/functions/xml/InnerXml.java
Normal file
@ -0,0 +1,84 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.expr.functions.xml;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONWriter;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import com.google.refine.expr.EvalError;
|
||||
import com.google.refine.grel.ControlFunctionRegistry;
|
||||
import com.google.refine.grel.Function;
|
||||
|
||||
public class InnerXml implements Function {
|
||||
|
||||
@Override
|
||||
public Object call(Properties bindings, Object[] args) {
|
||||
return call(bindings,args,"xml");
|
||||
}
|
||||
|
||||
public Object call(Properties bindings, Object[] args, String mode) {
|
||||
if (args.length == 1) {
|
||||
Object o1 = args[0];
|
||||
if (o1 != null && o1 instanceof Element) {
|
||||
Element e1 = (Element)o1;
|
||||
if(mode == "xml") {
|
||||
return e1.children().toString();
|
||||
} else if (mode == "html") {
|
||||
return e1.html();
|
||||
} else {
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " unable to determine whether XML or HTML is being used.");
|
||||
}
|
||||
}else{
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml() and select(query) prior to using this function");
|
||||
}
|
||||
}
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single XML or HTML element as an argument");
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("description"); writer.value("The innerXml/innerHtml of an XML/HTML element");
|
||||
writer.key("params"); writer.value("Element e");
|
||||
writer.key("returns"); writer.value("String innerXml/innerHtml");
|
||||
writer.endObject();
|
||||
}
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.expr.functions.html;
|
||||
package com.google.refine.expr.functions.xml;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
@ -47,17 +47,17 @@ public class OwnText implements Function {
|
||||
|
||||
@Override
|
||||
public Object call(Properties bindings, Object[] args) {
|
||||
if (args.length >= 1) {
|
||||
if (args.length == 1) {
|
||||
Object o1 = args[0];
|
||||
if (o1 != null && o1 instanceof Element) {
|
||||
Element e1 = (Element)o1;
|
||||
return e1.ownText();
|
||||
|
||||
}else{
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
|
||||
}
|
||||
}
|
||||
return null;
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single XML or HTML element as an argument");
|
||||
}
|
||||
|
||||
|
||||
@ -66,7 +66,7 @@ public class OwnText implements Function {
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("description"); writer.value("Gets the text owned by this HTML element only; does not get the combined text of all children.");
|
||||
writer.key("description"); writer.value("Gets the text owned by this XML/HTML element only; does not get the combined text of all children.");
|
||||
writer.key("params"); writer.value("Element e");
|
||||
writer.key("returns"); writer.value("String ownText");
|
||||
writer.endObject();
|
82
main/src/com/google/refine/expr/functions/xml/ParseXml.java
Normal file
82
main/src/com/google/refine/expr/functions/xml/ParseXml.java
Normal file
@ -0,0 +1,82 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.expr.functions.xml;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONWriter;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.parser.Parser;
|
||||
|
||||
import com.google.refine.expr.EvalError;
|
||||
import com.google.refine.grel.ControlFunctionRegistry;
|
||||
import com.google.refine.grel.Function;
|
||||
|
||||
public class ParseXml implements Function {
|
||||
|
||||
@Override
|
||||
public Object call(Properties bindings, Object[] args) {
|
||||
return call(bindings,args,"xml");
|
||||
}
|
||||
|
||||
public Object call(Properties bindings, Object[] args, String mode) {
|
||||
if (args.length == 1) {
|
||||
Object o1 = args[0];
|
||||
if (o1 != null && o1 instanceof String) {
|
||||
if (mode == "html") {
|
||||
return Jsoup.parse(o1.toString());
|
||||
} else if (mode == "xml") {
|
||||
return Jsoup.parse(o1.toString(), "",Parser.xmlParser());
|
||||
} else {
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " unable to identify which parser to use");
|
||||
}
|
||||
}
|
||||
}
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single String as an argument");
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("description"); writer.value("Parses a string as XML");
|
||||
writer.key("params"); writer.value("string s");
|
||||
writer.key("returns"); writer.value("XML object");
|
||||
writer.endObject();
|
||||
}
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.expr.functions.html;
|
||||
package com.google.refine.expr.functions.xml;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
@ -43,11 +43,11 @@ import com.google.refine.expr.EvalError;
|
||||
import com.google.refine.grel.ControlFunctionRegistry;
|
||||
import com.google.refine.grel.Function;
|
||||
|
||||
public class SelectHtml implements Function {
|
||||
public class SelectXml implements Function {
|
||||
|
||||
@Override
|
||||
public Object call(Properties bindings, Object[] args) {
|
||||
if (args.length >= 2) {
|
||||
if (args.length == 2) {
|
||||
Object o1 = args[0];
|
||||
Object o2 = args[1];
|
||||
if (o1 != null && o1 instanceof Element) {
|
||||
@ -56,10 +56,10 @@ public class SelectHtml implements Function {
|
||||
return e1.select(o2.toString());
|
||||
}
|
||||
}else{
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string)");
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml()");
|
||||
}
|
||||
}
|
||||
return null;
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects two arguments");
|
||||
}
|
||||
|
||||
|
||||
@ -68,9 +68,9 @@ public class SelectHtml implements Function {
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("description"); writer.value("Selects an element from an HTML elementn using selector syntax");
|
||||
writer.key("description"); writer.value("Selects an element from an XML or HTML element using selector syntax.");
|
||||
writer.key("params"); writer.value("Element e, String s");
|
||||
writer.key("returns"); writer.value("HTML Elements");
|
||||
writer.key("returns"); writer.value("XML/HTML Elements");
|
||||
writer.endObject();
|
||||
}
|
||||
}
|
@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.expr.functions.html;
|
||||
package com.google.refine.expr.functions.xml;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
@ -43,11 +43,11 @@ import com.google.refine.expr.EvalError;
|
||||
import com.google.refine.grel.ControlFunctionRegistry;
|
||||
import com.google.refine.grel.Function;
|
||||
|
||||
public class HtmlAttr implements Function {
|
||||
public class XmlAttr implements Function {
|
||||
|
||||
@Override
|
||||
public Object call(Properties bindings, Object[] args) {
|
||||
if (args.length >= 2) {
|
||||
if (args.length == 2) {
|
||||
Object o1 = args[0];
|
||||
Object o2 = args[1];
|
||||
if (o1 != null && o1 instanceof Element) {
|
||||
@ -56,10 +56,10 @@ public class HtmlAttr implements Function {
|
||||
return e1.attr(o2.toString());
|
||||
}
|
||||
}else{
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml() and select() prior to using this function");
|
||||
}
|
||||
}
|
||||
return null;
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects two arguments");
|
||||
}
|
||||
|
||||
|
||||
@ -68,7 +68,7 @@ public class HtmlAttr implements Function {
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("description"); writer.value("Selects a value from an attribute on an Html Element");
|
||||
writer.key("description"); writer.value("Selects a value from an attribute on an xml or html Element.");
|
||||
writer.key("params"); writer.value("Element e, String s");
|
||||
writer.key("returns"); writer.value("String attribute Value");
|
||||
writer.endObject();
|
@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.expr.functions.html;
|
||||
package com.google.refine.expr.functions.xml;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
@ -43,21 +43,21 @@ import com.google.refine.expr.EvalError;
|
||||
import com.google.refine.grel.ControlFunctionRegistry;
|
||||
import com.google.refine.grel.Function;
|
||||
|
||||
public class HtmlText implements Function {
|
||||
public class XmlText implements Function {
|
||||
|
||||
@Override
|
||||
public Object call(Properties bindings, Object[] args) {
|
||||
if (args.length >= 1) {
|
||||
if (args.length == 1) {
|
||||
Object o1 = args[0];
|
||||
if (o1 != null && o1 instanceof Element) {
|
||||
Element e1 = (Element)o1;
|
||||
return e1.text();
|
||||
|
||||
}else{
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml() and select(query) prior to using this function");
|
||||
}
|
||||
}
|
||||
return null;
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single XML or HTML element as an argument");
|
||||
}
|
||||
|
||||
|
@ -61,12 +61,14 @@ import com.google.refine.expr.functions.booleans.Xor;
|
||||
import com.google.refine.expr.functions.date.DatePart;
|
||||
import com.google.refine.expr.functions.date.Inc;
|
||||
import com.google.refine.expr.functions.date.Now;
|
||||
import com.google.refine.expr.functions.html.HtmlAttr;
|
||||
import com.google.refine.expr.functions.html.HtmlText;
|
||||
import com.google.refine.expr.functions.html.InnerHtml;
|
||||
import com.google.refine.expr.functions.html.OwnText;
|
||||
import com.google.refine.expr.functions.html.ParseHtml;
|
||||
import com.google.refine.expr.functions.html.SelectHtml;
|
||||
import com.google.refine.expr.functions.xml.XmlAttr;
|
||||
import com.google.refine.expr.functions.xml.XmlText;
|
||||
import com.google.refine.expr.functions.xml.InnerXml;
|
||||
import com.google.refine.expr.functions.xml.OwnText;
|
||||
import com.google.refine.expr.functions.xml.ParseXml;
|
||||
import com.google.refine.expr.functions.xml.SelectXml;
|
||||
import com.google.refine.expr.functions.math.ACos;
|
||||
import com.google.refine.expr.functions.math.ASin;
|
||||
import com.google.refine.expr.functions.math.ATan;
|
||||
@ -234,11 +236,15 @@ public class ControlFunctionRegistry {
|
||||
registerFunction("match", new Match());
|
||||
registerFunction("find", new Find());
|
||||
|
||||
// HTML functions from JSoup
|
||||
// XML and HTML functions from JSoup
|
||||
registerFunction("parseXml", new ParseXml());
|
||||
registerFunction("parseHtml", new ParseHtml());
|
||||
registerFunction("select", new SelectHtml());
|
||||
registerFunction("htmlAttr", new HtmlAttr());
|
||||
registerFunction("htmlText", new HtmlText());
|
||||
registerFunction("select", new SelectXml());
|
||||
registerFunction("xmlAttr", new XmlAttr());
|
||||
registerFunction("htmlAttr", new XmlAttr());
|
||||
registerFunction("xmlText", new XmlText());
|
||||
registerFunction("htmlText", new XmlText());
|
||||
registerFunction("innerXml", new InnerXml());
|
||||
registerFunction("innerHtml", new InnerHtml());
|
||||
registerFunction("ownText", new OwnText());
|
||||
|
||||
|
@ -1,15 +0,0 @@
|
||||
package com.google.refine.tests.expr.functions.html;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.expr.functions.html.HtmlAttr;
|
||||
import com.google.refine.tests.util.TestUtils;
|
||||
|
||||
public class HtmlAttrTests {
|
||||
@Test
|
||||
public void serializeHtmlAttr() {
|
||||
String json = "{\"description\":\"Selects a value from an attribute on an Html Element\",\"params\":\"Element e, String s\",\"returns\":\"String attribute Value\"}";
|
||||
TestUtils.isSerializedTo(new HtmlAttr(), json);
|
||||
}
|
||||
}
|
||||
|
@ -1,15 +1,86 @@
|
||||
package com.google.refine.tests.expr.functions.html;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.AfterMethod;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
|
||||
import com.google.refine.expr.EvalError;
|
||||
import com.google.refine.expr.functions.html.ParseHtml;
|
||||
import com.google.refine.grel.ControlFunctionRegistry;
|
||||
import com.google.refine.grel.Function;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
import com.google.refine.tests.util.TestUtils;
|
||||
|
||||
public class ParseHtmlTests {
|
||||
public class ParseHtmlTests extends RefineTest {
|
||||
|
||||
static Properties bindings;
|
||||
static String h = "<html>\n" +
|
||||
"<head>\n" +
|
||||
"</head>\n" +
|
||||
" <body>\n" +
|
||||
" <h1>head1</h1>\n" +
|
||||
" <div class=\"class1\">\n" +
|
||||
" <p>para1 <strong>strong text</strong></p>\n" +
|
||||
" <p>para2</p>\n" +
|
||||
" </div>\n" +
|
||||
" </body>\n" +
|
||||
"</html>";
|
||||
|
||||
@Override
|
||||
@BeforeTest
|
||||
public void init() {
|
||||
logger = LoggerFactory.getLogger(this.getClass());
|
||||
}
|
||||
|
||||
@BeforeMethod
|
||||
public void SetUp() {
|
||||
bindings = new Properties();
|
||||
}
|
||||
|
||||
@AfterMethod
|
||||
public void TearDown() {
|
||||
bindings = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lookup a control function by name and invoke it with a variable number of args
|
||||
*/
|
||||
private static Object invoke(String name,Object... args) {
|
||||
// registry uses static initializer, so no need to set it up
|
||||
Function function = ControlFunctionRegistry.getFunction(name);
|
||||
if (function == null) {
|
||||
throw new IllegalArgumentException("Unknown function "+name);
|
||||
}
|
||||
if (args == null) {
|
||||
return function.call(bindings,new Object[0]);
|
||||
} else {
|
||||
return function.call(bindings,args);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void serializeParseHtml() {
|
||||
String json = "{\"description\":\"Parses a string as HTML\",\"params\":\"string s\",\"returns\":\"HTML object\"}";
|
||||
TestUtils.isSerializedTo(new ParseHtml(), json);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseHtml() {
|
||||
Assert.assertTrue(invoke("parseHtml") instanceof EvalError);
|
||||
Assert.assertTrue(invoke("parseHtml","h") instanceof org.jsoup.nodes.Document);
|
||||
Assert.assertTrue(invoke("select",Jsoup.parse(h),"p") instanceof org.jsoup.select.Elements);
|
||||
Assert.assertTrue(invoke("innerHtml",Jsoup.parse(h).select("p").first()) instanceof String);
|
||||
Assert.assertEquals(invoke("innerHtml",Jsoup.parse(h).select("p").first()),"para1 <strong>strong text</strong>");
|
||||
Assert.assertEquals(invoke("htmlAttr",Jsoup.parse(h).select("div").first(),"class"),"class1");
|
||||
Assert.assertEquals(invoke("htmlText",Jsoup.parse(h).select("div").first()),"para1 strong text para2");
|
||||
Assert.assertEquals(invoke("ownText",Jsoup.parse(h).select("p").first()),"para1");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,15 +0,0 @@
|
||||
package com.google.refine.tests.expr.functions.html;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.expr.functions.html.SelectHtml;
|
||||
import com.google.refine.tests.util.TestUtils;
|
||||
|
||||
public class SelectHtmlTests {
|
||||
@Test
|
||||
public void serializeSelectHtml() {
|
||||
String json = "{\"description\":\"Selects an element from an HTML elementn using selector syntax\",\"params\":\"Element e, String s\",\"returns\":\"HTML Elements\"}";
|
||||
TestUtils.isSerializedTo(new SelectHtml(), json);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,15 @@
|
||||
package com.google.refine.tests.expr.functions.xml;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.expr.functions.xml.InnerXml;
|
||||
import com.google.refine.tests.util.TestUtils;
|
||||
|
||||
public class InnerXmlTests {
|
||||
@Test
|
||||
public void serializeInnerXml() {
|
||||
String json = "{\"description\":\"The innerXml/innerHtml of an XML/HTML element\",\"params\":\"Element e\",\"returns\":\"String innerXml/innerHtml\"}";
|
||||
TestUtils.isSerializedTo(new InnerXml(), json);
|
||||
}
|
||||
}
|
||||
|
@ -1,14 +1,14 @@
|
||||
package com.google.refine.tests.expr.functions.html;
|
||||
package com.google.refine.tests.expr.functions.xml;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.expr.functions.html.OwnText;
|
||||
import com.google.refine.expr.functions.xml.OwnText;
|
||||
import com.google.refine.tests.util.TestUtils;
|
||||
|
||||
public class OwnTextTests {
|
||||
@Test
|
||||
public void serializeOwnText() {
|
||||
String json = "{\"description\":\"Gets the text owned by this HTML element only; does not get the combined text of all children.\",\"params\":\"Element e\",\"returns\":\"String ownText\"}";
|
||||
String json = "{\"description\":\"Gets the text owned by this XML/HTML element only; does not get the combined text of all children.\",\"params\":\"Element e\",\"returns\":\"String ownText\"}";
|
||||
TestUtils.isSerializedTo(new OwnText(), json);
|
||||
}
|
||||
}
|
@ -0,0 +1,92 @@
|
||||
package com.google.refine.tests.expr.functions.xml;
|
||||
|
||||
import org.jsoup.parser.Parser;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.AfterMethod;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
|
||||
import com.google.refine.expr.EvalError;
|
||||
import com.google.refine.expr.functions.xml.ParseXml;
|
||||
import com.google.refine.grel.ControlFunctionRegistry;
|
||||
import com.google.refine.grel.Function;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
import com.google.refine.tests.util.TestUtils;
|
||||
|
||||
|
||||
public class ParseXmlTests extends RefineTest {
|
||||
|
||||
static Properties bindings;
|
||||
static String x = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
|
||||
"<root xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
|
||||
" <foaf:Person>\n" +
|
||||
" <foaf:name>John Doe</foaf:name>\n" +
|
||||
" <head>head1</head>\n" +
|
||||
" <head>head2</head>\n" +
|
||||
" <BODY>body1</BODY>\n" +
|
||||
" <foaf:homepage rdf:resource=\"http://www.example.com\"/>\n" +
|
||||
" </foaf:Person>\n" +
|
||||
" <foaf:Person>\n" +
|
||||
" <foaf:name>Héloïse Dupont</foaf:name>\n" +
|
||||
" <head>head3</head>\n" +
|
||||
" <BODY>body2</BODY>\n" +
|
||||
" <foaf:title/>\n" +
|
||||
" </foaf:Person>\n" +
|
||||
"</root>";
|
||||
|
||||
@Override
|
||||
@BeforeTest
|
||||
public void init() {
|
||||
logger = LoggerFactory.getLogger(this.getClass());
|
||||
}
|
||||
|
||||
@BeforeMethod
|
||||
public void SetUp() {
|
||||
bindings = new Properties();
|
||||
}
|
||||
|
||||
@AfterMethod
|
||||
public void TearDown() {
|
||||
bindings = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lookup a control function by name and invoke it with a variable number of args
|
||||
*/
|
||||
private static Object invoke(String name,Object... args) {
|
||||
// registry uses static initializer, so no need to set it up
|
||||
Function function = ControlFunctionRegistry.getFunction(name);
|
||||
if (function == null) {
|
||||
throw new IllegalArgumentException("Unknown function "+name);
|
||||
}
|
||||
if (args == null) {
|
||||
return function.call(bindings,new Object[0]);
|
||||
} else {
|
||||
return function.call(bindings,args);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void serializeParseXml() {
|
||||
String json = "{\"description\":\"Parses a string as XML\",\"params\":\"string s\",\"returns\":\"XML object\"}";
|
||||
TestUtils.isSerializedTo(new ParseXml(), json);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseXml() {
|
||||
Assert.assertTrue(invoke("parseXml") instanceof EvalError);
|
||||
Assert.assertTrue(invoke("parseXml","x") instanceof org.jsoup.nodes.Document);
|
||||
Assert.assertTrue(invoke("select",Jsoup.parse(x,"",Parser.xmlParser()),"foaf|Person") instanceof org.jsoup.select.Elements);
|
||||
Assert.assertEquals(invoke("innerXml",Jsoup.parse(x,"",Parser.xmlParser()).select("foaf|Person").first()),"<foaf:name>\n John Doe\n</foaf:name>\n<head>\n head1\n</head>\n<head>\n head2\n</head>\n<BODY>\n body1\n</BODY>\n<foaf:homepage rdf:resource=\"http://www.example.com\" />");
|
||||
Assert.assertEquals(invoke("xmlAttr",Jsoup.parse(x,"",Parser.xmlParser()).select("foaf|homepage").first(),"rdf:resource"),"http://www.example.com");
|
||||
Assert.assertEquals(invoke("ownText",Jsoup.parse(x,"",Parser.xmlParser()).select("BODY").first()),"body1");
|
||||
Assert.assertEquals(invoke("xmlText",Jsoup.parse(x,"",Parser.xmlParser()).select("foaf|Person").first()),"John Doe head1 head2 body1");
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,15 @@
|
||||
package com.google.refine.tests.expr.functions.xml;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.expr.functions.xml.SelectXml;
|
||||
import com.google.refine.tests.util.TestUtils;
|
||||
|
||||
public class SelectXmlTests {
|
||||
@Test
|
||||
public void serializeSelectXml() {
|
||||
String json = "{\"description\":\"Selects an element from an XML or HTML element using selector syntax.\",\"params\":\"Element e, String s\",\"returns\":\"XML/HTML Elements\"}";
|
||||
TestUtils.isSerializedTo(new SelectXml(), json);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,15 @@
|
||||
package com.google.refine.tests.expr.functions.xml;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.expr.functions.xml.XmlAttr;
|
||||
import com.google.refine.tests.util.TestUtils;
|
||||
|
||||
public class xmlAttrTests {
|
||||
@Test
|
||||
public void serializeXmlAttr() {
|
||||
String json = "{\"description\":\"Selects a value from an attribute on an xml or html Element.\",\"params\":\"Element e, String s\",\"returns\":\"String attribute Value\"}";
|
||||
TestUtils.isSerializedTo(new XmlAttr(), json);
|
||||
}
|
||||
}
|
||||
|
@ -1,15 +1,15 @@
|
||||
package com.google.refine.tests.expr.functions.html;
|
||||
package com.google.refine.tests.expr.functions.xml;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.expr.functions.html.HtmlText;
|
||||
import com.google.refine.expr.functions.xml.XmlText;
|
||||
import com.google.refine.tests.util.TestUtils;
|
||||
|
||||
public class HtmlTextTests {
|
||||
public class xmlTextTests {
|
||||
@Test
|
||||
public void serializeHtmlText() {
|
||||
public void serializeXmlText() {
|
||||
String json = "{\"description\":\"Selects the text from within an element (including all child elements)\",\"params\":\"Element e\",\"returns\":\"String text\"}";
|
||||
TestUtils.isSerializedTo(new HtmlText(), json);
|
||||
TestUtils.isSerializedTo(new XmlText(), json);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user