Merge pull request #1845 from ostephens/parseXml

Parse xml
This commit is contained in:
Antonin Delpeuch 2018-11-25 18:44:38 +00:00 committed by GitHub
commit 95d0cf546d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 431 additions and 80 deletions

View File

@ -371,7 +371,7 @@
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.4.1</version>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>net.sf.opencsv</groupId>

View File

@ -40,6 +40,7 @@ import org.json.JSONWriter;
import org.jsoup.nodes.Element;
import com.google.refine.expr.EvalError;
import com.google.refine.expr.functions.xml.InnerXml;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
@ -47,17 +48,15 @@ public class InnerHtml implements Function {
@Override
public Object call(Properties bindings, Object[] args) {
if (args.length >= 1) {
if (args.length == 1) {
Object o1 = args[0];
if (o1 != null && o1 instanceof Element) {
Element e1 = (Element)o1;
return e1.html();
return new InnerXml().call(bindings, args, "html");
}else{
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
}
}
return null;
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single String as an argument");
}

View File

@ -37,21 +37,23 @@ import java.util.Properties;
import org.json.JSONException;
import org.json.JSONWriter;
import org.jsoup.Jsoup;
import com.google.refine.expr.EvalError;
import com.google.refine.expr.functions.xml.ParseXml;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
public class ParseHtml implements Function {
@Override
public Object call(Properties bindings, Object[] args) {
if (args.length >= 1) {
if (args.length == 1) {
Object o1 = args[0];
if (o1 != null && o1 instanceof String) {
return Jsoup.parse(o1.toString());
return new ParseXml().call(bindings,args,"html");
}
}
return null;
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single String as an argument");
}

View File

@ -0,0 +1,84 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.expr.functions.xml;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONWriter;
import org.jsoup.nodes.Element;
import com.google.refine.expr.EvalError;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
public class InnerXml implements Function {
@Override
public Object call(Properties bindings, Object[] args) {
return call(bindings,args,"xml");
}
public Object call(Properties bindings, Object[] args, String mode) {
if (args.length == 1) {
Object o1 = args[0];
if (o1 != null && o1 instanceof Element) {
Element e1 = (Element)o1;
if(mode == "xml") {
return e1.children().toString();
} else if (mode == "html") {
return e1.html();
} else {
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " unable to determine whether XML or HTML is being used.");
}
}else{
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml() and select(query) prior to using this function");
}
}
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single XML or HTML element as an argument");
}
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("description"); writer.value("The innerXml/innerHtml of an XML/HTML element");
writer.key("params"); writer.value("Element e");
writer.key("returns"); writer.value("String innerXml/innerHtml");
writer.endObject();
}
}

View File

@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.expr.functions.html;
package com.google.refine.expr.functions.xml;
import java.util.Properties;
@ -47,17 +47,17 @@ public class OwnText implements Function {
@Override
public Object call(Properties bindings, Object[] args) {
if (args.length >= 1) {
if (args.length == 1) {
Object o1 = args[0];
if (o1 != null && o1 instanceof Element) {
Element e1 = (Element)o1;
return e1.ownText();
}else{
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
}
}
return null;
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single XML or HTML element as an argument");
}
@ -66,7 +66,7 @@ public class OwnText implements Function {
throws JSONException {
writer.object();
writer.key("description"); writer.value("Gets the text owned by this HTML element only; does not get the combined text of all children.");
writer.key("description"); writer.value("Gets the text owned by this XML/HTML element only; does not get the combined text of all children.");
writer.key("params"); writer.value("Element e");
writer.key("returns"); writer.value("String ownText");
writer.endObject();

View File

@ -0,0 +1,82 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.expr.functions.xml;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONWriter;
import org.jsoup.Jsoup;
import org.jsoup.parser.Parser;
import com.google.refine.expr.EvalError;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
public class ParseXml implements Function {
@Override
public Object call(Properties bindings, Object[] args) {
return call(bindings,args,"xml");
}
public Object call(Properties bindings, Object[] args, String mode) {
if (args.length == 1) {
Object o1 = args[0];
if (o1 != null && o1 instanceof String) {
if (mode == "html") {
return Jsoup.parse(o1.toString());
} else if (mode == "xml") {
return Jsoup.parse(o1.toString(), "",Parser.xmlParser());
} else {
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " unable to identify which parser to use");
}
}
}
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single String as an argument");
}
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("description"); writer.value("Parses a string as XML");
writer.key("params"); writer.value("string s");
writer.key("returns"); writer.value("XML object");
writer.endObject();
}
}

View File

@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.expr.functions.html;
package com.google.refine.expr.functions.xml;
import java.util.Properties;
@ -43,11 +43,11 @@ import com.google.refine.expr.EvalError;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
public class SelectHtml implements Function {
public class SelectXml implements Function {
@Override
public Object call(Properties bindings, Object[] args) {
if (args.length >= 2) {
if (args.length == 2) {
Object o1 = args[0];
Object o2 = args[1];
if (o1 != null && o1 instanceof Element) {
@ -56,10 +56,10 @@ public class SelectHtml implements Function {
return e1.select(o2.toString());
}
}else{
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string)");
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml()");
}
}
return null;
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects two arguments");
}
@ -68,9 +68,9 @@ public class SelectHtml implements Function {
throws JSONException {
writer.object();
writer.key("description"); writer.value("Selects an element from an HTML elementn using selector syntax");
writer.key("description"); writer.value("Selects an element from an XML or HTML element using selector syntax.");
writer.key("params"); writer.value("Element e, String s");
writer.key("returns"); writer.value("HTML Elements");
writer.key("returns"); writer.value("XML/HTML Elements");
writer.endObject();
}
}

View File

@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.expr.functions.html;
package com.google.refine.expr.functions.xml;
import java.util.Properties;
@ -43,11 +43,11 @@ import com.google.refine.expr.EvalError;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
public class HtmlAttr implements Function {
public class XmlAttr implements Function {
@Override
public Object call(Properties bindings, Object[] args) {
if (args.length >= 2) {
if (args.length == 2) {
Object o1 = args[0];
Object o2 = args[1];
if (o1 != null && o1 instanceof Element) {
@ -56,10 +56,10 @@ public class HtmlAttr implements Function {
return e1.attr(o2.toString());
}
}else{
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml() and select() prior to using this function");
}
}
return null;
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects two arguments");
}
@ -68,7 +68,7 @@ public class HtmlAttr implements Function {
throws JSONException {
writer.object();
writer.key("description"); writer.value("Selects a value from an attribute on an Html Element");
writer.key("description"); writer.value("Selects a value from an attribute on an xml or html Element.");
writer.key("params"); writer.value("Element e, String s");
writer.key("returns"); writer.value("String attribute Value");
writer.endObject();

View File

@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.expr.functions.html;
package com.google.refine.expr.functions.xml;
import java.util.Properties;
@ -43,21 +43,21 @@ import com.google.refine.expr.EvalError;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
public class HtmlText implements Function {
public class XmlText implements Function {
@Override
public Object call(Properties bindings, Object[] args) {
if (args.length >= 1) {
if (args.length == 1) {
Object o1 = args[0];
if (o1 != null && o1 instanceof Element) {
Element e1 = (Element)o1;
return e1.text();
}else{
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml() and select(query) prior to using this function");
}
}
return null;
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single XML or HTML element as an argument");
}

View File

@ -61,12 +61,14 @@ import com.google.refine.expr.functions.booleans.Xor;
import com.google.refine.expr.functions.date.DatePart;
import com.google.refine.expr.functions.date.Inc;
import com.google.refine.expr.functions.date.Now;
import com.google.refine.expr.functions.html.HtmlAttr;
import com.google.refine.expr.functions.html.HtmlText;
import com.google.refine.expr.functions.html.InnerHtml;
import com.google.refine.expr.functions.html.OwnText;
import com.google.refine.expr.functions.html.ParseHtml;
import com.google.refine.expr.functions.html.SelectHtml;
import com.google.refine.expr.functions.xml.XmlAttr;
import com.google.refine.expr.functions.xml.XmlText;
import com.google.refine.expr.functions.xml.InnerXml;
import com.google.refine.expr.functions.xml.OwnText;
import com.google.refine.expr.functions.xml.ParseXml;
import com.google.refine.expr.functions.xml.SelectXml;
import com.google.refine.expr.functions.math.ACos;
import com.google.refine.expr.functions.math.ASin;
import com.google.refine.expr.functions.math.ATan;
@ -234,11 +236,15 @@ public class ControlFunctionRegistry {
registerFunction("match", new Match());
registerFunction("find", new Find());
// HTML functions from JSoup
// XML and HTML functions from JSoup
registerFunction("parseXml", new ParseXml());
registerFunction("parseHtml", new ParseHtml());
registerFunction("select", new SelectHtml());
registerFunction("htmlAttr", new HtmlAttr());
registerFunction("htmlText", new HtmlText());
registerFunction("select", new SelectXml());
registerFunction("xmlAttr", new XmlAttr());
registerFunction("htmlAttr", new XmlAttr());
registerFunction("xmlText", new XmlText());
registerFunction("htmlText", new XmlText());
registerFunction("innerXml", new InnerXml());
registerFunction("innerHtml", new InnerHtml());
registerFunction("ownText", new OwnText());

View File

@ -1,15 +0,0 @@
package com.google.refine.tests.expr.functions.html;
import org.testng.annotations.Test;
import com.google.refine.expr.functions.html.HtmlAttr;
import com.google.refine.tests.util.TestUtils;
public class HtmlAttrTests {
@Test
public void serializeHtmlAttr() {
String json = "{\"description\":\"Selects a value from an attribute on an Html Element\",\"params\":\"Element e, String s\",\"returns\":\"String attribute Value\"}";
TestUtils.isSerializedTo(new HtmlAttr(), json);
}
}

View File

@ -1,15 +1,86 @@
package com.google.refine.tests.expr.functions.html;
import org.jsoup.Jsoup;
import org.testng.annotations.Test;
import java.util.Properties;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import com.google.refine.expr.EvalError;
import com.google.refine.expr.functions.html.ParseHtml;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
import com.google.refine.tests.RefineTest;
import com.google.refine.tests.util.TestUtils;
public class ParseHtmlTests {
public class ParseHtmlTests extends RefineTest {
static Properties bindings;
static String h = "<html>\n" +
"<head>\n" +
"</head>\n" +
" <body>\n" +
" <h1>head1</h1>\n" +
" <div class=\"class1\">\n" +
" <p>para1 <strong>strong text</strong></p>\n" +
" <p>para2</p>\n" +
" </div>\n" +
" </body>\n" +
"</html>";
@Override
@BeforeTest
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
}
@BeforeMethod
public void SetUp() {
bindings = new Properties();
}
@AfterMethod
public void TearDown() {
bindings = null;
}
/**
* Lookup a control function by name and invoke it with a variable number of args
*/
private static Object invoke(String name,Object... args) {
// registry uses static initializer, so no need to set it up
Function function = ControlFunctionRegistry.getFunction(name);
if (function == null) {
throw new IllegalArgumentException("Unknown function "+name);
}
if (args == null) {
return function.call(bindings,new Object[0]);
} else {
return function.call(bindings,args);
}
}
@Test
public void serializeParseHtml() {
String json = "{\"description\":\"Parses a string as HTML\",\"params\":\"string s\",\"returns\":\"HTML object\"}";
TestUtils.isSerializedTo(new ParseHtml(), json);
}
@Test
public void testParseHtml() {
Assert.assertTrue(invoke("parseHtml") instanceof EvalError);
Assert.assertTrue(invoke("parseHtml","h") instanceof org.jsoup.nodes.Document);
Assert.assertTrue(invoke("select",Jsoup.parse(h),"p") instanceof org.jsoup.select.Elements);
Assert.assertTrue(invoke("innerHtml",Jsoup.parse(h).select("p").first()) instanceof String);
Assert.assertEquals(invoke("innerHtml",Jsoup.parse(h).select("p").first()),"para1 <strong>strong text</strong>");
Assert.assertEquals(invoke("htmlAttr",Jsoup.parse(h).select("div").first(),"class"),"class1");
Assert.assertEquals(invoke("htmlText",Jsoup.parse(h).select("div").first()),"para1 strong text para2");
Assert.assertEquals(invoke("ownText",Jsoup.parse(h).select("p").first()),"para1");
}
}

View File

@ -1,15 +0,0 @@
package com.google.refine.tests.expr.functions.html;
import org.testng.annotations.Test;
import com.google.refine.expr.functions.html.SelectHtml;
import com.google.refine.tests.util.TestUtils;
public class SelectHtmlTests {
@Test
public void serializeSelectHtml() {
String json = "{\"description\":\"Selects an element from an HTML elementn using selector syntax\",\"params\":\"Element e, String s\",\"returns\":\"HTML Elements\"}";
TestUtils.isSerializedTo(new SelectHtml(), json);
}
}

View File

@ -0,0 +1,15 @@
package com.google.refine.tests.expr.functions.xml;
import org.testng.annotations.Test;
import com.google.refine.expr.functions.xml.InnerXml;
import com.google.refine.tests.util.TestUtils;
public class InnerXmlTests {
@Test
public void serializeInnerXml() {
String json = "{\"description\":\"The innerXml/innerHtml of an XML/HTML element\",\"params\":\"Element e\",\"returns\":\"String innerXml/innerHtml\"}";
TestUtils.isSerializedTo(new InnerXml(), json);
}
}

View File

@ -1,14 +1,14 @@
package com.google.refine.tests.expr.functions.html;
package com.google.refine.tests.expr.functions.xml;
import org.testng.annotations.Test;
import com.google.refine.expr.functions.html.OwnText;
import com.google.refine.expr.functions.xml.OwnText;
import com.google.refine.tests.util.TestUtils;
public class OwnTextTests {
@Test
public void serializeOwnText() {
String json = "{\"description\":\"Gets the text owned by this HTML element only; does not get the combined text of all children.\",\"params\":\"Element e\",\"returns\":\"String ownText\"}";
String json = "{\"description\":\"Gets the text owned by this XML/HTML element only; does not get the combined text of all children.\",\"params\":\"Element e\",\"returns\":\"String ownText\"}";
TestUtils.isSerializedTo(new OwnText(), json);
}
}

View File

@ -0,0 +1,92 @@
package com.google.refine.tests.expr.functions.xml;
import org.jsoup.parser.Parser;
import org.jsoup.Jsoup;
import org.testng.annotations.Test;
import java.util.Properties;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import com.google.refine.expr.EvalError;
import com.google.refine.expr.functions.xml.ParseXml;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
import com.google.refine.tests.RefineTest;
import com.google.refine.tests.util.TestUtils;
public class ParseXmlTests extends RefineTest {
static Properties bindings;
static String x = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<root xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
" <foaf:Person>\n" +
" <foaf:name>John Doe</foaf:name>\n" +
" <head>head1</head>\n" +
" <head>head2</head>\n" +
" <BODY>body1</BODY>\n" +
" <foaf:homepage rdf:resource=\"http://www.example.com\"/>\n" +
" </foaf:Person>\n" +
" <foaf:Person>\n" +
" <foaf:name>Héloïse Dupont</foaf:name>\n" +
" <head>head3</head>\n" +
" <BODY>body2</BODY>\n" +
" <foaf:title/>\n" +
" </foaf:Person>\n" +
"</root>";
@Override
@BeforeTest
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
}
@BeforeMethod
public void SetUp() {
bindings = new Properties();
}
@AfterMethod
public void TearDown() {
bindings = null;
}
/**
* Lookup a control function by name and invoke it with a variable number of args
*/
private static Object invoke(String name,Object... args) {
// registry uses static initializer, so no need to set it up
Function function = ControlFunctionRegistry.getFunction(name);
if (function == null) {
throw new IllegalArgumentException("Unknown function "+name);
}
if (args == null) {
return function.call(bindings,new Object[0]);
} else {
return function.call(bindings,args);
}
}
@Test
public void serializeParseXml() {
String json = "{\"description\":\"Parses a string as XML\",\"params\":\"string s\",\"returns\":\"XML object\"}";
TestUtils.isSerializedTo(new ParseXml(), json);
}
@Test
public void testParseXml() {
Assert.assertTrue(invoke("parseXml") instanceof EvalError);
Assert.assertTrue(invoke("parseXml","x") instanceof org.jsoup.nodes.Document);
Assert.assertTrue(invoke("select",Jsoup.parse(x,"",Parser.xmlParser()),"foaf|Person") instanceof org.jsoup.select.Elements);
Assert.assertEquals(invoke("innerXml",Jsoup.parse(x,"",Parser.xmlParser()).select("foaf|Person").first()),"<foaf:name>\n John Doe\n</foaf:name>\n<head>\n head1\n</head>\n<head>\n head2\n</head>\n<BODY>\n body1\n</BODY>\n<foaf:homepage rdf:resource=\"http://www.example.com\" />");
Assert.assertEquals(invoke("xmlAttr",Jsoup.parse(x,"",Parser.xmlParser()).select("foaf|homepage").first(),"rdf:resource"),"http://www.example.com");
Assert.assertEquals(invoke("ownText",Jsoup.parse(x,"",Parser.xmlParser()).select("BODY").first()),"body1");
Assert.assertEquals(invoke("xmlText",Jsoup.parse(x,"",Parser.xmlParser()).select("foaf|Person").first()),"John Doe head1 head2 body1");
}
}

View File

@ -0,0 +1,15 @@
package com.google.refine.tests.expr.functions.xml;
import org.testng.annotations.Test;
import com.google.refine.expr.functions.xml.SelectXml;
import com.google.refine.tests.util.TestUtils;
public class SelectXmlTests {
@Test
public void serializeSelectXml() {
String json = "{\"description\":\"Selects an element from an XML or HTML element using selector syntax.\",\"params\":\"Element e, String s\",\"returns\":\"XML/HTML Elements\"}";
TestUtils.isSerializedTo(new SelectXml(), json);
}
}

View File

@ -0,0 +1,15 @@
package com.google.refine.tests.expr.functions.xml;
import org.testng.annotations.Test;
import com.google.refine.expr.functions.xml.XmlAttr;
import com.google.refine.tests.util.TestUtils;
public class xmlAttrTests {
@Test
public void serializeXmlAttr() {
String json = "{\"description\":\"Selects a value from an attribute on an xml or html Element.\",\"params\":\"Element e, String s\",\"returns\":\"String attribute Value\"}";
TestUtils.isSerializedTo(new XmlAttr(), json);
}
}

View File

@ -1,15 +1,15 @@
package com.google.refine.tests.expr.functions.html;
package com.google.refine.tests.expr.functions.xml;
import org.testng.annotations.Test;
import com.google.refine.expr.functions.html.HtmlText;
import com.google.refine.expr.functions.xml.XmlText;
import com.google.refine.tests.util.TestUtils;
public class HtmlTextTests {
public class xmlTextTests {
@Test
public void serializeHtmlText() {
public void serializeXmlText() {
String json = "{\"description\":\"Selects the text from within an element (including all child elements)\",\"params\":\"Element e\",\"returns\":\"String text\"}";
TestUtils.isSerializedTo(new HtmlText(), json);
TestUtils.isSerializedTo(new XmlText(), json);
}
}