From bae3dbb81259b5c027876190c5627f1705aa092f Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Tue, 20 Nov 2018 20:20:51 +0000 Subject: [PATCH] Add XML parsing and update HTML parsing to use same classes --- .../refine/expr/functions/html/InnerHtml.java | 5 +- .../refine/expr/functions/html/ParseHtml.java | 9 +- .../refine/expr/functions/xml/InnerXml.java | 84 +++++++++++++++++++ .../refine/expr/functions/xml/OwnText.java | 75 +++++++++++++++++ .../refine/expr/functions/xml/ParseXml.java | 82 ++++++++++++++++++ .../refine/expr/functions/xml/SelectXml.java | 77 +++++++++++++++++ .../refine/expr/functions/xml/XmlAttr.java | 77 +++++++++++++++++ .../refine/expr/functions/xml/XmlText.java | 75 +++++++++++++++++ .../refine/grel/ControlFunctionRegistry.java | 23 +++-- 9 files changed, 494 insertions(+), 13 deletions(-) create mode 100644 main/src/com/google/refine/expr/functions/xml/InnerXml.java create mode 100644 main/src/com/google/refine/expr/functions/xml/OwnText.java create mode 100644 main/src/com/google/refine/expr/functions/xml/ParseXml.java create mode 100644 main/src/com/google/refine/expr/functions/xml/SelectXml.java create mode 100644 main/src/com/google/refine/expr/functions/xml/XmlAttr.java create mode 100644 main/src/com/google/refine/expr/functions/xml/XmlText.java diff --git a/main/src/com/google/refine/expr/functions/html/InnerHtml.java b/main/src/com/google/refine/expr/functions/html/InnerHtml.java index 2ac98e37d..5ccafdc6b 100644 --- a/main/src/com/google/refine/expr/functions/html/InnerHtml.java +++ b/main/src/com/google/refine/expr/functions/html/InnerHtml.java @@ -40,6 +40,7 @@ import org.json.JSONWriter; import org.jsoup.nodes.Element; import com.google.refine.expr.EvalError; +import com.google.refine.expr.functions.xml.InnerXml; import com.google.refine.grel.ControlFunctionRegistry; import com.google.refine.grel.Function; @@ -50,9 +51,7 @@ public class InnerHtml implements Function { if (args.length >= 1) { Object o1 = args[0]; if (o1 != null && o1 instanceof Element) { - Element e1 = (Element)o1; - return e1.html(); - + return new InnerXml().call(bindings, args, "html"); }else{ return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function"); } diff --git a/main/src/com/google/refine/expr/functions/html/ParseHtml.java b/main/src/com/google/refine/expr/functions/html/ParseHtml.java index 503171bab..28bf387eb 100644 --- a/main/src/com/google/refine/expr/functions/html/ParseHtml.java +++ b/main/src/com/google/refine/expr/functions/html/ParseHtml.java @@ -39,19 +39,22 @@ import org.json.JSONException; import org.json.JSONWriter; import org.jsoup.Jsoup; +import com.google.refine.expr.EvalError; +import com.google.refine.expr.functions.xml.ParseXml; +import com.google.refine.grel.ControlFunctionRegistry; import com.google.refine.grel.Function; public class ParseHtml implements Function { @Override public Object call(Properties bindings, Object[] args) { - if (args.length >= 1) { + if (args.length == 1) { Object o1 = args[0]; if (o1 != null && o1 instanceof String) { - return Jsoup.parse(o1.toString()); + return new ParseXml().call(bindings,args,"html"); } } - return null; + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single String as an argument"); } diff --git a/main/src/com/google/refine/expr/functions/xml/InnerXml.java b/main/src/com/google/refine/expr/functions/xml/InnerXml.java new file mode 100644 index 000000000..a37323c62 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/xml/InnerXml.java @@ -0,0 +1,84 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.expr.functions.xml; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; +import org.jsoup.nodes.Element; + +import com.google.refine.expr.EvalError; +import com.google.refine.grel.ControlFunctionRegistry; +import com.google.refine.grel.Function; + +public class InnerXml implements Function { + + @Override + public Object call(Properties bindings, Object[] args) { + return call(bindings,args,"xml"); + } + + public Object call(Properties bindings, Object[] args, String mode) { + if (args.length == 1) { + Object o1 = args[0]; + if (o1 != null && o1 instanceof Element) { + Element e1 = (Element)o1; + if(mode == "xml") { + return e1.children().toString(); + } else if (mode == "html") { + return e1.html(); + } else { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " unable to determine whether XML or HTML is being used."); + } + }else{ + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml() and select(query) prior to using this function"); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single XML or HTML element as an argument"); + } + + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("The innerXml/innerHtml of an XML/HTML element"); + writer.key("params"); writer.value("Element e"); + writer.key("returns"); writer.value("String innerXml/innerHtml"); + writer.endObject(); + } +} + diff --git a/main/src/com/google/refine/expr/functions/xml/OwnText.java b/main/src/com/google/refine/expr/functions/xml/OwnText.java new file mode 100644 index 000000000..b4df9c6d1 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/xml/OwnText.java @@ -0,0 +1,75 @@ +/* + +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.expr.functions.xml; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; +import org.jsoup.nodes.Element; + +import com.google.refine.expr.EvalError; +import com.google.refine.grel.ControlFunctionRegistry; +import com.google.refine.grel.Function; + +public class OwnText implements Function { + + @Override + public Object call(Properties bindings, Object[] args) { + if (args.length >= 1) { + Object o1 = args[0]; + if (o1 != null && o1 instanceof Element) { + Element e1 = (Element)o1; + return e1.ownText(); + + }else{ + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function"); + } + } + return null; + } + + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Gets the text owned by this XML/HTML element only; does not get the combined text of all children."); + writer.key("params"); writer.value("Element e"); + writer.key("returns"); writer.value("String ownText"); + writer.endObject(); + } +} + diff --git a/main/src/com/google/refine/expr/functions/xml/ParseXml.java b/main/src/com/google/refine/expr/functions/xml/ParseXml.java new file mode 100644 index 000000000..30f47aed8 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/xml/ParseXml.java @@ -0,0 +1,82 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.expr.functions.xml; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; +import org.jsoup.Jsoup; +import org.jsoup.parser.Parser; + +import com.google.refine.expr.EvalError; +import com.google.refine.grel.ControlFunctionRegistry; +import com.google.refine.grel.Function; + +public class ParseXml implements Function { + + @Override + public Object call(Properties bindings, Object[] args) { + return call(bindings,args,"xml"); + } + + public Object call(Properties bindings, Object[] args, String mode) { + if (args.length == 1) { + Object o1 = args[0]; + if (o1 != null && o1 instanceof String) { + if (mode == "html") { + return Jsoup.parse(o1.toString()); + } else if (mode == "xml") { + return Jsoup.parse(o1.toString(), "",Parser.xmlParser()); + } else { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " unable to identify which parser to use"); + } + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single String as an argument"); + } + + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Parses a string as XML"); + writer.key("params"); writer.value("string s"); + writer.key("returns"); writer.value("XML object"); + writer.endObject(); + } +} + diff --git a/main/src/com/google/refine/expr/functions/xml/SelectXml.java b/main/src/com/google/refine/expr/functions/xml/SelectXml.java new file mode 100644 index 000000000..8bb8d3a84 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/xml/SelectXml.java @@ -0,0 +1,77 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.expr.functions.xml; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; +import org.jsoup.nodes.Element; + +import com.google.refine.expr.EvalError; +import com.google.refine.grel.ControlFunctionRegistry; +import com.google.refine.grel.Function; + +public class SelectXml implements Function { + + @Override + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object o1 = args[0]; + Object o2 = args[1]; + if (o1 != null && o1 instanceof Element) { + Element e1 = (Element)o1; + if(o2 != null && o2 instanceof String){ + return e1.select(o2.toString()); + } + }else{ + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml()"); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects two arguments"); + } + + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Selects an element from an XML or HTML element using selector syntax."); + writer.key("params"); writer.value("Element e, String s"); + writer.key("returns"); writer.value("XML/HTML Elements"); + writer.endObject(); + } +} + diff --git a/main/src/com/google/refine/expr/functions/xml/XmlAttr.java b/main/src/com/google/refine/expr/functions/xml/XmlAttr.java new file mode 100644 index 000000000..269cb6796 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/xml/XmlAttr.java @@ -0,0 +1,77 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.expr.functions.xml; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; +import org.jsoup.nodes.Element; + +import com.google.refine.expr.EvalError; +import com.google.refine.grel.ControlFunctionRegistry; +import com.google.refine.grel.Function; + +public class XmlAttr implements Function { + + @Override + public Object call(Properties bindings, Object[] args) { + if (args.length >= 2) { + Object o1 = args[0]; + Object o2 = args[1]; + if (o1 != null && o1 instanceof Element) { + Element e1 = (Element)o1; + if(o2 != null && o2 instanceof String){ + return e1.attr(o2.toString()); + } + }else{ + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml() and select() prior to using this function"); + } + } + return null; + } + + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Selects a value from an attribute on an xml or html Element."); + writer.key("params"); writer.value("Element e, String s"); + writer.key("returns"); writer.value("String attribute Value"); + writer.endObject(); + } +} + diff --git a/main/src/com/google/refine/expr/functions/xml/XmlText.java b/main/src/com/google/refine/expr/functions/xml/XmlText.java new file mode 100644 index 000000000..90dd0fa41 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/xml/XmlText.java @@ -0,0 +1,75 @@ +/* + +Copyright 2010,2011 Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.expr.functions.xml; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; +import org.jsoup.nodes.Element; + +import com.google.refine.expr.EvalError; +import com.google.refine.grel.ControlFunctionRegistry; +import com.google.refine.grel.Function; + +public class XmlText implements Function { + + @Override + public Object call(Properties bindings, Object[] args) { + if (args.length >= 1) { + Object o1 = args[0]; + if (o1 != null && o1 instanceof Element) { + Element e1 = (Element)o1; + return e1.text(); + + }else{ + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml() and select(query) prior to using this function"); + } + } + return null; + } + + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Selects the text from within an element (including all child elements)"); + writer.key("params"); writer.value("Element e"); + writer.key("returns"); writer.value("String text"); + writer.endObject(); + } +} + diff --git a/main/src/com/google/refine/grel/ControlFunctionRegistry.java b/main/src/com/google/refine/grel/ControlFunctionRegistry.java index edb57e2f6..e52247024 100644 --- a/main/src/com/google/refine/grel/ControlFunctionRegistry.java +++ b/main/src/com/google/refine/grel/ControlFunctionRegistry.java @@ -61,12 +61,14 @@ import com.google.refine.expr.functions.booleans.Xor; import com.google.refine.expr.functions.date.DatePart; import com.google.refine.expr.functions.date.Inc; import com.google.refine.expr.functions.date.Now; -import com.google.refine.expr.functions.html.HtmlAttr; -import com.google.refine.expr.functions.html.HtmlText; import com.google.refine.expr.functions.html.InnerHtml; -import com.google.refine.expr.functions.html.OwnText; import com.google.refine.expr.functions.html.ParseHtml; -import com.google.refine.expr.functions.html.SelectHtml; +import com.google.refine.expr.functions.xml.XmlAttr; +import com.google.refine.expr.functions.xml.XmlText; +import com.google.refine.expr.functions.xml.InnerXml; +import com.google.refine.expr.functions.xml.OwnText; +import com.google.refine.expr.functions.xml.ParseXml; +import com.google.refine.expr.functions.xml.SelectXml; import com.google.refine.expr.functions.math.ACos; import com.google.refine.expr.functions.math.ASin; import com.google.refine.expr.functions.math.ATan; @@ -236,11 +238,18 @@ public class ControlFunctionRegistry { // HTML functions from JSoup registerFunction("parseHtml", new ParseHtml()); - registerFunction("select", new SelectHtml()); - registerFunction("htmlAttr", new HtmlAttr()); - registerFunction("htmlText", new HtmlText()); + registerFunction("select", new SelectXml()); + registerFunction("htmlAttr", new XmlAttr()); + registerFunction("htmlText", new XmlText()); registerFunction("innerHtml", new InnerHtml()); registerFunction("ownText", new OwnText()); + + // XML functions from JSoup + registerFunction("parseXml", new ParseXml()); + registerFunction("selectx", new SelectXml()); + registerFunction("xmlAttr", new XmlAttr()); + registerFunction("xmlText", new XmlText()); + registerFunction("innerXml", new InnerXml()); registerFunction("indexOf", new IndexOf()); registerFunction("lastIndexOf", new LastIndexOf());