Add XML parsing and update HTML parsing to use same classes
This commit is contained in:
parent
5678c44673
commit
bae3dbb812
@ -40,6 +40,7 @@ import org.json.JSONWriter;
|
|||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
import com.google.refine.expr.EvalError;
|
import com.google.refine.expr.EvalError;
|
||||||
|
import com.google.refine.expr.functions.xml.InnerXml;
|
||||||
import com.google.refine.grel.ControlFunctionRegistry;
|
import com.google.refine.grel.ControlFunctionRegistry;
|
||||||
import com.google.refine.grel.Function;
|
import com.google.refine.grel.Function;
|
||||||
|
|
||||||
@ -50,9 +51,7 @@ public class InnerHtml implements Function {
|
|||||||
if (args.length >= 1) {
|
if (args.length >= 1) {
|
||||||
Object o1 = args[0];
|
Object o1 = args[0];
|
||||||
if (o1 != null && o1 instanceof Element) {
|
if (o1 != null && o1 instanceof Element) {
|
||||||
Element e1 = (Element)o1;
|
return new InnerXml().call(bindings, args, "html");
|
||||||
return e1.html();
|
|
||||||
|
|
||||||
}else{
|
}else{
|
||||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
|
||||||
}
|
}
|
||||||
|
@ -39,19 +39,22 @@ import org.json.JSONException;
|
|||||||
import org.json.JSONWriter;
|
import org.json.JSONWriter;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
|
|
||||||
|
import com.google.refine.expr.EvalError;
|
||||||
|
import com.google.refine.expr.functions.xml.ParseXml;
|
||||||
|
import com.google.refine.grel.ControlFunctionRegistry;
|
||||||
import com.google.refine.grel.Function;
|
import com.google.refine.grel.Function;
|
||||||
|
|
||||||
public class ParseHtml implements Function {
|
public class ParseHtml implements Function {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object call(Properties bindings, Object[] args) {
|
public Object call(Properties bindings, Object[] args) {
|
||||||
if (args.length >= 1) {
|
if (args.length == 1) {
|
||||||
Object o1 = args[0];
|
Object o1 = args[0];
|
||||||
if (o1 != null && o1 instanceof String) {
|
if (o1 != null && o1 instanceof String) {
|
||||||
return Jsoup.parse(o1.toString());
|
return new ParseXml().call(bindings,args,"html");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single String as an argument");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
84
main/src/com/google/refine/expr/functions/xml/InnerXml.java
Normal file
84
main/src/com/google/refine/expr/functions/xml/InnerXml.java
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2010, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.expr.functions.xml;
|
||||||
|
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.google.refine.expr.EvalError;
|
||||||
|
import com.google.refine.grel.ControlFunctionRegistry;
|
||||||
|
import com.google.refine.grel.Function;
|
||||||
|
|
||||||
|
public class InnerXml implements Function {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object call(Properties bindings, Object[] args) {
|
||||||
|
return call(bindings,args,"xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object call(Properties bindings, Object[] args, String mode) {
|
||||||
|
if (args.length == 1) {
|
||||||
|
Object o1 = args[0];
|
||||||
|
if (o1 != null && o1 instanceof Element) {
|
||||||
|
Element e1 = (Element)o1;
|
||||||
|
if(mode == "xml") {
|
||||||
|
return e1.children().toString();
|
||||||
|
} else if (mode == "html") {
|
||||||
|
return e1.html();
|
||||||
|
} else {
|
||||||
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " unable to determine whether XML or HTML is being used.");
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml() and select(query) prior to using this function");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single XML or HTML element as an argument");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
|
||||||
|
writer.object();
|
||||||
|
writer.key("description"); writer.value("The innerXml/innerHtml of an XML/HTML element");
|
||||||
|
writer.key("params"); writer.value("Element e");
|
||||||
|
writer.key("returns"); writer.value("String innerXml/innerHtml");
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
75
main/src/com/google/refine/expr/functions/xml/OwnText.java
Normal file
75
main/src/com/google/refine/expr/functions/xml/OwnText.java
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.expr.functions.xml;
|
||||||
|
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.google.refine.expr.EvalError;
|
||||||
|
import com.google.refine.grel.ControlFunctionRegistry;
|
||||||
|
import com.google.refine.grel.Function;
|
||||||
|
|
||||||
|
public class OwnText implements Function {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object call(Properties bindings, Object[] args) {
|
||||||
|
if (args.length >= 1) {
|
||||||
|
Object o1 = args[0];
|
||||||
|
if (o1 != null && o1 instanceof Element) {
|
||||||
|
Element e1 = (Element)o1;
|
||||||
|
return e1.ownText();
|
||||||
|
|
||||||
|
}else{
|
||||||
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an HTML Element. Please first use parseHtml(string) and select(query) prior to using this function");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
|
||||||
|
writer.object();
|
||||||
|
writer.key("description"); writer.value("Gets the text owned by this XML/HTML element only; does not get the combined text of all children.");
|
||||||
|
writer.key("params"); writer.value("Element e");
|
||||||
|
writer.key("returns"); writer.value("String ownText");
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
82
main/src/com/google/refine/expr/functions/xml/ParseXml.java
Normal file
82
main/src/com/google/refine/expr/functions/xml/ParseXml.java
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2010, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.expr.functions.xml;
|
||||||
|
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.parser.Parser;
|
||||||
|
|
||||||
|
import com.google.refine.expr.EvalError;
|
||||||
|
import com.google.refine.grel.ControlFunctionRegistry;
|
||||||
|
import com.google.refine.grel.Function;
|
||||||
|
|
||||||
|
public class ParseXml implements Function {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object call(Properties bindings, Object[] args) {
|
||||||
|
return call(bindings,args,"xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object call(Properties bindings, Object[] args, String mode) {
|
||||||
|
if (args.length == 1) {
|
||||||
|
Object o1 = args[0];
|
||||||
|
if (o1 != null && o1 instanceof String) {
|
||||||
|
if (mode == "html") {
|
||||||
|
return Jsoup.parse(o1.toString());
|
||||||
|
} else if (mode == "xml") {
|
||||||
|
return Jsoup.parse(o1.toString(), "",Parser.xmlParser());
|
||||||
|
} else {
|
||||||
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " unable to identify which parser to use");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a single String as an argument");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
|
||||||
|
writer.object();
|
||||||
|
writer.key("description"); writer.value("Parses a string as XML");
|
||||||
|
writer.key("params"); writer.value("string s");
|
||||||
|
writer.key("returns"); writer.value("XML object");
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
77
main/src/com/google/refine/expr/functions/xml/SelectXml.java
Normal file
77
main/src/com/google/refine/expr/functions/xml/SelectXml.java
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2010, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.expr.functions.xml;
|
||||||
|
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.google.refine.expr.EvalError;
|
||||||
|
import com.google.refine.grel.ControlFunctionRegistry;
|
||||||
|
import com.google.refine.grel.Function;
|
||||||
|
|
||||||
|
public class SelectXml implements Function {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object call(Properties bindings, Object[] args) {
|
||||||
|
if (args.length == 2) {
|
||||||
|
Object o1 = args[0];
|
||||||
|
Object o2 = args[1];
|
||||||
|
if (o1 != null && o1 instanceof Element) {
|
||||||
|
Element e1 = (Element)o1;
|
||||||
|
if(o2 != null && o2 instanceof String){
|
||||||
|
return e1.select(o2.toString());
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml()");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects two arguments");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
|
||||||
|
writer.object();
|
||||||
|
writer.key("description"); writer.value("Selects an element from an XML or HTML element using selector syntax.");
|
||||||
|
writer.key("params"); writer.value("Element e, String s");
|
||||||
|
writer.key("returns"); writer.value("XML/HTML Elements");
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
77
main/src/com/google/refine/expr/functions/xml/XmlAttr.java
Normal file
77
main/src/com/google/refine/expr/functions/xml/XmlAttr.java
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2010, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.expr.functions.xml;
|
||||||
|
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.google.refine.expr.EvalError;
|
||||||
|
import com.google.refine.grel.ControlFunctionRegistry;
|
||||||
|
import com.google.refine.grel.Function;
|
||||||
|
|
||||||
|
public class XmlAttr implements Function {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object call(Properties bindings, Object[] args) {
|
||||||
|
if (args.length >= 2) {
|
||||||
|
Object o1 = args[0];
|
||||||
|
Object o2 = args[1];
|
||||||
|
if (o1 != null && o1 instanceof Element) {
|
||||||
|
Element e1 = (Element)o1;
|
||||||
|
if(o2 != null && o2 instanceof String){
|
||||||
|
return e1.attr(o2.toString());
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml() and select() prior to using this function");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
|
||||||
|
writer.object();
|
||||||
|
writer.key("description"); writer.value("Selects a value from an attribute on an xml or html Element.");
|
||||||
|
writer.key("params"); writer.value("Element e, String s");
|
||||||
|
writer.key("returns"); writer.value("String attribute Value");
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
75
main/src/com/google/refine/expr/functions/xml/XmlText.java
Normal file
75
main/src/com/google/refine/expr/functions/xml/XmlText.java
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2010,2011 Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.expr.functions.xml;
|
||||||
|
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.google.refine.expr.EvalError;
|
||||||
|
import com.google.refine.grel.ControlFunctionRegistry;
|
||||||
|
import com.google.refine.grel.Function;
|
||||||
|
|
||||||
|
public class XmlText implements Function {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object call(Properties bindings, Object[] args) {
|
||||||
|
if (args.length >= 1) {
|
||||||
|
Object o1 = args[0];
|
||||||
|
if (o1 != null && o1 instanceof Element) {
|
||||||
|
Element e1 = (Element)o1;
|
||||||
|
return e1.text();
|
||||||
|
|
||||||
|
}else{
|
||||||
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed as the first parameter is not an XML or HTML Element. Please first use parseXml() or parseHtml() and select(query) prior to using this function");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
|
||||||
|
writer.object();
|
||||||
|
writer.key("description"); writer.value("Selects the text from within an element (including all child elements)");
|
||||||
|
writer.key("params"); writer.value("Element e");
|
||||||
|
writer.key("returns"); writer.value("String text");
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -61,12 +61,14 @@ import com.google.refine.expr.functions.booleans.Xor;
|
|||||||
import com.google.refine.expr.functions.date.DatePart;
|
import com.google.refine.expr.functions.date.DatePart;
|
||||||
import com.google.refine.expr.functions.date.Inc;
|
import com.google.refine.expr.functions.date.Inc;
|
||||||
import com.google.refine.expr.functions.date.Now;
|
import com.google.refine.expr.functions.date.Now;
|
||||||
import com.google.refine.expr.functions.html.HtmlAttr;
|
|
||||||
import com.google.refine.expr.functions.html.HtmlText;
|
|
||||||
import com.google.refine.expr.functions.html.InnerHtml;
|
import com.google.refine.expr.functions.html.InnerHtml;
|
||||||
import com.google.refine.expr.functions.html.OwnText;
|
|
||||||
import com.google.refine.expr.functions.html.ParseHtml;
|
import com.google.refine.expr.functions.html.ParseHtml;
|
||||||
import com.google.refine.expr.functions.html.SelectHtml;
|
import com.google.refine.expr.functions.xml.XmlAttr;
|
||||||
|
import com.google.refine.expr.functions.xml.XmlText;
|
||||||
|
import com.google.refine.expr.functions.xml.InnerXml;
|
||||||
|
import com.google.refine.expr.functions.xml.OwnText;
|
||||||
|
import com.google.refine.expr.functions.xml.ParseXml;
|
||||||
|
import com.google.refine.expr.functions.xml.SelectXml;
|
||||||
import com.google.refine.expr.functions.math.ACos;
|
import com.google.refine.expr.functions.math.ACos;
|
||||||
import com.google.refine.expr.functions.math.ASin;
|
import com.google.refine.expr.functions.math.ASin;
|
||||||
import com.google.refine.expr.functions.math.ATan;
|
import com.google.refine.expr.functions.math.ATan;
|
||||||
@ -236,12 +238,19 @@ public class ControlFunctionRegistry {
|
|||||||
|
|
||||||
// HTML functions from JSoup
|
// HTML functions from JSoup
|
||||||
registerFunction("parseHtml", new ParseHtml());
|
registerFunction("parseHtml", new ParseHtml());
|
||||||
registerFunction("select", new SelectHtml());
|
registerFunction("select", new SelectXml());
|
||||||
registerFunction("htmlAttr", new HtmlAttr());
|
registerFunction("htmlAttr", new XmlAttr());
|
||||||
registerFunction("htmlText", new HtmlText());
|
registerFunction("htmlText", new XmlText());
|
||||||
registerFunction("innerHtml", new InnerHtml());
|
registerFunction("innerHtml", new InnerHtml());
|
||||||
registerFunction("ownText", new OwnText());
|
registerFunction("ownText", new OwnText());
|
||||||
|
|
||||||
|
// XML functions from JSoup
|
||||||
|
registerFunction("parseXml", new ParseXml());
|
||||||
|
registerFunction("selectx", new SelectXml());
|
||||||
|
registerFunction("xmlAttr", new XmlAttr());
|
||||||
|
registerFunction("xmlText", new XmlText());
|
||||||
|
registerFunction("innerXml", new InnerXml());
|
||||||
|
|
||||||
registerFunction("indexOf", new IndexOf());
|
registerFunction("indexOf", new IndexOf());
|
||||||
registerFunction("lastIndexOf", new LastIndexOf());
|
registerFunction("lastIndexOf", new LastIndexOf());
|
||||||
registerFunction("startsWith", new StartsWith());
|
registerFunction("startsWith", new StartsWith());
|
||||||
|
Loading…
Reference in New Issue
Block a user