From f6e4a1f7e49c9068a612d739292d01ffb91d2245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Najborowski?= Date: Sun, 30 Jan 2022 21:12:55 +0100 Subject: [PATCH] add unversioned files from new version of OpenRefine --- .../wikidata/manifests/ManifestV2.java | 162 ++++++++++++ .../schema/WbEntityIdValueConstant.java | 59 +++++ .../entityvalues/SuggestedFormIdValue.java | 32 +++ .../entityvalues/SuggestedLexemeIdValue.java | 15 ++ .../SuggestedMediaInfoIdValue.java | 45 ++++ .../entityvalues/SuggestedSenseIdValue.java | 35 +++ .../TermedStatementEntityUpdateBuilder.java | 233 ++++++++++++++++++ .../refine/expr/functions/xml/Parent.java | 83 +++++++ .../refine/expr/functions/xml/ScriptText.java | 87 +++++++ 9 files changed, 751 insertions(+) create mode 100644 OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/manifests/ManifestV2.java create mode 100644 OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/WbEntityIdValueConstant.java create mode 100644 OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedFormIdValue.java create mode 100644 OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedLexemeIdValue.java create mode 100644 OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedMediaInfoIdValue.java create mode 100644 OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedSenseIdValue.java create mode 100644 OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/updates/TermedStatementEntityUpdateBuilder.java create mode 100644 OpenRefine/main/src/com/google/refine/expr/functions/xml/Parent.java create mode 100644 OpenRefine/main/src/com/google/refine/expr/functions/xml/ScriptText.java diff --git a/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/manifests/ManifestV2.java b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/manifests/ManifestV2.java new file mode 100644 index 000000000..3dadc1cfa --- /dev/null +++ b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/manifests/ManifestV2.java @@ -0,0 +1,162 @@ +package org.openrefine.wikidata.manifests; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.google.refine.util.ParsingUtilities; + +public class ManifestV2 implements Manifest { + + private String version; + private String name; + private String siteIri; + private int maxlag; + private String instanceOfPid; + private String subclassOfPid; + private String mediaWikiApiEndpoint; + private String editGroupsUrlSchema; + + private Map entityTypeSettings; + + private Map constraintsRelatedIdMap = new HashMap<>(); + + public ManifestV2(JsonNode manifest) throws JsonParseException, JsonMappingException, IOException { + version = manifest.path("version").textValue(); + + JsonNode mediawiki = manifest.path("mediawiki"); + name = mediawiki.path("name").textValue(); + mediaWikiApiEndpoint = mediawiki.path("api").textValue(); + + JsonNode wikibase = manifest.path("wikibase"); + siteIri = wikibase.path("site_iri").textValue(); + maxlag = wikibase.path("maxlag").intValue(); + JsonNode properties = wikibase.path("properties"); + instanceOfPid = properties.path("instance_of").textValue(); + subclassOfPid = properties.path("subclass_of").textValue(); + + JsonNode constraints = wikibase.path("constraints"); + Iterator> fields = constraints.fields(); + while (fields.hasNext()) { + Map.Entry entry = fields.next(); + String name = entry.getKey(); + String value = entry.getValue().textValue(); + constraintsRelatedIdMap.put(name, value); + } + + JsonNode entityTypesJson = manifest.path("entity_types"); + entityTypeSettings = com.google.refine.util.ParsingUtilities.mapper.readValue( + ParsingUtilities.mapper.treeAsTokens(entityTypesJson), + new TypeReference>() {}); + JsonNode editGroups = manifest.path("editgroups"); + editGroupsUrlSchema = editGroups.path("url_schema").textValue(); + } + + private static class EntityTypeSettings { + + protected String siteIri; + protected String reconEndpoint; + protected String mediaWikiApi; + + @JsonCreator + protected EntityTypeSettings( + @JsonProperty("site_iri") String siteIri, + @JsonProperty("reconciliation_endpoint") String reconEndpoint, + @JsonProperty("mediawiki_api") String mediawikiEndpoint) { + this.siteIri = siteIri; + this.reconEndpoint = reconEndpoint; + this.mediaWikiApi = mediawikiEndpoint; + } + } + + @Override + public String getVersion() { + return version; + } + + @Override + public String getName() { + return name; + } + + @Override + public String getSiteIri() { + return siteIri; + } + + @Override + public int getMaxlag() { + return maxlag; + } + + @Override + public String getInstanceOfPid() { + return instanceOfPid; + } + + @Override + public String getSubclassOfPid() { + return subclassOfPid; + } + + @Override + public String getMediaWikiApiEndpoint() { + return mediaWikiApiEndpoint; + } + + @Override + public String getReconServiceEndpoint() { + return getReconServiceEndpoint(ITEM_TYPE); + } + + @Override + public String getConstraintsRelatedId(String name) { + return constraintsRelatedIdMap.get(name); + } + + @Override + public String getEditGroupsUrlSchema() { + return editGroupsUrlSchema; + } + + @Override + public String getReconServiceEndpoint(String entityType) { + EntityTypeSettings setting = entityTypeSettings.get(entityType); + if (setting == null) { + return null; + } + return setting.reconEndpoint; + } + + @Override + public String getEntityTypeSiteIri(String entityType) { + EntityTypeSettings setting = entityTypeSettings.get(entityType); + if (setting == null) { + return null; + } + return setting.siteIri; + } + + @Override + public String getMediaWikiApiEndpoint(String entityType) { + EntityTypeSettings setting = entityTypeSettings.get(entityType); + if (setting == null) { + return null; + } + return setting.mediaWikiApi != null ? setting.mediaWikiApi : getMediaWikiApiEndpoint(); + } + + @Override + public List getAvailableEntityTypes() { + return entityTypeSettings.keySet().stream().collect(Collectors.toList()); + } +} diff --git a/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/WbEntityIdValueConstant.java b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/WbEntityIdValueConstant.java new file mode 100644 index 000000000..1c02f0b70 --- /dev/null +++ b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/WbEntityIdValueConstant.java @@ -0,0 +1,59 @@ +package org.openrefine.wikidata.schema; + +import org.jsoup.helper.Validate; +import org.openrefine.wikidata.schema.entityvalues.SuggestedEntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +/** + * A constant entity id value, that does not change depending on the row + * + * @author Antonin Delpeuch + * + */ +public class WbEntityIdValueConstant implements WbExpression { + + private String id; + private String label; + + @JsonCreator + public WbEntityIdValueConstant( + @JsonProperty("id") String id, + @JsonProperty("label") String label) { + Validate.notNull(id, "id cannot be null"); + this.id = id; + Validate.notNull(label, "label cannot be null"); + this.label = label; + } + + @Override + public EntityIdValue evaluate(ExpressionContext ctxt) { + return SuggestedEntityIdValue.build(id, ctxt.getBaseIRI(), label); + } + + @JsonProperty("id") + public String getId() { + return id; + } + + @JsonProperty("label") + public String getLabel() { + return label; + } + + @Override + public boolean equals(Object other) { + if (other == null || !WbEntityIdValueConstant.class.isInstance(other)) { + return false; + } + WbEntityIdValueConstant otherConstant = (WbEntityIdValueConstant) other; + return id.equals(otherConstant.getId()) && label.equals(otherConstant.getLabel()); + } + + @Override + public int hashCode() { + return id.hashCode() + label.hashCode(); + } +} diff --git a/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedFormIdValue.java b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedFormIdValue.java new file mode 100644 index 000000000..5a798cb94 --- /dev/null +++ b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedFormIdValue.java @@ -0,0 +1,32 @@ +package org.openrefine.wikidata.schema.entityvalues; + +import org.wikidata.wdtk.datamodel.implementation.EntityIdValueImpl; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.FormIdValue; +import org.wikidata.wdtk.datamodel.interfaces.LexemeIdValue; + +public class SuggestedFormIdValue extends SuggestedEntityIdValue implements FormIdValue { + + private FormIdValue parsedId; + + public SuggestedFormIdValue(String id, String siteIRI, String label) { + super(id, siteIRI, label); + EntityIdValue parsed = EntityIdValueImpl.fromId(id, siteIRI); + if (parsed instanceof FormIdValue) { + parsedId = (FormIdValue)parsed; + } else { + throw new IllegalArgumentException(String.format("Invalid id for a form: %s", id)); + } + } + + @Override + public String getEntityType() { + return ET_FORM; + } + + @Override + public LexemeIdValue getLexemeId() { + return parsedId.getLexemeId(); + } + +} diff --git a/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedLexemeIdValue.java b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedLexemeIdValue.java new file mode 100644 index 000000000..fa7a34241 --- /dev/null +++ b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedLexemeIdValue.java @@ -0,0 +1,15 @@ +package org.openrefine.wikidata.schema.entityvalues; + +import org.wikidata.wdtk.datamodel.interfaces.LexemeIdValue; + +public class SuggestedLexemeIdValue extends SuggestedEntityIdValue implements LexemeIdValue { + + public SuggestedLexemeIdValue(String id, String siteIRI, String label) { + super(id, siteIRI, label); + } + + @Override + public String getEntityType() { + return ET_LEXEME; + } +} diff --git a/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedMediaInfoIdValue.java b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedMediaInfoIdValue.java new file mode 100644 index 000000000..0da792ade --- /dev/null +++ b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedMediaInfoIdValue.java @@ -0,0 +1,45 @@ +/******************************************************************************* + * MIT License + * + * Copyright (c) 2018 Antonin Delpeuch + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + ******************************************************************************/ +package org.openrefine.wikidata.schema.entityvalues; + +import org.wikidata.wdtk.datamodel.helpers.ToString; +import org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue; + +public class SuggestedMediaInfoIdValue extends SuggestedEntityIdValue implements MediaInfoIdValue { + + public SuggestedMediaInfoIdValue(String id, String siteIRI, String label) { + super(id, siteIRI, label); + } + + @Override + public String getEntityType() { + return ET_MEDIA_INFO; + } + + @Override + public String toString() { + return "suggested " + ToString.toString(this) + " (\"" + getLabel() + "\")"; + } + +} diff --git a/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedSenseIdValue.java b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedSenseIdValue.java new file mode 100644 index 000000000..437556bc2 --- /dev/null +++ b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedSenseIdValue.java @@ -0,0 +1,35 @@ +package org.openrefine.wikidata.schema.entityvalues; + +import org.wikidata.wdtk.datamodel.implementation.EntityIdValueImpl; +import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.LexemeIdValue; +import org.wikidata.wdtk.datamodel.interfaces.SenseIdValue; + +public class SuggestedSenseIdValue extends SuggestedEntityIdValue implements SenseIdValue { + + private SenseIdValue parsedId; + + public SuggestedSenseIdValue(String id, String siteIRI, String label) { + super(id, siteIRI, label); + EntityIdValue parsed = EntityIdValueImpl.fromId(id, siteIRI); + if (parsed instanceof SenseIdValue) { + parsedId = (SenseIdValue)parsed; + } else { + throw new IllegalArgumentException(String.format("Invalid id for a form: %s", id)); + } + } + + @Override + public LexemeIdValue getLexemeId() { + return parsedId.getLexemeId(); + } + + @Override + public String getEntityType() { + return DatatypeIdValue.DT_SENSE; + } + + + +} diff --git a/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/updates/TermedStatementEntityUpdateBuilder.java b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/updates/TermedStatementEntityUpdateBuilder.java new file mode 100644 index 000000000..10873f4e9 --- /dev/null +++ b/OpenRefine/extensions/wikidata/src/org/openrefine/wikidata/updates/TermedStatementEntityUpdateBuilder.java @@ -0,0 +1,233 @@ +/******************************************************************************* + * MIT License + * + * Copyright (c) 2018 Antonin Delpeuch + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + ******************************************************************************/ +package org.openrefine.wikidata.updates; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.jsoup.helper.Validate; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; +import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; +import org.wikidata.wdtk.datamodel.interfaces.Statement; + +/** + * Constructs a {@link ItemUpdate} incrementally. + * + * @author Antonin Delpeuch + * + */ +public class TermedStatementEntityUpdateBuilder { + + private EntityIdValue id; + private List addedStatements; + private Set deletedStatements; + private Set labels; + private Set labelsIfNew; + private Set descriptions; + private Set descriptionsIfNew; + private Set aliases; + private boolean built; + + /** + * Constructor. + * + * @param qid + * the subject of the document. It can be a reconciled item value for + * new items. + */ + public TermedStatementEntityUpdateBuilder(EntityIdValue id) { + Validate.notNull(id); + this.id = id; + this.addedStatements = new ArrayList<>(); + this.deletedStatements = new HashSet(); + this.labels = new HashSet(); + this.labelsIfNew = new HashSet(); + this.descriptions = new HashSet(); + this.descriptionsIfNew = new HashSet(); + this.aliases = new HashSet(); + this.built = false; + } + + /** + * Mark a statement for insertion. If it matches an existing statement, it will + * update the statement instead. + * + * @param statement + * the statement to add or update + */ + public TermedStatementEntityUpdateBuilder addStatement(Statement statement) { + Validate.isTrue(!built, "ItemUpdate has already been built"); + addedStatements.add(statement); + return this; + } + + /** + * Mark a statement for deletion. If no such statement exists, nothing will be + * deleted. + * + * @param statement + * the statement to delete + */ + public TermedStatementEntityUpdateBuilder deleteStatement(Statement statement) { + Validate.isTrue(!built, "ItemUpdate has already been built"); + deletedStatements.add(statement); + return this; + } + + /** + * Add a list of statement, as in {@link addStatement}. + * + * @param statements + * the statements to add + */ + public TermedStatementEntityUpdateBuilder addStatements(Set statements) { + Validate.isTrue(!built, "ItemUpdate has already been built"); + addedStatements.addAll(statements); + return this; + } + + /** + * Delete a list of statements, as in {@link deleteStatement}. + * + * @param statements + * the statements to delete + */ + public TermedStatementEntityUpdateBuilder deleteStatements(Set statements) { + Validate.isTrue(!built, "ItemUpdate has already been built"); + deletedStatements.addAll(statements); + return this; + } + + /** + * Adds a label to the item. + * + * @param label + * the label to add + * @param override + * whether the label should be added even if there is already a label in that language + */ + public TermedStatementEntityUpdateBuilder addLabel(MonolingualTextValue label, boolean override) { + Validate.isTrue(!built, "ItemUpdate has already been built"); + if (override) { + labels.add(label); + } else { + labelsIfNew.add(label); + } + return this; + } + + /** + * Adds a list of labels to the item. + * + * @param labels + * the labels to add + * @param override + * whether the label should be added even if there is already a label in that language + */ + public TermedStatementEntityUpdateBuilder addLabels(Set labels, boolean override) { + Validate.isTrue(!built, "ItemUpdate has already been built"); + if (override) { + this.labels.addAll(labels); + } else { + labelsIfNew.addAll(labels); + } + return this; + } + + /** + * Adds a description to the item. + * + * @param description + * the description to add + * @param override + * whether the description should be added even if there is already a description in that language + */ + public TermedStatementEntityUpdateBuilder addDescription(MonolingualTextValue description, boolean override) { + Validate.isTrue(!built, "ItemUpdate has already been built"); + if (override) { + descriptions.add(description); + } else { + descriptionsIfNew.add(description); + } + return this; + } + + /** + * Adds a list of descriptions to the item. + * + * @param descriptions + * the descriptions to add + * @param override + * whether the description should be added even if there is already a description in that language + */ + public TermedStatementEntityUpdateBuilder addDescriptions(Set descriptions, boolean override) { + Validate.isTrue(!built, "ItemUpdate has already been built"); + if (override) { + this.descriptions.addAll(descriptions); + } else { + descriptionsIfNew.addAll(descriptions); + } + return this; + } + + /** + * Adds an alias to the item. It will be added to any existing aliases in that + * language. + * + * @param alias + * the alias to add + */ + public TermedStatementEntityUpdateBuilder addAlias(MonolingualTextValue alias) { + Validate.isTrue(!built, "ItemUpdate has already been built"); + aliases.add(alias); + return this; + } + + /** + * Adds a list of aliases to the item. They will be added to any existing + * aliases in each language. + * + * @param aliases + * the aliases to add + */ + public TermedStatementEntityUpdateBuilder addAliases(Set aliases) { + Validate.isTrue(!built, "ItemUpdate has already been built"); + this.aliases.addAll(aliases); + return this; + } + + /** + * Constructs the {@link ItemUpdate}. + * + * @return + */ + public TermedStatementEntityUpdate build() { + built = true; + return new TermedStatementEntityUpdate(id, addedStatements, deletedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases); + } + +} diff --git a/OpenRefine/main/src/com/google/refine/expr/functions/xml/Parent.java b/OpenRefine/main/src/com/google/refine/expr/functions/xml/Parent.java new file mode 100644 index 000000000..fe0258c8b --- /dev/null +++ b/OpenRefine/main/src/com/google/refine/expr/functions/xml/Parent.java @@ -0,0 +1,83 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.expr.functions.xml; + +import java.util.Properties; + +import org.jsoup.nodes.Element; + +import com.google.refine.expr.EvalError; +import com.google.refine.grel.ControlFunctionRegistry; +import com.google.refine.grel.Function; +import com.google.refine.expr.functions.Type; + +public class Parent implements Function { + + @Override + public Object call(Properties bindings, Object[] args) { + + if (args.length == 1) { + Object o1 = args[0]; + if (o1 != null && o1 instanceof Element) { + Element e1 = (Element) o1; + return e1.parent(); + + }else{ + + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + + "() cannot work with this '" + + new Type().call(bindings, args) + + "'" + + " but instead needs a jsoup XML or HTML Element to work with." + + " For arrays, you might select an index or loop over them with forEach()."); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects one argument"); + } + + @Override + public String getDescription() { + return "Returns the parent node or null if no parent. Use it in conjunction with parseHtml() and select() to provide an element."; + } + + @Override + public String getParams() { + return "string s, element e"; + } + + @Override + public String getReturns() { + return "HTML/XML Element"; + } +} diff --git a/OpenRefine/main/src/com/google/refine/expr/functions/xml/ScriptText.java b/OpenRefine/main/src/com/google/refine/expr/functions/xml/ScriptText.java new file mode 100644 index 000000000..cdbeb3fef --- /dev/null +++ b/OpenRefine/main/src/com/google/refine/expr/functions/xml/ScriptText.java @@ -0,0 +1,87 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.expr.functions.xml; + +import java.util.Properties; + +import org.jsoup.nodes.Element; + +import com.google.refine.expr.EvalError; +import com.google.refine.grel.ControlFunctionRegistry; +import com.google.refine.grel.Function; +import com.google.refine.expr.functions.Type; + +public class ScriptText implements Function { + + @Override + public Object call(Properties bindings, Object[] args) { + + if (args.length == 1) { + Object o1 = args[0]; + if (o1 != null && o1 instanceof Element) { + Element e1 = (Element) o1; + return e1.data(); + + } else { + + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + "() cannot work with this '" + + new Type().call(bindings, args) + "'" + + " but instead needs a jsoup DataNode from style, script tags, etc. to work with." + + " See https://jsoup.org/apidocs/org/jsoup/nodes/Element.html#data()" + + " For arrays, you might select an index or loop over them with forEach()." + + " dataNodes() is currently not implemented."); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects one argument"); + } + + @Override + public String getDescription() { + // the description needs to be valid HTML. + return "Returns the combined data of an HTML/XML Element. Data is e.g. the inside of a <script> tag.\n" + + "Note that data is NOT the text of the element.\n" + + "Use htmlText() to get the text that would be visible to a user, and scriptText() for the contents of <script>, <style>, etc.\n" + + "Use scriptText() in conjunction with parseHtml() and select()."; + } + + @Override + public String getParams() { + return "element e"; + } + + @Override + public String getReturns() { + return "string"; + } +}