add unversioned files from new version of OpenRefine

This commit is contained in:
Michał Najborowski 2022-01-30 21:12:55 +01:00
parent b37b4153bf
commit f6e4a1f7e4
9 changed files with 751 additions and 0 deletions

View File

@ -0,0 +1,162 @@
package org.openrefine.wikidata.manifests;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.refine.util.ParsingUtilities;
public class ManifestV2 implements Manifest {
private String version;
private String name;
private String siteIri;
private int maxlag;
private String instanceOfPid;
private String subclassOfPid;
private String mediaWikiApiEndpoint;
private String editGroupsUrlSchema;
private Map<String, EntityTypeSettings> entityTypeSettings;
private Map<String, String> constraintsRelatedIdMap = new HashMap<>();
public ManifestV2(JsonNode manifest) throws JsonParseException, JsonMappingException, IOException {
version = manifest.path("version").textValue();
JsonNode mediawiki = manifest.path("mediawiki");
name = mediawiki.path("name").textValue();
mediaWikiApiEndpoint = mediawiki.path("api").textValue();
JsonNode wikibase = manifest.path("wikibase");
siteIri = wikibase.path("site_iri").textValue();
maxlag = wikibase.path("maxlag").intValue();
JsonNode properties = wikibase.path("properties");
instanceOfPid = properties.path("instance_of").textValue();
subclassOfPid = properties.path("subclass_of").textValue();
JsonNode constraints = wikibase.path("constraints");
Iterator<Map.Entry<String, JsonNode>> fields = constraints.fields();
while (fields.hasNext()) {
Map.Entry<String, JsonNode> entry = fields.next();
String name = entry.getKey();
String value = entry.getValue().textValue();
constraintsRelatedIdMap.put(name, value);
}
JsonNode entityTypesJson = manifest.path("entity_types");
entityTypeSettings = com.google.refine.util.ParsingUtilities.mapper.readValue(
ParsingUtilities.mapper.treeAsTokens(entityTypesJson),
new TypeReference<Map<String, EntityTypeSettings>>() {});
JsonNode editGroups = manifest.path("editgroups");
editGroupsUrlSchema = editGroups.path("url_schema").textValue();
}
private static class EntityTypeSettings {
protected String siteIri;
protected String reconEndpoint;
protected String mediaWikiApi;
@JsonCreator
protected EntityTypeSettings(
@JsonProperty("site_iri") String siteIri,
@JsonProperty("reconciliation_endpoint") String reconEndpoint,
@JsonProperty("mediawiki_api") String mediawikiEndpoint) {
this.siteIri = siteIri;
this.reconEndpoint = reconEndpoint;
this.mediaWikiApi = mediawikiEndpoint;
}
}
@Override
public String getVersion() {
return version;
}
@Override
public String getName() {
return name;
}
@Override
public String getSiteIri() {
return siteIri;
}
@Override
public int getMaxlag() {
return maxlag;
}
@Override
public String getInstanceOfPid() {
return instanceOfPid;
}
@Override
public String getSubclassOfPid() {
return subclassOfPid;
}
@Override
public String getMediaWikiApiEndpoint() {
return mediaWikiApiEndpoint;
}
@Override
public String getReconServiceEndpoint() {
return getReconServiceEndpoint(ITEM_TYPE);
}
@Override
public String getConstraintsRelatedId(String name) {
return constraintsRelatedIdMap.get(name);
}
@Override
public String getEditGroupsUrlSchema() {
return editGroupsUrlSchema;
}
@Override
public String getReconServiceEndpoint(String entityType) {
EntityTypeSettings setting = entityTypeSettings.get(entityType);
if (setting == null) {
return null;
}
return setting.reconEndpoint;
}
@Override
public String getEntityTypeSiteIri(String entityType) {
EntityTypeSettings setting = entityTypeSettings.get(entityType);
if (setting == null) {
return null;
}
return setting.siteIri;
}
@Override
public String getMediaWikiApiEndpoint(String entityType) {
EntityTypeSettings setting = entityTypeSettings.get(entityType);
if (setting == null) {
return null;
}
return setting.mediaWikiApi != null ? setting.mediaWikiApi : getMediaWikiApiEndpoint();
}
@Override
public List<String> getAvailableEntityTypes() {
return entityTypeSettings.keySet().stream().collect(Collectors.toList());
}
}

View File

@ -0,0 +1,59 @@
package org.openrefine.wikidata.schema;
import org.jsoup.helper.Validate;
import org.openrefine.wikidata.schema.entityvalues.SuggestedEntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* A constant entity id value, that does not change depending on the row
*
* @author Antonin Delpeuch
*
*/
public class WbEntityIdValueConstant implements WbExpression<EntityIdValue> {
private String id;
private String label;
@JsonCreator
public WbEntityIdValueConstant(
@JsonProperty("id") String id,
@JsonProperty("label") String label) {
Validate.notNull(id, "id cannot be null");
this.id = id;
Validate.notNull(label, "label cannot be null");
this.label = label;
}
@Override
public EntityIdValue evaluate(ExpressionContext ctxt) {
return SuggestedEntityIdValue.build(id, ctxt.getBaseIRI(), label);
}
@JsonProperty("id")
public String getId() {
return id;
}
@JsonProperty("label")
public String getLabel() {
return label;
}
@Override
public boolean equals(Object other) {
if (other == null || !WbEntityIdValueConstant.class.isInstance(other)) {
return false;
}
WbEntityIdValueConstant otherConstant = (WbEntityIdValueConstant) other;
return id.equals(otherConstant.getId()) && label.equals(otherConstant.getLabel());
}
@Override
public int hashCode() {
return id.hashCode() + label.hashCode();
}
}

View File

@ -0,0 +1,32 @@
package org.openrefine.wikidata.schema.entityvalues;
import org.wikidata.wdtk.datamodel.implementation.EntityIdValueImpl;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.FormIdValue;
import org.wikidata.wdtk.datamodel.interfaces.LexemeIdValue;
public class SuggestedFormIdValue extends SuggestedEntityIdValue implements FormIdValue {
private FormIdValue parsedId;
public SuggestedFormIdValue(String id, String siteIRI, String label) {
super(id, siteIRI, label);
EntityIdValue parsed = EntityIdValueImpl.fromId(id, siteIRI);
if (parsed instanceof FormIdValue) {
parsedId = (FormIdValue)parsed;
} else {
throw new IllegalArgumentException(String.format("Invalid id for a form: %s", id));
}
}
@Override
public String getEntityType() {
return ET_FORM;
}
@Override
public LexemeIdValue getLexemeId() {
return parsedId.getLexemeId();
}
}

View File

@ -0,0 +1,15 @@
package org.openrefine.wikidata.schema.entityvalues;
import org.wikidata.wdtk.datamodel.interfaces.LexemeIdValue;
public class SuggestedLexemeIdValue extends SuggestedEntityIdValue implements LexemeIdValue {
public SuggestedLexemeIdValue(String id, String siteIRI, String label) {
super(id, siteIRI, label);
}
@Override
public String getEntityType() {
return ET_LEXEME;
}
}

View File

@ -0,0 +1,45 @@
/*******************************************************************************
* MIT License
*
* Copyright (c) 2018 Antonin Delpeuch
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
******************************************************************************/
package org.openrefine.wikidata.schema.entityvalues;
import org.wikidata.wdtk.datamodel.helpers.ToString;
import org.wikidata.wdtk.datamodel.interfaces.MediaInfoIdValue;
public class SuggestedMediaInfoIdValue extends SuggestedEntityIdValue implements MediaInfoIdValue {
public SuggestedMediaInfoIdValue(String id, String siteIRI, String label) {
super(id, siteIRI, label);
}
@Override
public String getEntityType() {
return ET_MEDIA_INFO;
}
@Override
public String toString() {
return "suggested " + ToString.toString(this) + " (\"" + getLabel() + "\")";
}
}

View File

@ -0,0 +1,35 @@
package org.openrefine.wikidata.schema.entityvalues;
import org.wikidata.wdtk.datamodel.implementation.EntityIdValueImpl;
import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.LexemeIdValue;
import org.wikidata.wdtk.datamodel.interfaces.SenseIdValue;
public class SuggestedSenseIdValue extends SuggestedEntityIdValue implements SenseIdValue {
private SenseIdValue parsedId;
public SuggestedSenseIdValue(String id, String siteIRI, String label) {
super(id, siteIRI, label);
EntityIdValue parsed = EntityIdValueImpl.fromId(id, siteIRI);
if (parsed instanceof SenseIdValue) {
parsedId = (SenseIdValue)parsed;
} else {
throw new IllegalArgumentException(String.format("Invalid id for a form: %s", id));
}
}
@Override
public LexemeIdValue getLexemeId() {
return parsedId.getLexemeId();
}
@Override
public String getEntityType() {
return DatatypeIdValue.DT_SENSE;
}
}

View File

@ -0,0 +1,233 @@
/*******************************************************************************
* MIT License
*
* Copyright (c) 2018 Antonin Delpeuch
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
******************************************************************************/
package org.openrefine.wikidata.updates;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.jsoup.helper.Validate;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
/**
* Constructs a {@link ItemUpdate} incrementally.
*
* @author Antonin Delpeuch
*
*/
public class TermedStatementEntityUpdateBuilder {
private EntityIdValue id;
private List<Statement> addedStatements;
private Set<Statement> deletedStatements;
private Set<MonolingualTextValue> labels;
private Set<MonolingualTextValue> labelsIfNew;
private Set<MonolingualTextValue> descriptions;
private Set<MonolingualTextValue> descriptionsIfNew;
private Set<MonolingualTextValue> aliases;
private boolean built;
/**
* Constructor.
*
* @param qid
* the subject of the document. It can be a reconciled item value for
* new items.
*/
public TermedStatementEntityUpdateBuilder(EntityIdValue id) {
Validate.notNull(id);
this.id = id;
this.addedStatements = new ArrayList<>();
this.deletedStatements = new HashSet<Statement>();
this.labels = new HashSet<MonolingualTextValue>();
this.labelsIfNew = new HashSet<MonolingualTextValue>();
this.descriptions = new HashSet<MonolingualTextValue>();
this.descriptionsIfNew = new HashSet<MonolingualTextValue>();
this.aliases = new HashSet<MonolingualTextValue>();
this.built = false;
}
/**
* Mark a statement for insertion. If it matches an existing statement, it will
* update the statement instead.
*
* @param statement
* the statement to add or update
*/
public TermedStatementEntityUpdateBuilder addStatement(Statement statement) {
Validate.isTrue(!built, "ItemUpdate has already been built");
addedStatements.add(statement);
return this;
}
/**
* Mark a statement for deletion. If no such statement exists, nothing will be
* deleted.
*
* @param statement
* the statement to delete
*/
public TermedStatementEntityUpdateBuilder deleteStatement(Statement statement) {
Validate.isTrue(!built, "ItemUpdate has already been built");
deletedStatements.add(statement);
return this;
}
/**
* Add a list of statement, as in {@link addStatement}.
*
* @param statements
* the statements to add
*/
public TermedStatementEntityUpdateBuilder addStatements(Set<Statement> statements) {
Validate.isTrue(!built, "ItemUpdate has already been built");
addedStatements.addAll(statements);
return this;
}
/**
* Delete a list of statements, as in {@link deleteStatement}.
*
* @param statements
* the statements to delete
*/
public TermedStatementEntityUpdateBuilder deleteStatements(Set<Statement> statements) {
Validate.isTrue(!built, "ItemUpdate has already been built");
deletedStatements.addAll(statements);
return this;
}
/**
* Adds a label to the item.
*
* @param label
* the label to add
* @param override
* whether the label should be added even if there is already a label in that language
*/
public TermedStatementEntityUpdateBuilder addLabel(MonolingualTextValue label, boolean override) {
Validate.isTrue(!built, "ItemUpdate has already been built");
if (override) {
labels.add(label);
} else {
labelsIfNew.add(label);
}
return this;
}
/**
* Adds a list of labels to the item.
*
* @param labels
* the labels to add
* @param override
* whether the label should be added even if there is already a label in that language
*/
public TermedStatementEntityUpdateBuilder addLabels(Set<MonolingualTextValue> labels, boolean override) {
Validate.isTrue(!built, "ItemUpdate has already been built");
if (override) {
this.labels.addAll(labels);
} else {
labelsIfNew.addAll(labels);
}
return this;
}
/**
* Adds a description to the item.
*
* @param description
* the description to add
* @param override
* whether the description should be added even if there is already a description in that language
*/
public TermedStatementEntityUpdateBuilder addDescription(MonolingualTextValue description, boolean override) {
Validate.isTrue(!built, "ItemUpdate has already been built");
if (override) {
descriptions.add(description);
} else {
descriptionsIfNew.add(description);
}
return this;
}
/**
* Adds a list of descriptions to the item.
*
* @param descriptions
* the descriptions to add
* @param override
* whether the description should be added even if there is already a description in that language
*/
public TermedStatementEntityUpdateBuilder addDescriptions(Set<MonolingualTextValue> descriptions, boolean override) {
Validate.isTrue(!built, "ItemUpdate has already been built");
if (override) {
this.descriptions.addAll(descriptions);
} else {
descriptionsIfNew.addAll(descriptions);
}
return this;
}
/**
* Adds an alias to the item. It will be added to any existing aliases in that
* language.
*
* @param alias
* the alias to add
*/
public TermedStatementEntityUpdateBuilder addAlias(MonolingualTextValue alias) {
Validate.isTrue(!built, "ItemUpdate has already been built");
aliases.add(alias);
return this;
}
/**
* Adds a list of aliases to the item. They will be added to any existing
* aliases in each language.
*
* @param aliases
* the aliases to add
*/
public TermedStatementEntityUpdateBuilder addAliases(Set<MonolingualTextValue> aliases) {
Validate.isTrue(!built, "ItemUpdate has already been built");
this.aliases.addAll(aliases);
return this;
}
/**
* Constructs the {@link ItemUpdate}.
*
* @return
*/
public TermedStatementEntityUpdate build() {
built = true;
return new TermedStatementEntityUpdate(id, addedStatements, deletedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases);
}
}

View File

@ -0,0 +1,83 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.expr.functions.xml;
import java.util.Properties;
import org.jsoup.nodes.Element;
import com.google.refine.expr.EvalError;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
import com.google.refine.expr.functions.Type;
public class Parent implements Function {
@Override
public Object call(Properties bindings, Object[] args) {
if (args.length == 1) {
Object o1 = args[0];
if (o1 != null && o1 instanceof Element) {
Element e1 = (Element) o1;
return e1.parent();
}else{
return new EvalError(ControlFunctionRegistry.getFunctionName(this)
+ "() cannot work with this '"
+ new Type().call(bindings, args)
+ "'"
+ " but instead needs a jsoup XML or HTML Element to work with."
+ " For arrays, you might select an index or loop over them with forEach().");
}
}
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects one argument");
}
@Override
public String getDescription() {
return "Returns the parent node or null if no parent. Use it in conjunction with parseHtml() and select() to provide an element.";
}
@Override
public String getParams() {
return "string s, element e";
}
@Override
public String getReturns() {
return "HTML/XML Element";
}
}

View File

@ -0,0 +1,87 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.expr.functions.xml;
import java.util.Properties;
import org.jsoup.nodes.Element;
import com.google.refine.expr.EvalError;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
import com.google.refine.expr.functions.Type;
public class ScriptText implements Function {
@Override
public Object call(Properties bindings, Object[] args) {
if (args.length == 1) {
Object o1 = args[0];
if (o1 != null && o1 instanceof Element) {
Element e1 = (Element) o1;
return e1.data();
} else {
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + "() cannot work with this '"
+ new Type().call(bindings, args) + "'"
+ " but instead needs a jsoup DataNode from style, script tags, etc. to work with."
+ " See https://jsoup.org/apidocs/org/jsoup/nodes/Element.html#data()"
+ " For arrays, you might select an index or loop over them with forEach()."
+ " dataNodes() is currently not implemented.");
}
}
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects one argument");
}
@Override
public String getDescription() {
// the description needs to be valid HTML.
return "Returns the combined data of an HTML/XML Element. Data is e.g. the inside of a &lt;script&gt; tag.\n"
+ "Note that data is NOT the text of the element.\n"
+ "Use htmlText() to get the text that would be visible to a user, and scriptText() for the contents of &lt;script&gt;, &lt;style&gt;, etc.\n"
+ "Use scriptText() in conjunction with parseHtml() and select().";
}
@Override
public String getParams() {
return "element e";
}
@Override
public String getReturns() {
return "string";
}
}