Merge pull request #2066 from OpenRefine/issue-2064

Update to Wikidata-Toolkit 0.10.0.
This commit is contained in:
Antonin Delpeuch 2019-07-11 11:24:10 +02:00 committed by GitHub
commit a735ed7d57
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 179 additions and 68 deletions

View File

@ -93,45 +93,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
<executions>
<execution>
<id>install-wdtk-datamodel</id>
<phase>process-resources</phase>
<configuration>
<file>${basedir}/lib-local/wdtk-datamodel-0.9.0-SNAPSHOT-jar-with-dependencies.jar</file>
<repositoryLayout>default</repositoryLayout>
<groupId>org.wikidata.wdtk</groupId>
<artifactId>wdtk-datamodel</artifactId>
<version>0.9.0-SNAPSHOT</version>
<packaging>jar</packaging>
<generatePom>true</generatePom>
</configuration>
<goals>
<goal>install-file</goal>
</goals>
</execution>
<execution>
<id>install-wdtk-wikibaseapi</id>
<phase>process-resources</phase>
<configuration>
<file>${basedir}/lib-local/wdtk-wikibaseapi-0.9.0-SNAPSHOT.jar</file>
<repositoryLayout>default</repositoryLayout>
<groupId>org.wikidata.wdtk</groupId>
<artifactId>wdtk-wikibaseapi</artifactId>
<version>0.9.0-SNAPSHOT</version>
<packaging>jar</packaging>
<generatePom>true</generatePom>
</configuration>
<goals>
<goal>install-file</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-clean-plugin</artifactId>
@ -163,12 +124,12 @@
<dependency>
<groupId>org.wikidata.wdtk</groupId>
<artifactId>wdtk-wikibaseapi</artifactId>
<version>0.9.0-SNAPSHOT</version>
<version>0.10.0</version>
</dependency>
<dependency>
<groupId>org.wikidata.wdtk</groupId>
<artifactId>wdtk-datamodel</artifactId>
<version>0.9.0-SNAPSHOT</version>
<version>0.10.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>

View File

@ -211,7 +211,10 @@ public class EditBatchProcessor {
} catch (MediaWikiApiErrorException e) {
e.printStackTrace();
Thread.sleep(5000);
}
} catch (IOException e) {
e.printStackTrace();
Thread.sleep(5000);
}
retries--;
}
if (currentDocs == null) {

View File

@ -31,6 +31,7 @@ import org.openrefine.wikidata.updates.scheduler.QuickStatementsUpdateScheduler;
import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
@ -50,12 +51,6 @@ import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
*/
public class QSValuePrinter implements ValueVisitor<String> {
@Override
public String visit(DatatypeIdValue value) {
// impossible case (this is actually a bug in WDTK, DatatypeIdValue should not subclass Value)
throw new IllegalArgumentException();
}
@Override
public String visit(EntityIdValue value) {
if (ReconEntityIdValue.class.isInstance(value) && ((ReconEntityIdValue) value).isNew()) {
@ -76,12 +71,10 @@ public class QSValuePrinter implements ValueVisitor<String> {
@Override
public String visit(QuantityValue value) {
String unitPrefix = "http://www.wikidata.org/entity/Q";
String unitIri = value.getUnit();
ItemIdValue unit = value.getUnitItemId();
String unitRepresentation = "", boundsRepresentation = "";
if (!unitIri.isEmpty()) {
if (!unitIri.startsWith(unitPrefix)) return null; // QuickStatements only accepts Qids as units
unitRepresentation = "U" + unitIri.substring(unitPrefix.length());
if (unit != null) {
unitRepresentation = "U" + unit.getId().substring(1);
}
if (value.getLowerBound() != null) {
// bounds are always null at the same time so we know they are both not null

View File

@ -45,22 +45,19 @@ public class QuantityScrutinizer extends SnakScrutinizer {
addIssue(issue);
}
Set<ItemIdValue> allowedUnits = _fetcher.allowedUnits(pid);
String currentUnit = null;
if (value.getUnit() != null && !value.getUnit().equals("")) {
currentUnit = value.getUnit();
ItemIdValue currentUnit = null;
if (value.getUnitItemId() != null) {
currentUnit = value.getUnitItemId();
}
if(allowedUnits != null &&
!allowedUnits.stream().map(u -> u != null ? u.getIri() : null)
.collect(Collectors.toSet()).contains(currentUnit)) {
!allowedUnits.contains(currentUnit)) {
String issueType = currentUnit == null ? noUnitProvidedType : invalidUnitType;
QAWarning issue = new QAWarning(issueType, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
issue.setProperty("property_entity", pid);
issue.setProperty("example_value", value.getNumericValue().toString());
issue.setProperty("example_item_entity", entityId);
if (currentUnit != null) {
issue.setProperty("unit_entity",
// this is a hack but it will not be needed anymore in the upcoming version of Wikidata-Toolkit
Datamodel.makeWikidataItemIdValue(currentUnit.substring(currentUnit.indexOf("Q"))));
issue.setProperty("unit_entity", value.getUnitItemId());
}
addIssue(issue);
}

View File

@ -28,6 +28,7 @@ import java.util.List;
import org.jsoup.helper.Validate;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import org.openrefine.wikidata.utils.StatementGroupJson;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;

View File

@ -36,6 +36,8 @@ import java.util.function.Function;
import java.util.stream.Collectors;
import org.jsoup.helper.Validate;
import org.openrefine.wikidata.utils.StatementGroupJson;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.implementation.StatementGroupImpl;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
@ -176,7 +178,7 @@ public class ItemUpdate {
*
* @return the list of all added statements
*/
@JsonProperty("addedStatements")
@JsonIgnore // exposed as statement groups below
public List<Statement> getAddedStatements() {
return addedStatements;
}
@ -295,6 +297,7 @@ public class ItemUpdate {
*
* @return a grouped version of getAddedStatements()
*/
@JsonIgnore
public List<StatementGroup> getAddedStatementGroups() {
Map<PropertyIdValue, List<Statement>> map = new HashMap<>();
for (Statement statement : getAddedStatements()) {
@ -306,10 +309,22 @@ public class ItemUpdate {
}
List<StatementGroup> result = new ArrayList<>();
for (Map.Entry<PropertyIdValue, List<Statement>> entry : map.entrySet()) {
// We have to do this rather than use Datamodel in order to preserve the
// custom entity id values which can link to new items.
result.add(new StatementGroupImpl(entry.getValue()));
}
return result;
}
/**
* Json serialization for preview of item updates. Because StatementGroup
* is not designed for serialization (so its format is not specified by WDTK),
* we add a wrapper on top to specify it.
*/
@JsonProperty("addedStatementGroups")
public List<StatementGroupJson> getAddedStatementGroupsJson() {
return this.getAddedStatementGroups().stream().map(s -> new StatementGroupJson(s)).collect(Collectors.toList());
}
/**
* Group a list of ItemUpdates by subject: this is useful to make one single
@ -339,6 +354,7 @@ public class ItemUpdate {
/**
* Is this update about a new item?
*/
@JsonProperty("new")
public boolean isNew() {
return EntityIdValue.SITE_LOCAL.equals(getItemId().getSiteIri());
}

View File

@ -122,11 +122,6 @@ public class PointerExtractor implements ValueVisitor<Set<ReconItemIdValue>> {
return pointers;
}
@Override
public Set<ReconItemIdValue> visit(DatatypeIdValue value) {
return null;
}
@Override
public Set<ReconItemIdValue> visit(EntityIdValue value) {
if (ReconItemIdValue.class.isInstance(value)) {

View File

@ -0,0 +1,43 @@
package org.openrefine.wikidata.utils;
import java.util.List;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* Wikidata-Toolkit's StatementGroup class is not designed to be serialized,
* so its serialization via Jackson is not specified. This adds annotations
* to specify its behaviour.
*
* @author Antonin Delpeuch
*/
public class StatementGroupJson {
protected final StatementGroup statementGroup;
public StatementGroupJson(StatementGroup s) {
statementGroup = s;
}
@JsonProperty("subject")
public EntityIdValue getSubject() {
return statementGroup.getSubject();
}
@JsonProperty("property")
public PropertyIdValue getProperty() {
return statementGroup.getProperty();
}
@JsonProperty("statements")
public List<Statement> getStatements() {
return statementGroup.getStatements();
}
}

View File

@ -0,0 +1,83 @@
{
"addedAliases": [],
"addedStatementGroups": [
{
"property": {
"type": "wikibase-entityid",
"value": {
"entity-type": "property",
"id": "P348",
"numeric-id": 348
}
},
"statements": [
{
"mainsnak": {
"property": "P348",
"snaktype": "novalue"
},
"rank": "normal",
"type": "statement"
}
],
"subject": {
"type": "wikibase-entityid",
"value": {
"entity-type": "item",
"id": "Q34",
"numeric-id": 34
}
}
},
{
"property": {
"type": "wikibase-entityid",
"value": {
"entity-type": "property",
"id": "P52",
"numeric-id": 52
}
},
"statements": [
{
"mainsnak": {
"datatype": "wikibase-item",
"datavalue": {
"entityType": "http://www.wikidata.org/ontology#Item",
"id": "Q1234",
"iri": "http://localhost/entity/Q1234",
"label": "new item",
"reconInternalId": 1234,
"siteIri": "http://localhost/entity/",
"types": []
},
"property": "P52",
"snaktype": "value"
},
"rank": "normal",
"type": "statement"
}
],
"subject": {
"type": "wikibase-entityid",
"value": {
"entity-type": "item",
"id": "Q34",
"numeric-id": 34
}
}
}
],
"deletedStatements": [],
"descriptions": [],
"labels": [],
"new": false,
"subject": {
"type": "wikibase-entityid",
"value": {
"entity-type": "item",
"id": "Q34",
"numeric-id": 34
}
}
}

View File

@ -28,6 +28,7 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
@ -48,6 +49,8 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
import com.google.refine.tests.util.TestUtils;
public class ItemUpdateTest {
private ItemIdValue existingSubject = Datamodel.makeWikidataItemIdValue("Q34");
@ -110,6 +113,23 @@ public class ItemUpdateTest {
assertEquals(Arrays.asList(statement1, statement2), update.getAddedStatements());
assertEquals(statementGroups, update.getAddedStatementGroups().stream().collect(Collectors.toSet()));
}
/**
* Test disabled because it fails due to
* https://github.com/Wikidata/Wikidata-Toolkit/issues/417
* (not fixed as of WDTK 0.10.0).
*
* This bug is not critical as the extraneous serialized data
* is ignored by Wikibase.
*
* @todo reenable once a later version is released
*/
@Test(enabled=false)
public void testSerializeStatements() throws IOException {
ItemUpdate update = new ItemUpdateBuilder(existingSubject).addStatement(statement1).addStatement(statement2)
.build();
TestUtils.isSerializedTo(update, TestingData.jsonFromFile("updates/statement_groups.json"));
}
@Test
public void testDeleteStatements() {

View File

@ -66,7 +66,6 @@ public class PointerExtractorTest {
@Test
public void testExtractDatavalues() {
assertEmpty(Datamodel.makeDatatypeIdValue("string"));
assertEmpty(Datamodel.makeGlobeCoordinatesValue(1.34, 2.354, 0.1, GlobeCoordinatesValue.GLOBE_EARTH));
assertEmpty(Datamodel.makeStringValue("est"));
assertEmpty(Datamodel.makeMonolingualTextValue("srtu", "en"));