Merge pull request #2207 from OpenRefine/issue-2206-double-creation
Fix duplicate creations of Wikidata items
This commit is contained in:
commit
286915ed83
@ -31,6 +31,7 @@ import java.util.Map;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||||
|
import org.openrefine.wikidata.schema.exceptions.NewItemNotCreatedYetException;
|
||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
|
import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -128,7 +129,12 @@ public class EditBatchProcessor {
|
|||||||
|
|
||||||
// Rewrite mentions to new items
|
// Rewrite mentions to new items
|
||||||
ReconEntityRewriter rewriter = new ReconEntityRewriter(library, update.getItemId());
|
ReconEntityRewriter rewriter = new ReconEntityRewriter(library, update.getItemId());
|
||||||
update = rewriter.rewrite(update);
|
try {
|
||||||
|
update = rewriter.rewrite(update);
|
||||||
|
} catch (NewItemNotCreatedYetException e) {
|
||||||
|
logger.warn("Failed to rewrite update on entity "+update.getItemId()+". Missing entity: "+e.getMissingEntity()+". Skipping update.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// New item
|
// New item
|
||||||
|
@ -28,10 +28,12 @@ import java.util.Set;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
|
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
|
||||||
|
import org.openrefine.wikidata.schema.exceptions.NewItemNotCreatedYetException;
|
||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter;
|
import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter;
|
||||||
import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl;
|
import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
@ -53,53 +55,87 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
|||||||
*/
|
*/
|
||||||
public class ReconEntityRewriter extends DatamodelConverter {
|
public class ReconEntityRewriter extends DatamodelConverter {
|
||||||
|
|
||||||
private NewItemLibrary library;
|
private final NewItemLibrary library;
|
||||||
private ItemIdValue subject;
|
private final ItemIdValue subject;
|
||||||
|
|
||||||
/**
|
protected static final String notCreatedYetMessage = "Trying to rewrite an update where a new item was not created yet.";
|
||||||
* Constructor. Sets up a rewriter which uses the provided library to look up
|
|
||||||
* qids of new items, and the subject (which should not be rewritten).
|
|
||||||
*
|
|
||||||
* @param library
|
|
||||||
* @param subject
|
|
||||||
*/
|
|
||||||
public ReconEntityRewriter(NewItemLibrary library, ItemIdValue subject) {
|
|
||||||
super(new DataObjectFactoryImpl());
|
|
||||||
this.library = library;
|
|
||||||
this.subject = subject;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
/**
|
||||||
public ItemIdValue copy(ItemIdValue value) {
|
* Constructor. Sets up a rewriter which uses the provided library to look up
|
||||||
if (subject.equals(value)) {
|
* qids of new items.
|
||||||
return value;
|
*
|
||||||
}
|
* @param library
|
||||||
if (value instanceof ReconItemIdValue) {
|
* the collection of items already created
|
||||||
ReconItemIdValue recon = (ReconItemIdValue) value;
|
* @param subject
|
||||||
if (recon.isNew()) {
|
* the subject id of the entity to rewrite
|
||||||
String newId = library.getQid(recon.getReconInternalId());
|
*/
|
||||||
if (newId == null) {
|
public ReconEntityRewriter(NewItemLibrary library, ItemIdValue subject) {
|
||||||
throw new IllegalArgumentException(
|
super(new DataObjectFactoryImpl());
|
||||||
"Trying to rewrite an update where a new item was not created yet.");
|
this.library = library;
|
||||||
}
|
this.subject = subject;
|
||||||
return Datamodel.makeItemIdValue(newId, recon.getRecon().identifierSpace);
|
}
|
||||||
}
|
|
||||||
}
|
@Override
|
||||||
return super.copy(value);
|
public ItemIdValue copy(ItemIdValue value) {
|
||||||
}
|
if (value instanceof ReconItemIdValue) {
|
||||||
|
ReconItemIdValue recon = (ReconItemIdValue) value;
|
||||||
|
if (recon.isNew()) {
|
||||||
|
String newId = library.getQid(recon.getReconInternalId());
|
||||||
|
if (newId == null) {
|
||||||
|
if (subject.equals(recon)) {
|
||||||
|
return subject;
|
||||||
|
} else {
|
||||||
|
throw new MissingEntityIdFound(recon);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Datamodel.makeItemIdValue(newId, recon.getRecon().identifierSpace);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return super.copy(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rewrite an update, replacing references to all entities already
|
||||||
|
* created by their fresh identifiers. The subject id might not have been
|
||||||
|
* created already, in which case it will be left untouched. All the other
|
||||||
|
* entities need to have been created already.
|
||||||
|
*
|
||||||
|
* @param update
|
||||||
|
* the update to rewrite
|
||||||
|
* @return
|
||||||
|
* the rewritten update
|
||||||
|
* @throws NewItemNotCreatedYetException
|
||||||
|
* if any non-subject entity had not been created yet
|
||||||
|
*/
|
||||||
|
public ItemUpdate rewrite(ItemUpdate update) throws NewItemNotCreatedYetException {
|
||||||
|
try {
|
||||||
|
ItemIdValue subject = copy(update.getItemId());
|
||||||
|
Set<MonolingualTextValue> labels = update.getLabels().stream().map(l -> copy(l)).collect(Collectors.toSet());
|
||||||
|
Set<MonolingualTextValue> labelsIfNew = update.getLabelsIfNew().stream().map(l -> copy(l)).collect(Collectors.toSet());
|
||||||
|
Set<MonolingualTextValue> descriptions = update.getDescriptions().stream().map(l -> copy(l))
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
Set<MonolingualTextValue> descriptionsIfNew = update.getDescriptionsIfNew().stream().map(l -> copy(l))
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
Set<MonolingualTextValue> aliases = update.getAliases().stream().map(l -> copy(l)).collect(Collectors.toSet());
|
||||||
|
List<Statement> addedStatements = update.getAddedStatements().stream().map(l -> copy(l))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
Set<Statement> deletedStatements = update.getDeletedStatements().stream().map(l -> copy(l))
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
return new ItemUpdate(subject, addedStatements, deletedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases);
|
||||||
|
} catch(MissingEntityIdFound e) {
|
||||||
|
throw new NewItemNotCreatedYetException(e.value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unchecked version of {@class NewItemNotCreatedYetException}, for internal use only.
|
||||||
|
*/
|
||||||
|
protected static class MissingEntityIdFound extends Error {
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
protected EntityIdValue value;
|
||||||
|
public MissingEntityIdFound(EntityIdValue missing) {
|
||||||
|
this.value = missing;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public ItemUpdate rewrite(ItemUpdate update) {
|
|
||||||
Set<MonolingualTextValue> labels = update.getLabels().stream().map(l -> copy(l)).collect(Collectors.toSet());
|
|
||||||
Set<MonolingualTextValue> labelsIfNew = update.getLabelsIfNew().stream().map(l -> copy(l)).collect(Collectors.toSet());
|
|
||||||
Set<MonolingualTextValue> descriptions = update.getDescriptions().stream().map(l -> copy(l))
|
|
||||||
.collect(Collectors.toSet());
|
|
||||||
Set<MonolingualTextValue> descriptionsIfNew = update.getDescriptionsIfNew().stream().map(l -> copy(l))
|
|
||||||
.collect(Collectors.toSet());
|
|
||||||
Set<MonolingualTextValue> aliases = update.getAliases().stream().map(l -> copy(l)).collect(Collectors.toSet());
|
|
||||||
List<Statement> addedStatements = update.getAddedStatements().stream().map(l -> copy(l))
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
Set<Statement> deletedStatements = update.getDeletedStatements().stream().map(l -> copy(l))
|
|
||||||
.collect(Collectors.toSet());
|
|
||||||
return new ItemUpdate(update.getItemId(), addedStatements, deletedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,19 @@
|
|||||||
|
package org.openrefine.wikidata.schema.exceptions;
|
||||||
|
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
|
|
||||||
|
public class NewItemNotCreatedYetException extends Exception {
|
||||||
|
private static final long serialVersionUID = -563535295696710197L;
|
||||||
|
|
||||||
|
private final EntityIdValue value;
|
||||||
|
|
||||||
|
public NewItemNotCreatedYetException(EntityIdValue value) {
|
||||||
|
super("Attempted to rewrite an entity which was not created yet: "+value);
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public EntityIdValue getMissingEntity() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -407,13 +407,21 @@ public class ItemUpdate {
|
|||||||
builder.append("<Update on ");
|
builder.append("<Update on ");
|
||||||
builder.append(qid);
|
builder.append(qid);
|
||||||
if (!labels.isEmpty()) {
|
if (!labels.isEmpty()) {
|
||||||
builder.append("\n Labels: ");
|
builder.append("\n Labels (override): ");
|
||||||
builder.append(labels);
|
builder.append(labels);
|
||||||
}
|
}
|
||||||
|
if (!labelsIfNew.isEmpty()) {
|
||||||
|
builder.append("\n Labels (if new): ");
|
||||||
|
builder.append(labelsIfNew);
|
||||||
|
}
|
||||||
if (!descriptions.isEmpty()) {
|
if (!descriptions.isEmpty()) {
|
||||||
builder.append("\n Descriptions: ");
|
builder.append("\n Descriptions (override): ");
|
||||||
builder.append(descriptions);
|
builder.append(descriptions);
|
||||||
}
|
}
|
||||||
|
if (!descriptionsIfNew.isEmpty()) {
|
||||||
|
builder.append("\n Descriptions (if new): ");
|
||||||
|
builder.append(descriptionsIfNew);
|
||||||
|
}
|
||||||
if (!aliases.isEmpty()) {
|
if (!aliases.isEmpty()) {
|
||||||
builder.append("\n Aliases: ");
|
builder.append("\n Aliases: ");
|
||||||
builder.append(aliases);
|
builder.append(aliases);
|
||||||
|
@ -25,6 +25,7 @@ package org.openrefine.wikidata.editing;
|
|||||||
|
|
||||||
import static org.testng.Assert.assertEquals;
|
import static org.testng.Assert.assertEquals;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.schema.exceptions.NewItemNotCreatedYetException;
|
||||||
import org.openrefine.wikidata.testing.TestingData;
|
import org.openrefine.wikidata.testing.TestingData;
|
||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
@ -35,54 +36,103 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
|||||||
|
|
||||||
public class ReconEntityRewriterTest {
|
public class ReconEntityRewriterTest {
|
||||||
|
|
||||||
NewItemLibrary library = null;
|
NewItemLibrary library = null;
|
||||||
ReconEntityRewriter rewriter = null;
|
ReconEntityRewriter rewriter = null;
|
||||||
ItemIdValue subject = TestingData.newIdA;
|
ItemIdValue newlyCreated = Datamodel.makeWikidataItemIdValue("Q1234");
|
||||||
ItemIdValue newlyCreated = Datamodel.makeWikidataItemIdValue("Q1234");
|
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void setUp() {
|
public void setUp() {
|
||||||
library = new NewItemLibrary();
|
library = new NewItemLibrary();
|
||||||
rewriter = new ReconEntityRewriter(library, subject);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
@Test(expectedExceptions = ReconEntityRewriter.MissingEntityIdFound.class)
|
||||||
public void testNotCreatedYet() {
|
public void testNotCreatedYet() {
|
||||||
rewriter.copy(TestingData.newIdB);
|
rewriter = new ReconEntityRewriter(library, TestingData.newIdA);
|
||||||
}
|
rewriter.copy(TestingData.newIdB);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSuccessfulRewrite() {
|
public void testSuccessfulRewrite() {
|
||||||
library.setQid(4567L, "Q1234");
|
rewriter = new ReconEntityRewriter(library, TestingData.newIdA);
|
||||||
assertEquals(newlyCreated, rewriter.copy(TestingData.newIdB));
|
library.setQid(4567L, "Q1234");
|
||||||
}
|
assertEquals(newlyCreated, rewriter.copy(TestingData.newIdB));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSubjectNotRewriten() {
|
public void testSubjectNotRewritten() {
|
||||||
assertEquals(subject, rewriter.copy(subject));
|
ItemIdValue subject = TestingData.newIdA;
|
||||||
}
|
rewriter = new ReconEntityRewriter(library, subject);
|
||||||
|
assertEquals(subject, rewriter.copy(subject));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMatched() {
|
public void testSubjectRewritten() {
|
||||||
assertEquals(TestingData.matchedId, rewriter.copy(TestingData.matchedId));
|
ItemIdValue subject = TestingData.newIdB;
|
||||||
}
|
library.setQid(4567L, "Q1234");
|
||||||
|
rewriter = new ReconEntityRewriter(library, subject);
|
||||||
|
assertEquals(newlyCreated, rewriter.copy(subject));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRewriteUpdate() {
|
public void testMatched() {
|
||||||
library.setQid(4567L, "Q1234");
|
rewriter = new ReconEntityRewriter(library, TestingData.newIdA);
|
||||||
ItemUpdate update = new ItemUpdateBuilder(subject)
|
assertEquals(TestingData.matchedId, rewriter.copy(TestingData.matchedId));
|
||||||
.addStatement(TestingData.generateStatement(subject, TestingData.newIdB))
|
}
|
||||||
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
|
|
||||||
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true)
|
@Test
|
||||||
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
|
public void testRewriteCreate() throws NewItemNotCreatedYetException {
|
||||||
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
|
ItemIdValue subject = TestingData.newIdA;
|
||||||
ItemUpdate rewritten = rewriter.rewrite(update);
|
rewriter = new ReconEntityRewriter(library, subject);
|
||||||
ItemUpdate expected = new ItemUpdateBuilder(subject)
|
library.setQid(4567L, "Q1234");
|
||||||
.addStatement(TestingData.generateStatement(subject, newlyCreated))
|
ItemUpdate update = new ItemUpdateBuilder(subject)
|
||||||
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
|
.addStatement(TestingData.generateStatement(subject, TestingData.newIdB))
|
||||||
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true)
|
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
|
||||||
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
|
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true)
|
||||||
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
|
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
|
||||||
assertEquals(expected, rewritten);
|
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
|
||||||
}
|
ItemUpdate rewritten = rewriter.rewrite(update);
|
||||||
|
ItemUpdate expected = new ItemUpdateBuilder(subject)
|
||||||
|
.addStatement(TestingData.generateStatement(subject, newlyCreated))
|
||||||
|
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true)
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
|
||||||
|
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
|
||||||
|
assertEquals(rewritten, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRewriteUpdateOnPreviouslyCreatedEntity() throws NewItemNotCreatedYetException {
|
||||||
|
ItemIdValue subject = TestingData.newIdA;
|
||||||
|
rewriter = new ReconEntityRewriter(library, subject);
|
||||||
|
library.setQid(4567L, "Q1234");
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdB)
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
|
||||||
|
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
|
||||||
|
ItemUpdate rewritten = rewriter.rewrite(update);
|
||||||
|
ItemUpdate expected = new ItemUpdateBuilder(newlyCreated)
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
|
||||||
|
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
|
||||||
|
assertEquals(rewritten, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRewriteUpdateOnExistingEntity() throws NewItemNotCreatedYetException {
|
||||||
|
ItemIdValue subject = TestingData.matchedId;
|
||||||
|
rewriter = new ReconEntityRewriter(library, subject);
|
||||||
|
library.setQid(4567L, "Q1234");
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(subject)
|
||||||
|
.addStatement(TestingData.generateStatement(subject, TestingData.newIdB))
|
||||||
|
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true)
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
|
||||||
|
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
|
||||||
|
ItemUpdate rewritten = rewriter.rewrite(update);
|
||||||
|
ItemUpdate expected = new ItemUpdateBuilder(subject)
|
||||||
|
.addStatement(TestingData.generateStatement(subject, newlyCreated))
|
||||||
|
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true)
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
|
||||||
|
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
|
||||||
|
assertEquals(rewritten, expected);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user