From ec5c9cd4189ca8845b34864245482bf7776e8370 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Mon, 4 Nov 2019 21:17:18 +0000 Subject: [PATCH 1/2] Fix duplicate creations, closes #2206 --- .../wikidata/editing/ReconEntityRewriter.java | 3 ++- .../openrefine/wikidata/updates/ItemUpdate.java | 12 ++++++++++-- .../wikidata/editing/ReconEntityRewriterTest.java | 15 ++++++++++++++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java index 195862ff6..e9dbf10f5 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java @@ -89,6 +89,7 @@ public class ReconEntityRewriter extends DatamodelConverter { } public ItemUpdate rewrite(ItemUpdate update) { + ItemIdValue subject = copy(update.getItemId()); Set labels = update.getLabels().stream().map(l -> copy(l)).collect(Collectors.toSet()); Set labelsIfNew = update.getLabelsIfNew().stream().map(l -> copy(l)).collect(Collectors.toSet()); Set descriptions = update.getDescriptions().stream().map(l -> copy(l)) @@ -100,6 +101,6 @@ public class ReconEntityRewriter extends DatamodelConverter { .collect(Collectors.toList()); Set deletedStatements = update.getDeletedStatements().stream().map(l -> copy(l)) .collect(Collectors.toSet()); - return new ItemUpdate(update.getItemId(), addedStatements, deletedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases); + return new ItemUpdate(subject, addedStatements, deletedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases); } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdate.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdate.java index 46def6711..90713d10a 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdate.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdate.java @@ -407,13 +407,21 @@ public class ItemUpdate { builder.append(" Date: Tue, 5 Nov 2019 17:49:47 +0000 Subject: [PATCH 2/2] Skip update if rewriting failed --- .../wikidata/editing/EditBatchProcessor.java | 8 +- .../wikidata/editing/ReconEntityRewriter.java | 143 ++++++++++------ .../NewItemNotCreatedYetException.java | 19 +++ .../editing/ReconEntityRewriterTest.java | 159 +++++++++++------- 4 files changed, 213 insertions(+), 116 deletions(-) create mode 100644 extensions/wikidata/src/org/openrefine/wikidata/schema/exceptions/NewItemNotCreatedYetException.java diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/EditBatchProcessor.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/EditBatchProcessor.java index 5c8e98ef7..ab01dff17 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/editing/EditBatchProcessor.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/EditBatchProcessor.java @@ -31,6 +31,7 @@ import java.util.Map; import java.util.stream.Collectors; import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue; +import org.openrefine.wikidata.schema.exceptions.NewItemNotCreatedYetException; import org.openrefine.wikidata.updates.ItemUpdate; import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler; import org.slf4j.Logger; @@ -128,7 +129,12 @@ public class EditBatchProcessor { // Rewrite mentions to new items ReconEntityRewriter rewriter = new ReconEntityRewriter(library, update.getItemId()); - update = rewriter.rewrite(update); + try { + update = rewriter.rewrite(update); + } catch (NewItemNotCreatedYetException e) { + logger.warn("Failed to rewrite update on entity "+update.getItemId()+". Missing entity: "+e.getMissingEntity()+". Skipping update."); + return; + } try { // New item diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java index e9dbf10f5..776b8240b 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java @@ -1,18 +1,18 @@ /******************************************************************************* * MIT License - * + * * Copyright (c) 2018 Antonin Delpeuch - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -28,10 +28,12 @@ import java.util.Set; import java.util.stream.Collectors; import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue; +import org.openrefine.wikidata.schema.exceptions.NewItemNotCreatedYetException; import org.openrefine.wikidata.updates.ItemUpdate; import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter; import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; import org.wikidata.wdtk.datamodel.interfaces.Statement; @@ -39,68 +41,101 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement; /** * A class that rewrites an {@link ItemUpdate}, replacing reconciled entity id * values by their concrete values after creation of all the new items involved. - * + * * If an item has not been created yet, an {@link IllegalArgumentException} will * be raised. - * + * * The subject is treated as a special case: it is returned unchanged. This is * because it is guaranteed not to appear in the update (but it does appear in * the datamodel representation as the subject is passed around to the Claim * objects its document contains). - * + * * @author Antonin Delpeuch * */ public class ReconEntityRewriter extends DatamodelConverter { - private NewItemLibrary library; - private ItemIdValue subject; + private final NewItemLibrary library; + private final ItemIdValue subject; - /** - * Constructor. Sets up a rewriter which uses the provided library to look up - * qids of new items, and the subject (which should not be rewritten). - * - * @param library - * @param subject - */ - public ReconEntityRewriter(NewItemLibrary library, ItemIdValue subject) { - super(new DataObjectFactoryImpl()); - this.library = library; - this.subject = subject; - } + protected static final String notCreatedYetMessage = "Trying to rewrite an update where a new item was not created yet."; - @Override - public ItemIdValue copy(ItemIdValue value) { - if (subject.equals(value)) { - return value; - } - if (value instanceof ReconItemIdValue) { - ReconItemIdValue recon = (ReconItemIdValue) value; - if (recon.isNew()) { - String newId = library.getQid(recon.getReconInternalId()); - if (newId == null) { - throw new IllegalArgumentException( - "Trying to rewrite an update where a new item was not created yet."); - } - return Datamodel.makeItemIdValue(newId, recon.getRecon().identifierSpace); - } - } - return super.copy(value); - } + /** + * Constructor. Sets up a rewriter which uses the provided library to look up + * qids of new items. + * + * @param library + * the collection of items already created + * @param subject + * the subject id of the entity to rewrite + */ + public ReconEntityRewriter(NewItemLibrary library, ItemIdValue subject) { + super(new DataObjectFactoryImpl()); + this.library = library; + this.subject = subject; + } + + @Override + public ItemIdValue copy(ItemIdValue value) { + if (value instanceof ReconItemIdValue) { + ReconItemIdValue recon = (ReconItemIdValue) value; + if (recon.isNew()) { + String newId = library.getQid(recon.getReconInternalId()); + if (newId == null) { + if (subject.equals(recon)) { + return subject; + } else { + throw new MissingEntityIdFound(recon); + } + } + return Datamodel.makeItemIdValue(newId, recon.getRecon().identifierSpace); + } + } + return super.copy(value); + } + + /** + * Rewrite an update, replacing references to all entities already + * created by their fresh identifiers. The subject id might not have been + * created already, in which case it will be left untouched. All the other + * entities need to have been created already. + * + * @param update + * the update to rewrite + * @return + * the rewritten update + * @throws NewItemNotCreatedYetException + * if any non-subject entity had not been created yet + */ + public ItemUpdate rewrite(ItemUpdate update) throws NewItemNotCreatedYetException { + try { + ItemIdValue subject = copy(update.getItemId()); + Set labels = update.getLabels().stream().map(l -> copy(l)).collect(Collectors.toSet()); + Set labelsIfNew = update.getLabelsIfNew().stream().map(l -> copy(l)).collect(Collectors.toSet()); + Set descriptions = update.getDescriptions().stream().map(l -> copy(l)) + .collect(Collectors.toSet()); + Set descriptionsIfNew = update.getDescriptionsIfNew().stream().map(l -> copy(l)) + .collect(Collectors.toSet()); + Set aliases = update.getAliases().stream().map(l -> copy(l)).collect(Collectors.toSet()); + List addedStatements = update.getAddedStatements().stream().map(l -> copy(l)) + .collect(Collectors.toList()); + Set deletedStatements = update.getDeletedStatements().stream().map(l -> copy(l)) + .collect(Collectors.toSet()); + return new ItemUpdate(subject, addedStatements, deletedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases); + } catch(MissingEntityIdFound e) { + throw new NewItemNotCreatedYetException(e.value); + } + } + + /** + * Unchecked version of {@class NewItemNotCreatedYetException}, for internal use only. + */ + protected static class MissingEntityIdFound extends Error { + private static final long serialVersionUID = 1L; + protected EntityIdValue value; + public MissingEntityIdFound(EntityIdValue missing) { + this.value = missing; + } + } - public ItemUpdate rewrite(ItemUpdate update) { - ItemIdValue subject = copy(update.getItemId()); - Set labels = update.getLabels().stream().map(l -> copy(l)).collect(Collectors.toSet()); - Set labelsIfNew = update.getLabelsIfNew().stream().map(l -> copy(l)).collect(Collectors.toSet()); - Set descriptions = update.getDescriptions().stream().map(l -> copy(l)) - .collect(Collectors.toSet()); - Set descriptionsIfNew = update.getDescriptionsIfNew().stream().map(l -> copy(l)) - .collect(Collectors.toSet()); - Set aliases = update.getAliases().stream().map(l -> copy(l)).collect(Collectors.toSet()); - List addedStatements = update.getAddedStatements().stream().map(l -> copy(l)) - .collect(Collectors.toList()); - Set deletedStatements = update.getDeletedStatements().stream().map(l -> copy(l)) - .collect(Collectors.toSet()); - return new ItemUpdate(subject, addedStatements, deletedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases); - } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/exceptions/NewItemNotCreatedYetException.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/exceptions/NewItemNotCreatedYetException.java new file mode 100644 index 000000000..38880be96 --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/exceptions/NewItemNotCreatedYetException.java @@ -0,0 +1,19 @@ +package org.openrefine.wikidata.schema.exceptions; + +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; + +public class NewItemNotCreatedYetException extends Exception { + private static final long serialVersionUID = -563535295696710197L; + + private final EntityIdValue value; + + public NewItemNotCreatedYetException(EntityIdValue value) { + super("Attempted to rewrite an entity which was not created yet: "+value); + this.value = value; + } + + public EntityIdValue getMissingEntity() { + return value; + } + +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/ReconEntityRewriterTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/ReconEntityRewriterTest.java index 24ca115e0..2cc84f6fa 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/ReconEntityRewriterTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/ReconEntityRewriterTest.java @@ -1,18 +1,18 @@ /******************************************************************************* * MIT License - * + * * Copyright (c) 2018 Antonin Delpeuch - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -25,6 +25,7 @@ package org.openrefine.wikidata.editing; import static org.testng.Assert.assertEquals; +import org.openrefine.wikidata.schema.exceptions.NewItemNotCreatedYetException; import org.openrefine.wikidata.testing.TestingData; import org.openrefine.wikidata.updates.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdateBuilder; @@ -35,67 +36,103 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; public class ReconEntityRewriterTest { - NewItemLibrary library = null; - ReconEntityRewriter rewriter = null; - ItemIdValue subject = TestingData.newIdA; - ItemIdValue newlyCreated = Datamodel.makeWikidataItemIdValue("Q1234"); + NewItemLibrary library = null; + ReconEntityRewriter rewriter = null; + ItemIdValue newlyCreated = Datamodel.makeWikidataItemIdValue("Q1234"); - @BeforeMethod - public void setUp() { - library = new NewItemLibrary(); - rewriter = new ReconEntityRewriter(library, subject); - } + @BeforeMethod + public void setUp() { + library = new NewItemLibrary(); + } - @Test(expectedExceptions = IllegalArgumentException.class) - public void testNotCreatedYet() { - rewriter.copy(TestingData.newIdB); - } + @Test(expectedExceptions = ReconEntityRewriter.MissingEntityIdFound.class) + public void testNotCreatedYet() { + rewriter = new ReconEntityRewriter(library, TestingData.newIdA); + rewriter.copy(TestingData.newIdB); + } - @Test - public void testSuccessfulRewrite() { - library.setQid(4567L, "Q1234"); - assertEquals(newlyCreated, rewriter.copy(TestingData.newIdB)); - } + @Test + public void testSuccessfulRewrite() { + rewriter = new ReconEntityRewriter(library, TestingData.newIdA); + library.setQid(4567L, "Q1234"); + assertEquals(newlyCreated, rewriter.copy(TestingData.newIdB)); + } - @Test - public void testSubjectNotRewriten() { - assertEquals(subject, rewriter.copy(subject)); - } + @Test + public void testSubjectNotRewritten() { + ItemIdValue subject = TestingData.newIdA; + rewriter = new ReconEntityRewriter(library, subject); + assertEquals(subject, rewriter.copy(subject)); + } - @Test - public void testMatched() { - assertEquals(TestingData.matchedId, rewriter.copy(TestingData.matchedId)); - } + @Test + public void testSubjectRewritten() { + ItemIdValue subject = TestingData.newIdB; + library.setQid(4567L, "Q1234"); + rewriter = new ReconEntityRewriter(library, subject); + assertEquals(newlyCreated, rewriter.copy(subject)); + } - @Test - public void testRewriteUpdate() { - library.setQid(4567L, "Q1234"); - ItemUpdate update = new ItemUpdateBuilder(subject) - .addStatement(TestingData.generateStatement(subject, TestingData.newIdB)) - .deleteStatement(TestingData.generateStatement(subject, TestingData.existingId)) - .addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true) - .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) - .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); - ItemUpdate rewritten = rewriter.rewrite(update); - ItemUpdate expected = new ItemUpdateBuilder(subject) - .addStatement(TestingData.generateStatement(subject, newlyCreated)) - .deleteStatement(TestingData.generateStatement(subject, TestingData.existingId)) - .addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true) - .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) - .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); - assertEquals(rewritten, expected); - } - - @Test - public void testRewriteCreation() { - library.setQid(4567L, "Q1234"); - ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdB) - .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) - .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); - ItemUpdate rewritten = rewriter.rewrite(update); - ItemUpdate expected = new ItemUpdateBuilder(newlyCreated) - .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) - .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); - assertEquals(rewritten, expected); - } + @Test + public void testMatched() { + rewriter = new ReconEntityRewriter(library, TestingData.newIdA); + assertEquals(TestingData.matchedId, rewriter.copy(TestingData.matchedId)); + } + + @Test + public void testRewriteCreate() throws NewItemNotCreatedYetException { + ItemIdValue subject = TestingData.newIdA; + rewriter = new ReconEntityRewriter(library, subject); + library.setQid(4567L, "Q1234"); + ItemUpdate update = new ItemUpdateBuilder(subject) + .addStatement(TestingData.generateStatement(subject, TestingData.newIdB)) + .deleteStatement(TestingData.generateStatement(subject, TestingData.existingId)) + .addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true) + .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) + .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); + ItemUpdate rewritten = rewriter.rewrite(update); + ItemUpdate expected = new ItemUpdateBuilder(subject) + .addStatement(TestingData.generateStatement(subject, newlyCreated)) + .deleteStatement(TestingData.generateStatement(subject, TestingData.existingId)) + .addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true) + .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) + .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); + assertEquals(rewritten, expected); + } + + @Test + public void testRewriteUpdateOnPreviouslyCreatedEntity() throws NewItemNotCreatedYetException { + ItemIdValue subject = TestingData.newIdA; + rewriter = new ReconEntityRewriter(library, subject); + library.setQid(4567L, "Q1234"); + ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdB) + .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) + .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); + ItemUpdate rewritten = rewriter.rewrite(update); + ItemUpdate expected = new ItemUpdateBuilder(newlyCreated) + .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) + .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); + assertEquals(rewritten, expected); + } + + @Test + public void testRewriteUpdateOnExistingEntity() throws NewItemNotCreatedYetException { + ItemIdValue subject = TestingData.matchedId; + rewriter = new ReconEntityRewriter(library, subject); + library.setQid(4567L, "Q1234"); + ItemUpdate update = new ItemUpdateBuilder(subject) + .addStatement(TestingData.generateStatement(subject, TestingData.newIdB)) + .deleteStatement(TestingData.generateStatement(subject, TestingData.existingId)) + .addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true) + .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) + .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); + ItemUpdate rewritten = rewriter.rewrite(update); + ItemUpdate expected = new ItemUpdateBuilder(subject) + .addStatement(TestingData.generateStatement(subject, newlyCreated)) + .deleteStatement(TestingData.generateStatement(subject, TestingData.existingId)) + .addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true) + .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) + .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); + assertEquals(rewritten, expected); + } }