From 773be2e1610affbd07509f5023b00e1b2bcfee79 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Sat, 3 Mar 2018 02:36:32 +0000 Subject: [PATCH] Introduce a rewriter to update references to new items after creation --- .../wikidata/editing/NewItemLibrary.java | 43 +++++++-- .../wikidata/editing/ReconEntityRewriter.java | 88 +++++++++++++++++++ .../PerformWikibaseEditsOperation.java | 6 +- .../wikidata/editing/NewItemLibraryTest.java | 64 ++++++++++++++ .../editing/ReconEntityRewriterTest.java | 67 ++++++++++++++ 5 files changed, 258 insertions(+), 10 deletions(-) create mode 100644 extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java create mode 100644 extensions/wikidata/tests/src/org/openrefine/wikidata/editing/NewItemLibraryTest.java create mode 100644 extensions/wikidata/tests/src/org/openrefine/wikidata/editing/ReconEntityRewriterTest.java diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java index c66f2493b..f8c944716 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java @@ -7,6 +7,10 @@ import java.util.Set; import java.util.HashSet; import com.google.refine.model.Project; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + import com.google.refine.model.Cell; import com.google.refine.model.Column; import com.google.refine.model.Recon; @@ -29,6 +33,11 @@ public class NewItemLibrary { map = new HashMap<>(); } + @JsonCreator + public NewItemLibrary(@JsonProperty("qidMap") Map map) { + this.map = map; + } + /** * Retrieves the Qid allocated to a given new cell * @param id: the fake ItemId generated by the cell @@ -67,23 +76,26 @@ public class NewItemLibrary { */ for(Row row : project.rows) { - for(Cell cell : row.cells) { + for(int i = 0; i != row.cells.size(); i++) { + Cell cell = row.cells.get(i); if (cell == null || cell.recon == null) { continue; } Recon recon = cell.recon; if (Recon.Judgment.New.equals(recon.judgment) && !reset && - map.containsKey(recon.id)) { + map.containsKey(recon.judgmentHistoryEntry)) { recon.judgment = Recon.Judgment.Matched; recon.match = new ReconCandidate( - map.get(recon.id), + map.get(recon.judgmentHistoryEntry), cell.value.toString(), new String[0], 100); + impactedColumns.add(i); } else if (Recon.Judgment.Matched.equals(recon.judgment) && reset && - map.containsKey(recon.id)) { + map.containsKey(recon.judgmentHistoryEntry)) { recon.judgment = Recon.Judgment.New; recon.match = null; + impactedColumns.add(i); } } } @@ -98,14 +110,27 @@ public class NewItemLibrary { * Getter, only meant to be used by Jackson * @return the underlying map */ + @JsonProperty("qidMap") public Map getQidMap() { return map; } - /** - * Setter, only meant to be used by Jackson - */ - public void setQidMap(Map newMap) { - map = newMap; + @Override + public boolean equals(Object other) { + if(other == null || !NewItemLibrary.class.isInstance(other)) { + return false; + } + NewItemLibrary otherLibrary = (NewItemLibrary)other; + return map.equals(otherLibrary.getQidMap()); + } + + @Override + public int hashCode() { + return map.hashCode(); + } + + @Override + public String toString() { + return map.toString(); } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java new file mode 100644 index 000000000..493664914 --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java @@ -0,0 +1,88 @@ +package org.openrefine.wikidata.editing; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue; +import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter; +import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; +import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; +import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.Value; + +/** + * A class that rewrites an {@link ItemUpdate}, + * replacing reconciled entity id values by their concrete + * values after creation of all the new items involved. + * + * If an item has not been created yet, an {@link IllegalArgumentException} + * will be raised. + * + * The subject is treated as a special case: it is returned unchanged. + * This is because it is guaranteed not to appear in the update (but + * it does appear in the datamodel representation as the subject is passed around + * to the Claim objects its document contains). + * + * @author Antonin Delpeuch + * + */ +public class ReconEntityRewriter extends DatamodelConverter { + + private NewItemLibrary library; + private ItemIdValue subject; + + /** + * Constructor. Sets up a rewriter which uses the provided library + * to look up qids of new items, and the subject (which should not be + * rewritten). + * + * @param library + * @param subject + */ + public ReconEntityRewriter(NewItemLibrary library, ItemIdValue subject) { + super(new DataObjectFactoryImpl()); + this.library = library; + this.subject = subject; + } + + @Override + public ItemIdValue copy(ItemIdValue value) { + if(subject.equals(value)) { + return value; + } + if(value instanceof ReconItemIdValue) { + ReconItemIdValue recon = (ReconItemIdValue)value; + if(recon.isNew()) { + String newId = library.getQid(recon.getReconInternalId()); + if(newId == null) { + throw new IllegalArgumentException( + "Trying to rewrite an update where a new item was not created yet."); + } + return Datamodel.makeItemIdValue(newId, + recon.getSiteIri()); + } + } + return super.copy(value); + } + + public ItemUpdate rewrite(ItemUpdate update) { + Set labels = update.getLabels().stream() + .map(l -> copy(l)).collect(Collectors.toSet()); + Set descriptions = update.getDescriptions().stream() + .map(l -> copy(l)).collect(Collectors.toSet()); + Set aliases = update.getAliases().stream() + .map(l -> copy(l)).collect(Collectors.toSet()); + List addedStatements = update.getAddedStatements().stream() + .map(l -> copy(l)).collect(Collectors.toList()); + Set deletedStatements = update.getDeletedStatements().stream() + .map(l -> copy(l)).collect(Collectors.toSet()); + return new ItemUpdate(update.getItemId(), addedStatements, + deletedStatements, labels, descriptions, aliases); + } +} diff --git a/extensions/wikidata/src/org/openrefine/wikidata/operations/PerformWikibaseEditsOperation.java b/extensions/wikidata/src/org/openrefine/wikidata/operations/PerformWikibaseEditsOperation.java index 5315759db..48d95bc8b 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/operations/PerformWikibaseEditsOperation.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/operations/PerformWikibaseEditsOperation.java @@ -16,6 +16,7 @@ import org.json.JSONWriter; import org.openrefine.wikidata.editing.ConnectionManager; import org.openrefine.wikidata.editing.NewItemLibrary; +import org.openrefine.wikidata.editing.ReconEntityRewriter; import org.openrefine.wikidata.updates.ItemUpdate; import org.openrefine.wikidata.updates.scheduler.ImpossibleSchedulingException; import org.openrefine.wikidata.updates.scheduler.UpdateScheduler; @@ -279,10 +280,13 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation { logger.info("Performing edits"); for(ItemUpdate update : batch) { + // Rewrite the update + ReconEntityRewriter rewriter = new ReconEntityRewriter(newItemLibrary, update.getItemId()); + update = rewriter.rewrite(update); try { // New item - if (update.getItemId().getId() == "Q0") { + if (update.getItemId().getId().equals("Q0")) { ReconEntityIdValue newCell = (ReconEntityIdValue)update.getItemId(); update.normalizeLabelsAndAliases(); diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/NewItemLibraryTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/NewItemLibraryTest.java new file mode 100644 index 000000000..f8616b070 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/NewItemLibraryTest.java @@ -0,0 +1,64 @@ +package org.openrefine.wikidata.editing; + +import static org.junit.Assert.assertEquals; + +import org.openrefine.wikidata.testing.JacksonSerializationTest; +import org.openrefine.wikidata.testing.TestingData; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.tests.RefineTest; + +public class NewItemLibraryTest extends RefineTest { + private NewItemLibrary library; + + @BeforeMethod + public void setUp() { + library = new NewItemLibrary(); + library.setQid(1234L, "Q345"); + library.setQid(3289L, "Q384"); + } + + @Test + public void testRetrieveItem() { + assertEquals("Q345", library.getQid(1234L)); + } + + @Test + public void testUpdateReconciledCells() { + Project project = createCSVProject(TestingData.inceptionWithNewCsv); + project.rows.get(0).cells.set(0, TestingData.makeNewItemCell(3289L, "University of Ljubljana")); + project.rows.get(1).cells.set(0, TestingData.makeMatchedCell("Q865528", "University of Warwick")); + project.rows.get(2).cells.set(0, TestingData.makeNewItemCell(1234L, "new uni")); + isNewTo(3289L, project.rows.get(0).cells.get(0)); + isMatchedTo("Q865528", project.rows.get(1).cells.get(0)); + isNewTo(1234L, project.rows.get(2).cells.get(0)); + library.updateReconciledCells(project, false); + isMatchedTo("Q384", project.rows.get(0).cells.get(0)); + isMatchedTo("Q865528", project.rows.get(1).cells.get(0)); + isMatchedTo("Q345", project.rows.get(2).cells.get(0)); + library.updateReconciledCells(project, true); + isNewTo(3289L, project.rows.get(0).cells.get(0)); + isMatchedTo("Q865528", project.rows.get(1).cells.get(0)); + isNewTo(1234L, project.rows.get(2).cells.get(0)); + } + + @Test + public void testSerialize() { + JacksonSerializationTest.canonicalSerialization(NewItemLibrary.class, library, + "{\"qidMap\":{\"1234\":\"Q345\",\"3289\":\"Q384\"}}"); + } + + private void isMatchedTo(String qid, Cell cell) { + assertEquals(Recon.Judgment.Matched, cell.recon.judgment); + assertEquals(qid, cell.recon.match.id); + } + + private void isNewTo(long id, Cell cell) { + assertEquals(Recon.Judgment.New, cell.recon.judgment); + assertEquals(id, cell.recon.judgmentHistoryEntry); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/ReconEntityRewriterTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/ReconEntityRewriterTest.java new file mode 100644 index 000000000..814c8df1c --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/ReconEntityRewriterTest.java @@ -0,0 +1,67 @@ +package org.openrefine.wikidata.editing; + +import static org.junit.Assert.assertEquals; + +import org.openrefine.wikidata.testing.TestingData; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; + +public class ReconEntityRewriterTest { + + NewItemLibrary library = null; + ReconEntityRewriter rewriter = null; + ItemIdValue subject = TestingData.newIdA; + ItemIdValue newlyCreated = Datamodel.makeWikidataItemIdValue("Q1234"); + + @BeforeMethod + public void setUp() { + library = new NewItemLibrary(); + rewriter = new ReconEntityRewriter(library, subject); + } + + @Test(expectedExceptions=IllegalArgumentException.class) + public void testNotCreatedYet() { + rewriter.copy(TestingData.newIdB); + } + + @Test + public void testSuccessfulRewrite() { + library.setQid(4567L, "Q1234"); + assertEquals(newlyCreated, rewriter.copy(TestingData.newIdB)); + } + + @Test + public void testSubjectNotRewriten() { + assertEquals(subject, rewriter.copy(subject)); + } + + @Test + public void testMatched() { + assertEquals(TestingData.matchedId, rewriter.copy(TestingData.matchedId)); + } + + @Test + public void testRewriteUpdate() { + library.setQid(4567L, "Q1234"); + ItemUpdate update = new ItemUpdateBuilder(subject) + .addStatement(TestingData.generateStatement(subject, TestingData.newIdB)) + .deleteStatement(TestingData.generateStatement(subject, TestingData.existingId)) + .addLabel(Datamodel.makeMonolingualTextValue("label", "de")) + .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de")) + .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")) + .build(); + ItemUpdate rewritten = rewriter.rewrite(update); + ItemUpdate expected = new ItemUpdateBuilder(subject) + .addStatement(TestingData.generateStatement(subject, newlyCreated)) + .deleteStatement(TestingData.generateStatement(subject, TestingData.existingId)) + .addLabel(Datamodel.makeMonolingualTextValue("label", "de")) + .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de")) + .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")) + .build(); + assertEquals(expected, rewritten); + } +}