Introduce a rewriter to update references to new items after creation
This commit is contained in:
parent
7cb8757028
commit
773be2e161
@ -7,6 +7,10 @@ import java.util.Set;
|
|||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
import com.google.refine.model.Cell;
|
import com.google.refine.model.Cell;
|
||||||
import com.google.refine.model.Column;
|
import com.google.refine.model.Column;
|
||||||
import com.google.refine.model.Recon;
|
import com.google.refine.model.Recon;
|
||||||
@ -29,6 +33,11 @@ public class NewItemLibrary {
|
|||||||
map = new HashMap<>();
|
map = new HashMap<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonCreator
|
||||||
|
public NewItemLibrary(@JsonProperty("qidMap") Map<Long, String> map) {
|
||||||
|
this.map = map;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the Qid allocated to a given new cell
|
* Retrieves the Qid allocated to a given new cell
|
||||||
* @param id: the fake ItemId generated by the cell
|
* @param id: the fake ItemId generated by the cell
|
||||||
@ -67,23 +76,26 @@ public class NewItemLibrary {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
for(Row row : project.rows) {
|
for(Row row : project.rows) {
|
||||||
for(Cell cell : row.cells) {
|
for(int i = 0; i != row.cells.size(); i++) {
|
||||||
|
Cell cell = row.cells.get(i);
|
||||||
if (cell == null || cell.recon == null) {
|
if (cell == null || cell.recon == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Recon recon = cell.recon;
|
Recon recon = cell.recon;
|
||||||
if (Recon.Judgment.New.equals(recon.judgment) && !reset &&
|
if (Recon.Judgment.New.equals(recon.judgment) && !reset &&
|
||||||
map.containsKey(recon.id)) {
|
map.containsKey(recon.judgmentHistoryEntry)) {
|
||||||
recon.judgment = Recon.Judgment.Matched;
|
recon.judgment = Recon.Judgment.Matched;
|
||||||
recon.match = new ReconCandidate(
|
recon.match = new ReconCandidate(
|
||||||
map.get(recon.id),
|
map.get(recon.judgmentHistoryEntry),
|
||||||
cell.value.toString(),
|
cell.value.toString(),
|
||||||
new String[0],
|
new String[0],
|
||||||
100);
|
100);
|
||||||
|
impactedColumns.add(i);
|
||||||
} else if (Recon.Judgment.Matched.equals(recon.judgment) && reset &&
|
} else if (Recon.Judgment.Matched.equals(recon.judgment) && reset &&
|
||||||
map.containsKey(recon.id)) {
|
map.containsKey(recon.judgmentHistoryEntry)) {
|
||||||
recon.judgment = Recon.Judgment.New;
|
recon.judgment = Recon.Judgment.New;
|
||||||
recon.match = null;
|
recon.match = null;
|
||||||
|
impactedColumns.add(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -98,14 +110,27 @@ public class NewItemLibrary {
|
|||||||
* Getter, only meant to be used by Jackson
|
* Getter, only meant to be used by Jackson
|
||||||
* @return the underlying map
|
* @return the underlying map
|
||||||
*/
|
*/
|
||||||
|
@JsonProperty("qidMap")
|
||||||
public Map<Long, String> getQidMap() {
|
public Map<Long, String> getQidMap() {
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
@Override
|
||||||
* Setter, only meant to be used by Jackson
|
public boolean equals(Object other) {
|
||||||
*/
|
if(other == null || !NewItemLibrary.class.isInstance(other)) {
|
||||||
public void setQidMap(Map<Long, String> newMap) {
|
return false;
|
||||||
map = newMap;
|
}
|
||||||
|
NewItemLibrary otherLibrary = (NewItemLibrary)other;
|
||||||
|
return map.equals(otherLibrary.getQidMap());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return map.hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return map.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,88 @@
|
|||||||
|
package org.openrefine.wikidata.editing;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter;
|
||||||
|
import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class that rewrites an {@link ItemUpdate},
|
||||||
|
* replacing reconciled entity id values by their concrete
|
||||||
|
* values after creation of all the new items involved.
|
||||||
|
*
|
||||||
|
* If an item has not been created yet, an {@link IllegalArgumentException}
|
||||||
|
* will be raised.
|
||||||
|
*
|
||||||
|
* The subject is treated as a special case: it is returned unchanged.
|
||||||
|
* This is because it is guaranteed not to appear in the update (but
|
||||||
|
* it does appear in the datamodel representation as the subject is passed around
|
||||||
|
* to the Claim objects its document contains).
|
||||||
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class ReconEntityRewriter extends DatamodelConverter {
|
||||||
|
|
||||||
|
private NewItemLibrary library;
|
||||||
|
private ItemIdValue subject;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor. Sets up a rewriter which uses the provided library
|
||||||
|
* to look up qids of new items, and the subject (which should not be
|
||||||
|
* rewritten).
|
||||||
|
*
|
||||||
|
* @param library
|
||||||
|
* @param subject
|
||||||
|
*/
|
||||||
|
public ReconEntityRewriter(NewItemLibrary library, ItemIdValue subject) {
|
||||||
|
super(new DataObjectFactoryImpl());
|
||||||
|
this.library = library;
|
||||||
|
this.subject = subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ItemIdValue copy(ItemIdValue value) {
|
||||||
|
if(subject.equals(value)) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
if(value instanceof ReconItemIdValue) {
|
||||||
|
ReconItemIdValue recon = (ReconItemIdValue)value;
|
||||||
|
if(recon.isNew()) {
|
||||||
|
String newId = library.getQid(recon.getReconInternalId());
|
||||||
|
if(newId == null) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Trying to rewrite an update where a new item was not created yet.");
|
||||||
|
}
|
||||||
|
return Datamodel.makeItemIdValue(newId,
|
||||||
|
recon.getSiteIri());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return super.copy(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ItemUpdate rewrite(ItemUpdate update) {
|
||||||
|
Set<MonolingualTextValue> labels = update.getLabels().stream()
|
||||||
|
.map(l -> copy(l)).collect(Collectors.toSet());
|
||||||
|
Set<MonolingualTextValue> descriptions = update.getDescriptions().stream()
|
||||||
|
.map(l -> copy(l)).collect(Collectors.toSet());
|
||||||
|
Set<MonolingualTextValue> aliases = update.getAliases().stream()
|
||||||
|
.map(l -> copy(l)).collect(Collectors.toSet());
|
||||||
|
List<Statement> addedStatements = update.getAddedStatements().stream()
|
||||||
|
.map(l -> copy(l)).collect(Collectors.toList());
|
||||||
|
Set<Statement> deletedStatements = update.getDeletedStatements().stream()
|
||||||
|
.map(l -> copy(l)).collect(Collectors.toSet());
|
||||||
|
return new ItemUpdate(update.getItemId(), addedStatements,
|
||||||
|
deletedStatements, labels, descriptions, aliases);
|
||||||
|
}
|
||||||
|
}
|
@ -16,6 +16,7 @@ import org.json.JSONWriter;
|
|||||||
|
|
||||||
import org.openrefine.wikidata.editing.ConnectionManager;
|
import org.openrefine.wikidata.editing.ConnectionManager;
|
||||||
import org.openrefine.wikidata.editing.NewItemLibrary;
|
import org.openrefine.wikidata.editing.NewItemLibrary;
|
||||||
|
import org.openrefine.wikidata.editing.ReconEntityRewriter;
|
||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
import org.openrefine.wikidata.updates.scheduler.ImpossibleSchedulingException;
|
import org.openrefine.wikidata.updates.scheduler.ImpossibleSchedulingException;
|
||||||
import org.openrefine.wikidata.updates.scheduler.UpdateScheduler;
|
import org.openrefine.wikidata.updates.scheduler.UpdateScheduler;
|
||||||
@ -279,10 +280,13 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
|
|||||||
logger.info("Performing edits");
|
logger.info("Performing edits");
|
||||||
|
|
||||||
for(ItemUpdate update : batch) {
|
for(ItemUpdate update : batch) {
|
||||||
|
// Rewrite the update
|
||||||
|
ReconEntityRewriter rewriter = new ReconEntityRewriter(newItemLibrary, update.getItemId());
|
||||||
|
update = rewriter.rewrite(update);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// New item
|
// New item
|
||||||
if (update.getItemId().getId() == "Q0") {
|
if (update.getItemId().getId().equals("Q0")) {
|
||||||
ReconEntityIdValue newCell = (ReconEntityIdValue)update.getItemId();
|
ReconEntityIdValue newCell = (ReconEntityIdValue)update.getItemId();
|
||||||
update.normalizeLabelsAndAliases();
|
update.normalizeLabelsAndAliases();
|
||||||
|
|
||||||
|
@ -0,0 +1,64 @@
|
|||||||
|
package org.openrefine.wikidata.editing;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.JacksonSerializationTest;
|
||||||
|
import org.openrefine.wikidata.testing.TestingData;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.google.refine.model.Cell;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.model.Recon;
|
||||||
|
import com.google.refine.tests.RefineTest;
|
||||||
|
|
||||||
|
public class NewItemLibraryTest extends RefineTest {
|
||||||
|
private NewItemLibrary library;
|
||||||
|
|
||||||
|
@BeforeMethod
|
||||||
|
public void setUp() {
|
||||||
|
library = new NewItemLibrary();
|
||||||
|
library.setQid(1234L, "Q345");
|
||||||
|
library.setQid(3289L, "Q384");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRetrieveItem() {
|
||||||
|
assertEquals("Q345", library.getQid(1234L));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateReconciledCells() {
|
||||||
|
Project project = createCSVProject(TestingData.inceptionWithNewCsv);
|
||||||
|
project.rows.get(0).cells.set(0, TestingData.makeNewItemCell(3289L, "University of Ljubljana"));
|
||||||
|
project.rows.get(1).cells.set(0, TestingData.makeMatchedCell("Q865528", "University of Warwick"));
|
||||||
|
project.rows.get(2).cells.set(0, TestingData.makeNewItemCell(1234L, "new uni"));
|
||||||
|
isNewTo(3289L, project.rows.get(0).cells.get(0));
|
||||||
|
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
|
||||||
|
isNewTo(1234L, project.rows.get(2).cells.get(0));
|
||||||
|
library.updateReconciledCells(project, false);
|
||||||
|
isMatchedTo("Q384", project.rows.get(0).cells.get(0));
|
||||||
|
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
|
||||||
|
isMatchedTo("Q345", project.rows.get(2).cells.get(0));
|
||||||
|
library.updateReconciledCells(project, true);
|
||||||
|
isNewTo(3289L, project.rows.get(0).cells.get(0));
|
||||||
|
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
|
||||||
|
isNewTo(1234L, project.rows.get(2).cells.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSerialize() {
|
||||||
|
JacksonSerializationTest.canonicalSerialization(NewItemLibrary.class, library,
|
||||||
|
"{\"qidMap\":{\"1234\":\"Q345\",\"3289\":\"Q384\"}}");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void isMatchedTo(String qid, Cell cell) {
|
||||||
|
assertEquals(Recon.Judgment.Matched, cell.recon.judgment);
|
||||||
|
assertEquals(qid, cell.recon.match.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void isNewTo(long id, Cell cell) {
|
||||||
|
assertEquals(Recon.Judgment.New, cell.recon.judgment);
|
||||||
|
assertEquals(id, cell.recon.judgmentHistoryEntry);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,67 @@
|
|||||||
|
package org.openrefine.wikidata.editing;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingData;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
|
||||||
|
public class ReconEntityRewriterTest {
|
||||||
|
|
||||||
|
NewItemLibrary library = null;
|
||||||
|
ReconEntityRewriter rewriter = null;
|
||||||
|
ItemIdValue subject = TestingData.newIdA;
|
||||||
|
ItemIdValue newlyCreated = Datamodel.makeWikidataItemIdValue("Q1234");
|
||||||
|
|
||||||
|
@BeforeMethod
|
||||||
|
public void setUp() {
|
||||||
|
library = new NewItemLibrary();
|
||||||
|
rewriter = new ReconEntityRewriter(library, subject);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expectedExceptions=IllegalArgumentException.class)
|
||||||
|
public void testNotCreatedYet() {
|
||||||
|
rewriter.copy(TestingData.newIdB);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSuccessfulRewrite() {
|
||||||
|
library.setQid(4567L, "Q1234");
|
||||||
|
assertEquals(newlyCreated, rewriter.copy(TestingData.newIdB));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSubjectNotRewriten() {
|
||||||
|
assertEquals(subject, rewriter.copy(subject));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMatched() {
|
||||||
|
assertEquals(TestingData.matchedId, rewriter.copy(TestingData.matchedId));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRewriteUpdate() {
|
||||||
|
library.setQid(4567L, "Q1234");
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(subject)
|
||||||
|
.addStatement(TestingData.generateStatement(subject, TestingData.newIdB))
|
||||||
|
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"))
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"))
|
||||||
|
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de"))
|
||||||
|
.build();
|
||||||
|
ItemUpdate rewritten = rewriter.rewrite(update);
|
||||||
|
ItemUpdate expected = new ItemUpdateBuilder(subject)
|
||||||
|
.addStatement(TestingData.generateStatement(subject, newlyCreated))
|
||||||
|
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"))
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"))
|
||||||
|
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de"))
|
||||||
|
.build();
|
||||||
|
assertEquals(expected, rewritten);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user