Introduce a rewriter to update references to new items after creation
This commit is contained in:
parent
7cb8757028
commit
773be2e161
@ -7,6 +7,10 @@ import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
import com.google.refine.model.Project;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.Recon;
|
||||
@ -29,6 +33,11 @@ public class NewItemLibrary {
|
||||
map = new HashMap<>();
|
||||
}
|
||||
|
||||
@JsonCreator
|
||||
public NewItemLibrary(@JsonProperty("qidMap") Map<Long, String> map) {
|
||||
this.map = map;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the Qid allocated to a given new cell
|
||||
* @param id: the fake ItemId generated by the cell
|
||||
@ -67,23 +76,26 @@ public class NewItemLibrary {
|
||||
*/
|
||||
|
||||
for(Row row : project.rows) {
|
||||
for(Cell cell : row.cells) {
|
||||
for(int i = 0; i != row.cells.size(); i++) {
|
||||
Cell cell = row.cells.get(i);
|
||||
if (cell == null || cell.recon == null) {
|
||||
continue;
|
||||
}
|
||||
Recon recon = cell.recon;
|
||||
if (Recon.Judgment.New.equals(recon.judgment) && !reset &&
|
||||
map.containsKey(recon.id)) {
|
||||
map.containsKey(recon.judgmentHistoryEntry)) {
|
||||
recon.judgment = Recon.Judgment.Matched;
|
||||
recon.match = new ReconCandidate(
|
||||
map.get(recon.id),
|
||||
map.get(recon.judgmentHistoryEntry),
|
||||
cell.value.toString(),
|
||||
new String[0],
|
||||
100);
|
||||
impactedColumns.add(i);
|
||||
} else if (Recon.Judgment.Matched.equals(recon.judgment) && reset &&
|
||||
map.containsKey(recon.id)) {
|
||||
map.containsKey(recon.judgmentHistoryEntry)) {
|
||||
recon.judgment = Recon.Judgment.New;
|
||||
recon.match = null;
|
||||
impactedColumns.add(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -98,14 +110,27 @@ public class NewItemLibrary {
|
||||
* Getter, only meant to be used by Jackson
|
||||
* @return the underlying map
|
||||
*/
|
||||
@JsonProperty("qidMap")
|
||||
public Map<Long, String> getQidMap() {
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setter, only meant to be used by Jackson
|
||||
*/
|
||||
public void setQidMap(Map<Long, String> newMap) {
|
||||
map = newMap;
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if(other == null || !NewItemLibrary.class.isInstance(other)) {
|
||||
return false;
|
||||
}
|
||||
NewItemLibrary otherLibrary = (NewItemLibrary)other;
|
||||
return map.equals(otherLibrary.getQidMap());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return map.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return map.toString();
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,88 @@
|
||||
package org.openrefine.wikidata.editing;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter;
|
||||
import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
/**
|
||||
* A class that rewrites an {@link ItemUpdate},
|
||||
* replacing reconciled entity id values by their concrete
|
||||
* values after creation of all the new items involved.
|
||||
*
|
||||
* If an item has not been created yet, an {@link IllegalArgumentException}
|
||||
* will be raised.
|
||||
*
|
||||
* The subject is treated as a special case: it is returned unchanged.
|
||||
* This is because it is guaranteed not to appear in the update (but
|
||||
* it does appear in the datamodel representation as the subject is passed around
|
||||
* to the Claim objects its document contains).
|
||||
*
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
public class ReconEntityRewriter extends DatamodelConverter {
|
||||
|
||||
private NewItemLibrary library;
|
||||
private ItemIdValue subject;
|
||||
|
||||
/**
|
||||
* Constructor. Sets up a rewriter which uses the provided library
|
||||
* to look up qids of new items, and the subject (which should not be
|
||||
* rewritten).
|
||||
*
|
||||
* @param library
|
||||
* @param subject
|
||||
*/
|
||||
public ReconEntityRewriter(NewItemLibrary library, ItemIdValue subject) {
|
||||
super(new DataObjectFactoryImpl());
|
||||
this.library = library;
|
||||
this.subject = subject;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ItemIdValue copy(ItemIdValue value) {
|
||||
if(subject.equals(value)) {
|
||||
return value;
|
||||
}
|
||||
if(value instanceof ReconItemIdValue) {
|
||||
ReconItemIdValue recon = (ReconItemIdValue)value;
|
||||
if(recon.isNew()) {
|
||||
String newId = library.getQid(recon.getReconInternalId());
|
||||
if(newId == null) {
|
||||
throw new IllegalArgumentException(
|
||||
"Trying to rewrite an update where a new item was not created yet.");
|
||||
}
|
||||
return Datamodel.makeItemIdValue(newId,
|
||||
recon.getSiteIri());
|
||||
}
|
||||
}
|
||||
return super.copy(value);
|
||||
}
|
||||
|
||||
public ItemUpdate rewrite(ItemUpdate update) {
|
||||
Set<MonolingualTextValue> labels = update.getLabels().stream()
|
||||
.map(l -> copy(l)).collect(Collectors.toSet());
|
||||
Set<MonolingualTextValue> descriptions = update.getDescriptions().stream()
|
||||
.map(l -> copy(l)).collect(Collectors.toSet());
|
||||
Set<MonolingualTextValue> aliases = update.getAliases().stream()
|
||||
.map(l -> copy(l)).collect(Collectors.toSet());
|
||||
List<Statement> addedStatements = update.getAddedStatements().stream()
|
||||
.map(l -> copy(l)).collect(Collectors.toList());
|
||||
Set<Statement> deletedStatements = update.getDeletedStatements().stream()
|
||||
.map(l -> copy(l)).collect(Collectors.toSet());
|
||||
return new ItemUpdate(update.getItemId(), addedStatements,
|
||||
deletedStatements, labels, descriptions, aliases);
|
||||
}
|
||||
}
|
@ -16,6 +16,7 @@ import org.json.JSONWriter;
|
||||
|
||||
import org.openrefine.wikidata.editing.ConnectionManager;
|
||||
import org.openrefine.wikidata.editing.NewItemLibrary;
|
||||
import org.openrefine.wikidata.editing.ReconEntityRewriter;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.scheduler.ImpossibleSchedulingException;
|
||||
import org.openrefine.wikidata.updates.scheduler.UpdateScheduler;
|
||||
@ -279,10 +280,13 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
|
||||
logger.info("Performing edits");
|
||||
|
||||
for(ItemUpdate update : batch) {
|
||||
// Rewrite the update
|
||||
ReconEntityRewriter rewriter = new ReconEntityRewriter(newItemLibrary, update.getItemId());
|
||||
update = rewriter.rewrite(update);
|
||||
|
||||
try {
|
||||
// New item
|
||||
if (update.getItemId().getId() == "Q0") {
|
||||
if (update.getItemId().getId().equals("Q0")) {
|
||||
ReconEntityIdValue newCell = (ReconEntityIdValue)update.getItemId();
|
||||
update.normalizeLabelsAndAliases();
|
||||
|
||||
|
@ -0,0 +1,64 @@
|
||||
package org.openrefine.wikidata.editing;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import org.openrefine.wikidata.testing.JacksonSerializationTest;
|
||||
import org.openrefine.wikidata.testing.TestingData;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Recon;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
|
||||
public class NewItemLibraryTest extends RefineTest {
|
||||
private NewItemLibrary library;
|
||||
|
||||
@BeforeMethod
|
||||
public void setUp() {
|
||||
library = new NewItemLibrary();
|
||||
library.setQid(1234L, "Q345");
|
||||
library.setQid(3289L, "Q384");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRetrieveItem() {
|
||||
assertEquals("Q345", library.getQid(1234L));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateReconciledCells() {
|
||||
Project project = createCSVProject(TestingData.inceptionWithNewCsv);
|
||||
project.rows.get(0).cells.set(0, TestingData.makeNewItemCell(3289L, "University of Ljubljana"));
|
||||
project.rows.get(1).cells.set(0, TestingData.makeMatchedCell("Q865528", "University of Warwick"));
|
||||
project.rows.get(2).cells.set(0, TestingData.makeNewItemCell(1234L, "new uni"));
|
||||
isNewTo(3289L, project.rows.get(0).cells.get(0));
|
||||
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
|
||||
isNewTo(1234L, project.rows.get(2).cells.get(0));
|
||||
library.updateReconciledCells(project, false);
|
||||
isMatchedTo("Q384", project.rows.get(0).cells.get(0));
|
||||
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
|
||||
isMatchedTo("Q345", project.rows.get(2).cells.get(0));
|
||||
library.updateReconciledCells(project, true);
|
||||
isNewTo(3289L, project.rows.get(0).cells.get(0));
|
||||
isMatchedTo("Q865528", project.rows.get(1).cells.get(0));
|
||||
isNewTo(1234L, project.rows.get(2).cells.get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerialize() {
|
||||
JacksonSerializationTest.canonicalSerialization(NewItemLibrary.class, library,
|
||||
"{\"qidMap\":{\"1234\":\"Q345\",\"3289\":\"Q384\"}}");
|
||||
}
|
||||
|
||||
private void isMatchedTo(String qid, Cell cell) {
|
||||
assertEquals(Recon.Judgment.Matched, cell.recon.judgment);
|
||||
assertEquals(qid, cell.recon.match.id);
|
||||
}
|
||||
|
||||
private void isNewTo(long id, Cell cell) {
|
||||
assertEquals(Recon.Judgment.New, cell.recon.judgment);
|
||||
assertEquals(id, cell.recon.judgmentHistoryEntry);
|
||||
}
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
package org.openrefine.wikidata.editing;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingData;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
|
||||
public class ReconEntityRewriterTest {
|
||||
|
||||
NewItemLibrary library = null;
|
||||
ReconEntityRewriter rewriter = null;
|
||||
ItemIdValue subject = TestingData.newIdA;
|
||||
ItemIdValue newlyCreated = Datamodel.makeWikidataItemIdValue("Q1234");
|
||||
|
||||
@BeforeMethod
|
||||
public void setUp() {
|
||||
library = new NewItemLibrary();
|
||||
rewriter = new ReconEntityRewriter(library, subject);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions=IllegalArgumentException.class)
|
||||
public void testNotCreatedYet() {
|
||||
rewriter.copy(TestingData.newIdB);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSuccessfulRewrite() {
|
||||
library.setQid(4567L, "Q1234");
|
||||
assertEquals(newlyCreated, rewriter.copy(TestingData.newIdB));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSubjectNotRewriten() {
|
||||
assertEquals(subject, rewriter.copy(subject));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMatched() {
|
||||
assertEquals(TestingData.matchedId, rewriter.copy(TestingData.matchedId));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRewriteUpdate() {
|
||||
library.setQid(4567L, "Q1234");
|
||||
ItemUpdate update = new ItemUpdateBuilder(subject)
|
||||
.addStatement(TestingData.generateStatement(subject, TestingData.newIdB))
|
||||
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
|
||||
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"))
|
||||
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"))
|
||||
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de"))
|
||||
.build();
|
||||
ItemUpdate rewritten = rewriter.rewrite(update);
|
||||
ItemUpdate expected = new ItemUpdateBuilder(subject)
|
||||
.addStatement(TestingData.generateStatement(subject, newlyCreated))
|
||||
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
|
||||
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"))
|
||||
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"))
|
||||
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de"))
|
||||
.build();
|
||||
assertEquals(expected, rewritten);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user