Skip update if rewriting failed

This commit is contained in:
Antonin Delpeuch 2019-11-05 17:49:47 +00:00
parent ec5c9cd418
commit de6dedb43d
4 changed files with 213 additions and 116 deletions

View File

@ -31,6 +31,7 @@ import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue; import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
import org.openrefine.wikidata.schema.exceptions.NewItemNotCreatedYetException;
import org.openrefine.wikidata.updates.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler; import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -128,7 +129,12 @@ public class EditBatchProcessor {
// Rewrite mentions to new items // Rewrite mentions to new items
ReconEntityRewriter rewriter = new ReconEntityRewriter(library, update.getItemId()); ReconEntityRewriter rewriter = new ReconEntityRewriter(library, update.getItemId());
update = rewriter.rewrite(update); try {
update = rewriter.rewrite(update);
} catch (NewItemNotCreatedYetException e) {
logger.warn("Failed to rewrite update on entity "+update.getItemId()+". Missing entity: "+e.getMissingEntity()+". Skipping update.");
return;
}
try { try {
// New item // New item

View File

@ -1,18 +1,18 @@
/******************************************************************************* /*******************************************************************************
* MIT License * MIT License
* *
* Copyright (c) 2018 Antonin Delpeuch * Copyright (c) 2018 Antonin Delpeuch
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights * in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is * copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions: * furnished to do so, subject to the following conditions:
* *
* The above copyright notice and this permission notice shall be included in all * The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software. * copies or substantial portions of the Software.
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@ -28,10 +28,12 @@ import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue; import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
import org.openrefine.wikidata.schema.exceptions.NewItemNotCreatedYetException;
import org.openrefine.wikidata.updates.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter; import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter;
import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl; import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.Statement;
@ -39,68 +41,101 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
/** /**
* A class that rewrites an {@link ItemUpdate}, replacing reconciled entity id * A class that rewrites an {@link ItemUpdate}, replacing reconciled entity id
* values by their concrete values after creation of all the new items involved. * values by their concrete values after creation of all the new items involved.
* *
* If an item has not been created yet, an {@link IllegalArgumentException} will * If an item has not been created yet, an {@link IllegalArgumentException} will
* be raised. * be raised.
* *
* The subject is treated as a special case: it is returned unchanged. This is * The subject is treated as a special case: it is returned unchanged. This is
* because it is guaranteed not to appear in the update (but it does appear in * because it is guaranteed not to appear in the update (but it does appear in
* the datamodel representation as the subject is passed around to the Claim * the datamodel representation as the subject is passed around to the Claim
* objects its document contains). * objects its document contains).
* *
* @author Antonin Delpeuch * @author Antonin Delpeuch
* *
*/ */
public class ReconEntityRewriter extends DatamodelConverter { public class ReconEntityRewriter extends DatamodelConverter {
private NewItemLibrary library; private final NewItemLibrary library;
private ItemIdValue subject; private final ItemIdValue subject;
/** protected static final String notCreatedYetMessage = "Trying to rewrite an update where a new item was not created yet.";
* Constructor. Sets up a rewriter which uses the provided library to look up
* qids of new items, and the subject (which should not be rewritten).
*
* @param library
* @param subject
*/
public ReconEntityRewriter(NewItemLibrary library, ItemIdValue subject) {
super(new DataObjectFactoryImpl());
this.library = library;
this.subject = subject;
}
@Override /**
public ItemIdValue copy(ItemIdValue value) { * Constructor. Sets up a rewriter which uses the provided library to look up
if (subject.equals(value)) { * qids of new items.
return value; *
} * @param library
if (value instanceof ReconItemIdValue) { * the collection of items already created
ReconItemIdValue recon = (ReconItemIdValue) value; * @param subject
if (recon.isNew()) { * the subject id of the entity to rewrite
String newId = library.getQid(recon.getReconInternalId()); */
if (newId == null) { public ReconEntityRewriter(NewItemLibrary library, ItemIdValue subject) {
throw new IllegalArgumentException( super(new DataObjectFactoryImpl());
"Trying to rewrite an update where a new item was not created yet."); this.library = library;
} this.subject = subject;
return Datamodel.makeItemIdValue(newId, recon.getRecon().identifierSpace); }
}
} @Override
return super.copy(value); public ItemIdValue copy(ItemIdValue value) {
} if (value instanceof ReconItemIdValue) {
ReconItemIdValue recon = (ReconItemIdValue) value;
if (recon.isNew()) {
String newId = library.getQid(recon.getReconInternalId());
if (newId == null) {
if (subject.equals(recon)) {
return subject;
} else {
throw new MissingEntityIdFound(recon);
}
}
return Datamodel.makeItemIdValue(newId, recon.getRecon().identifierSpace);
}
}
return super.copy(value);
}
/**
* Rewrite an update, replacing references to all entities already
* created by their fresh identifiers. The subject id might not have been
* created already, in which case it will be left untouched. All the other
* entities need to have been created already.
*
* @param update
* the update to rewrite
* @return
* the rewritten update
* @throws NewItemNotCreatedYetException
* if any non-subject entity had not been created yet
*/
public ItemUpdate rewrite(ItemUpdate update) throws NewItemNotCreatedYetException {
try {
ItemIdValue subject = copy(update.getItemId());
Set<MonolingualTextValue> labels = update.getLabels().stream().map(l -> copy(l)).collect(Collectors.toSet());
Set<MonolingualTextValue> labelsIfNew = update.getLabelsIfNew().stream().map(l -> copy(l)).collect(Collectors.toSet());
Set<MonolingualTextValue> descriptions = update.getDescriptions().stream().map(l -> copy(l))
.collect(Collectors.toSet());
Set<MonolingualTextValue> descriptionsIfNew = update.getDescriptionsIfNew().stream().map(l -> copy(l))
.collect(Collectors.toSet());
Set<MonolingualTextValue> aliases = update.getAliases().stream().map(l -> copy(l)).collect(Collectors.toSet());
List<Statement> addedStatements = update.getAddedStatements().stream().map(l -> copy(l))
.collect(Collectors.toList());
Set<Statement> deletedStatements = update.getDeletedStatements().stream().map(l -> copy(l))
.collect(Collectors.toSet());
return new ItemUpdate(subject, addedStatements, deletedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases);
} catch(MissingEntityIdFound e) {
throw new NewItemNotCreatedYetException(e.value);
}
}
/**
* Unchecked version of {@class NewItemNotCreatedYetException}, for internal use only.
*/
protected static class MissingEntityIdFound extends Error {
private static final long serialVersionUID = 1L;
protected EntityIdValue value;
public MissingEntityIdFound(EntityIdValue missing) {
this.value = missing;
}
}
public ItemUpdate rewrite(ItemUpdate update) {
ItemIdValue subject = copy(update.getItemId());
Set<MonolingualTextValue> labels = update.getLabels().stream().map(l -> copy(l)).collect(Collectors.toSet());
Set<MonolingualTextValue> labelsIfNew = update.getLabelsIfNew().stream().map(l -> copy(l)).collect(Collectors.toSet());
Set<MonolingualTextValue> descriptions = update.getDescriptions().stream().map(l -> copy(l))
.collect(Collectors.toSet());
Set<MonolingualTextValue> descriptionsIfNew = update.getDescriptionsIfNew().stream().map(l -> copy(l))
.collect(Collectors.toSet());
Set<MonolingualTextValue> aliases = update.getAliases().stream().map(l -> copy(l)).collect(Collectors.toSet());
List<Statement> addedStatements = update.getAddedStatements().stream().map(l -> copy(l))
.collect(Collectors.toList());
Set<Statement> deletedStatements = update.getDeletedStatements().stream().map(l -> copy(l))
.collect(Collectors.toSet());
return new ItemUpdate(subject, addedStatements, deletedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases);
}
} }

View File

@ -0,0 +1,19 @@
package org.openrefine.wikidata.schema.exceptions;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
public class NewItemNotCreatedYetException extends Exception {
private static final long serialVersionUID = -563535295696710197L;
private final EntityIdValue value;
public NewItemNotCreatedYetException(EntityIdValue value) {
super("Attempted to rewrite an entity which was not created yet: "+value);
this.value = value;
}
public EntityIdValue getMissingEntity() {
return value;
}
}

View File

@ -1,18 +1,18 @@
/******************************************************************************* /*******************************************************************************
* MIT License * MIT License
* *
* Copyright (c) 2018 Antonin Delpeuch * Copyright (c) 2018 Antonin Delpeuch
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights * in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is * copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions: * furnished to do so, subject to the following conditions:
* *
* The above copyright notice and this permission notice shall be included in all * The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software. * copies or substantial portions of the Software.
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@ -25,6 +25,7 @@ package org.openrefine.wikidata.editing;
import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertEquals;
import org.openrefine.wikidata.schema.exceptions.NewItemNotCreatedYetException;
import org.openrefine.wikidata.testing.TestingData; import org.openrefine.wikidata.testing.TestingData;
import org.openrefine.wikidata.updates.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder; import org.openrefine.wikidata.updates.ItemUpdateBuilder;
@ -35,67 +36,103 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
public class ReconEntityRewriterTest { public class ReconEntityRewriterTest {
NewItemLibrary library = null; NewItemLibrary library = null;
ReconEntityRewriter rewriter = null; ReconEntityRewriter rewriter = null;
ItemIdValue subject = TestingData.newIdA; ItemIdValue newlyCreated = Datamodel.makeWikidataItemIdValue("Q1234");
ItemIdValue newlyCreated = Datamodel.makeWikidataItemIdValue("Q1234");
@BeforeMethod @BeforeMethod
public void setUp() { public void setUp() {
library = new NewItemLibrary(); library = new NewItemLibrary();
rewriter = new ReconEntityRewriter(library, subject); }
}
@Test(expectedExceptions = IllegalArgumentException.class) @Test(expectedExceptions = ReconEntityRewriter.MissingEntityIdFound.class)
public void testNotCreatedYet() { public void testNotCreatedYet() {
rewriter.copy(TestingData.newIdB); rewriter = new ReconEntityRewriter(library, TestingData.newIdA);
} rewriter.copy(TestingData.newIdB);
}
@Test @Test
public void testSuccessfulRewrite() { public void testSuccessfulRewrite() {
library.setQid(4567L, "Q1234"); rewriter = new ReconEntityRewriter(library, TestingData.newIdA);
assertEquals(newlyCreated, rewriter.copy(TestingData.newIdB)); library.setQid(4567L, "Q1234");
} assertEquals(newlyCreated, rewriter.copy(TestingData.newIdB));
}
@Test @Test
public void testSubjectNotRewriten() { public void testSubjectNotRewritten() {
assertEquals(subject, rewriter.copy(subject)); ItemIdValue subject = TestingData.newIdA;
} rewriter = new ReconEntityRewriter(library, subject);
assertEquals(subject, rewriter.copy(subject));
}
@Test @Test
public void testMatched() { public void testSubjectRewritten() {
assertEquals(TestingData.matchedId, rewriter.copy(TestingData.matchedId)); ItemIdValue subject = TestingData.newIdB;
} library.setQid(4567L, "Q1234");
rewriter = new ReconEntityRewriter(library, subject);
assertEquals(newlyCreated, rewriter.copy(subject));
}
@Test @Test
public void testRewriteUpdate() { public void testMatched() {
library.setQid(4567L, "Q1234"); rewriter = new ReconEntityRewriter(library, TestingData.newIdA);
ItemUpdate update = new ItemUpdateBuilder(subject) assertEquals(TestingData.matchedId, rewriter.copy(TestingData.matchedId));
.addStatement(TestingData.generateStatement(subject, TestingData.newIdB)) }
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true) @Test
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) public void testRewriteCreate() throws NewItemNotCreatedYetException {
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); ItemIdValue subject = TestingData.newIdA;
ItemUpdate rewritten = rewriter.rewrite(update); rewriter = new ReconEntityRewriter(library, subject);
ItemUpdate expected = new ItemUpdateBuilder(subject) library.setQid(4567L, "Q1234");
.addStatement(TestingData.generateStatement(subject, newlyCreated)) ItemUpdate update = new ItemUpdateBuilder(subject)
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId)) .addStatement(TestingData.generateStatement(subject, TestingData.newIdB))
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true) .deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) .addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true)
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
assertEquals(rewritten, expected); .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
} ItemUpdate rewritten = rewriter.rewrite(update);
ItemUpdate expected = new ItemUpdateBuilder(subject)
@Test .addStatement(TestingData.generateStatement(subject, newlyCreated))
public void testRewriteCreation() { .deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
library.setQid(4567L, "Q1234"); .addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true)
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdB) .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); assertEquals(rewritten, expected);
ItemUpdate rewritten = rewriter.rewrite(update); }
ItemUpdate expected = new ItemUpdateBuilder(newlyCreated)
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) @Test
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); public void testRewriteUpdateOnPreviouslyCreatedEntity() throws NewItemNotCreatedYetException {
assertEquals(rewritten, expected); ItemIdValue subject = TestingData.newIdA;
} rewriter = new ReconEntityRewriter(library, subject);
library.setQid(4567L, "Q1234");
ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdB)
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
ItemUpdate rewritten = rewriter.rewrite(update);
ItemUpdate expected = new ItemUpdateBuilder(newlyCreated)
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
assertEquals(rewritten, expected);
}
@Test
public void testRewriteUpdateOnExistingEntity() throws NewItemNotCreatedYetException {
ItemIdValue subject = TestingData.matchedId;
rewriter = new ReconEntityRewriter(library, subject);
library.setQid(4567L, "Q1234");
ItemUpdate update = new ItemUpdateBuilder(subject)
.addStatement(TestingData.generateStatement(subject, TestingData.newIdB))
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true)
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
ItemUpdate rewritten = rewriter.rewrite(update);
ItemUpdate expected = new ItemUpdateBuilder(subject)
.addStatement(TestingData.generateStatement(subject, newlyCreated))
.deleteStatement(TestingData.generateStatement(subject, TestingData.existingId))
.addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true)
.addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false)
.addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build();
assertEquals(rewritten, expected);
}
} }