Introduce a scheduler package to reorganize item updates
This commit is contained in:
parent
88178d7c04
commit
bb044612e0
@ -18,35 +18,15 @@ import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
|
|||||||
* Format documentation:
|
* Format documentation:
|
||||||
* https://www.wikidata.org/wiki/Help:QuickStatements
|
* https://www.wikidata.org/wiki/Help:QuickStatements
|
||||||
*
|
*
|
||||||
|
* Any new entity id will be
|
||||||
|
* assumed to be the last one created, represented with "LAST". It is
|
||||||
|
* fine to do this assumption because we are working on edit batches
|
||||||
|
* previously scheduled by {@link QuickStatementsUpdateScheduler}.
|
||||||
|
*
|
||||||
* @author Antonin Delpeuch
|
* @author Antonin Delpeuch
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class QSValuePrinter implements ValueVisitor<String> {
|
public class QSValuePrinter implements ValueVisitor<String> {
|
||||||
|
|
||||||
private final ReconEntityIdValue lastCreatedEntityIdValue;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor.
|
|
||||||
*
|
|
||||||
* Creates a printer for a context where no entity was previously
|
|
||||||
* created with the "CREATE" command. Any new entity id will not
|
|
||||||
* be printed.
|
|
||||||
*/
|
|
||||||
public QSValuePrinter() {
|
|
||||||
lastCreatedEntityIdValue = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a printer for a context where an entity was previously
|
|
||||||
* created with the "CREATE" command. If this id is encountered,
|
|
||||||
* it will be printed as "LAST".
|
|
||||||
*
|
|
||||||
* @param lastCreatedEntityIdValue
|
|
||||||
* the virtual id of the last created entity
|
|
||||||
*/
|
|
||||||
public QSValuePrinter(ReconEntityIdValue lastCreatedEntityIdValue) {
|
|
||||||
this.lastCreatedEntityIdValue = lastCreatedEntityIdValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String visit(DatatypeIdValue value) {
|
public String visit(DatatypeIdValue value) {
|
||||||
@ -57,11 +37,8 @@ public class QSValuePrinter implements ValueVisitor<String> {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String visit(EntityIdValue value) {
|
public String visit(EntityIdValue value) {
|
||||||
if (lastCreatedEntityIdValue != null && lastCreatedEntityIdValue.equals(value)) {
|
if (ReconEntityIdValue.class.isInstance(value) && ((ReconEntityIdValue)value).isNew()) {
|
||||||
return "LAST";
|
return "LAST";
|
||||||
} else if (ReconEntityIdValue.class.isInstance(value)) {
|
|
||||||
// oops, we are trying to print another newly created entity (not the last one)
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
return value.getId();
|
return value.getId();
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,8 @@ import com.google.refine.model.Project;
|
|||||||
|
|
||||||
import org.openrefine.wikidata.schema.WikibaseSchema;
|
import org.openrefine.wikidata.schema.WikibaseSchema;
|
||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.scheduler.ImpossibleSchedulingException;
|
||||||
|
import org.openrefine.wikidata.updates.scheduler.QuickStatementsUpdateScheduler;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||||
@ -27,6 +29,9 @@ import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
|
|||||||
public class QuickStatementsExporter implements WriterExporter {
|
public class QuickStatementsExporter implements WriterExporter {
|
||||||
|
|
||||||
final static Logger logger = LoggerFactory.getLogger("QuickStatementsExporter");
|
final static Logger logger = LoggerFactory.getLogger("QuickStatementsExporter");
|
||||||
|
|
||||||
|
public static final String impossibleSchedulingErrorMessage =
|
||||||
|
"This edit batch cannot be performed with QuickStatements due to the structure of its new items.";
|
||||||
|
|
||||||
public QuickStatementsExporter(){
|
public QuickStatementsExporter(){
|
||||||
}
|
}
|
||||||
@ -64,10 +69,17 @@ public class QuickStatementsExporter implements WriterExporter {
|
|||||||
translateItemList(items, writer);
|
translateItemList(items, writer);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void translateItemList(List<ItemUpdate> editBatch, Writer writer) throws IOException {
|
public void translateItemList(List<ItemUpdate> updates, Writer writer) throws IOException {
|
||||||
for (ItemUpdate item : editBatch) {
|
QuickStatementsUpdateScheduler scheduler = new QuickStatementsUpdateScheduler();
|
||||||
translateItem(item, writer);
|
try {
|
||||||
|
List<ItemUpdate> scheduled = scheduler.schedule(updates);
|
||||||
|
for (ItemUpdate item : scheduled) {
|
||||||
|
translateItem(item, writer);
|
||||||
|
}
|
||||||
|
} catch(ImpossibleSchedulingException e) {
|
||||||
|
writer.write(impossibleSchedulingErrorMessage);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void translateNameDescr(String qid, Set<MonolingualTextValue> values, String prefix, ItemIdValue id, Writer writer) throws IOException {
|
protected void translateNameDescr(String qid, Set<MonolingualTextValue> values, String prefix, ItemIdValue id, Writer writer) throws IOException {
|
||||||
@ -86,7 +98,7 @@ public class QuickStatementsExporter implements WriterExporter {
|
|||||||
if (item.isNew()) {
|
if (item.isNew()) {
|
||||||
writer.write("CREATE\n");
|
writer.write("CREATE\n");
|
||||||
qid = "LAST";
|
qid = "LAST";
|
||||||
item.normalizeLabelsAndAliases();
|
item = item.normalizeLabelsAndAliases();
|
||||||
}
|
}
|
||||||
|
|
||||||
translateNameDescr(qid, item.getLabels(), "L", item.getItemId(), writer);
|
translateNameDescr(qid, item.getLabels(), "L", item.getItemId(), writer);
|
||||||
|
@ -17,6 +17,9 @@ import org.json.JSONWriter;
|
|||||||
import org.openrefine.wikidata.editing.ConnectionManager;
|
import org.openrefine.wikidata.editing.ConnectionManager;
|
||||||
import org.openrefine.wikidata.editing.NewItemLibrary;
|
import org.openrefine.wikidata.editing.NewItemLibrary;
|
||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.scheduler.ImpossibleSchedulingException;
|
||||||
|
import org.openrefine.wikidata.updates.scheduler.UpdateScheduler;
|
||||||
|
import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
|
||||||
import org.openrefine.wikidata.schema.WikibaseSchema;
|
import org.openrefine.wikidata.schema.WikibaseSchema;
|
||||||
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -29,6 +32,7 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemDocument;
|
|||||||
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||||
import org.wikidata.wdtk.util.WebResourceFetcherImpl;
|
import org.wikidata.wdtk.util.WebResourceFetcherImpl;
|
||||||
import org.wikidata.wdtk.wikibaseapi.ApiConnection;
|
import org.wikidata.wdtk.wikibaseapi.ApiConnection;
|
||||||
|
import org.wikidata.wdtk.wikibaseapi.TermStatementUpdate;
|
||||||
import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor;
|
import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor;
|
||||||
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
|
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
|
||||||
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
|
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
|
||||||
@ -215,8 +219,10 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
|
|||||||
// Evaluate the schema
|
// Evaluate the schema
|
||||||
List<ItemUpdate> itemDocuments = _schema.evaluate(_project, _engine);
|
List<ItemUpdate> itemDocuments = _schema.evaluate(_project, _engine);
|
||||||
|
|
||||||
// Group statements by item
|
// Schedule the edit batch
|
||||||
Map<EntityIdValue, ItemUpdate> updates = ItemUpdate.groupBySubject(itemDocuments);
|
WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
|
||||||
|
List<ItemUpdate> updates = null;
|
||||||
|
updates = scheduler.schedule(itemDocuments);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO:
|
* TODO:
|
||||||
@ -228,7 +234,7 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
|
|||||||
NewItemLibrary newItemLibrary = new NewItemLibrary();
|
NewItemLibrary newItemLibrary = new NewItemLibrary();
|
||||||
DataObjectFactory factory = new DataObjectFactoryImpl();
|
DataObjectFactory factory = new DataObjectFactoryImpl();
|
||||||
List<ItemUpdate> remainingItemUpdates = new ArrayList<>();
|
List<ItemUpdate> remainingItemUpdates = new ArrayList<>();
|
||||||
remainingItemUpdates.addAll(updates.values());
|
remainingItemUpdates.addAll(updates);
|
||||||
int totalItemUpdates = updates.size();
|
int totalItemUpdates = updates.size();
|
||||||
int updatesDone = 0;
|
int updatesDone = 0;
|
||||||
int batchSize = 50;
|
int batchSize = 50;
|
||||||
@ -295,6 +301,20 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
|
|||||||
} else {
|
} else {
|
||||||
// Existing item
|
// Existing item
|
||||||
ItemDocument currentDocument = (ItemDocument)currentDocs.get(update.getItemId().getId());
|
ItemDocument currentDocument = (ItemDocument)currentDocs.get(update.getItemId().getId());
|
||||||
|
/*
|
||||||
|
TermStatementUpdate tsUpdate = new TermStatementUpdate(
|
||||||
|
currentDocument,
|
||||||
|
update.getAddedStatements().stream().collect(Collectors.toList()),
|
||||||
|
update.getDeletedStatements().stream().collect(Collectors.toList()),
|
||||||
|
update.getLabels().stream().collect(Collectors.toList()),
|
||||||
|
update.getDescriptions().stream().collect(Collectors.toList()),
|
||||||
|
update.getAliases().stream().collect(Collectors.toList()),
|
||||||
|
new ArrayList<MonolingualTextValue>()
|
||||||
|
);
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
logger.info(mapper.writeValueAsString(update));
|
||||||
|
logger.info(update.toString());
|
||||||
|
logger.info(tsUpdate.getJsonUpdateString()); */
|
||||||
wbde.updateTermsStatements(currentDocument,
|
wbde.updateTermsStatements(currentDocument,
|
||||||
update.getLabels().stream().collect(Collectors.toList()),
|
update.getLabels().stream().collect(Collectors.toList()),
|
||||||
update.getDescriptions().stream().collect(Collectors.toList()),
|
update.getDescriptions().stream().collect(Collectors.toList()),
|
||||||
|
@ -18,11 +18,15 @@ import org.openrefine.wikidata.qa.scrutinizers.SingleValueScrutinizer;
|
|||||||
import org.openrefine.wikidata.qa.scrutinizers.UnsourcedScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.UnsourcedScrutinizer;
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.WhitespaceScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.WhitespaceScrutinizer;
|
||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.scheduler.ImpossibleSchedulingException;
|
||||||
|
import org.openrefine.wikidata.updates.scheduler.UpdateScheduler;
|
||||||
|
import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Runs a collection of edit scrutinizers on an edit batch
|
* Runs a collection of edit scrutinizers on an edit batch.
|
||||||
* @author antonin
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class EditInspector {
|
public class EditInspector {
|
||||||
@ -63,10 +67,19 @@ public class EditInspector {
|
|||||||
* @param editBatch
|
* @param editBatch
|
||||||
*/
|
*/
|
||||||
public void inspect(List<ItemUpdate> editBatch) {
|
public void inspect(List<ItemUpdate> editBatch) {
|
||||||
Map<EntityIdValue, ItemUpdate> updates = ItemUpdate.groupBySubject(editBatch);
|
// First, schedule them with some scheduler,
|
||||||
List<ItemUpdate> mergedUpdates = updates.values().stream().collect(Collectors.toList());
|
// so that all newly created entities appear in the batch
|
||||||
for(EditScrutinizer scrutinizer : scrutinizers.values()) {
|
UpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
|
||||||
scrutinizer.scrutinize(mergedUpdates);
|
try {
|
||||||
|
editBatch = scheduler.schedule(editBatch);
|
||||||
|
Map<EntityIdValue, ItemUpdate> updates = ItemUpdate.groupBySubject(editBatch);
|
||||||
|
List<ItemUpdate> mergedUpdates = updates.values().stream().collect(Collectors.toList());
|
||||||
|
for(EditScrutinizer scrutinizer : scrutinizers.values()) {
|
||||||
|
scrutinizer.scrutinize(mergedUpdates);
|
||||||
|
}
|
||||||
|
} catch(ImpossibleSchedulingException e) {
|
||||||
|
warningStore.addWarning(new QAWarning(
|
||||||
|
"scheduling-failed", null, QAWarning.Severity.CRITICAL, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (warningStore.getNbWarnings() == 0) {
|
if (warningStore.getNbWarnings() == 0) {
|
||||||
|
@ -8,6 +8,8 @@ import org.wikidata.wdtk.datamodel.helpers.Hash;
|
|||||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
|
import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
|
|
||||||
import com.google.refine.model.Recon;
|
import com.google.refine.model.Recon;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -38,11 +40,13 @@ public abstract class ReconEntityIdValue implements PrefetchedEntityIdValue {
|
|||||||
Recon.Judgment.New.equals(_recon.judgment));
|
Recon.Judgment.New.equals(_recon.judgment));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean isMatched() {
|
@JsonIgnore
|
||||||
|
public boolean isMatched() {
|
||||||
return Recon.Judgment.Matched.equals(_recon.judgment) && _recon.match != null;
|
return Recon.Judgment.Matched.equals(_recon.judgment) && _recon.match != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean isNew() {
|
@JsonIgnore
|
||||||
|
public boolean isNew() {
|
||||||
return !isMatched();
|
return !isMatched();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
|||||||
*/
|
*/
|
||||||
public class ItemUpdate {
|
public class ItemUpdate {
|
||||||
private final ItemIdValue qid;
|
private final ItemIdValue qid;
|
||||||
private final Set<Statement> addedStatements;
|
private final List<Statement> addedStatements;
|
||||||
private final Set<Statement> deletedStatements;
|
private final Set<Statement> deletedStatements;
|
||||||
private final Set<MonolingualTextValue> labels;
|
private final Set<MonolingualTextValue> labels;
|
||||||
private final Set<MonolingualTextValue> descriptions;
|
private final Set<MonolingualTextValue> descriptions;
|
||||||
@ -42,11 +42,24 @@ public class ItemUpdate {
|
|||||||
*
|
*
|
||||||
* @param qid
|
* @param qid
|
||||||
* the subject of the document. It can be a reconciled item value for new items.
|
* the subject of the document. It can be a reconciled item value for new items.
|
||||||
|
* @param addedStatements
|
||||||
|
* the statements to add on the item. They should be distinct. They
|
||||||
|
* are modelled as a list because their insertion order matters.
|
||||||
|
* @param deletedStatements
|
||||||
|
* the statements to remove from the item
|
||||||
|
* @param labels
|
||||||
|
* the labels to add on the item
|
||||||
|
* @param descriptions
|
||||||
|
* the descriptions to add on the item
|
||||||
|
* @param aliases
|
||||||
|
* the aliases to add on the item. In theory their order should matter
|
||||||
|
* but in practice people rarely rely on the order of aliases so this
|
||||||
|
* is just kept as a set for simplicity.
|
||||||
*/
|
*/
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
public ItemUpdate(
|
public ItemUpdate(
|
||||||
@JsonProperty("subject") ItemIdValue qid,
|
@JsonProperty("subject") ItemIdValue qid,
|
||||||
@JsonProperty("addedStatements") Set<Statement> addedStatements,
|
@JsonProperty("addedStatements") List<Statement> addedStatements,
|
||||||
@JsonProperty("deletedStatements") Set<Statement> deletedStatements,
|
@JsonProperty("deletedStatements") Set<Statement> deletedStatements,
|
||||||
@JsonProperty("labels") Set<MonolingualTextValue> labels,
|
@JsonProperty("labels") Set<MonolingualTextValue> labels,
|
||||||
@JsonProperty("descriptions") Set<MonolingualTextValue> descriptions,
|
@JsonProperty("descriptions") Set<MonolingualTextValue> descriptions,
|
||||||
@ -54,7 +67,7 @@ public class ItemUpdate {
|
|||||||
Validate.notNull(qid);
|
Validate.notNull(qid);
|
||||||
this.qid = qid;
|
this.qid = qid;
|
||||||
if(addedStatements == null) {
|
if(addedStatements == null) {
|
||||||
addedStatements = Collections.emptySet();
|
addedStatements = Collections.emptyList();
|
||||||
}
|
}
|
||||||
this.addedStatements = addedStatements;
|
this.addedStatements = addedStatements;
|
||||||
if(deletedStatements == null) {
|
if(deletedStatements == null) {
|
||||||
@ -84,10 +97,13 @@ public class ItemUpdate {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the set of all added statements
|
* Added statements are recorded as a list because
|
||||||
|
* their order of insertion matters.
|
||||||
|
*
|
||||||
|
* @return the list of all added statements
|
||||||
*/
|
*/
|
||||||
@JsonProperty("addedStatements")
|
@JsonProperty("addedStatements")
|
||||||
public Set<Statement> getAddedStatements() {
|
public List<Statement> getAddedStatements() {
|
||||||
return addedStatements;
|
return addedStatements;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -124,11 +140,18 @@ public class ItemUpdate {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true when this change is empty
|
* @return true when this change is empty and its subject is not new
|
||||||
* (no statements or terms changed)
|
|
||||||
*/
|
*/
|
||||||
@JsonIgnore
|
@JsonIgnore
|
||||||
public boolean isNull() {
|
public boolean isNull() {
|
||||||
|
return isEmpty() && !isNew();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true when this change leaves the content of the document untouched
|
||||||
|
*/
|
||||||
|
@JsonIgnore
|
||||||
|
public boolean isEmpty() {
|
||||||
return (addedStatements.isEmpty()
|
return (addedStatements.isEmpty()
|
||||||
&& deletedStatements.isEmpty()
|
&& deletedStatements.isEmpty()
|
||||||
&& labels.isEmpty()
|
&& labels.isEmpty()
|
||||||
@ -145,8 +168,12 @@ public class ItemUpdate {
|
|||||||
*/
|
*/
|
||||||
public ItemUpdate merge(ItemUpdate other) {
|
public ItemUpdate merge(ItemUpdate other) {
|
||||||
Validate.isTrue(qid.equals(other.getItemId()));
|
Validate.isTrue(qid.equals(other.getItemId()));
|
||||||
Set<Statement> newAddedStatements = new HashSet<>(addedStatements);
|
List<Statement> newAddedStatements = new ArrayList<>(addedStatements);
|
||||||
newAddedStatements.addAll(other.getAddedStatements());
|
for(Statement statement : other.getAddedStatements()) {
|
||||||
|
if (!newAddedStatements.contains(statement)) {
|
||||||
|
newAddedStatements.add(statement);
|
||||||
|
}
|
||||||
|
}
|
||||||
Set<Statement> newDeletedStatements = new HashSet<>(deletedStatements);
|
Set<Statement> newDeletedStatements = new HashSet<>(deletedStatements);
|
||||||
newDeletedStatements.addAll(other.getDeletedStatements());
|
newDeletedStatements.addAll(other.getDeletedStatements());
|
||||||
Set<MonolingualTextValue> newLabels = new HashSet<>(labels);
|
Set<MonolingualTextValue> newLabels = new HashSet<>(labels);
|
||||||
@ -264,16 +291,29 @@ public class ItemUpdate {
|
|||||||
StringBuilder builder = new StringBuilder();
|
StringBuilder builder = new StringBuilder();
|
||||||
builder.append("<Update on ");
|
builder.append("<Update on ");
|
||||||
builder.append(qid);
|
builder.append(qid);
|
||||||
builder.append("\n Labels: ");
|
if (!labels.isEmpty()) {
|
||||||
builder.append(labels);
|
builder.append("\n Labels: ");
|
||||||
builder.append("\n Descriptions: ");
|
builder.append(labels);
|
||||||
builder.append(descriptions);
|
}
|
||||||
builder.append("\n Aliases: ");
|
if (!descriptions.isEmpty()) {
|
||||||
builder.append(aliases);
|
builder.append("\n Descriptions: ");
|
||||||
builder.append("\n Added statements: ");
|
builder.append(descriptions);
|
||||||
builder.append(addedStatements);
|
}
|
||||||
builder.append("\n Deleted statements: ");
|
if (!aliases.isEmpty()) {
|
||||||
builder.append(deletedStatements);
|
builder.append("\n Aliases: ");
|
||||||
|
builder.append(aliases);
|
||||||
|
}
|
||||||
|
if (!addedStatements.isEmpty()) {
|
||||||
|
builder.append("\n Added statements: ");
|
||||||
|
builder.append(addedStatements);
|
||||||
|
}
|
||||||
|
if (!deletedStatements.isEmpty()) {
|
||||||
|
builder.append("\n Deleted statements: ");
|
||||||
|
builder.append(deletedStatements);
|
||||||
|
}
|
||||||
|
if (isNull()) {
|
||||||
|
builder.append(" (null update)");
|
||||||
|
}
|
||||||
builder.append("\n>");
|
builder.append("\n>");
|
||||||
return builder.toString();
|
return builder.toString();
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
package org.openrefine.wikidata.updates;
|
package org.openrefine.wikidata.updates;
|
||||||
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.jsoup.helper.Validate;
|
import org.jsoup.helper.Validate;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
@ -17,7 +19,7 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
|||||||
*/
|
*/
|
||||||
public class ItemUpdateBuilder {
|
public class ItemUpdateBuilder {
|
||||||
private ItemIdValue qid;
|
private ItemIdValue qid;
|
||||||
private Set<Statement> addedStatements;
|
private List<Statement> addedStatements;
|
||||||
private Set<Statement> deletedStatements;
|
private Set<Statement> deletedStatements;
|
||||||
private Set<MonolingualTextValue> labels;
|
private Set<MonolingualTextValue> labels;
|
||||||
private Set<MonolingualTextValue> descriptions;
|
private Set<MonolingualTextValue> descriptions;
|
||||||
@ -33,7 +35,7 @@ public class ItemUpdateBuilder {
|
|||||||
public ItemUpdateBuilder(ItemIdValue qid) {
|
public ItemUpdateBuilder(ItemIdValue qid) {
|
||||||
Validate.notNull(qid);
|
Validate.notNull(qid);
|
||||||
this.qid = qid;
|
this.qid = qid;
|
||||||
this.addedStatements = new HashSet<>();
|
this.addedStatements = new ArrayList<>();
|
||||||
this.deletedStatements = new HashSet<Statement>();
|
this.deletedStatements = new HashSet<Statement>();
|
||||||
this.labels = new HashSet<MonolingualTextValue>();
|
this.labels = new HashSet<MonolingualTextValue>();
|
||||||
this.descriptions = new HashSet<MonolingualTextValue>();
|
this.descriptions = new HashSet<MonolingualTextValue>();
|
||||||
@ -103,6 +105,19 @@ public class ItemUpdateBuilder {
|
|||||||
labels.add(label);
|
labels.add(label);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a list of labels to the item. It will override any
|
||||||
|
* existing label in each language.
|
||||||
|
*
|
||||||
|
* @param labels
|
||||||
|
* the labels to add
|
||||||
|
*/
|
||||||
|
public ItemUpdateBuilder addLabels(Set<MonolingualTextValue> labels) {
|
||||||
|
Validate.isTrue(!built, "ItemUpdate has already been built");
|
||||||
|
this.labels.addAll(labels);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a description to the item. It will override any existing
|
* Adds a description to the item. It will override any existing
|
||||||
@ -116,6 +131,19 @@ public class ItemUpdateBuilder {
|
|||||||
descriptions.add(description);
|
descriptions.add(description);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a list of descriptions to the item. It will override any
|
||||||
|
* existing description in each language.
|
||||||
|
*
|
||||||
|
* @param descriptions
|
||||||
|
* the descriptions to add
|
||||||
|
*/
|
||||||
|
public ItemUpdateBuilder addDescriptions(Set<MonolingualTextValue> descriptions) {
|
||||||
|
Validate.isTrue(!built, "ItemUpdate has already been built");
|
||||||
|
this.descriptions.addAll(descriptions);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds an alias to the item. It will be added to any existing
|
* Adds an alias to the item. It will be added to any existing
|
||||||
@ -129,6 +157,19 @@ public class ItemUpdateBuilder {
|
|||||||
aliases.add(alias);
|
aliases.add(alias);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a list of aliases to the item. They will be added to any
|
||||||
|
* existing aliases in each language.
|
||||||
|
*
|
||||||
|
* @param aliases
|
||||||
|
* the aliases to add
|
||||||
|
*/
|
||||||
|
public ItemUpdateBuilder addAliases(Set<MonolingualTextValue> aliases) {
|
||||||
|
Validate.isTrue(!built, "ItemUpdate has already been built");
|
||||||
|
this.aliases.addAll(aliases);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs the {@link ItemUpdate}.
|
* Constructs the {@link ItemUpdate}.
|
||||||
|
@ -0,0 +1,8 @@
|
|||||||
|
package org.openrefine.wikidata.updates.scheduler;
|
||||||
|
|
||||||
|
|
||||||
|
public class ImpossibleSchedulingException extends Exception {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 6621563898380564148L;
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,152 @@
|
|||||||
|
package org.openrefine.wikidata.updates.scheduler;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class that extracts the new entity ids referred to
|
||||||
|
* in a statement.
|
||||||
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class PointerExtractor implements ValueVisitor<Set<ReconItemIdValue>> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts all the new entities mentioned by this statement. This
|
||||||
|
* does not include the subject of the statement.
|
||||||
|
*
|
||||||
|
* @param statement
|
||||||
|
* the statement to inspect
|
||||||
|
* @return
|
||||||
|
* the set of all new entities mentioned by the statement
|
||||||
|
*/
|
||||||
|
public Set<ReconItemIdValue> extractPointers(Statement statement) {
|
||||||
|
Set<ReconItemIdValue> result = new HashSet<>();
|
||||||
|
result.addAll(extractPointers(statement.getClaim().getMainSnak()));
|
||||||
|
result.addAll(extractPointers(statement.getClaim().getQualifiers()));
|
||||||
|
statement.getReferences().stream()
|
||||||
|
.map(l -> extractPointers(l.getSnakGroups()))
|
||||||
|
.forEach(s -> result.addAll(s));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts all the new entities mentioned by this list of snak groups.
|
||||||
|
*
|
||||||
|
* @param snakGroups
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Set<ReconItemIdValue> extractPointers(List<SnakGroup> snakGroups) {
|
||||||
|
Set<ReconItemIdValue> result = new HashSet<>();
|
||||||
|
snakGroups.stream()
|
||||||
|
.map(s -> extractPointers(s))
|
||||||
|
.forEach(s -> result.addAll(s));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/***
|
||||||
|
* Extracts all the new entities mentioned by this snak group.
|
||||||
|
*
|
||||||
|
* @param snakGroup
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Set<ReconItemIdValue> extractPointers(SnakGroup snakGroup) {
|
||||||
|
Set<ReconItemIdValue> result = new HashSet<>();
|
||||||
|
snakGroup.getSnaks().stream()
|
||||||
|
.map(s -> extractPointers(s))
|
||||||
|
.forEach(s -> result.addAll(s));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts all new entities mentioned by this snak group.
|
||||||
|
* Currently there will be at most one: the target of the snak
|
||||||
|
* (as property ids cannot be new for now).
|
||||||
|
*
|
||||||
|
* @param snak
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Set<ReconItemIdValue> extractPointers(Snak snak) {
|
||||||
|
Set<ReconItemIdValue> result = new HashSet<>();
|
||||||
|
result.addAll(extractPointers(snak.getPropertyId()));
|
||||||
|
result.addAll(extractPointers(snak.getValue()));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts any new entity from the value.
|
||||||
|
*
|
||||||
|
* @param value
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Set<ReconItemIdValue> extractPointers(Value value) {
|
||||||
|
if (value == null) {
|
||||||
|
return Collections.emptySet();
|
||||||
|
}
|
||||||
|
Set<ReconItemIdValue> pointers = value.accept(this);
|
||||||
|
if (pointers == null) {
|
||||||
|
return Collections.emptySet();
|
||||||
|
}
|
||||||
|
return pointers;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<ReconItemIdValue> visit(DatatypeIdValue value) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<ReconItemIdValue> visit(EntityIdValue value) {
|
||||||
|
if(ReconItemIdValue.class.isInstance(value)) {
|
||||||
|
ReconItemIdValue recon = (ReconItemIdValue)value;
|
||||||
|
if(recon.isNew()) {
|
||||||
|
return Collections.singleton(recon);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<ReconItemIdValue> visit(GlobeCoordinatesValue value) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<ReconItemIdValue> visit(MonolingualTextValue value) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<ReconItemIdValue> visit(QuantityValue value) {
|
||||||
|
// units cannot be new because WDTK represents them as strings already
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<ReconItemIdValue> visit(StringValue value) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<ReconItemIdValue> visit(TimeValue value) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,118 @@
|
|||||||
|
package org.openrefine.wikidata.updates.scheduler;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
|
||||||
|
|
||||||
|
public class QuickStatementsUpdateScheduler implements UpdateScheduler {
|
||||||
|
|
||||||
|
private PointerExtractor extractor = new PointerExtractor();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This map holds for each new entity id value a list of updates
|
||||||
|
* that refer to this id (and should hence be scheduled right after
|
||||||
|
* creation of that entity).
|
||||||
|
*/
|
||||||
|
private Map<ItemIdValue, UpdateSequence> pointerUpdates;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This contains all updates which do not refer to any new entity
|
||||||
|
* apart from possibly the subject, in the order that they were supplied to us.
|
||||||
|
*/
|
||||||
|
private UpdateSequence pointerFreeUpdates;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Separates out the statements which refer to new items from the rest
|
||||||
|
* of the update. The resulting updates are stored in {@link referencingUpdates}
|
||||||
|
* and {@link updatesWithoutReferences}.
|
||||||
|
*
|
||||||
|
* @param update
|
||||||
|
* @throws ImpossibleSchedulingException
|
||||||
|
* if two new item ids are referred to in the same statement
|
||||||
|
*/
|
||||||
|
protected void splitUpdate(ItemUpdate update) throws ImpossibleSchedulingException {
|
||||||
|
ItemUpdateBuilder remainingUpdateBuilder = new ItemUpdateBuilder(update.getItemId())
|
||||||
|
.addLabels(update.getLabels())
|
||||||
|
.addDescriptions(update.getDescriptions())
|
||||||
|
.addAliases(update.getAliases())
|
||||||
|
.deleteStatements(update.getDeletedStatements());
|
||||||
|
Map<ItemIdValue, ItemUpdateBuilder> referencingUpdates = new HashMap<>();
|
||||||
|
|
||||||
|
for(Statement statement : update.getAddedStatements()) {
|
||||||
|
Set<ReconItemIdValue> pointers = extractor.extractPointers(statement);
|
||||||
|
if (pointers.isEmpty()) {
|
||||||
|
remainingUpdateBuilder.addStatement(statement);
|
||||||
|
} else if (pointers.size() == 1 && !update.isNew()) {
|
||||||
|
ItemIdValue pointer = pointers.stream().findFirst().get();
|
||||||
|
ItemUpdateBuilder referencingBuilder = referencingUpdates.get(pointer);
|
||||||
|
if (referencingBuilder == null) {
|
||||||
|
referencingBuilder = new ItemUpdateBuilder(update.getItemId());
|
||||||
|
}
|
||||||
|
referencingBuilder.addStatement(statement);
|
||||||
|
referencingUpdates.put(pointer, referencingBuilder);
|
||||||
|
} else {
|
||||||
|
throw new ImpossibleSchedulingException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the update that is not referring to anything to the schedule
|
||||||
|
ItemUpdate pointerFree = remainingUpdateBuilder.build();
|
||||||
|
if (!pointerFree.isNull()) {
|
||||||
|
pointerFreeUpdates.add(pointerFree);
|
||||||
|
}
|
||||||
|
// Add the other updates to the map
|
||||||
|
for(Entry<ItemIdValue, ItemUpdateBuilder> entry : referencingUpdates.entrySet()) {
|
||||||
|
ItemUpdate pointerUpdate = entry.getValue().build();
|
||||||
|
UpdateSequence pointerUpdatesForKey = pointerUpdates.get(entry.getKey());
|
||||||
|
if (pointerUpdatesForKey == null) {
|
||||||
|
pointerUpdatesForKey = new UpdateSequence();
|
||||||
|
}
|
||||||
|
pointerUpdatesForKey.add(pointerUpdate);
|
||||||
|
pointerUpdates.put(entry.getKey(), pointerUpdatesForKey);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ItemUpdate> schedule(List<ItemUpdate> updates) throws ImpossibleSchedulingException {
|
||||||
|
pointerUpdates = new HashMap<>();
|
||||||
|
pointerFreeUpdates = new UpdateSequence();
|
||||||
|
|
||||||
|
for(ItemUpdate update : updates) {
|
||||||
|
splitUpdate(update);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reconstruct
|
||||||
|
List<ItemUpdate> fullSchedule = new ArrayList<>();
|
||||||
|
Set<ItemIdValue> mentionedNewEntities = new HashSet<>(pointerUpdates.keySet());
|
||||||
|
for(ItemUpdate update : pointerFreeUpdates.getUpdates()) {
|
||||||
|
fullSchedule.add(update);
|
||||||
|
UpdateSequence backPointers = pointerUpdates.get(update.getItemId());
|
||||||
|
if (backPointers != null) {
|
||||||
|
fullSchedule.addAll(backPointers.getUpdates());
|
||||||
|
}
|
||||||
|
mentionedNewEntities.remove(update.getItemId());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create any item that was referred to but untouched
|
||||||
|
// (this is just for the sake of correctness, it would be bad to do that
|
||||||
|
// as the items would remain blank in this batch).
|
||||||
|
for(ItemIdValue missingId : mentionedNewEntities) {
|
||||||
|
fullSchedule.add(new ItemUpdateBuilder(missingId).build());
|
||||||
|
fullSchedule.addAll(pointerUpdates.get(missingId).getUpdates());
|
||||||
|
}
|
||||||
|
return fullSchedule;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,32 @@
|
|||||||
|
package org.openrefine.wikidata.updates.scheduler;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A scheduling strategy for item updates.
|
||||||
|
* Given a list of initial updates, the scheduler
|
||||||
|
* reorganizes these updates (possibly splitting them
|
||||||
|
* or merging them) to create a sequence that is suitable
|
||||||
|
* for a particular import process.
|
||||||
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public interface UpdateScheduler {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs the scheduling. The initial updates are provided
|
||||||
|
* as a list so that the scheduler can attempt to respect the
|
||||||
|
* initial order (but no guarantee is made for that in general).
|
||||||
|
*
|
||||||
|
* @param updates
|
||||||
|
* the updates to schedule
|
||||||
|
* @return
|
||||||
|
* the reorganized updates
|
||||||
|
* @throws ImpossibleSchedulingException
|
||||||
|
* when the scheduler cannot cope with a particular edit plan.
|
||||||
|
*/
|
||||||
|
public List<ItemUpdate> schedule(List<ItemUpdate> updates) throws ImpossibleSchedulingException;
|
||||||
|
}
|
@ -0,0 +1,59 @@
|
|||||||
|
package org.openrefine.wikidata.updates.scheduler;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper class to store a list of updates where each subject
|
||||||
|
* appears at most once. It preserves order of insertion.
|
||||||
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
|
*/
|
||||||
|
public class UpdateSequence {
|
||||||
|
/**
|
||||||
|
* The list of updates stored by this container
|
||||||
|
*/
|
||||||
|
private List<ItemUpdate> updates = new ArrayList<>();
|
||||||
|
/**
|
||||||
|
* An index to keep track of where each item is touched in the sequence
|
||||||
|
*/
|
||||||
|
private Map<ItemIdValue, Integer> index = new HashMap<>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a new update to the list, merging it with any existing
|
||||||
|
* one with the same subject.
|
||||||
|
*
|
||||||
|
* @param update
|
||||||
|
*/
|
||||||
|
public void add(ItemUpdate update) {
|
||||||
|
ItemIdValue subject = update.getItemId();
|
||||||
|
if(index.containsKey(subject)) {
|
||||||
|
int i = index.get(subject);
|
||||||
|
ItemUpdate oldUpdate = updates.get(i);
|
||||||
|
updates.set(i, oldUpdate.merge(update));
|
||||||
|
} else {
|
||||||
|
index.put(subject, updates.size());
|
||||||
|
updates.add(update);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the list of merged updates
|
||||||
|
*/
|
||||||
|
public List<ItemUpdate> getUpdates() {
|
||||||
|
return updates;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the set of touched subjects
|
||||||
|
*/
|
||||||
|
public Set<ItemIdValue> getSubjects() {
|
||||||
|
return index.keySet();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,115 @@
|
|||||||
|
package org.openrefine.wikidata.updates.scheduler;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple scheduler for batches commited via the Wikibase API.
|
||||||
|
*
|
||||||
|
* The strategy is quite simple and makes at most two edits
|
||||||
|
* per touched item (which is not minimal though). Each update
|
||||||
|
* is split between statements making references to new items,
|
||||||
|
* and statements not making these references. All updates with no
|
||||||
|
* references to new items are done first (which creates all new
|
||||||
|
* items), then all other updates are done.
|
||||||
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class WikibaseAPIUpdateScheduler implements UpdateScheduler {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The first part of updates: the ones which create new items
|
||||||
|
* without referring to any other new item.
|
||||||
|
*/
|
||||||
|
private UpdateSequence pointerFreeUpdates;
|
||||||
|
/**
|
||||||
|
* The second part of the updates: all existing items, plus
|
||||||
|
* all parts of new items that refer to other new items.
|
||||||
|
*/
|
||||||
|
private UpdateSequence pointerFullUpdates;
|
||||||
|
/**
|
||||||
|
* The set of all new items referred to in the whole batch.
|
||||||
|
*/
|
||||||
|
private Set<ItemIdValue> allPointers;
|
||||||
|
|
||||||
|
private PointerExtractor extractor = new PointerExtractor();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ItemUpdate> schedule(List<ItemUpdate> updates) {
|
||||||
|
List<ItemUpdate> result = new ArrayList<>();
|
||||||
|
pointerFreeUpdates = new UpdateSequence();
|
||||||
|
pointerFullUpdates = new UpdateSequence();
|
||||||
|
allPointers = new HashSet<>();
|
||||||
|
|
||||||
|
for(ItemUpdate update : updates) {
|
||||||
|
splitUpdate(update);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Part 1: add all the pointer free updates
|
||||||
|
result.addAll(pointerFreeUpdates.getUpdates());
|
||||||
|
|
||||||
|
// Part 1': add the remaining new items that have not been touched
|
||||||
|
Set<ItemIdValue> unseenPointers = new HashSet<>(allPointers);
|
||||||
|
unseenPointers.removeAll(pointerFreeUpdates.getSubjects());
|
||||||
|
|
||||||
|
result.addAll(unseenPointers.stream()
|
||||||
|
.map(e -> new ItemUpdateBuilder(e).build())
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
|
// Part 2: add all the pointer full updates
|
||||||
|
result.addAll(pointerFullUpdates.getUpdates());
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Splits an update into two parts
|
||||||
|
* @param update
|
||||||
|
*/
|
||||||
|
protected void splitUpdate(ItemUpdate update) {
|
||||||
|
ItemUpdateBuilder pointerFreeBuilder = new ItemUpdateBuilder(update.getItemId())
|
||||||
|
.addLabels(update.getLabels())
|
||||||
|
.addDescriptions(update.getDescriptions())
|
||||||
|
.addAliases(update.getAliases())
|
||||||
|
.deleteStatements(update.getDeletedStatements());
|
||||||
|
ItemUpdateBuilder pointerFullBuilder = new ItemUpdateBuilder(update.getItemId());
|
||||||
|
|
||||||
|
for(Statement statement : update.getAddedStatements()) {
|
||||||
|
Set<ReconItemIdValue> pointers = extractor.extractPointers(statement);
|
||||||
|
if (pointers.isEmpty()) {
|
||||||
|
pointerFreeBuilder.addStatement(statement);
|
||||||
|
} else {
|
||||||
|
pointerFullBuilder.addStatement(statement);
|
||||||
|
}
|
||||||
|
allPointers.addAll(pointers);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(update.isNew()) {
|
||||||
|
// If the update is new, we might need to split it
|
||||||
|
// in two (if it refers to any other new entity).
|
||||||
|
ItemUpdate pointerFree = pointerFreeBuilder.build();
|
||||||
|
if (!pointerFree.isNull()) {
|
||||||
|
pointerFreeUpdates.add(pointerFree);
|
||||||
|
}
|
||||||
|
ItemUpdate pointerFull = pointerFullBuilder.build();
|
||||||
|
if (!pointerFull.isEmpty()) {
|
||||||
|
pointerFullUpdates.add(pointerFull);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Otherwise, we just make sure this edit is done after
|
||||||
|
// all item creations.
|
||||||
|
pointerFullUpdates.add(update);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -40,15 +40,11 @@ public class QSValuePrinterTest {
|
|||||||
@Test
|
@Test
|
||||||
public void printNewItemId() {
|
public void printNewItemId() {
|
||||||
ReconEntityIdValue id = TestingDataGenerator.makeNewItemIdValue(12345L, "my new item");
|
ReconEntityIdValue id = TestingDataGenerator.makeNewItemIdValue(12345L, "my new item");
|
||||||
assertNull(id.accept(printer));
|
assertEquals("LAST", id.accept(printer));
|
||||||
// because no entity was previously created
|
// because no entity was previously created
|
||||||
|
|
||||||
QSValuePrinter printerAfterCreate = new QSValuePrinter(id);
|
|
||||||
ReconEntityIdValue equalId = TestingDataGenerator.makeNewItemIdValue(12345L, "my other new item");
|
|
||||||
assertEquals("LAST", printerAfterCreate.visit(equalId));
|
|
||||||
|
|
||||||
ReconEntityIdValue differentId = TestingDataGenerator.makeNewItemIdValue(34567L, "my new item");
|
ReconEntityIdValue differentId = TestingDataGenerator.makeMatchedItemIdValue("Q78", "my existing item");
|
||||||
assertNull(printerAfterCreate.visit(differentId));
|
assertEquals("Q78", differentId.accept(printer));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Globe coordinates
|
// Globe coordinates
|
||||||
|
@ -18,6 +18,7 @@ import org.json.JSONWriter;
|
|||||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||||
@ -63,12 +64,24 @@ public class WikibaseSchemaTest extends RefineTest {
|
|||||||
Collections.singletonList(Datamodel.makeReference(Collections.singletonList(retrievedSnakGroup))),
|
Collections.singletonList(Datamodel.makeReference(Collections.singletonList(retrievedSnakGroup))),
|
||||||
StatementRank.NORMAL, "");
|
StatementRank.NORMAL, "");
|
||||||
|
|
||||||
|
private Project project;
|
||||||
|
|
||||||
static JSONObject jsonFromFile(String filename) throws IOException, JSONException {
|
static JSONObject jsonFromFile(String filename) throws IOException, JSONException {
|
||||||
byte[] contents = Files.readAllBytes(Paths.get(filename));
|
byte[] contents = Files.readAllBytes(Paths.get(filename));
|
||||||
String decoded = new String(contents, "utf-8");
|
String decoded = new String(contents, "utf-8");
|
||||||
return ParsingUtilities.evaluateJsonStringToObject(decoded);
|
return ParsingUtilities.evaluateJsonStringToObject(decoded);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@BeforeMethod
|
||||||
|
public void setUpProject() {
|
||||||
|
project = this.createCSVProject(
|
||||||
|
"subject,inception,reference\n"+
|
||||||
|
"Q1377,1919,http://www.ljubljana-slovenia.com/university-ljubljana\n"+
|
||||||
|
"Q865528,1965,");
|
||||||
|
project.rows.get(0).cells.set(0, TestingDataGenerator.makeMatchedCell("Q1377", "University of Ljubljana"));
|
||||||
|
project.rows.get(1).cells.set(0, TestingDataGenerator.makeMatchedCell("Q865528", "University of Warwick"));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSerialize() throws JSONException, IOException {
|
public void testSerialize() throws JSONException, IOException {
|
||||||
JSONObject serialized = jsonFromFile("data/schema/history_of_medicine.json");
|
JSONObject serialized = jsonFromFile("data/schema/history_of_medicine.json");
|
||||||
@ -94,12 +107,6 @@ public class WikibaseSchemaTest extends RefineTest {
|
|||||||
public void testEvaluate() throws JSONException, IOException {
|
public void testEvaluate() throws JSONException, IOException {
|
||||||
JSONObject serialized = jsonFromFile("data/schema/inception.json");
|
JSONObject serialized = jsonFromFile("data/schema/inception.json");
|
||||||
WikibaseSchema schema = WikibaseSchema.reconstruct(serialized);
|
WikibaseSchema schema = WikibaseSchema.reconstruct(serialized);
|
||||||
Project project = this.createCSVProject(
|
|
||||||
"subject,inception,reference\n"+
|
|
||||||
"Q1377,1919,http://www.ljubljana-slovenia.com/university-ljubljana\n"+
|
|
||||||
"Q865528,1965,");
|
|
||||||
project.rows.get(0).cells.set(0, TestingDataGenerator.makeMatchedCell("Q1377", "University of Ljubljana"));
|
|
||||||
project.rows.get(1).cells.set(0, TestingDataGenerator.makeMatchedCell("Q865528", "University of Warwick"));
|
|
||||||
Engine engine = new Engine(project);
|
Engine engine = new Engine(project);
|
||||||
List<ItemUpdate> updates = schema.evaluate(project, engine);
|
List<ItemUpdate> updates = schema.evaluate(project, engine);
|
||||||
List<ItemUpdate> expected = new ArrayList<>();
|
List<ItemUpdate> expected = new ArrayList<>();
|
||||||
@ -109,4 +116,31 @@ public class WikibaseSchemaTest extends RefineTest {
|
|||||||
expected.add(update2);
|
expected.add(update2);
|
||||||
assertEquals(expected, updates);
|
assertEquals(expected, updates);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEvaluateRespectsFacets() throws JSONException, IOException {
|
||||||
|
JSONObject serialized = jsonFromFile("data/schema/inception.json");
|
||||||
|
WikibaseSchema schema = WikibaseSchema.reconstruct(serialized);
|
||||||
|
Engine engine = new Engine(project);
|
||||||
|
JSONObject engineConfig = new JSONObject("{\n" +
|
||||||
|
" \"mode\": \"row-based\",\n" +
|
||||||
|
" \"facets\": [\n" +
|
||||||
|
" {\n" +
|
||||||
|
" \"mode\": \"text\",\n" +
|
||||||
|
" \"invert\": false,\n" +
|
||||||
|
" \"caseSensitive\": false,\n" +
|
||||||
|
" \"query\": \"www\",\n" +
|
||||||
|
" \"name\": \"reference\",\n" +
|
||||||
|
" \"type\": \"text\",\n" +
|
||||||
|
" \"columnName\": \"reference\"\n" +
|
||||||
|
" }\n" +
|
||||||
|
" ]\n" +
|
||||||
|
" }");
|
||||||
|
engine.initializeFromJSON(engineConfig);
|
||||||
|
List<ItemUpdate> updates = schema.evaluate(project, engine);
|
||||||
|
List<ItemUpdate> expected = new ArrayList<>();
|
||||||
|
ItemUpdate update1 = new ItemUpdateBuilder(qid1).addStatement(statement1).build();
|
||||||
|
expected.add(update1);
|
||||||
|
assertEquals(expected, updates);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -62,6 +62,16 @@ public class ItemUpdateTest {
|
|||||||
public void testIsNull() {
|
public void testIsNull() {
|
||||||
ItemUpdate update = new ItemUpdateBuilder(existingSubject).build();
|
ItemUpdate update = new ItemUpdateBuilder(existingSubject).build();
|
||||||
assertTrue(update.isNull());
|
assertTrue(update.isNull());
|
||||||
|
ItemUpdate update2 = new ItemUpdateBuilder(newSubject).build();
|
||||||
|
assertFalse(update2.isNull());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIsEmpty() {
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(existingSubject).build();
|
||||||
|
assertTrue(update.isEmpty());
|
||||||
|
ItemUpdate update2 = new ItemUpdateBuilder(newSubject).build();
|
||||||
|
assertTrue(update2.isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -78,8 +88,8 @@ public class ItemUpdateTest {
|
|||||||
.addStatement(statement1)
|
.addStatement(statement1)
|
||||||
.addStatement(statement2)
|
.addStatement(statement2)
|
||||||
.build();
|
.build();
|
||||||
assertEquals(Arrays.asList(statement1, statement2).stream().collect(Collectors.toSet()),
|
assertFalse(update.isNull());
|
||||||
update.getAddedStatements());
|
assertEquals(Arrays.asList(statement1, statement2), update.getAddedStatements());
|
||||||
assertEquals(statementGroups, update.getAddedStatementGroups().stream().collect(Collectors.toSet()));
|
assertEquals(statementGroups, update.getAddedStatementGroups().stream().collect(Collectors.toSet()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -130,6 +140,7 @@ public class ItemUpdateTest {
|
|||||||
.addAlias(aliasEn)
|
.addAlias(aliasEn)
|
||||||
.addAlias(aliasFr)
|
.addAlias(aliasFr)
|
||||||
.build();
|
.build();
|
||||||
|
assertFalse(updateA.isNull());
|
||||||
ItemUpdate normalized = updateA.normalizeLabelsAndAliases();
|
ItemUpdate normalized = updateA.normalizeLabelsAndAliases();
|
||||||
ItemUpdate expectedUpdate = new ItemUpdateBuilder(newSubject)
|
ItemUpdate expectedUpdate = new ItemUpdateBuilder(newSubject)
|
||||||
.addLabel(label)
|
.addLabel(label)
|
||||||
|
@ -0,0 +1,95 @@
|
|||||||
|
package org.openrefine.wikidata.updates.scheduler;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Reference;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||||
|
|
||||||
|
public class PointerExtractorTest {
|
||||||
|
|
||||||
|
private ItemIdValue existingId = Datamodel.makeWikidataItemIdValue("Q43");
|
||||||
|
private ItemIdValue matchedId = TestingDataGenerator.makeMatchedItemIdValue("Q89","eist");
|
||||||
|
private ItemIdValue newIdA = TestingDataGenerator.makeNewItemIdValue(1234L, "new item A");
|
||||||
|
private ItemIdValue newIdB = TestingDataGenerator.makeNewItemIdValue(4567L, "new item B");
|
||||||
|
|
||||||
|
private PropertyIdValue pid = Datamodel.makeWikidataPropertyIdValue("P89");
|
||||||
|
private Snak snakWithNew = Datamodel.makeValueSnak(pid, newIdA);
|
||||||
|
private Snak snakWithoutNew = Datamodel.makeValueSnak(pid, matchedId);
|
||||||
|
private SnakGroup snakGroupWithNew = Datamodel.makeSnakGroup(Collections.singletonList(snakWithNew));
|
||||||
|
private SnakGroup snakGroupWithoutNew = Datamodel.makeSnakGroup(Collections.singletonList(snakWithoutNew));
|
||||||
|
private Claim claimWithNew = Datamodel.makeClaim(existingId, snakWithNew, Collections.emptyList());
|
||||||
|
private Claim claimNewSubject = Datamodel.makeClaim(newIdB, snakWithoutNew, Collections.emptyList());
|
||||||
|
private Claim claimNewQualifier = Datamodel.makeClaim(matchedId, snakWithoutNew,
|
||||||
|
Collections.singletonList(snakGroupWithNew));
|
||||||
|
|
||||||
|
private static PointerExtractor e = new PointerExtractor();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtractEntityId() {
|
||||||
|
assertEquals(Collections.singleton(newIdA), e.extractPointers(newIdA));
|
||||||
|
assertEmpty(e.extractPointers(existingId));
|
||||||
|
assertEmpty(e.extractPointers(matchedId));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtractDatavalues() {
|
||||||
|
assertEmpty(Datamodel.makeDatatypeIdValue("string"));
|
||||||
|
assertEmpty(Datamodel.makeGlobeCoordinatesValue(1.34, 2.354, 0.1, GlobeCoordinatesValue.GLOBE_EARTH));
|
||||||
|
assertEmpty(Datamodel.makeStringValue("est"));
|
||||||
|
assertEmpty(Datamodel.makeMonolingualTextValue("srtu", "en"));
|
||||||
|
assertEmpty(Datamodel.makeWikidataPropertyIdValue("P78"));
|
||||||
|
assertEmpty(Datamodel.makeQuantityValue(new BigDecimal("898")));
|
||||||
|
assertEmpty(Datamodel.makeQuantityValue(new BigDecimal("7.87"), "http://www.wikidata.org/entity/Q34"));
|
||||||
|
assertEmpty(Datamodel.makeTimeValue(1898, (byte)2, (byte)3, TimeValue.CM_GREGORIAN_PRO));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSnak() {
|
||||||
|
assertEmpty(e.extractPointers(snakWithoutNew));
|
||||||
|
assertEquals(Collections.singleton(newIdA), e.extractPointers(snakWithNew));
|
||||||
|
assertEmpty(e.extractPointers(Datamodel.makeNoValueSnak(pid)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSnakGroup() {
|
||||||
|
assertEmpty(e.extractPointers(snakGroupWithoutNew));
|
||||||
|
assertEquals(Collections.singleton(newIdA), e.extractPointers(snakGroupWithNew));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testStatement() {
|
||||||
|
assertEmpty(e.extractPointers(Datamodel.makeStatement(claimNewSubject,
|
||||||
|
Collections.emptyList(), StatementRank.NORMAL, "")));
|
||||||
|
assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimWithNew,
|
||||||
|
Collections.emptyList(), StatementRank.NORMAL, "")));
|
||||||
|
assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimNewQualifier,
|
||||||
|
Collections.emptyList(), StatementRank.NORMAL, "")));
|
||||||
|
Reference reference = Datamodel.makeReference(Collections.singletonList(snakGroupWithNew));
|
||||||
|
assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimNewSubject,
|
||||||
|
Collections.singletonList(reference), StatementRank.NORMAL, "")));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertEmpty(Value v) {
|
||||||
|
assertEmpty(e.extractPointers(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertEmpty(Set<ReconItemIdValue> pointers) {
|
||||||
|
assertEquals(Collections.emptySet(), pointers);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,51 @@
|
|||||||
|
package org.openrefine.wikidata.updates.scheduler;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
|
||||||
|
public class QuickStatementsUpdateSchedulerTest extends UpdateSchedulerTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoNewItem() throws ImpossibleSchedulingException {
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(existingIdA).addStatement(sAtoB).build();
|
||||||
|
ItemUpdate updateB = new ItemUpdateBuilder(existingIdB).addStatement(sBtoA).build();
|
||||||
|
List<ItemUpdate> scheduled = schedule(updateA, updateB);
|
||||||
|
assertEquals(Arrays.asList(updateA,updateB), scheduled);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSplitUpdate() throws ImpossibleSchedulingException {
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(existingIdA)
|
||||||
|
.addStatement(sAtoNewA)
|
||||||
|
.addStatement(sAtoNewB)
|
||||||
|
.build();
|
||||||
|
ItemUpdate newUpdateA = new ItemUpdateBuilder(newIdA).build();
|
||||||
|
ItemUpdate newUpdateB = new ItemUpdateBuilder(newIdB).build();
|
||||||
|
ItemUpdate splitUpdateA = new ItemUpdateBuilder(existingIdA)
|
||||||
|
.addStatement(sAtoNewA)
|
||||||
|
.build();
|
||||||
|
ItemUpdate splitUpdateB = new ItemUpdateBuilder(existingIdA)
|
||||||
|
.addStatement(sAtoNewB)
|
||||||
|
.build();
|
||||||
|
List<ItemUpdate> scheduled = schedule(updateA);
|
||||||
|
assertSetEquals(Arrays.asList(newUpdateA, splitUpdateA, newUpdateB, splitUpdateB), scheduled);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expectedExceptions=ImpossibleSchedulingException.class)
|
||||||
|
public void testImpossibleForQS() throws ImpossibleSchedulingException {
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(newIdA).addStatement(sNewAtoNewB).build();
|
||||||
|
schedule(update);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public UpdateScheduler getScheduler() {
|
||||||
|
return new QuickStatementsUpdateScheduler();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,94 @@
|
|||||||
|
package org.openrefine.wikidata.updates.scheduler;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
|
||||||
|
|
||||||
|
public abstract class UpdateSchedulerTest {
|
||||||
|
|
||||||
|
protected ItemIdValue existingIdA = Datamodel.makeWikidataItemIdValue("Q43");
|
||||||
|
protected ItemIdValue existingIdB = Datamodel.makeWikidataItemIdValue("Q538");
|
||||||
|
protected ItemIdValue newIdA = TestingDataGenerator.makeNewItemIdValue(1234L, "new item A");
|
||||||
|
protected ItemIdValue newIdB = TestingDataGenerator.makeNewItemIdValue(5678L, "new item B");
|
||||||
|
|
||||||
|
protected static PropertyIdValue pid = Datamodel.makeWikidataPropertyIdValue("P38");
|
||||||
|
|
||||||
|
protected Statement sAtoB = generateStatement(existingIdA, existingIdB);
|
||||||
|
protected Statement sBtoA = generateStatement(existingIdB, existingIdA);
|
||||||
|
protected Statement sAtoNewA = generateStatement(existingIdA, newIdA);
|
||||||
|
protected Statement sAtoNewB = generateStatement(existingIdA, newIdB);
|
||||||
|
protected Statement sNewAtoB = generateStatement(newIdA, existingIdB);
|
||||||
|
protected Statement sNewAtoNewB = generateStatement(newIdA, newIdB);
|
||||||
|
|
||||||
|
public static Statement generateStatement(ItemIdValue from, ItemIdValue to) {
|
||||||
|
Claim claim = Datamodel.makeClaim(from, Datamodel.makeValueSnak(pid, to), Collections.emptyList());
|
||||||
|
return Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract UpdateScheduler getScheduler();
|
||||||
|
|
||||||
|
protected List<ItemUpdate> schedule(ItemUpdate... itemUpdates) throws ImpossibleSchedulingException {
|
||||||
|
return getScheduler().schedule(Arrays.asList(itemUpdates));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void assertSetEquals(List<ItemUpdate> expected, List<ItemUpdate> actual) {
|
||||||
|
assertEquals(expected.stream().collect(Collectors.toSet()),
|
||||||
|
actual.stream().collect(Collectors.toSet()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNewItemNotMentioned() throws ImpossibleSchedulingException {
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(existingIdA).addStatement(sAtoNewA).build();
|
||||||
|
List<ItemUpdate> scheduled = schedule(updateA);
|
||||||
|
ItemUpdate newUpdate = new ItemUpdateBuilder(newIdA).build();
|
||||||
|
assertEquals(Arrays.asList(newUpdate, updateA), scheduled);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNewItemMentioned() throws ImpossibleSchedulingException {
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(existingIdA).addStatement(sAtoNewA).build();
|
||||||
|
ItemUpdate newUpdate = new ItemUpdateBuilder(newIdA).addStatement(sNewAtoB).build();
|
||||||
|
List<ItemUpdate> scheduled = schedule(updateA, newUpdate);
|
||||||
|
assertEquals(Arrays.asList(newUpdate, updateA), scheduled);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMerge() throws ImpossibleSchedulingException {
|
||||||
|
ItemUpdate update1 = new ItemUpdateBuilder(existingIdA)
|
||||||
|
.addStatement(sAtoB)
|
||||||
|
.build();
|
||||||
|
ItemUpdate update2 = new ItemUpdateBuilder(existingIdA)
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue("hello", "fr"))
|
||||||
|
.addStatement(sAtoB)
|
||||||
|
.build();
|
||||||
|
ItemUpdate merged = update1.merge(update2);
|
||||||
|
assertEquals(Collections.singletonList(merged), schedule(update1, update2));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMergeNew() throws ImpossibleSchedulingException {
|
||||||
|
ItemUpdate update1 = new ItemUpdateBuilder(newIdA)
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue("hello", "fr"))
|
||||||
|
.addStatement(sNewAtoB)
|
||||||
|
.build();
|
||||||
|
ItemUpdate update2 = new ItemUpdateBuilder(newIdA)
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue("hello", "fr"))
|
||||||
|
.build();
|
||||||
|
ItemUpdate merged = update1.merge(update2);
|
||||||
|
assertEquals(Collections.singletonList(merged), schedule(update1, update2));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,58 @@
|
|||||||
|
package org.openrefine.wikidata.updates.scheduler;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
|
||||||
|
|
||||||
|
public class WikibaseAPIUpdateSchedulerTest extends UpdateSchedulerTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOrderPreserved() throws ImpossibleSchedulingException {
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(existingIdA).addStatement(sAtoB).build();
|
||||||
|
ItemUpdate updateB = new ItemUpdateBuilder(existingIdB).addStatement(sBtoA).build();
|
||||||
|
List<ItemUpdate> scheduled = schedule(updateA, updateB);
|
||||||
|
assertEquals(Arrays.asList(updateA,updateB), scheduled);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateIsNotSplit() throws ImpossibleSchedulingException {
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(existingIdA)
|
||||||
|
.addStatement(sAtoNewA)
|
||||||
|
.addStatement(sAtoNewB)
|
||||||
|
.build();
|
||||||
|
ItemUpdate newUpdateA = new ItemUpdateBuilder(newIdA).build();
|
||||||
|
ItemUpdate newUpdateB = new ItemUpdateBuilder(newIdB).build();
|
||||||
|
List<ItemUpdate> scheduled = schedule(updateA);
|
||||||
|
assertSetEquals(Arrays.asList(newUpdateA, newUpdateB, updateA), scheduled);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMixedUpdate() throws ImpossibleSchedulingException {
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(existingIdA)
|
||||||
|
.addStatement(sAtoNewA)
|
||||||
|
.addStatement(sAtoNewB)
|
||||||
|
.addStatement(sAtoB)
|
||||||
|
.build();
|
||||||
|
ItemUpdate newUpdateA = new ItemUpdateBuilder(newIdA)
|
||||||
|
.addStatement(sNewAtoB)
|
||||||
|
.build();
|
||||||
|
ItemUpdate newUpdateB = new ItemUpdateBuilder(newIdB)
|
||||||
|
.build();
|
||||||
|
List<ItemUpdate> scheduled = schedule(updateA, newUpdateA);
|
||||||
|
assertEquals(Arrays.asList(newUpdateA, newUpdateB, updateA), scheduled);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public UpdateScheduler getScheduler() {
|
||||||
|
return new WikibaseAPIUpdateScheduler();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user