Refactor ItemUpdate and introduce ItemUpdateBuilder

This commit is contained in:
Antonin Delpeuch 2018-03-01 02:09:56 +00:00
parent e0cdb91520
commit 88178d7c04
21 changed files with 336 additions and 221 deletions

View File

@ -54,8 +54,9 @@ import org.openrefine.wikidata.exporters.QuickStatementsExporter;
import org.openrefine.wikidata.qa.EditInspector; import org.openrefine.wikidata.qa.EditInspector;
import org.openrefine.wikidata.qa.QAWarning; import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.qa.QAWarningStore; import org.openrefine.wikidata.qa.QAWarningStore;
import org.openrefine.wikidata.schema.ItemUpdate;
import org.openrefine.wikidata.schema.WikibaseSchema; import org.openrefine.wikidata.schema.WikibaseSchema;
import org.openrefine.wikidata.updates.ItemUpdate;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.util.ParsingUtilities; import com.google.refine.util.ParsingUtilities;

View File

@ -10,8 +10,8 @@ import com.google.refine.browsing.Engine;
import com.google.refine.exporters.WriterExporter; import com.google.refine.exporters.WriterExporter;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import org.openrefine.wikidata.schema.ItemUpdate;
import org.openrefine.wikidata.schema.WikibaseSchema; import org.openrefine.wikidata.schema.WikibaseSchema;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.wikidata.wdtk.datamodel.interfaces.Claim; import org.wikidata.wdtk.datamodel.interfaces.Claim;

View File

@ -6,7 +6,6 @@ import java.io.Writer;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -17,7 +16,7 @@ import org.json.JSONWriter;
import org.openrefine.wikidata.editing.ConnectionManager; import org.openrefine.wikidata.editing.ConnectionManager;
import org.openrefine.wikidata.editing.NewItemLibrary; import org.openrefine.wikidata.editing.NewItemLibrary;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.schema.WikibaseSchema; import org.openrefine.wikidata.schema.WikibaseSchema;
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue; import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -30,7 +29,6 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemDocument;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.util.WebResourceFetcherImpl; import org.wikidata.wdtk.util.WebResourceFetcherImpl;
import org.wikidata.wdtk.wikibaseapi.ApiConnection; import org.wikidata.wdtk.wikibaseapi.ApiConnection;
import org.wikidata.wdtk.wikibaseapi.TermStatementUpdate;
import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor; import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor;
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher; import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException; import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;

View File

@ -17,7 +17,7 @@ import org.openrefine.wikidata.qa.scrutinizers.SelfReferentialScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.SingleValueScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.SingleValueScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.UnsourcedScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.UnsourcedScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.WhitespaceScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.WhitespaceScrutinizer;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
/** /**

View File

@ -1,13 +1,12 @@
package org.openrefine.wikidata.qa.scrutinizers; package org.openrefine.wikidata.qa.scrutinizers;
import java.util.List; import java.util.List;
import java.util.Map;
import org.openrefine.wikidata.qa.ConstraintFetcher; import org.openrefine.wikidata.qa.ConstraintFetcher;
import org.openrefine.wikidata.qa.QAWarning; import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.qa.QAWarning.Severity; import org.openrefine.wikidata.qa.QAWarning.Severity;
import org.openrefine.wikidata.qa.QAWarningStore; import org.openrefine.wikidata.qa.QAWarningStore;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
/** /**
* Interface for any class that * Interface for any class that

View File

@ -2,7 +2,7 @@ package org.openrefine.wikidata.qa.scrutinizers;
import java.util.List; import java.util.List;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
public abstract class ItemEditScrutinizer extends EditScrutinizer { public abstract class ItemEditScrutinizer extends EditScrutinizer {

View File

@ -1,7 +1,7 @@
package org.openrefine.wikidata.qa.scrutinizers; package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.qa.QAWarning; import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
/** /**

View File

@ -2,7 +2,7 @@ package org.openrefine.wikidata.qa.scrutinizers;
import java.util.List; import java.util.List;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
public class NoEditsMadeScrutinizer extends EditScrutinizer { public class NoEditsMadeScrutinizer extends EditScrutinizer {

View File

@ -4,7 +4,7 @@ import java.util.HashSet;
import java.util.Set; import java.util.Set;
import org.openrefine.wikidata.qa.QAWarning; import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.Statement;

View File

@ -1,6 +1,6 @@
package org.openrefine.wikidata.qa.scrutinizers; package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
public abstract class StatementGroupScrutinizer extends ItemEditScrutinizer { public abstract class StatementGroupScrutinizer extends ItemEditScrutinizer {

View File

@ -1,6 +1,6 @@
package org.openrefine.wikidata.qa.scrutinizers; package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.Statement;

View File

@ -1,6 +1,6 @@
package org.openrefine.wikidata.qa.scrutinizers; package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak; import org.wikidata.wdtk.datamodel.interfaces.Snak;

View File

@ -3,6 +3,8 @@ package org.openrefine.wikidata.schema;
import java.util.List; import java.util.List;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException; import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.openrefine.wikidata.utils.JacksonJsonizable; import org.openrefine.wikidata.utils.JacksonJsonizable;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.Statement;
@ -40,7 +42,7 @@ public class WbItemDocumentExpr extends JacksonJsonizable implements WbExpressio
@Override @Override
public ItemUpdate evaluate(ExpressionContext ctxt) throws SkipSchemaExpressionException { public ItemUpdate evaluate(ExpressionContext ctxt) throws SkipSchemaExpressionException {
ItemIdValue subjectId = getSubject().evaluate(ctxt); ItemIdValue subjectId = getSubject().evaluate(ctxt);
ItemUpdate update = new ItemUpdate(subjectId); ItemUpdateBuilder update = new ItemUpdateBuilder(subjectId);
for(WbStatementGroupExpr expr : getStatementGroups()) { for(WbStatementGroupExpr expr : getStatementGroups()) {
try { try {
for(Statement s : expr.evaluate(ctxt, subjectId).getStatements()) { for(Statement s : expr.evaluate(ctxt, subjectId).getStatements()) {
@ -53,7 +55,7 @@ public class WbItemDocumentExpr extends JacksonJsonizable implements WbExpressio
for(WbNameDescExpr expr : getNameDescs()) { for(WbNameDescExpr expr : getNameDescs()) {
expr.contributeTo(update, ctxt); expr.contributeTo(update, ctxt);
} }
return update; return update.build();
} }
@JsonProperty("subject") @JsonProperty("subject")

View File

@ -2,6 +2,7 @@ package org.openrefine.wikidata.schema;
import org.jsoup.helper.Validate; import org.jsoup.helper.Validate;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException; import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
@ -11,7 +12,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
/** /**
* An expression that represent a term (label, description or alias). * An expression that represent a term (label, description or alias).
* The structure is slightly different from other expressions because * The structure is slightly different from other expressions because
* we need to call different methods on {@link ItemUpdate}. * we need to call different methods on {@link ItemUpdateBuilder}.
* *
* @author Antonin Delpeuch * @author Antonin Delpeuch
* *
@ -46,7 +47,7 @@ public class WbNameDescExpr {
* @param ctxt * @param ctxt
* the evaluation context for the expression * the evaluation context for the expression
*/ */
public void contributeTo(ItemUpdate item, ExpressionContext ctxt) { public void contributeTo(ItemUpdateBuilder item, ExpressionContext ctxt) {
try { try {
MonolingualTextValue val = getValue().evaluate(ctxt); MonolingualTextValue val = getValue().evaluate(ctxt);
switch (getType()) { switch (getType()) {

View File

@ -19,6 +19,8 @@ import com.google.refine.model.Project;
import com.google.refine.model.Row; import com.google.refine.model.Row;
import org.openrefine.wikidata.schema.WbItemDocumentExpr; import org.openrefine.wikidata.schema.WbItemDocumentExpr;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException; import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.openrefine.wikidata.qa.QAWarningStore; import org.openrefine.wikidata.qa.QAWarningStore;
import org.openrefine.wikidata.schema.ExpressionContext; import org.openrefine.wikidata.schema.ExpressionContext;
import org.openrefine.wikidata.utils.JacksonJsonizable; import org.openrefine.wikidata.utils.JacksonJsonizable;
@ -73,7 +75,7 @@ public class WikibaseSchema implements OverlayModel {
* @return * @return
*/ */
public List<ItemUpdate> evaluateItemDocuments(ExpressionContext ctxt) { public List<ItemUpdate> evaluateItemDocuments(ExpressionContext ctxt) {
List<ItemUpdate> result = new ArrayList<ItemUpdate>(); List<ItemUpdate> result = new ArrayList<>();
for (WbItemDocumentExpr expr : itemDocumentExprs) { for (WbItemDocumentExpr expr : itemDocumentExprs) {
try { try {
@ -104,7 +106,7 @@ public class WikibaseSchema implements OverlayModel {
* generating order (not merged yet). * generating order (not merged yet).
*/ */
public List<ItemUpdate> evaluate(Project project, Engine engine, QAWarningStore warningStore) { public List<ItemUpdate> evaluate(Project project, Engine engine, QAWarningStore warningStore) {
List<ItemUpdate> result = new ArrayList<ItemUpdate>(); List<ItemUpdate> result = new ArrayList<>();
FilteredRows filteredRows = engine.getAllFilteredRows(); FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, new EvaluatingRowVisitor(result, warningStore)); filteredRows.accept(project, new EvaluatingRowVisitor(result, warningStore));
return result; return result;

View File

@ -1,6 +1,7 @@
package org.openrefine.wikidata.schema; package org.openrefine.wikidata.updates;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -17,6 +18,9 @@ import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
/** /**
* A class to plan an update of an item, after evaluating the statements * A class to plan an update of an item, after evaluating the statements
@ -26,12 +30,12 @@ import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
* @author Antonin Delpeuch * @author Antonin Delpeuch
*/ */
public class ItemUpdate { public class ItemUpdate {
private ItemIdValue qid; private final ItemIdValue qid;
private Set<Statement> addedStatements; private final Set<Statement> addedStatements;
private Set<Statement> deletedStatements; private final Set<Statement> deletedStatements;
private Set<MonolingualTextValue> labels; private final Set<MonolingualTextValue> labels;
private Set<MonolingualTextValue> descriptions; private final Set<MonolingualTextValue> descriptions;
private Set<MonolingualTextValue> aliases; private final Set<MonolingualTextValue> aliases;
/** /**
* Constructor. * Constructor.
@ -39,61 +43,42 @@ public class ItemUpdate {
* @param qid * @param qid
* the subject of the document. It can be a reconciled item value for new items. * the subject of the document. It can be a reconciled item value for new items.
*/ */
public ItemUpdate(ItemIdValue qid) { @JsonCreator
public ItemUpdate(
@JsonProperty("subject") ItemIdValue qid,
@JsonProperty("addedStatements") Set<Statement> addedStatements,
@JsonProperty("deletedStatements") Set<Statement> deletedStatements,
@JsonProperty("labels") Set<MonolingualTextValue> labels,
@JsonProperty("descriptions") Set<MonolingualTextValue> descriptions,
@JsonProperty("addedAliases") Set<MonolingualTextValue> aliases) {
Validate.notNull(qid); Validate.notNull(qid);
this.qid = qid; this.qid = qid;
this.addedStatements = new HashSet<>(); if(addedStatements == null) {
this.deletedStatements = new HashSet<Statement>(); addedStatements = Collections.emptySet();
this.labels = new HashSet<MonolingualTextValue>(); }
this.descriptions = new HashSet<MonolingualTextValue>(); this.addedStatements = addedStatements;
this.aliases = new HashSet<MonolingualTextValue>(); if(deletedStatements == null) {
} deletedStatements = Collections.emptySet();
}
/** this.deletedStatements = deletedStatements;
* Mark a statement for insertion. If it matches an existing if(labels == null) {
* statement, it will update the statement instead. labels = Collections.emptySet();
* }
* @param statement this.labels = labels;
* the statement to add or update if(descriptions == null) {
*/ descriptions = Collections.emptySet();
public void addStatement(Statement statement) { }
addedStatements.add(statement); this.descriptions = descriptions;
} if(aliases == null) {
aliases = Collections.emptySet();
/** }
* Mark a statement for deletion. If no such statement exists, this.aliases = aliases;
* nothing will be deleted.
*
* @param statement
* the statement to delete
*/
public void deleteStatement(Statement statement) {
deletedStatements.add(statement);
}
/**
* Add a list of statement, as in {@link addStatement}.
*
* @param statements
* the statements to add
*/
public void addStatements(Set<Statement> statements) {
addedStatements.addAll(statements);
}
/**
* Delete a list of statements, as in {@link deleteStatement}.
*
* @param statements
* the statements to delete
*/
public void deleteStatements(Set<Statement> statements) {
deletedStatements.addAll(statements);
} }
/** /**
* @return the subject of the item * @return the subject of the item
*/ */
@JsonProperty("subject")
public ItemIdValue getItemId() { public ItemIdValue getItemId() {
return qid; return qid;
} }
@ -101,6 +86,7 @@ public class ItemUpdate {
/** /**
* @return the set of all added statements * @return the set of all added statements
*/ */
@JsonProperty("addedStatements")
public Set<Statement> getAddedStatements() { public Set<Statement> getAddedStatements() {
return addedStatements; return addedStatements;
} }
@ -108,74 +94,15 @@ public class ItemUpdate {
/** /**
* @return the list of all deleted statements * @return the list of all deleted statements
*/ */
@JsonProperty("deletedStatements")
public Set<Statement> getDeletedStatements() { public Set<Statement> getDeletedStatements() {
return deletedStatements; return deletedStatements;
} }
/**
* Merges all the changes in other into this instance.
* Both updates should have the same subject.
*
* @param other
* the other change that should be merged
*/
public void merge(ItemUpdate other) {
Validate.isTrue(qid.equals(other.getItemId()));
addStatements(other.getAddedStatements());
deleteStatements(other.getDeletedStatements());
labels.addAll(other.getLabels());
descriptions.addAll(other.getDescriptions());
aliases.addAll(other.getAliases());
}
/**
* @return true when this change is empty
* (no statements or terms changed)
*/
public boolean isNull() {
return (addedStatements.isEmpty()
&& deletedStatements.isEmpty()
&& labels.isEmpty()
&& descriptions.isEmpty()
&& aliases.isEmpty());
}
/**
* Adds a label to the item. It will override any
* existing label in this language.
*
* @param label
* the label to add
*/
public void addLabel(MonolingualTextValue label) {
labels.add(label);
}
/**
* Adds a description to the item. It will override any existing
* description in this language.
*
* @param description
* the description to add
*/
public void addDescription(MonolingualTextValue description) {
descriptions.add(description);
}
/**
* Adds an alias to the item. It will be added to any existing
* aliases in that language.
*
* @param alias
* the alias to add
*/
public void addAlias(MonolingualTextValue alias) {
aliases.add(alias);
}
/** /**
* @return the list of updated labels * @return the list of updated labels
*/ */
@JsonProperty("labels")
public Set<MonolingualTextValue> getLabels() { public Set<MonolingualTextValue> getLabels() {
return labels; return labels;
} }
@ -183,6 +110,7 @@ public class ItemUpdate {
/** /**
* @return the list of updated descriptions * @return the list of updated descriptions
*/ */
@JsonProperty("descriptions")
public Set<MonolingualTextValue> getDescriptions() { public Set<MonolingualTextValue> getDescriptions() {
return descriptions; return descriptions;
} }
@ -190,10 +118,48 @@ public class ItemUpdate {
/** /**
* @return the list of updated aliases * @return the list of updated aliases
*/ */
@JsonProperty("addedAliases")
public Set<MonolingualTextValue> getAliases() { public Set<MonolingualTextValue> getAliases() {
return aliases; return aliases;
} }
/**
* @return true when this change is empty
* (no statements or terms changed)
*/
@JsonIgnore
public boolean isNull() {
return (addedStatements.isEmpty()
&& deletedStatements.isEmpty()
&& labels.isEmpty()
&& descriptions.isEmpty()
&& aliases.isEmpty());
}
/**
* Merges all the changes in other into this instance.
* Both updates should have the same subject.
*
* @param other
* the other change that should be merged
*/
public ItemUpdate merge(ItemUpdate other) {
Validate.isTrue(qid.equals(other.getItemId()));
Set<Statement> newAddedStatements = new HashSet<>(addedStatements);
newAddedStatements.addAll(other.getAddedStatements());
Set<Statement> newDeletedStatements = new HashSet<>(deletedStatements);
newDeletedStatements.addAll(other.getDeletedStatements());
Set<MonolingualTextValue> newLabels = new HashSet<>(labels);
newLabels.addAll(other.getLabels());
Set<MonolingualTextValue> newDescriptions = new HashSet<>(descriptions);
newDescriptions.addAll(other.getDescriptions());
Set<MonolingualTextValue> newAliases = new HashSet<>(aliases);
newAliases.addAll(other.getDescriptions());
return new ItemUpdate(
qid, newAddedStatements, newDeletedStatements,
newLabels, newDescriptions, newAliases);
}
/** /**
* Group added statements in StatementGroups: useful if the * Group added statements in StatementGroups: useful if the
* item is new. * item is new.
@ -215,7 +181,7 @@ public class ItemUpdate {
} }
return result; return result;
} }
/** /**
* Group a list of ItemUpdates by subject: this is useful to make one single edit * Group a list of ItemUpdates by subject: this is useful to make one single edit
* per item. * per item.
@ -224,7 +190,7 @@ public class ItemUpdate {
* @return a map from item ids to merged ItemUpdate for that id * @return a map from item ids to merged ItemUpdate for that id
*/ */
public static Map<EntityIdValue, ItemUpdate> groupBySubject(List<ItemUpdate> itemDocuments) { public static Map<EntityIdValue, ItemUpdate> groupBySubject(List<ItemUpdate> itemDocuments) {
Map<EntityIdValue, ItemUpdate> map = new HashMap<EntityIdValue, ItemUpdate>(); Map<EntityIdValue, ItemUpdate> map = new HashMap<>();
for(ItemUpdate update : itemDocuments) { for(ItemUpdate update : itemDocuments) {
if (update.isNull()) { if (update.isNull()) {
continue; continue;
@ -233,38 +199,14 @@ public class ItemUpdate {
ItemIdValue qid = update.getItemId(); ItemIdValue qid = update.getItemId();
if (map.containsKey(qid)) { if (map.containsKey(qid)) {
ItemUpdate oldUpdate = map.get(qid); ItemUpdate oldUpdate = map.get(qid);
oldUpdate.merge(update); map.put(qid, oldUpdate.merge(update));
} else { } else {
map.put(qid, update); map.put(qid, update);
} }
} }
return map; return map;
} }
/**
* This should only be used when creating a new item.
* This ensures that we never add an alias without adding
* a label in the same language.
*/
public void normalizeLabelsAndAliases() {
// Ensure that we are only adding aliases with labels
Set<String> labelLanguages = labels.stream()
.map(l -> l.getLanguageCode())
.collect(Collectors.toSet());
System.out.println(labelLanguages);
Set<MonolingualTextValue> filteredAliases = new HashSet<>();
for(MonolingualTextValue alias : aliases) {
if(!labelLanguages.contains(alias.getLanguageCode())) {
labelLanguages.add(alias.getLanguageCode());
labels.add(alias);
} else {
filteredAliases.add(alias);
}
}
aliases = filteredAliases;
}
/** /**
* Is this update about a new item? * Is this update about a new item?
*/ */
@ -272,6 +214,31 @@ public class ItemUpdate {
return "Q0".equals(getItemId().getId()); return "Q0".equals(getItemId().getId());
} }
/**
* This should only be used when creating a new item.
* This ensures that we never add an alias without adding
* a label in the same language.
*/
public ItemUpdate normalizeLabelsAndAliases() {
// Ensure that we are only adding aliases with labels
Set<String> labelLanguages = labels.stream()
.map(l -> l.getLanguageCode())
.collect(Collectors.toSet());
Set<MonolingualTextValue> filteredAliases = new HashSet<>();
Set<MonolingualTextValue> newLabels = new HashSet<>(labels);
for(MonolingualTextValue alias : aliases) {
if(!labelLanguages.contains(alias.getLanguageCode())) {
labelLanguages.add(alias.getLanguageCode());
newLabels.add(alias);
} else {
filteredAliases.add(alias);
}
}
return new ItemUpdate(qid, addedStatements, deletedStatements,
newLabels, descriptions, filteredAliases);
}
@Override @Override
public boolean equals(Object other) { public boolean equals(Object other) {
if(other == null || !ItemUpdate.class.isInstance(other)) { if(other == null || !ItemUpdate.class.isInstance(other)) {
@ -310,4 +277,5 @@ public class ItemUpdate {
builder.append("\n>"); builder.append("\n>");
return builder.toString(); return builder.toString();
} }
} }

View File

@ -0,0 +1,143 @@
package org.openrefine.wikidata.updates;
import java.util.Set;
import java.util.HashSet;
import org.jsoup.helper.Validate;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
/**
* Constructs a {@link ItemUpdate} incrementally.
*
* @author Antonin Delpeuch
*
*/
public class ItemUpdateBuilder {
private ItemIdValue qid;
private Set<Statement> addedStatements;
private Set<Statement> deletedStatements;
private Set<MonolingualTextValue> labels;
private Set<MonolingualTextValue> descriptions;
private Set<MonolingualTextValue> aliases;
private boolean built;
/**
* Constructor.
*
* @param qid
* the subject of the document. It can be a reconciled item value for new items.
*/
public ItemUpdateBuilder(ItemIdValue qid) {
Validate.notNull(qid);
this.qid = qid;
this.addedStatements = new HashSet<>();
this.deletedStatements = new HashSet<Statement>();
this.labels = new HashSet<MonolingualTextValue>();
this.descriptions = new HashSet<MonolingualTextValue>();
this.aliases = new HashSet<MonolingualTextValue>();
this.built = false;
}
/**
* Mark a statement for insertion. If it matches an existing
* statement, it will update the statement instead.
*
* @param statement
* the statement to add or update
*/
public ItemUpdateBuilder addStatement(Statement statement) {
Validate.isTrue(!built, "ItemUpdate has already been built");
addedStatements.add(statement);
return this;
}
/**
* Mark a statement for deletion. If no such statement exists,
* nothing will be deleted.
*
* @param statement
* the statement to delete
*/
public ItemUpdateBuilder deleteStatement(Statement statement) {
Validate.isTrue(!built, "ItemUpdate has already been built");
deletedStatements.add(statement);
return this;
}
/**
* Add a list of statement, as in {@link addStatement}.
*
* @param statements
* the statements to add
*/
public ItemUpdateBuilder addStatements(Set<Statement> statements) {
Validate.isTrue(!built, "ItemUpdate has already been built");
addedStatements.addAll(statements);
return this;
}
/**
* Delete a list of statements, as in {@link deleteStatement}.
*
* @param statements
* the statements to delete
*/
public ItemUpdateBuilder deleteStatements(Set<Statement> statements) {
Validate.isTrue(!built, "ItemUpdate has already been built");
deletedStatements.addAll(statements);
return this;
}
/**
* Adds a label to the item. It will override any
* existing label in this language.
*
* @param label
* the label to add
*/
public ItemUpdateBuilder addLabel(MonolingualTextValue label) {
Validate.isTrue(!built, "ItemUpdate has already been built");
labels.add(label);
return this;
}
/**
* Adds a description to the item. It will override any existing
* description in this language.
*
* @param description
* the description to add
*/
public ItemUpdateBuilder addDescription(MonolingualTextValue description) {
Validate.isTrue(!built, "ItemUpdate has already been built");
descriptions.add(description);
return this;
}
/**
* Adds an alias to the item. It will be added to any existing
* aliases in that language.
*
* @param alias
* the alias to add
*/
public ItemUpdateBuilder addAlias(MonolingualTextValue alias) {
Validate.isTrue(!built, "ItemUpdate has already been built");
aliases.add(alias);
return this;
}
/**
* Constructs the {@link ItemUpdate}.
* @return
*/
public ItemUpdate build() {
built = true;
return new ItemUpdate(qid, addedStatements, deletedStatements,
labels, descriptions, aliases);
}
}

View File

@ -3,6 +3,8 @@ package org.openrefine.wikidata.schema;
import java.util.Collections; import java.util.Collections;
import org.openrefine.wikidata.testing.JacksonSerializationTest; import org.openrefine.wikidata.testing.JacksonSerializationTest;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
@ -39,9 +41,7 @@ public class WbItemDocumentExprTest extends WbExpressionTest<ItemUpdate> {
@Test @Test
public void testEvaluate() { public void testEvaluate() {
setRow(recon("Q3434"), "2010-07-23", "3.898,4.389", "my alias", recon("Q23")); setRow(recon("Q3434"), "2010-07-23", "3.898,4.389", "my alias", recon("Q23"));
ItemUpdate result = new ItemUpdate(subject); ItemUpdate result = new ItemUpdateBuilder(subject).addAlias(alias).addStatement(fullStatement).build();
result.addAlias(alias);
result.addStatement(fullStatement);
evaluatesTo(result, expr); evaluatesTo(result, expr);
} }
@ -54,16 +54,14 @@ public class WbItemDocumentExprTest extends WbExpressionTest<ItemUpdate> {
@Test @Test
public void testStatementSkipped() { public void testStatementSkipped() {
setRow(recon("Q3434"), "2010-07-23", "3.898,invalid4.389", "my alias", recon("Q23")); setRow(recon("Q3434"), "2010-07-23", "3.898,invalid4.389", "my alias", recon("Q23"));
ItemUpdate result = new ItemUpdate(subject); ItemUpdate result = new ItemUpdateBuilder(subject).addAlias(alias).build();
result.addAlias(alias);
evaluatesTo(result, expr); evaluatesTo(result, expr);
} }
@Test @Test
public void testAliasSkipped() { public void testAliasSkipped() {
setRow(recon("Q3434"), "2010-07-23", "3.898,4.389", "", recon("Q23")); setRow(recon("Q3434"), "2010-07-23", "3.898,4.389", "", recon("Q23"));
ItemUpdate result = new ItemUpdate(subject); ItemUpdate result = new ItemUpdateBuilder(subject).addStatement(fullStatement).build();
result.addStatement(fullStatement);
evaluatesTo(result, expr); evaluatesTo(result, expr);
} }

View File

@ -6,6 +6,7 @@ import java.util.Collections;
import org.openrefine.wikidata.testing.JacksonSerializationTest; import org.openrefine.wikidata.testing.JacksonSerializationTest;
import org.openrefine.wikidata.testing.TestingDataGenerator; import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
@ -25,38 +26,38 @@ public class WbNameDescExprTest extends WbExpressionTest<MonolingualTextValue> {
public void testContributeToLabel() { public void testContributeToLabel() {
WbNameDescExpr labelExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.LABEL, WbNameDescExpr labelExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.LABEL,
TestingDataGenerator.getTestMonolingualExpr("fr", "français", "le croissant magnifique")); TestingDataGenerator.getTestMonolingualExpr("fr", "français", "le croissant magnifique"));
ItemUpdate update = new ItemUpdate(subject); ItemUpdateBuilder update = new ItemUpdateBuilder(subject);
labelExpr.contributeTo(update, ctxt); labelExpr.contributeTo(update, ctxt);
assertEquals(Collections.singleton(Datamodel.makeMonolingualTextValue("le croissant magnifique", "fr")), assertEquals(Collections.singleton(Datamodel.makeMonolingualTextValue("le croissant magnifique", "fr")),
update.getLabels()); update.build().getLabels());
} }
@Test @Test
public void testContributeToDescription() { public void testContributeToDescription() {
WbNameDescExpr descriptionExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.DESCRIPTION, WbNameDescExpr descriptionExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.DESCRIPTION,
TestingDataGenerator.getTestMonolingualExpr("de", "Deutsch", "wunderschön")); TestingDataGenerator.getTestMonolingualExpr("de", "Deutsch", "wunderschön"));
ItemUpdate update = new ItemUpdate(subject); ItemUpdateBuilder update = new ItemUpdateBuilder(subject);
descriptionExpr.contributeTo(update, ctxt); descriptionExpr.contributeTo(update, ctxt);
assertEquals(Collections.singleton(Datamodel.makeMonolingualTextValue("wunderschön", "de")), assertEquals(Collections.singleton(Datamodel.makeMonolingualTextValue("wunderschön", "de")),
update.getDescriptions()); update.build().getDescriptions());
} }
@Test @Test
public void testContributeToAlias() { public void testContributeToAlias() {
WbNameDescExpr aliasExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.ALIAS, WbNameDescExpr aliasExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.ALIAS,
TestingDataGenerator.getTestMonolingualExpr("en", "English", "snack")); TestingDataGenerator.getTestMonolingualExpr("en", "English", "snack"));
ItemUpdate update = new ItemUpdate(subject); ItemUpdateBuilder update = new ItemUpdateBuilder(subject);
aliasExpr.contributeTo(update, ctxt); aliasExpr.contributeTo(update, ctxt);
assertEquals(Collections.singleton(Datamodel.makeMonolingualTextValue("snack", "en")), assertEquals(Collections.singleton(Datamodel.makeMonolingualTextValue("snack", "en")),
update.getAliases()); update.build().getAliases());
} }
@Test @Test
public void testSkipped() { public void testSkipped() {
ItemUpdate update = new ItemUpdate(subject); ItemUpdateBuilder update = new ItemUpdateBuilder(subject);
setRow(""); setRow("");
expr.contributeTo(update, ctxt); expr.contributeTo(update, ctxt);
assertEquals(new ItemUpdate(subject), update); assertEquals(new ItemUpdateBuilder(subject).build(), update.build());
} }
@Test @Test

View File

@ -16,6 +16,8 @@ import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
import org.json.JSONWriter; import org.json.JSONWriter;
import org.openrefine.wikidata.testing.TestingDataGenerator; import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.Claim; import org.wikidata.wdtk.datamodel.interfaces.Claim;
@ -101,11 +103,9 @@ public class WikibaseSchemaTest extends RefineTest {
Engine engine = new Engine(project); Engine engine = new Engine(project);
List<ItemUpdate> updates = schema.evaluate(project, engine); List<ItemUpdate> updates = schema.evaluate(project, engine);
List<ItemUpdate> expected = new ArrayList<>(); List<ItemUpdate> expected = new ArrayList<>();
ItemUpdate update1 = new ItemUpdate(qid1); ItemUpdate update1 = new ItemUpdateBuilder(qid1).addStatement(statement1).build();
update1.addStatement(statement1);
expected.add(update1); expected.add(update1);
ItemUpdate update2 = new ItemUpdate(qid2); ItemUpdate update2 = new ItemUpdateBuilder(qid2).addStatement(statement2).build();
update2.addStatement(statement2);
expected.add(update2); expected.add(update2);
assertEquals(expected, updates); assertEquals(expected, updates);
} }

View File

@ -1,4 +1,4 @@
package org.openrefine.wikidata.schema; package org.openrefine.wikidata.updates;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
@ -14,6 +14,7 @@ import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.openrefine.wikidata.testing.TestingDataGenerator; import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.Claim; import org.wikidata.wdtk.datamodel.interfaces.Claim;
@ -31,8 +32,7 @@ public class ItemUpdateTest {
private ItemIdValue newSubject = TestingDataGenerator.makeNewItemIdValue(1234L, "new item"); private ItemIdValue newSubject = TestingDataGenerator.makeNewItemIdValue(1234L, "new item");
private ItemIdValue sameNewSubject = TestingDataGenerator.makeNewItemIdValue(1234L, "other new item"); private ItemIdValue sameNewSubject = TestingDataGenerator.makeNewItemIdValue(1234L, "other new item");
private ItemIdValue matchedSubject = TestingDataGenerator.makeMatchedItemIdValue("Q78", "well known item"); private ItemIdValue matchedSubject = TestingDataGenerator.makeMatchedItemIdValue("Q78", "well known item");
private ItemUpdate update = new ItemUpdate(existingSubject);
private PropertyIdValue pid1 = Datamodel.makeWikidataPropertyIdValue("P348"); private PropertyIdValue pid1 = Datamodel.makeWikidataPropertyIdValue("P348");
private PropertyIdValue pid2 = Datamodel.makeWikidataPropertyIdValue("P52"); private PropertyIdValue pid2 = Datamodel.makeWikidataPropertyIdValue("P52");
private Claim claim1 = Datamodel.makeClaim(existingSubject, private Claim claim1 = Datamodel.makeClaim(existingSubject,
@ -55,26 +55,29 @@ public class ItemUpdateTest {
@Test(expectedExceptions=IllegalArgumentException.class) @Test(expectedExceptions=IllegalArgumentException.class)
public void testCreateWithoutSubject() { public void testCreateWithoutSubject() {
new ItemUpdate(null); new ItemUpdateBuilder(null);
} }
@Test @Test
public void testIsNull() { public void testIsNull() {
ItemUpdate update = new ItemUpdateBuilder(existingSubject).build();
assertTrue(update.isNull()); assertTrue(update.isNull());
} }
@Test @Test
public void testIsNew() { public void testIsNew() {
ItemUpdate newUpdate = new ItemUpdate(newSubject); ItemUpdate newUpdate = new ItemUpdateBuilder(newSubject).build();
assertTrue(newUpdate.isNew()); assertTrue(newUpdate.isNew());
ItemUpdate update = new ItemUpdateBuilder(existingSubject).build();
assertFalse(update.isNew()); assertFalse(update.isNew());
} }
@Test @Test
public void testAddStatements() { public void testAddStatements() {
ItemUpdate update = new ItemUpdate(existingSubject); ItemUpdate update = new ItemUpdateBuilder(existingSubject)
update.addStatement(statement1); .addStatement(statement1)
update.addStatement(statement2); .addStatement(statement2)
.build();
assertEquals(Arrays.asList(statement1, statement2).stream().collect(Collectors.toSet()), assertEquals(Arrays.asList(statement1, statement2).stream().collect(Collectors.toSet()),
update.getAddedStatements()); update.getAddedStatements());
assertEquals(statementGroups, update.getAddedStatementGroups().stream().collect(Collectors.toSet())); assertEquals(statementGroups, update.getAddedStatementGroups().stream().collect(Collectors.toSet()));
@ -82,39 +85,36 @@ public class ItemUpdateTest {
@Test @Test
public void testDeleteStatements() { public void testDeleteStatements() {
ItemUpdate update = new ItemUpdate(existingSubject); ItemUpdate update = new ItemUpdateBuilder(existingSubject)
update.deleteStatement(statement1); .deleteStatement(statement1)
update.deleteStatement(statement2); .deleteStatement(statement2)
.build();
assertEquals(Arrays.asList(statement1, statement2).stream().collect(Collectors.toSet()), assertEquals(Arrays.asList(statement1, statement2).stream().collect(Collectors.toSet()),
update.getDeletedStatements()); update.getDeletedStatements());
} }
@Test @Test
public void testMerge() { public void testMerge() {
ItemUpdate updateA = new ItemUpdate(existingSubject); ItemUpdate updateA = new ItemUpdateBuilder(existingSubject).addStatement(statement1).build();
updateA.addStatement(statement1); ItemUpdate updateB = new ItemUpdateBuilder(existingSubject).addStatement(statement2).build();
ItemUpdate updateB = new ItemUpdate(existingSubject);
updateB.addStatement(statement2);
assertNotEquals(updateA, updateB); assertNotEquals(updateA, updateB);
updateA.merge(updateB); ItemUpdate merged = updateA.merge(updateB);
assertEquals(statementGroups, assertEquals(statementGroups,
updateA.getAddedStatementGroups().stream().collect(Collectors.toSet())); merged.getAddedStatementGroups().stream().collect(Collectors.toSet()));
} }
@Test @Test
public void testGroupBySubject() { public void testGroupBySubject() {
ItemUpdate updateA = new ItemUpdate(newSubject); ItemUpdate updateA = new ItemUpdateBuilder(newSubject).addStatement(statement1).build();
updateA.addStatement(statement1); ItemUpdate updateB = new ItemUpdateBuilder(sameNewSubject).addStatement(statement2).build();
ItemUpdate updateB = new ItemUpdate(sameNewSubject); ItemUpdate updateC = new ItemUpdateBuilder(existingSubject).addLabel(label).build();
updateB.addStatement(statement2); ItemUpdate updateD = new ItemUpdateBuilder(matchedSubject).build();
ItemUpdate updateC = new ItemUpdate(existingSubject);
updateC.addLabel(label);
ItemUpdate updateD = new ItemUpdate(matchedSubject);
Map<EntityIdValue, ItemUpdate> grouped = ItemUpdate.groupBySubject( Map<EntityIdValue, ItemUpdate> grouped = ItemUpdate.groupBySubject(
Arrays.asList(updateA, updateB, updateC, updateD)); Arrays.asList(updateA, updateB, updateC, updateD));
ItemUpdate mergedUpdate = new ItemUpdate(newSubject); ItemUpdate mergedUpdate = new ItemUpdateBuilder(newSubject)
mergedUpdate.addStatement(statement1); .addStatement(statement1)
mergedUpdate.addStatement(statement2); .addStatement(statement2)
.build();
Map<EntityIdValue, ItemUpdate> expected = new HashMap<>(); Map<EntityIdValue, ItemUpdate> expected = new HashMap<>();
expected.put(newSubject, mergedUpdate); expected.put(newSubject, mergedUpdate);
expected.put(existingSubject, updateC); expected.put(existingSubject, updateC);
@ -125,15 +125,17 @@ public class ItemUpdateTest {
public void testNormalizeTerms() { public void testNormalizeTerms() {
MonolingualTextValue aliasEn = Datamodel.makeMonolingualTextValue("alias", "en"); MonolingualTextValue aliasEn = Datamodel.makeMonolingualTextValue("alias", "en");
MonolingualTextValue aliasFr = Datamodel.makeMonolingualTextValue("coucou", "fr"); MonolingualTextValue aliasFr = Datamodel.makeMonolingualTextValue("coucou", "fr");
ItemUpdate updateA = new ItemUpdate(newSubject); ItemUpdate updateA = new ItemUpdateBuilder(newSubject)
updateA.addLabel(label); .addLabel(label)
updateA.addAlias(aliasEn); .addAlias(aliasEn)
updateA.addAlias(aliasFr); .addAlias(aliasFr)
updateA.normalizeLabelsAndAliases(); .build();
ItemUpdate expectedUpdate = new ItemUpdate(newSubject); ItemUpdate normalized = updateA.normalizeLabelsAndAliases();
expectedUpdate.addLabel(label); ItemUpdate expectedUpdate = new ItemUpdateBuilder(newSubject)
expectedUpdate.addAlias(aliasEn); .addLabel(label)
expectedUpdate.addLabel(aliasFr); .addAlias(aliasEn)
assertEquals(expectedUpdate, updateA); .addLabel(aliasFr)
.build();
assertEquals(expectedUpdate, normalized);
} }
} }