Refactor ItemUpdate and introduce ItemUpdateBuilder
This commit is contained in:
parent
e0cdb91520
commit
88178d7c04
@ -54,8 +54,9 @@ import org.openrefine.wikidata.exporters.QuickStatementsExporter;
|
||||
import org.openrefine.wikidata.qa.EditInspector;
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.qa.QAWarningStore;
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.schema.WikibaseSchema;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
|
@ -10,8 +10,8 @@ import com.google.refine.browsing.Engine;
|
||||
import com.google.refine.exporters.WriterExporter;
|
||||
import com.google.refine.model.Project;
|
||||
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.schema.WikibaseSchema;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||
|
@ -6,7 +6,6 @@ import java.io.Writer;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.stream.Collectors;
|
||||
@ -17,7 +16,7 @@ import org.json.JSONWriter;
|
||||
|
||||
import org.openrefine.wikidata.editing.ConnectionManager;
|
||||
import org.openrefine.wikidata.editing.NewItemLibrary;
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.schema.WikibaseSchema;
|
||||
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||
import org.slf4j.Logger;
|
||||
@ -30,7 +29,6 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemDocument;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||
import org.wikidata.wdtk.util.WebResourceFetcherImpl;
|
||||
import org.wikidata.wdtk.wikibaseapi.ApiConnection;
|
||||
import org.wikidata.wdtk.wikibaseapi.TermStatementUpdate;
|
||||
import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor;
|
||||
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
|
||||
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
|
||||
|
@ -17,7 +17,7 @@ import org.openrefine.wikidata.qa.scrutinizers.SelfReferentialScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.SingleValueScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.UnsourcedScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.WhitespaceScrutinizer;
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
|
||||
/**
|
||||
|
@ -1,13 +1,12 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.qa.QAWarning.Severity;
|
||||
import org.openrefine.wikidata.qa.QAWarningStore;
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
|
||||
/**
|
||||
* Interface for any class that
|
||||
|
@ -2,7 +2,7 @@ package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
|
||||
public abstract class ItemEditScrutinizer extends EditScrutinizer {
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||
|
||||
/**
|
||||
|
@ -2,7 +2,7 @@ package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
|
||||
|
||||
public class NoEditsMadeScrutinizer extends EditScrutinizer {
|
||||
|
@ -4,7 +4,7 @@ import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||
|
||||
public abstract class StatementGroupScrutinizer extends ItemEditScrutinizer {
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
|
@ -3,6 +3,8 @@ package org.openrefine.wikidata.schema;
|
||||
import java.util.List;
|
||||
|
||||
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.openrefine.wikidata.utils.JacksonJsonizable;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
@ -40,7 +42,7 @@ public class WbItemDocumentExpr extends JacksonJsonizable implements WbExpressio
|
||||
@Override
|
||||
public ItemUpdate evaluate(ExpressionContext ctxt) throws SkipSchemaExpressionException {
|
||||
ItemIdValue subjectId = getSubject().evaluate(ctxt);
|
||||
ItemUpdate update = new ItemUpdate(subjectId);
|
||||
ItemUpdateBuilder update = new ItemUpdateBuilder(subjectId);
|
||||
for(WbStatementGroupExpr expr : getStatementGroups()) {
|
||||
try {
|
||||
for(Statement s : expr.evaluate(ctxt, subjectId).getStatements()) {
|
||||
@ -53,7 +55,7 @@ public class WbItemDocumentExpr extends JacksonJsonizable implements WbExpressio
|
||||
for(WbNameDescExpr expr : getNameDescs()) {
|
||||
expr.contributeTo(update, ctxt);
|
||||
}
|
||||
return update;
|
||||
return update.build();
|
||||
}
|
||||
|
||||
@JsonProperty("subject")
|
||||
|
@ -2,6 +2,7 @@ package org.openrefine.wikidata.schema;
|
||||
|
||||
import org.jsoup.helper.Validate;
|
||||
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
@ -11,7 +12,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
/**
|
||||
* An expression that represent a term (label, description or alias).
|
||||
* The structure is slightly different from other expressions because
|
||||
* we need to call different methods on {@link ItemUpdate}.
|
||||
* we need to call different methods on {@link ItemUpdateBuilder}.
|
||||
*
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
@ -46,7 +47,7 @@ public class WbNameDescExpr {
|
||||
* @param ctxt
|
||||
* the evaluation context for the expression
|
||||
*/
|
||||
public void contributeTo(ItemUpdate item, ExpressionContext ctxt) {
|
||||
public void contributeTo(ItemUpdateBuilder item, ExpressionContext ctxt) {
|
||||
try {
|
||||
MonolingualTextValue val = getValue().evaluate(ctxt);
|
||||
switch (getType()) {
|
||||
|
@ -19,6 +19,8 @@ import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
import org.openrefine.wikidata.schema.WbItemDocumentExpr;
|
||||
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.openrefine.wikidata.qa.QAWarningStore;
|
||||
import org.openrefine.wikidata.schema.ExpressionContext;
|
||||
import org.openrefine.wikidata.utils.JacksonJsonizable;
|
||||
@ -73,7 +75,7 @@ public class WikibaseSchema implements OverlayModel {
|
||||
* @return
|
||||
*/
|
||||
public List<ItemUpdate> evaluateItemDocuments(ExpressionContext ctxt) {
|
||||
List<ItemUpdate> result = new ArrayList<ItemUpdate>();
|
||||
List<ItemUpdate> result = new ArrayList<>();
|
||||
for (WbItemDocumentExpr expr : itemDocumentExprs) {
|
||||
|
||||
try {
|
||||
@ -104,7 +106,7 @@ public class WikibaseSchema implements OverlayModel {
|
||||
* generating order (not merged yet).
|
||||
*/
|
||||
public List<ItemUpdate> evaluate(Project project, Engine engine, QAWarningStore warningStore) {
|
||||
List<ItemUpdate> result = new ArrayList<ItemUpdate>();
|
||||
List<ItemUpdate> result = new ArrayList<>();
|
||||
FilteredRows filteredRows = engine.getAllFilteredRows();
|
||||
filteredRows.accept(project, new EvaluatingRowVisitor(result, warningStore));
|
||||
return result;
|
||||
|
@ -1,6 +1,7 @@
|
||||
package org.openrefine.wikidata.schema;
|
||||
package org.openrefine.wikidata.updates;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@ -17,6 +18,9 @@ import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
/**
|
||||
* A class to plan an update of an item, after evaluating the statements
|
||||
@ -26,12 +30,12 @@ import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||
* @author Antonin Delpeuch
|
||||
*/
|
||||
public class ItemUpdate {
|
||||
private ItemIdValue qid;
|
||||
private Set<Statement> addedStatements;
|
||||
private Set<Statement> deletedStatements;
|
||||
private Set<MonolingualTextValue> labels;
|
||||
private Set<MonolingualTextValue> descriptions;
|
||||
private Set<MonolingualTextValue> aliases;
|
||||
private final ItemIdValue qid;
|
||||
private final Set<Statement> addedStatements;
|
||||
private final Set<Statement> deletedStatements;
|
||||
private final Set<MonolingualTextValue> labels;
|
||||
private final Set<MonolingualTextValue> descriptions;
|
||||
private final Set<MonolingualTextValue> aliases;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
@ -39,61 +43,42 @@ public class ItemUpdate {
|
||||
* @param qid
|
||||
* the subject of the document. It can be a reconciled item value for new items.
|
||||
*/
|
||||
public ItemUpdate(ItemIdValue qid) {
|
||||
@JsonCreator
|
||||
public ItemUpdate(
|
||||
@JsonProperty("subject") ItemIdValue qid,
|
||||
@JsonProperty("addedStatements") Set<Statement> addedStatements,
|
||||
@JsonProperty("deletedStatements") Set<Statement> deletedStatements,
|
||||
@JsonProperty("labels") Set<MonolingualTextValue> labels,
|
||||
@JsonProperty("descriptions") Set<MonolingualTextValue> descriptions,
|
||||
@JsonProperty("addedAliases") Set<MonolingualTextValue> aliases) {
|
||||
Validate.notNull(qid);
|
||||
this.qid = qid;
|
||||
this.addedStatements = new HashSet<>();
|
||||
this.deletedStatements = new HashSet<Statement>();
|
||||
this.labels = new HashSet<MonolingualTextValue>();
|
||||
this.descriptions = new HashSet<MonolingualTextValue>();
|
||||
this.aliases = new HashSet<MonolingualTextValue>();
|
||||
if(addedStatements == null) {
|
||||
addedStatements = Collections.emptySet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a statement for insertion. If it matches an existing
|
||||
* statement, it will update the statement instead.
|
||||
*
|
||||
* @param statement
|
||||
* the statement to add or update
|
||||
*/
|
||||
public void addStatement(Statement statement) {
|
||||
addedStatements.add(statement);
|
||||
this.addedStatements = addedStatements;
|
||||
if(deletedStatements == null) {
|
||||
deletedStatements = Collections.emptySet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a statement for deletion. If no such statement exists,
|
||||
* nothing will be deleted.
|
||||
*
|
||||
* @param statement
|
||||
* the statement to delete
|
||||
*/
|
||||
public void deleteStatement(Statement statement) {
|
||||
deletedStatements.add(statement);
|
||||
this.deletedStatements = deletedStatements;
|
||||
if(labels == null) {
|
||||
labels = Collections.emptySet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a list of statement, as in {@link addStatement}.
|
||||
*
|
||||
* @param statements
|
||||
* the statements to add
|
||||
*/
|
||||
public void addStatements(Set<Statement> statements) {
|
||||
addedStatements.addAll(statements);
|
||||
this.labels = labels;
|
||||
if(descriptions == null) {
|
||||
descriptions = Collections.emptySet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a list of statements, as in {@link deleteStatement}.
|
||||
*
|
||||
* @param statements
|
||||
* the statements to delete
|
||||
*/
|
||||
public void deleteStatements(Set<Statement> statements) {
|
||||
deletedStatements.addAll(statements);
|
||||
this.descriptions = descriptions;
|
||||
if(aliases == null) {
|
||||
aliases = Collections.emptySet();
|
||||
}
|
||||
this.aliases = aliases;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the subject of the item
|
||||
*/
|
||||
@JsonProperty("subject")
|
||||
public ItemIdValue getItemId() {
|
||||
return qid;
|
||||
}
|
||||
@ -101,6 +86,7 @@ public class ItemUpdate {
|
||||
/**
|
||||
* @return the set of all added statements
|
||||
*/
|
||||
@JsonProperty("addedStatements")
|
||||
public Set<Statement> getAddedStatements() {
|
||||
return addedStatements;
|
||||
}
|
||||
@ -108,30 +94,40 @@ public class ItemUpdate {
|
||||
/**
|
||||
* @return the list of all deleted statements
|
||||
*/
|
||||
@JsonProperty("deletedStatements")
|
||||
public Set<Statement> getDeletedStatements() {
|
||||
return deletedStatements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges all the changes in other into this instance.
|
||||
* Both updates should have the same subject.
|
||||
*
|
||||
* @param other
|
||||
* the other change that should be merged
|
||||
* @return the list of updated labels
|
||||
*/
|
||||
public void merge(ItemUpdate other) {
|
||||
Validate.isTrue(qid.equals(other.getItemId()));
|
||||
addStatements(other.getAddedStatements());
|
||||
deleteStatements(other.getDeletedStatements());
|
||||
labels.addAll(other.getLabels());
|
||||
descriptions.addAll(other.getDescriptions());
|
||||
aliases.addAll(other.getAliases());
|
||||
@JsonProperty("labels")
|
||||
public Set<MonolingualTextValue> getLabels() {
|
||||
return labels;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the list of updated descriptions
|
||||
*/
|
||||
@JsonProperty("descriptions")
|
||||
public Set<MonolingualTextValue> getDescriptions() {
|
||||
return descriptions;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the list of updated aliases
|
||||
*/
|
||||
@JsonProperty("addedAliases")
|
||||
public Set<MonolingualTextValue> getAliases() {
|
||||
return aliases;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true when this change is empty
|
||||
* (no statements or terms changed)
|
||||
*/
|
||||
@JsonIgnore
|
||||
public boolean isNull() {
|
||||
return (addedStatements.isEmpty()
|
||||
&& deletedStatements.isEmpty()
|
||||
@ -141,57 +137,27 @@ public class ItemUpdate {
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a label to the item. It will override any
|
||||
* existing label in this language.
|
||||
* Merges all the changes in other into this instance.
|
||||
* Both updates should have the same subject.
|
||||
*
|
||||
* @param label
|
||||
* the label to add
|
||||
* @param other
|
||||
* the other change that should be merged
|
||||
*/
|
||||
public void addLabel(MonolingualTextValue label) {
|
||||
labels.add(label);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a description to the item. It will override any existing
|
||||
* description in this language.
|
||||
*
|
||||
* @param description
|
||||
* the description to add
|
||||
*/
|
||||
public void addDescription(MonolingualTextValue description) {
|
||||
descriptions.add(description);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an alias to the item. It will be added to any existing
|
||||
* aliases in that language.
|
||||
*
|
||||
* @param alias
|
||||
* the alias to add
|
||||
*/
|
||||
public void addAlias(MonolingualTextValue alias) {
|
||||
aliases.add(alias);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the list of updated labels
|
||||
*/
|
||||
public Set<MonolingualTextValue> getLabels() {
|
||||
return labels;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the list of updated descriptions
|
||||
*/
|
||||
public Set<MonolingualTextValue> getDescriptions() {
|
||||
return descriptions;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the list of updated aliases
|
||||
*/
|
||||
public Set<MonolingualTextValue> getAliases() {
|
||||
return aliases;
|
||||
public ItemUpdate merge(ItemUpdate other) {
|
||||
Validate.isTrue(qid.equals(other.getItemId()));
|
||||
Set<Statement> newAddedStatements = new HashSet<>(addedStatements);
|
||||
newAddedStatements.addAll(other.getAddedStatements());
|
||||
Set<Statement> newDeletedStatements = new HashSet<>(deletedStatements);
|
||||
newDeletedStatements.addAll(other.getDeletedStatements());
|
||||
Set<MonolingualTextValue> newLabels = new HashSet<>(labels);
|
||||
newLabels.addAll(other.getLabels());
|
||||
Set<MonolingualTextValue> newDescriptions = new HashSet<>(descriptions);
|
||||
newDescriptions.addAll(other.getDescriptions());
|
||||
Set<MonolingualTextValue> newAliases = new HashSet<>(aliases);
|
||||
newAliases.addAll(other.getDescriptions());
|
||||
return new ItemUpdate(
|
||||
qid, newAddedStatements, newDeletedStatements,
|
||||
newLabels, newDescriptions, newAliases);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -224,7 +190,7 @@ public class ItemUpdate {
|
||||
* @return a map from item ids to merged ItemUpdate for that id
|
||||
*/
|
||||
public static Map<EntityIdValue, ItemUpdate> groupBySubject(List<ItemUpdate> itemDocuments) {
|
||||
Map<EntityIdValue, ItemUpdate> map = new HashMap<EntityIdValue, ItemUpdate>();
|
||||
Map<EntityIdValue, ItemUpdate> map = new HashMap<>();
|
||||
for(ItemUpdate update : itemDocuments) {
|
||||
if (update.isNull()) {
|
||||
continue;
|
||||
@ -233,7 +199,7 @@ public class ItemUpdate {
|
||||
ItemIdValue qid = update.getItemId();
|
||||
if (map.containsKey(qid)) {
|
||||
ItemUpdate oldUpdate = map.get(qid);
|
||||
oldUpdate.merge(update);
|
||||
map.put(qid, oldUpdate.merge(update));
|
||||
} else {
|
||||
map.put(qid, update);
|
||||
}
|
||||
@ -241,30 +207,6 @@ public class ItemUpdate {
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* This should only be used when creating a new item.
|
||||
* This ensures that we never add an alias without adding
|
||||
* a label in the same language.
|
||||
*/
|
||||
public void normalizeLabelsAndAliases() {
|
||||
// Ensure that we are only adding aliases with labels
|
||||
Set<String> labelLanguages = labels.stream()
|
||||
.map(l -> l.getLanguageCode())
|
||||
.collect(Collectors.toSet());
|
||||
System.out.println(labelLanguages);
|
||||
|
||||
Set<MonolingualTextValue> filteredAliases = new HashSet<>();
|
||||
for(MonolingualTextValue alias : aliases) {
|
||||
if(!labelLanguages.contains(alias.getLanguageCode())) {
|
||||
labelLanguages.add(alias.getLanguageCode());
|
||||
labels.add(alias);
|
||||
} else {
|
||||
filteredAliases.add(alias);
|
||||
}
|
||||
}
|
||||
aliases = filteredAliases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this update about a new item?
|
||||
*/
|
||||
@ -272,6 +214,31 @@ public class ItemUpdate {
|
||||
return "Q0".equals(getItemId().getId());
|
||||
}
|
||||
|
||||
/**
|
||||
* This should only be used when creating a new item.
|
||||
* This ensures that we never add an alias without adding
|
||||
* a label in the same language.
|
||||
*/
|
||||
public ItemUpdate normalizeLabelsAndAliases() {
|
||||
// Ensure that we are only adding aliases with labels
|
||||
Set<String> labelLanguages = labels.stream()
|
||||
.map(l -> l.getLanguageCode())
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
Set<MonolingualTextValue> filteredAliases = new HashSet<>();
|
||||
Set<MonolingualTextValue> newLabels = new HashSet<>(labels);
|
||||
for(MonolingualTextValue alias : aliases) {
|
||||
if(!labelLanguages.contains(alias.getLanguageCode())) {
|
||||
labelLanguages.add(alias.getLanguageCode());
|
||||
newLabels.add(alias);
|
||||
} else {
|
||||
filteredAliases.add(alias);
|
||||
}
|
||||
}
|
||||
return new ItemUpdate(qid, addedStatements, deletedStatements,
|
||||
newLabels, descriptions, filteredAliases);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if(other == null || !ItemUpdate.class.isInstance(other)) {
|
||||
@ -310,4 +277,5 @@ public class ItemUpdate {
|
||||
builder.append("\n>");
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,143 @@
|
||||
package org.openrefine.wikidata.updates;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.jsoup.helper.Validate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a {@link ItemUpdate} incrementally.
|
||||
*
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
public class ItemUpdateBuilder {
|
||||
private ItemIdValue qid;
|
||||
private Set<Statement> addedStatements;
|
||||
private Set<Statement> deletedStatements;
|
||||
private Set<MonolingualTextValue> labels;
|
||||
private Set<MonolingualTextValue> descriptions;
|
||||
private Set<MonolingualTextValue> aliases;
|
||||
private boolean built;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param qid
|
||||
* the subject of the document. It can be a reconciled item value for new items.
|
||||
*/
|
||||
public ItemUpdateBuilder(ItemIdValue qid) {
|
||||
Validate.notNull(qid);
|
||||
this.qid = qid;
|
||||
this.addedStatements = new HashSet<>();
|
||||
this.deletedStatements = new HashSet<Statement>();
|
||||
this.labels = new HashSet<MonolingualTextValue>();
|
||||
this.descriptions = new HashSet<MonolingualTextValue>();
|
||||
this.aliases = new HashSet<MonolingualTextValue>();
|
||||
this.built = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a statement for insertion. If it matches an existing
|
||||
* statement, it will update the statement instead.
|
||||
*
|
||||
* @param statement
|
||||
* the statement to add or update
|
||||
*/
|
||||
public ItemUpdateBuilder addStatement(Statement statement) {
|
||||
Validate.isTrue(!built, "ItemUpdate has already been built");
|
||||
addedStatements.add(statement);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a statement for deletion. If no such statement exists,
|
||||
* nothing will be deleted.
|
||||
*
|
||||
* @param statement
|
||||
* the statement to delete
|
||||
*/
|
||||
public ItemUpdateBuilder deleteStatement(Statement statement) {
|
||||
Validate.isTrue(!built, "ItemUpdate has already been built");
|
||||
deletedStatements.add(statement);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a list of statement, as in {@link addStatement}.
|
||||
*
|
||||
* @param statements
|
||||
* the statements to add
|
||||
*/
|
||||
public ItemUpdateBuilder addStatements(Set<Statement> statements) {
|
||||
Validate.isTrue(!built, "ItemUpdate has already been built");
|
||||
addedStatements.addAll(statements);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a list of statements, as in {@link deleteStatement}.
|
||||
*
|
||||
* @param statements
|
||||
* the statements to delete
|
||||
*/
|
||||
public ItemUpdateBuilder deleteStatements(Set<Statement> statements) {
|
||||
Validate.isTrue(!built, "ItemUpdate has already been built");
|
||||
deletedStatements.addAll(statements);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a label to the item. It will override any
|
||||
* existing label in this language.
|
||||
*
|
||||
* @param label
|
||||
* the label to add
|
||||
*/
|
||||
public ItemUpdateBuilder addLabel(MonolingualTextValue label) {
|
||||
Validate.isTrue(!built, "ItemUpdate has already been built");
|
||||
labels.add(label);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a description to the item. It will override any existing
|
||||
* description in this language.
|
||||
*
|
||||
* @param description
|
||||
* the description to add
|
||||
*/
|
||||
public ItemUpdateBuilder addDescription(MonolingualTextValue description) {
|
||||
Validate.isTrue(!built, "ItemUpdate has already been built");
|
||||
descriptions.add(description);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an alias to the item. It will be added to any existing
|
||||
* aliases in that language.
|
||||
*
|
||||
* @param alias
|
||||
* the alias to add
|
||||
*/
|
||||
public ItemUpdateBuilder addAlias(MonolingualTextValue alias) {
|
||||
Validate.isTrue(!built, "ItemUpdate has already been built");
|
||||
aliases.add(alias);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs the {@link ItemUpdate}.
|
||||
* @return
|
||||
*/
|
||||
public ItemUpdate build() {
|
||||
built = true;
|
||||
return new ItemUpdate(qid, addedStatements, deletedStatements,
|
||||
labels, descriptions, aliases);
|
||||
}
|
||||
|
||||
}
|
@ -3,6 +3,8 @@ package org.openrefine.wikidata.schema;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.openrefine.wikidata.testing.JacksonSerializationTest;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
@ -39,9 +41,7 @@ public class WbItemDocumentExprTest extends WbExpressionTest<ItemUpdate> {
|
||||
@Test
|
||||
public void testEvaluate() {
|
||||
setRow(recon("Q3434"), "2010-07-23", "3.898,4.389", "my alias", recon("Q23"));
|
||||
ItemUpdate result = new ItemUpdate(subject);
|
||||
result.addAlias(alias);
|
||||
result.addStatement(fullStatement);
|
||||
ItemUpdate result = new ItemUpdateBuilder(subject).addAlias(alias).addStatement(fullStatement).build();
|
||||
evaluatesTo(result, expr);
|
||||
}
|
||||
|
||||
@ -54,16 +54,14 @@ public class WbItemDocumentExprTest extends WbExpressionTest<ItemUpdate> {
|
||||
@Test
|
||||
public void testStatementSkipped() {
|
||||
setRow(recon("Q3434"), "2010-07-23", "3.898,invalid4.389", "my alias", recon("Q23"));
|
||||
ItemUpdate result = new ItemUpdate(subject);
|
||||
result.addAlias(alias);
|
||||
ItemUpdate result = new ItemUpdateBuilder(subject).addAlias(alias).build();
|
||||
evaluatesTo(result, expr);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAliasSkipped() {
|
||||
setRow(recon("Q3434"), "2010-07-23", "3.898,4.389", "", recon("Q23"));
|
||||
ItemUpdate result = new ItemUpdate(subject);
|
||||
result.addStatement(fullStatement);
|
||||
ItemUpdate result = new ItemUpdateBuilder(subject).addStatement(fullStatement).build();
|
||||
evaluatesTo(result, expr);
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@ import java.util.Collections;
|
||||
|
||||
import org.openrefine.wikidata.testing.JacksonSerializationTest;
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
@ -25,38 +26,38 @@ public class WbNameDescExprTest extends WbExpressionTest<MonolingualTextValue> {
|
||||
public void testContributeToLabel() {
|
||||
WbNameDescExpr labelExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.LABEL,
|
||||
TestingDataGenerator.getTestMonolingualExpr("fr", "français", "le croissant magnifique"));
|
||||
ItemUpdate update = new ItemUpdate(subject);
|
||||
ItemUpdateBuilder update = new ItemUpdateBuilder(subject);
|
||||
labelExpr.contributeTo(update, ctxt);
|
||||
assertEquals(Collections.singleton(Datamodel.makeMonolingualTextValue("le croissant magnifique", "fr")),
|
||||
update.getLabels());
|
||||
update.build().getLabels());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testContributeToDescription() {
|
||||
WbNameDescExpr descriptionExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.DESCRIPTION,
|
||||
TestingDataGenerator.getTestMonolingualExpr("de", "Deutsch", "wunderschön"));
|
||||
ItemUpdate update = new ItemUpdate(subject);
|
||||
ItemUpdateBuilder update = new ItemUpdateBuilder(subject);
|
||||
descriptionExpr.contributeTo(update, ctxt);
|
||||
assertEquals(Collections.singleton(Datamodel.makeMonolingualTextValue("wunderschön", "de")),
|
||||
update.getDescriptions());
|
||||
update.build().getDescriptions());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testContributeToAlias() {
|
||||
WbNameDescExpr aliasExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.ALIAS,
|
||||
TestingDataGenerator.getTestMonolingualExpr("en", "English", "snack"));
|
||||
ItemUpdate update = new ItemUpdate(subject);
|
||||
ItemUpdateBuilder update = new ItemUpdateBuilder(subject);
|
||||
aliasExpr.contributeTo(update, ctxt);
|
||||
assertEquals(Collections.singleton(Datamodel.makeMonolingualTextValue("snack", "en")),
|
||||
update.getAliases());
|
||||
update.build().getAliases());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipped() {
|
||||
ItemUpdate update = new ItemUpdate(subject);
|
||||
ItemUpdateBuilder update = new ItemUpdateBuilder(subject);
|
||||
setRow("");
|
||||
expr.contributeTo(update, ctxt);
|
||||
assertEquals(new ItemUpdate(subject), update);
|
||||
assertEquals(new ItemUpdateBuilder(subject).build(), update.build());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -16,6 +16,8 @@ import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||
@ -101,11 +103,9 @@ public class WikibaseSchemaTest extends RefineTest {
|
||||
Engine engine = new Engine(project);
|
||||
List<ItemUpdate> updates = schema.evaluate(project, engine);
|
||||
List<ItemUpdate> expected = new ArrayList<>();
|
||||
ItemUpdate update1 = new ItemUpdate(qid1);
|
||||
update1.addStatement(statement1);
|
||||
ItemUpdate update1 = new ItemUpdateBuilder(qid1).addStatement(statement1).build();
|
||||
expected.add(update1);
|
||||
ItemUpdate update2 = new ItemUpdate(qid2);
|
||||
update2.addStatement(statement2);
|
||||
ItemUpdate update2 = new ItemUpdateBuilder(qid2).addStatement(statement2).build();
|
||||
expected.add(update2);
|
||||
assertEquals(expected, updates);
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
package org.openrefine.wikidata.schema;
|
||||
package org.openrefine.wikidata.updates;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
@ -14,6 +14,7 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||
@ -31,7 +32,6 @@ public class ItemUpdateTest {
|
||||
private ItemIdValue newSubject = TestingDataGenerator.makeNewItemIdValue(1234L, "new item");
|
||||
private ItemIdValue sameNewSubject = TestingDataGenerator.makeNewItemIdValue(1234L, "other new item");
|
||||
private ItemIdValue matchedSubject = TestingDataGenerator.makeMatchedItemIdValue("Q78", "well known item");
|
||||
private ItemUpdate update = new ItemUpdate(existingSubject);
|
||||
|
||||
private PropertyIdValue pid1 = Datamodel.makeWikidataPropertyIdValue("P348");
|
||||
private PropertyIdValue pid2 = Datamodel.makeWikidataPropertyIdValue("P52");
|
||||
@ -55,26 +55,29 @@ public class ItemUpdateTest {
|
||||
|
||||
@Test(expectedExceptions=IllegalArgumentException.class)
|
||||
public void testCreateWithoutSubject() {
|
||||
new ItemUpdate(null);
|
||||
new ItemUpdateBuilder(null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsNull() {
|
||||
ItemUpdate update = new ItemUpdateBuilder(existingSubject).build();
|
||||
assertTrue(update.isNull());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsNew() {
|
||||
ItemUpdate newUpdate = new ItemUpdate(newSubject);
|
||||
ItemUpdate newUpdate = new ItemUpdateBuilder(newSubject).build();
|
||||
assertTrue(newUpdate.isNew());
|
||||
ItemUpdate update = new ItemUpdateBuilder(existingSubject).build();
|
||||
assertFalse(update.isNew());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddStatements() {
|
||||
ItemUpdate update = new ItemUpdate(existingSubject);
|
||||
update.addStatement(statement1);
|
||||
update.addStatement(statement2);
|
||||
ItemUpdate update = new ItemUpdateBuilder(existingSubject)
|
||||
.addStatement(statement1)
|
||||
.addStatement(statement2)
|
||||
.build();
|
||||
assertEquals(Arrays.asList(statement1, statement2).stream().collect(Collectors.toSet()),
|
||||
update.getAddedStatements());
|
||||
assertEquals(statementGroups, update.getAddedStatementGroups().stream().collect(Collectors.toSet()));
|
||||
@ -82,39 +85,36 @@ public class ItemUpdateTest {
|
||||
|
||||
@Test
|
||||
public void testDeleteStatements() {
|
||||
ItemUpdate update = new ItemUpdate(existingSubject);
|
||||
update.deleteStatement(statement1);
|
||||
update.deleteStatement(statement2);
|
||||
ItemUpdate update = new ItemUpdateBuilder(existingSubject)
|
||||
.deleteStatement(statement1)
|
||||
.deleteStatement(statement2)
|
||||
.build();
|
||||
assertEquals(Arrays.asList(statement1, statement2).stream().collect(Collectors.toSet()),
|
||||
update.getDeletedStatements());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMerge() {
|
||||
ItemUpdate updateA = new ItemUpdate(existingSubject);
|
||||
updateA.addStatement(statement1);
|
||||
ItemUpdate updateB = new ItemUpdate(existingSubject);
|
||||
updateB.addStatement(statement2);
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(existingSubject).addStatement(statement1).build();
|
||||
ItemUpdate updateB = new ItemUpdateBuilder(existingSubject).addStatement(statement2).build();
|
||||
assertNotEquals(updateA, updateB);
|
||||
updateA.merge(updateB);
|
||||
ItemUpdate merged = updateA.merge(updateB);
|
||||
assertEquals(statementGroups,
|
||||
updateA.getAddedStatementGroups().stream().collect(Collectors.toSet()));
|
||||
merged.getAddedStatementGroups().stream().collect(Collectors.toSet()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupBySubject() {
|
||||
ItemUpdate updateA = new ItemUpdate(newSubject);
|
||||
updateA.addStatement(statement1);
|
||||
ItemUpdate updateB = new ItemUpdate(sameNewSubject);
|
||||
updateB.addStatement(statement2);
|
||||
ItemUpdate updateC = new ItemUpdate(existingSubject);
|
||||
updateC.addLabel(label);
|
||||
ItemUpdate updateD = new ItemUpdate(matchedSubject);
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(newSubject).addStatement(statement1).build();
|
||||
ItemUpdate updateB = new ItemUpdateBuilder(sameNewSubject).addStatement(statement2).build();
|
||||
ItemUpdate updateC = new ItemUpdateBuilder(existingSubject).addLabel(label).build();
|
||||
ItemUpdate updateD = new ItemUpdateBuilder(matchedSubject).build();
|
||||
Map<EntityIdValue, ItemUpdate> grouped = ItemUpdate.groupBySubject(
|
||||
Arrays.asList(updateA, updateB, updateC, updateD));
|
||||
ItemUpdate mergedUpdate = new ItemUpdate(newSubject);
|
||||
mergedUpdate.addStatement(statement1);
|
||||
mergedUpdate.addStatement(statement2);
|
||||
ItemUpdate mergedUpdate = new ItemUpdateBuilder(newSubject)
|
||||
.addStatement(statement1)
|
||||
.addStatement(statement2)
|
||||
.build();
|
||||
Map<EntityIdValue, ItemUpdate> expected = new HashMap<>();
|
||||
expected.put(newSubject, mergedUpdate);
|
||||
expected.put(existingSubject, updateC);
|
||||
@ -125,15 +125,17 @@ public class ItemUpdateTest {
|
||||
public void testNormalizeTerms() {
|
||||
MonolingualTextValue aliasEn = Datamodel.makeMonolingualTextValue("alias", "en");
|
||||
MonolingualTextValue aliasFr = Datamodel.makeMonolingualTextValue("coucou", "fr");
|
||||
ItemUpdate updateA = new ItemUpdate(newSubject);
|
||||
updateA.addLabel(label);
|
||||
updateA.addAlias(aliasEn);
|
||||
updateA.addAlias(aliasFr);
|
||||
updateA.normalizeLabelsAndAliases();
|
||||
ItemUpdate expectedUpdate = new ItemUpdate(newSubject);
|
||||
expectedUpdate.addLabel(label);
|
||||
expectedUpdate.addAlias(aliasEn);
|
||||
expectedUpdate.addLabel(aliasFr);
|
||||
assertEquals(expectedUpdate, updateA);
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(newSubject)
|
||||
.addLabel(label)
|
||||
.addAlias(aliasEn)
|
||||
.addAlias(aliasFr)
|
||||
.build();
|
||||
ItemUpdate normalized = updateA.normalizeLabelsAndAliases();
|
||||
ItemUpdate expectedUpdate = new ItemUpdateBuilder(newSubject)
|
||||
.addLabel(label)
|
||||
.addAlias(aliasEn)
|
||||
.addLabel(aliasFr)
|
||||
.build();
|
||||
assertEquals(expectedUpdate, normalized);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user