From 05839b1bbf3fa715928a56cc86f9877041bbda41 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Wed, 12 Jun 2019 18:59:10 +0100 Subject: [PATCH] Add support for conditional label and term updates, for issue #2063. --- .../wikidata/editing/EditBatchProcessor.java | 11 +- .../wikidata/editing/ReconEntityRewriter.java | 5 +- .../exporters/QuickStatementsExporter.java | 2 + .../qa/scrutinizers/NewItemScrutinizer.java | 4 +- .../wikidata/schema/WbNameDescExpr.java | 20 ++-- .../wikidata/updates/ItemUpdate.java | 108 ++++++++++++++---- .../wikidata/updates/ItemUpdateBuilder.java | 58 +++++++--- .../QuickStatementsUpdateScheduler.java | 6 +- .../scheduler/WikibaseAPIUpdateScheduler.java | 8 +- .../editing/EditBatchProcessorTest.java | 2 +- .../editing/ReconEntityRewriterTest.java | 8 +- .../QuickStatementsExporterTest.java | 19 ++- .../scrutinizers/NewItemScrutinizerTest.java | 8 +- .../qa/scrutinizers/ValueScrutinizerTest.java | 2 +- .../schema/WbItemDocumentExprTest.java | 2 +- .../wikidata/schema/WbNameDescExprTest.java | 12 +- .../wikidata/updates/ItemUpdateTest.java | 90 ++++++++++++++- .../scheduler/UpdateSchedulerTest.java | 6 +- 18 files changed, 290 insertions(+), 81 deletions(-) diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/EditBatchProcessor.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/EditBatchProcessor.java index e9822ee23..7ab8524a4 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/editing/EditBatchProcessor.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/EditBatchProcessor.java @@ -142,8 +142,15 @@ public class EditBatchProcessor { } else { // Existing item ItemDocument currentDocument = (ItemDocument) currentDocs.get(update.getItemId().getId()); - editor.updateTermsStatements(currentDocument, update.getLabels().stream().collect(Collectors.toList()), - update.getDescriptions().stream().collect(Collectors.toList()), + List labels = update.getLabels().stream().collect(Collectors.toList()); + labels.addAll(update.getLabelsIfNew().stream() + .filter(label -> !currentDocument.getLabels().containsKey(label.getLanguageCode())).collect(Collectors.toList())); + List descriptions = update.getDescriptions().stream().collect(Collectors.toList()); + descriptions.addAll(update.getDescriptionsIfNew().stream() + .filter(desc -> !currentDocument.getDescriptions().containsKey(desc.getLanguageCode())).collect(Collectors.toList())); + editor.updateTermsStatements(currentDocument, + labels, + descriptions, update.getAliases().stream().collect(Collectors.toList()), new ArrayList(), update.getAddedStatements().stream().collect(Collectors.toList()), diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java index 36938a3ee..195862ff6 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java @@ -90,13 +90,16 @@ public class ReconEntityRewriter extends DatamodelConverter { public ItemUpdate rewrite(ItemUpdate update) { Set labels = update.getLabels().stream().map(l -> copy(l)).collect(Collectors.toSet()); + Set labelsIfNew = update.getLabelsIfNew().stream().map(l -> copy(l)).collect(Collectors.toSet()); Set descriptions = update.getDescriptions().stream().map(l -> copy(l)) .collect(Collectors.toSet()); + Set descriptionsIfNew = update.getDescriptionsIfNew().stream().map(l -> copy(l)) + .collect(Collectors.toSet()); Set aliases = update.getAliases().stream().map(l -> copy(l)).collect(Collectors.toSet()); List addedStatements = update.getAddedStatements().stream().map(l -> copy(l)) .collect(Collectors.toList()); Set deletedStatements = update.getDeletedStatements().stream().map(l -> copy(l)) .collect(Collectors.toSet()); - return new ItemUpdate(update.getItemId(), addedStatements, deletedStatements, labels, descriptions, aliases); + return new ItemUpdate(update.getItemId(), addedStatements, deletedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases); } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/exporters/QuickStatementsExporter.java b/extensions/wikidata/src/org/openrefine/wikidata/exporters/QuickStatementsExporter.java index 6bc650bf8..4a6fdf1ae 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/exporters/QuickStatementsExporter.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/exporters/QuickStatementsExporter.java @@ -131,7 +131,9 @@ public class QuickStatementsExporter implements WriterExporter { } translateNameDescr(qid, item.getLabels(), "L", item.getItemId(), writer); + translateNameDescr(qid, item.getLabelsIfNew(), "L", item.getItemId(), writer); translateNameDescr(qid, item.getDescriptions(), "D", item.getItemId(), writer); + translateNameDescr(qid, item.getDescriptionsIfNew(), "D", item.getItemId(), writer); translateNameDescr(qid, item.getAliases(), "A", item.getItemId(), writer); for (Statement s : item.getAddedStatements()) { diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizer.java index 335e00caf..9110b0031 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizer.java @@ -45,13 +45,13 @@ public class NewItemScrutinizer extends EditScrutinizer { if (update.isNew()) { info(newItemType); - if (update.getLabels().isEmpty() && update.getAliases().isEmpty()) { + if (update.getLabels().isEmpty() && update.getLabelsIfNew().isEmpty() && update.getAliases().isEmpty()) { QAWarning issue = new QAWarning(noLabelType, null, QAWarning.Severity.CRITICAL, 1); issue.setProperty("example_entity", update.getItemId()); addIssue(issue); } - if (update.getDescriptions().isEmpty()) { + if (update.getDescriptions().isEmpty() && update.getDescriptionsIfNew().isEmpty()) { QAWarning issue = new QAWarning(noDescType, null, QAWarning.Severity.WARNING, 1); issue.setProperty("example_entity", update.getItemId()); addIssue(issue); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbNameDescExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbNameDescExpr.java index 3e4a0c5dc..065ec2139 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbNameDescExpr.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbNameDescExpr.java @@ -43,15 +43,15 @@ import com.fasterxml.jackson.annotation.JsonProperty; @JsonIgnoreProperties(ignoreUnknown = true) public class WbNameDescExpr { - enum NameDescrType { - LABEL, DESCRIPTION, ALIAS, + enum NameDescType { + LABEL, LABEL_IF_NEW, DESCRIPTION, DESCRIPTION_IF_NEW, ALIAS, } - private NameDescrType type; + private NameDescType type; private WbMonolingualExpr value; @JsonCreator - public WbNameDescExpr(@JsonProperty("name_type") NameDescrType type, + public WbNameDescExpr(@JsonProperty("name_type") NameDescType type, @JsonProperty("value") WbMonolingualExpr value) { Validate.notNull(type); this.type = type; @@ -72,11 +72,17 @@ public class WbNameDescExpr { MonolingualTextValue val = getValue().evaluate(ctxt); switch (getType()) { case LABEL: - item.addLabel(val); + item.addLabel(val, true); break; + case LABEL_IF_NEW: + item.addLabel(val, false); + break; case DESCRIPTION: - item.addDescription(val); + item.addDescription(val, true); break; + case DESCRIPTION_IF_NEW: + item.addDescription(val, false); + break; case ALIAS: item.addAlias(val); break; @@ -87,7 +93,7 @@ public class WbNameDescExpr { } @JsonProperty("name_type") - public NameDescrType getType() { + public NameDescType getType() { return type; } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdate.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdate.java index 21763fb18..dab87d97b 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdate.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdate.java @@ -61,7 +61,9 @@ public class ItemUpdate { private final List addedStatements; private final Set deletedStatements; private final Map labels; + private final Map labelsIfNew; private final Map descriptions; + private final Map descriptionsIfNew; private final Map> aliases; /** @@ -76,9 +78,13 @@ public class ItemUpdate { * @param deletedStatements * the statements to remove from the item * @param labels - * the labels to add on the item + * the labels to add on the item, overriding any existing one in that language + * @param labelsIfNew + * the labels to add on the item, only if no label for that language exists * @param descriptions - * the descriptions to add on the item + * the descriptions to add on the item, overriding any existing one in that language + * @param descriptionsIfNew + * the descriptions to add on the item, only if no description for that language exists * @param aliases * the aliases to add on the item. In theory their order should * matter but in practice people rarely rely on the order of aliases @@ -89,7 +95,9 @@ public class ItemUpdate { @JsonProperty("addedStatements") List addedStatements, @JsonProperty("deletedStatements") Set deletedStatements, @JsonProperty("labels") Set labels, + @JsonProperty("labelsIfNew") Set labelsIfNew, @JsonProperty("descriptions") Set descriptions, + @JsonProperty("descriptionsIfNew") Set descriptionsIfNew, @JsonProperty("addedAliases") Set aliases) { Validate.notNull(qid); this.qid = qid; @@ -101,8 +109,12 @@ public class ItemUpdate { deletedStatements = Collections.emptySet(); } this.deletedStatements = deletedStatements; - this.labels = constructTermMap(labels != null ? labels : Collections.emptyList()); - this.descriptions = constructTermMap(descriptions != null ? descriptions : Collections.emptyList()); + this.labels = new HashMap<>(); + this.labelsIfNew = new HashMap<>(); + mergeSingleTermMaps(this.labels, this.labelsIfNew, labels, labelsIfNew); + this.descriptions = new HashMap<>(); + this.descriptionsIfNew = new HashMap<>(); + mergeSingleTermMaps(this.descriptions, this.descriptionsIfNew, descriptions, descriptionsIfNew); this.aliases = constructTermListMap(aliases != null ? aliases : Collections.emptyList()); } @@ -119,7 +131,13 @@ public class ItemUpdate { * @param deletedStatements * the statements to delete * @param labels - * the labels to add + * the labels to add on the item, overriding any existing one in that language + * @param labelsIfNew + * the labels to add on the item, only if no label for that language exists + * @param descriptions + * the descriptions to add on the item, overriding any existing one in that language + * @param descriptionsIfNew + * the descriptions to add on the item, only if no description for that language exists * @param descriptions * the descriptions to add * @param aliases @@ -130,13 +148,17 @@ public class ItemUpdate { List addedStatements, Set deletedStatements, Map labels, + Map labelsIfNew, Map descriptions, + Map descriptionsIfNew, Map> aliases) { this.qid = qid; this.addedStatements = addedStatements; this.deletedStatements = deletedStatements; this.labels = labels; + this.labelsIfNew = labelsIfNew; this.descriptions = descriptions; + this.descriptionsIfNew = descriptionsIfNew; this.aliases = aliases; } @@ -168,20 +190,36 @@ public class ItemUpdate { } /** - * @return the list of updated labels + * @return the list of updated labels, overriding existing ones */ @JsonProperty("labels") public Set getLabels() { return labels.values().stream().collect(Collectors.toSet()); } + + /** + * @return the list of updated labels, only added if new + */ + @JsonProperty("labelsIfNew") + public Set getLabelsIfNew() { + return labelsIfNew.values().stream().collect(Collectors.toSet()); + } /** - * @return the list of updated descriptions + * @return the list of updated descriptions, overriding existing ones */ @JsonProperty("descriptions") public Set getDescriptions() { return descriptions.values().stream().collect(Collectors.toSet()); } + + /** + * @return the list of updated descriptions, only added if new + */ + @JsonProperty("descriptionsIfNew") + public Set getDescriptionsIfNew() { + return descriptionsIfNew.values().stream().collect(Collectors.toSet()); + } /** * @return the list of updated aliases @@ -228,13 +266,11 @@ public class ItemUpdate { Set newDeletedStatements = new HashSet<>(deletedStatements); newDeletedStatements.addAll(other.getDeletedStatements()); Map newLabels = new HashMap<>(labels); - for(MonolingualTextValue otherLabel : other.getLabels()) { - newLabels.put(otherLabel.getLanguageCode(), otherLabel); - } + Map newLabelsIfNew = new HashMap<>(labelsIfNew); + mergeSingleTermMaps(newLabels, newLabelsIfNew, other.getLabels(), other.getLabelsIfNew()); Map newDescriptions = new HashMap<>(descriptions); - for(MonolingualTextValue otherDescription : other.getDescriptions()) { - newDescriptions.put(otherDescription.getLanguageCode(), otherDescription); - } + Map newDescriptionsIfNew = new HashMap<>(descriptionsIfNew); + mergeSingleTermMaps(newDescriptions, newDescriptionsIfNew, other.getDescriptions(), other.getDescriptionsIfNew()); Map> newAliases = new HashMap<>(aliases); for(MonolingualTextValue alias : other.getAliases()) { List aliases = newAliases.get(alias.getLanguageCode()); @@ -246,8 +282,8 @@ public class ItemUpdate { aliases.add(alias); } } - return new ItemUpdate(qid, newAddedStatements, newDeletedStatements, newLabels, newDescriptions, newAliases); - } + return new ItemUpdate(qid, newAddedStatements, newDeletedStatements, newLabels, newLabelsIfNew, newDescriptions, newDescriptionsIfNew, newAliases); + } /** * Group added statements in StatementGroups: useful if the item is new. @@ -309,16 +345,20 @@ public class ItemUpdate { public ItemUpdate normalizeLabelsAndAliases() { // Ensure that we are only adding aliases with labels Set filteredAliases = new HashSet<>(); - Map newLabels = new HashMap<>(labels); + Map newLabels = new HashMap<>(labelsIfNew); + newLabels.putAll(labels); for (MonolingualTextValue alias : getAliases()) { - if (!labels.containsKey(alias.getLanguageCode())) { + if (!newLabels.containsKey(alias.getLanguageCode())) { newLabels.put(alias.getLanguageCode(), alias); } else { filteredAliases.add(alias); } } + Map newDescriptions = new HashMap<>(descriptionsIfNew); + newDescriptions.putAll(descriptions); return new ItemUpdate(qid, addedStatements, deletedStatements, - newLabels, descriptions, constructTermListMap(filteredAliases)); + newLabels, Collections.emptyMap(), newDescriptions, Collections.emptyMap(), + constructTermListMap(filteredAliases)); } @Override @@ -372,9 +412,35 @@ public class ItemUpdate { return builder.toString(); } - protected Map constructTermMap(Collection mltvs) { - return mltvs.stream() - .collect(Collectors.toMap(MonolingualTextValue::getLanguageCode, Function.identity())); + /** + * Helper function to merge dictionaries of terms to override or provide. + * @param currentTerms + * current map of terms to override + * @param currentTermsIfNew + * current map of terms to provide if not already there + * @param newTerms + * new terms to override + * @param newTermsIfNew + * new terms to provide if not already there + */ + private static void mergeSingleTermMaps( + Map currentTerms, + Map currentTermsIfNew, + Set newTerms, + Set newTermsIfNew) { + for(MonolingualTextValue otherLabel : newTerms) { + String languageCode = otherLabel.getLanguageCode(); + currentTerms.put(languageCode, otherLabel); + if (currentTermsIfNew.containsKey(languageCode)) { + currentTermsIfNew.remove(languageCode); + } + } + for(MonolingualTextValue otherLabel : newTermsIfNew) { + String languageCode = otherLabel.getLanguageCode(); + if (!currentTermsIfNew.containsKey(languageCode) && !currentTerms.containsKey(languageCode)) { + currentTermsIfNew.put(languageCode, otherLabel); + } + } } protected Map> constructTermListMap(Collection mltvs) { diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdateBuilder.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdateBuilder.java index 1556370c0..9516fe4c4 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdateBuilder.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdateBuilder.java @@ -45,7 +45,9 @@ public class ItemUpdateBuilder { private List addedStatements; private Set deletedStatements; private Set labels; + private Set labelsIfNew; private Set descriptions; + private Set descriptionsIfNew; private Set aliases; private boolean built; @@ -62,7 +64,9 @@ public class ItemUpdateBuilder { this.addedStatements = new ArrayList<>(); this.deletedStatements = new HashSet(); this.labels = new HashSet(); + this.labelsIfNew = new HashSet(); this.descriptions = new HashSet(); + this.descriptionsIfNew = new HashSet(); this.aliases = new HashSet(); this.built = false; } @@ -118,54 +122,74 @@ public class ItemUpdateBuilder { } /** - * Adds a label to the item. It will override any existing label in this - * language. + * Adds a label to the item. * * @param label * the label to add + * @param override + * whether the label should be added even if there is already a label in that language */ - public ItemUpdateBuilder addLabel(MonolingualTextValue label) { + public ItemUpdateBuilder addLabel(MonolingualTextValue label, boolean override) { Validate.isTrue(!built, "ItemUpdate has already been built"); - labels.add(label); + if (override) { + labels.add(label); + } else { + labelsIfNew.add(label); + } return this; } /** - * Adds a list of labels to the item. It will override any existing label in - * each language. + * Adds a list of labels to the item. * * @param labels * the labels to add + * @param override + * whether the label should be added even if there is already a label in that language */ - public ItemUpdateBuilder addLabels(Set labels) { + public ItemUpdateBuilder addLabels(Set labels, boolean override) { Validate.isTrue(!built, "ItemUpdate has already been built"); - this.labels.addAll(labels); + if (override) { + this.labels.addAll(labels); + } else { + labelsIfNew.addAll(labels); + } return this; } /** - * Adds a description to the item. It will override any existing description in - * this language. + * Adds a description to the item. * * @param description * the description to add + * @param override + * whether the description should be added even if there is already a description in that language */ - public ItemUpdateBuilder addDescription(MonolingualTextValue description) { + public ItemUpdateBuilder addDescription(MonolingualTextValue description, boolean override) { Validate.isTrue(!built, "ItemUpdate has already been built"); - descriptions.add(description); + if (override) { + descriptions.add(description); + } else { + descriptionsIfNew.add(description); + } return this; } /** - * Adds a list of descriptions to the item. It will override any existing - * description in each language. + * Adds a list of descriptions to the item. * * @param descriptions * the descriptions to add + * @param override + * whether the description should be added even if there is already a description in that language */ - public ItemUpdateBuilder addDescriptions(Set descriptions) { + public ItemUpdateBuilder addDescriptions(Set descriptions, boolean override) { Validate.isTrue(!built, "ItemUpdate has already been built"); - this.descriptions.addAll(descriptions); + if (override) { + this.descriptions.addAll(descriptions); + } else { + descriptionsIfNew.addAll(descriptions); + } return this; } @@ -202,7 +226,7 @@ public class ItemUpdateBuilder { */ public ItemUpdate build() { built = true; - return new ItemUpdate(qid, addedStatements, deletedStatements, labels, descriptions, aliases); + return new ItemUpdate(qid, addedStatements, deletedStatements, labels, labelsIfNew, descriptions, descriptionsIfNew, aliases); } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/QuickStatementsUpdateScheduler.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/QuickStatementsUpdateScheduler.java index 4640daa61..f6dedb3d9 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/QuickStatementsUpdateScheduler.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/QuickStatementsUpdateScheduler.java @@ -65,7 +65,11 @@ public class QuickStatementsUpdateScheduler implements UpdateScheduler { protected void splitUpdate(ItemUpdate update) throws ImpossibleSchedulingException { ItemUpdateBuilder remainingUpdateBuilder = new ItemUpdateBuilder(update.getItemId()) - .addLabels(update.getLabels()).addDescriptions(update.getDescriptions()).addAliases(update.getAliases()) + .addLabels(update.getLabels(), true) + .addLabels(update.getLabelsIfNew(), false) + .addDescriptions(update.getDescriptions(), true) + .addDescriptions(update.getDescriptionsIfNew(), false) + .addAliases(update.getAliases()) .deleteStatements(update.getDeletedStatements()); Map referencingUpdates = new HashMap<>(); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/WikibaseAPIUpdateScheduler.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/WikibaseAPIUpdateScheduler.java index f12936fd8..d4a9b9eb3 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/WikibaseAPIUpdateScheduler.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/WikibaseAPIUpdateScheduler.java @@ -98,8 +98,12 @@ public class WikibaseAPIUpdateScheduler implements UpdateScheduler { * @param update */ protected void splitUpdate(ItemUpdate update) { - ItemUpdateBuilder pointerFreeBuilder = new ItemUpdateBuilder(update.getItemId()).addLabels(update.getLabels()) - .addDescriptions(update.getDescriptions()).addAliases(update.getAliases()) + ItemUpdateBuilder pointerFreeBuilder = new ItemUpdateBuilder(update.getItemId()) + .addLabels(update.getLabels(), true) + .addLabels(update.getLabelsIfNew(), false) + .addDescriptions(update.getDescriptions(), true) + .addDescriptions(update.getDescriptionsIfNew(), false) + .addAliases(update.getAliases()) .deleteStatements(update.getDeletedStatements()); ItemUpdateBuilder pointerFullBuilder = new ItemUpdateBuilder(update.getItemId()); diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/EditBatchProcessorTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/EditBatchProcessorTest.java index 2540698f2..94fa52c3e 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/EditBatchProcessorTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/EditBatchProcessorTest.java @@ -120,7 +120,7 @@ public class EditBatchProcessorTest extends RefineTest { List qids = ids.stream().map(e -> Datamodel.makeWikidataItemIdValue(e)) .collect(Collectors.toList()); List batch = qids.stream() - .map(qid -> new ItemUpdateBuilder(qid).addDescription(description).build()) + .map(qid -> new ItemUpdateBuilder(qid).addDescription(description, true).build()) .collect(Collectors.toList()); int batchSize = 50; diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/ReconEntityRewriterTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/ReconEntityRewriterTest.java index cbf50596f..1b1e90266 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/ReconEntityRewriterTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/editing/ReconEntityRewriterTest.java @@ -73,15 +73,15 @@ public class ReconEntityRewriterTest { ItemUpdate update = new ItemUpdateBuilder(subject) .addStatement(TestingData.generateStatement(subject, TestingData.newIdB)) .deleteStatement(TestingData.generateStatement(subject, TestingData.existingId)) - .addLabel(Datamodel.makeMonolingualTextValue("label", "de")) - .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de")) + .addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true) + .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); ItemUpdate rewritten = rewriter.rewrite(update); ItemUpdate expected = new ItemUpdateBuilder(subject) .addStatement(TestingData.generateStatement(subject, newlyCreated)) .deleteStatement(TestingData.generateStatement(subject, TestingData.existingId)) - .addLabel(Datamodel.makeMonolingualTextValue("label", "de")) - .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de")) + .addLabel(Datamodel.makeMonolingualTextValue("label", "de"), true) + .addDescription(Datamodel.makeMonolingualTextValue("beschreibung", "de"), false) .addAlias(Datamodel.makeMonolingualTextValue("darstellung", "de")).build(); assertEquals(expected, rewritten); } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/exporters/QuickStatementsExporterTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/exporters/QuickStatementsExporterTest.java index 7c04de840..800dfb816 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/exporters/QuickStatementsExporterTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/exporters/QuickStatementsExporterTest.java @@ -91,9 +91,24 @@ public class QuickStatementsExporterTest extends RefineTest { @Test public void testNameDesc() throws IOException { + /** + * Adding labels and description without overriding is not supported by QS, so + * we fall back on adding them with overriding. + */ + ItemUpdate update = new ItemUpdateBuilder(qid1) + .addLabel(Datamodel.makeMonolingualTextValue("some label", "en"), true) + .addDescription(Datamodel.makeMonolingualTextValue("some description", "en"), true) + .build(); + + assertEquals("Q1377\tLen\t\"some label\"\n" + "Q1377\tDen\t\"some description\"\n", export(update)); + } + + @Test + public void testOptionalNameDesc() + throws IOException { ItemUpdate update = new ItemUpdateBuilder(newIdA) - .addLabel(Datamodel.makeMonolingualTextValue("my new item", "en")) - .addDescription(Datamodel.makeMonolingualTextValue("isn't it awesome?", "en")) + .addLabel(Datamodel.makeMonolingualTextValue("my new item", "en"), false) + .addDescription(Datamodel.makeMonolingualTextValue("isn't it awesome?", "en"), false) .addAlias(Datamodel.makeMonolingualTextValue("fabitem", "en")).build(); assertEquals("CREATE\n" + "LAST\tLen\t\"my new item\"\n" + "LAST\tDen\t\"isn't it awesome?\"\n" diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizerTest.java index 333fc78af..37ecc9115 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizerTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizerTest.java @@ -65,8 +65,8 @@ public class NewItemScrutinizerTest extends ScrutinizerTest { public void testGoodNewItem() { ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) - .addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr")) - .addDescription(Datamodel.makeMonolingualTextValue("interesting item", "en")).addStatement(p31Statement) + .addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"), false) + .addDescription(Datamodel.makeMonolingualTextValue("interesting item", "en"), true).addStatement(p31Statement) .build(); scrutinize(update); assertWarningsRaised(NewItemScrutinizer.newItemType); @@ -75,8 +75,8 @@ public class NewItemScrutinizerTest extends ScrutinizerTest { @Test public void testDeletedStatements() { ItemUpdate update = new ItemUpdateBuilder(TestingData.newIdA) - .addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr")) - .addDescription(Datamodel.makeMonolingualTextValue("interesting item", "en")).addStatement(p31Statement) + .addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"), false) + .addDescription(Datamodel.makeMonolingualTextValue("interesting item", "en"), true).addStatement(p31Statement) .deleteStatement(TestingData.generateStatement(TestingData.newIdA, TestingData.matchedId)).build(); scrutinize(update); assertWarningsRaised(NewItemScrutinizer.newItemType, NewItemScrutinizer.deletedStatementsType); diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizerTest.java index bba04c5c6..9a5856437 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizerTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizerTest.java @@ -43,6 +43,6 @@ public abstract class ValueScrutinizerTest extends SnakScrutinizerTest { } public void scrutinizeLabel(MonolingualTextValue text) { - scrutinize(new ItemUpdateBuilder(TestingData.existingId).addLabel(text).build()); + scrutinize(new ItemUpdateBuilder(TestingData.existingId).addLabel(text, true).build()); } } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbItemDocumentExprTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbItemDocumentExprTest.java index 2da9cfb23..dc8187f13 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbItemDocumentExprTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbItemDocumentExprTest.java @@ -45,7 +45,7 @@ public class WbItemDocumentExprTest extends WbExpressionTest { public WbItemDocumentExprTest() { WbStatementGroupExprTest sgt = new WbStatementGroupExprTest(); - WbNameDescExpr nde = new WbNameDescExpr(WbNameDescExpr.NameDescrType.ALIAS, + WbNameDescExpr nde = new WbNameDescExpr(WbNameDescExpr.NameDescType.ALIAS, new WbMonolingualExpr(new WbLanguageConstant("en", "English"), new WbStringVariable("column D"))); WbItemVariable subjectExpr = new WbItemVariable("column E"); expr = new WbItemDocumentExpr(subjectExpr, Collections.singletonList(nde), Collections.singletonList(sgt.expr)); diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbNameDescExprTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbNameDescExprTest.java index cd4f0fba0..ef72d5c6e 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbNameDescExprTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbNameDescExprTest.java @@ -38,7 +38,7 @@ import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; public class WbNameDescExprTest extends WbExpressionTest { private ItemIdValue subject = Datamodel.makeWikidataItemIdValue("Q56"); - public WbNameDescExpr expr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.ALIAS, + public WbNameDescExpr expr = new WbNameDescExpr(WbNameDescExpr.NameDescType.ALIAS, new WbMonolingualExpr(new WbLanguageConstant("en", "English"), new WbStringVariable("column A"))); public String jsonRepresentation = "{\"name_type\":\"ALIAS\",\"value\":{\"type\":\"wbmonolingualexpr\",\"language\":" @@ -47,7 +47,7 @@ public class WbNameDescExprTest extends WbExpressionTest { @Test public void testContributeToLabel() { - WbNameDescExpr labelExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.LABEL, + WbNameDescExpr labelExpr = new WbNameDescExpr(WbNameDescExpr.NameDescType.LABEL, TestingData.getTestMonolingualExpr("fr", "français", "le croissant magnifique")); ItemUpdateBuilder update = new ItemUpdateBuilder(subject); labelExpr.contributeTo(update, ctxt); @@ -57,7 +57,7 @@ public class WbNameDescExprTest extends WbExpressionTest { @Test public void testContributeToDescription() { - WbNameDescExpr descriptionExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.DESCRIPTION, + WbNameDescExpr descriptionExpr = new WbNameDescExpr(WbNameDescExpr.NameDescType.DESCRIPTION, TestingData.getTestMonolingualExpr("de", "Deutsch", "wunderschön")); ItemUpdateBuilder update = new ItemUpdateBuilder(subject); descriptionExpr.contributeTo(update, ctxt); @@ -67,7 +67,7 @@ public class WbNameDescExprTest extends WbExpressionTest { @Test public void testContributeToAlias() { - WbNameDescExpr aliasExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.ALIAS, + WbNameDescExpr aliasExpr = new WbNameDescExpr(WbNameDescExpr.NameDescType.ALIAS, TestingData.getTestMonolingualExpr("en", "English", "snack")); ItemUpdateBuilder update = new ItemUpdateBuilder(subject); aliasExpr.contributeTo(update, ctxt); @@ -86,8 +86,8 @@ public class WbNameDescExprTest extends WbExpressionTest { @Test public void testGetters() { WbMonolingualExpr monolingualExpr = TestingData.getTestMonolingualExpr("en", "English", "not sure what"); - WbNameDescExpr aliasExpr = new WbNameDescExpr(WbNameDescExpr.NameDescrType.ALIAS, monolingualExpr); - assertEquals(WbNameDescExpr.NameDescrType.ALIAS, aliasExpr.getType()); + WbNameDescExpr aliasExpr = new WbNameDescExpr(WbNameDescExpr.NameDescType.ALIAS, monolingualExpr); + assertEquals(WbNameDescExpr.NameDescType.ALIAS, aliasExpr.getType()); assertEquals(monolingualExpr, aliasExpr.getValue()); } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/updates/ItemUpdateTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/updates/ItemUpdateTest.java index 469f801af..a5773f57e 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/updates/ItemUpdateTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/updates/ItemUpdateTest.java @@ -132,7 +132,7 @@ public class ItemUpdateTest { public void testGroupBySubject() { ItemUpdate updateA = new ItemUpdateBuilder(newSubject).addStatement(statement1).build(); ItemUpdate updateB = new ItemUpdateBuilder(sameNewSubject).addStatement(statement2).build(); - ItemUpdate updateC = new ItemUpdateBuilder(existingSubject).addLabel(label).build(); + ItemUpdate updateC = new ItemUpdateBuilder(existingSubject).addLabel(label, true).build(); ItemUpdate updateD = new ItemUpdateBuilder(matchedSubject).build(); Map grouped = ItemUpdate .groupBySubject(Arrays.asList(updateA, updateB, updateC, updateD)); @@ -148,12 +148,12 @@ public class ItemUpdateTest { public void testNormalizeTerms() { MonolingualTextValue aliasEn = Datamodel.makeMonolingualTextValue("alias", "en"); MonolingualTextValue aliasFr = Datamodel.makeMonolingualTextValue("coucou", "fr"); - ItemUpdate updateA = new ItemUpdateBuilder(newSubject).addLabel(label).addAlias(aliasEn).addAlias(aliasFr) + ItemUpdate updateA = new ItemUpdateBuilder(newSubject).addLabel(label, true).addAlias(aliasEn).addAlias(aliasFr) .build(); assertFalse(updateA.isNull()); ItemUpdate normalized = updateA.normalizeLabelsAndAliases(); - ItemUpdate expectedUpdate = new ItemUpdateBuilder(newSubject).addLabel(label).addAlias(aliasEn) - .addLabel(aliasFr).build(); + ItemUpdate expectedUpdate = new ItemUpdateBuilder(newSubject).addLabel(label, true).addAlias(aliasEn) + .addLabel(aliasFr, true).build(); assertEquals(expectedUpdate, normalized); } @@ -161,9 +161,87 @@ public class ItemUpdateTest { public void testMergeLabels() { MonolingualTextValue label1 = Datamodel.makeMonolingualTextValue("first label", "en"); MonolingualTextValue label2 = Datamodel.makeMonolingualTextValue("second label", "en"); - ItemUpdate update1 = new ItemUpdateBuilder(existingSubject).addLabel(label1).build(); - ItemUpdate update2 = new ItemUpdateBuilder(existingSubject).addLabel(label2).build(); + ItemUpdate update1 = new ItemUpdateBuilder(existingSubject).addLabel(label1, true).build(); + ItemUpdate update2 = new ItemUpdateBuilder(existingSubject).addLabel(label2, true).build(); ItemUpdate merged = update1.merge(update2); assertEquals(Collections.singleton(label2), merged.getLabels()); } + + @Test + public void testMergeLabelsIfNew() { + MonolingualTextValue label1 = Datamodel.makeMonolingualTextValue("first label", "en"); + MonolingualTextValue label2 = Datamodel.makeMonolingualTextValue("second label", "en"); + ItemUpdate update1 = new ItemUpdateBuilder(existingSubject).addLabel(label1, false).build(); + ItemUpdate update2 = new ItemUpdateBuilder(existingSubject).addLabel(label2, false).build(); + ItemUpdate merged = update1.merge(update2); + assertEquals(Collections.singleton(label1), merged.getLabelsIfNew()); + assertEquals(Collections.emptySet(), merged.getLabels()); + } + + @Test + public void testMergeLabelsIfNewOverriding() { + MonolingualTextValue label1 = Datamodel.makeMonolingualTextValue("first label", "en"); + MonolingualTextValue label2 = Datamodel.makeMonolingualTextValue("second label", "en"); + ItemUpdate update1 = new ItemUpdateBuilder(existingSubject).addLabel(label1, true).build(); + ItemUpdate update2 = new ItemUpdateBuilder(existingSubject).addLabel(label2, false).build(); + ItemUpdate merged = update1.merge(update2); + assertEquals(Collections.singleton(label1), merged.getLabels()); + assertEquals(Collections.emptySet(), merged.getLabelsIfNew()); + } + + @Test + public void testMergeLabelsIfNewOverriding2() { + MonolingualTextValue label1 = Datamodel.makeMonolingualTextValue("first label", "en"); + MonolingualTextValue label2 = Datamodel.makeMonolingualTextValue("second label", "en"); + ItemUpdate update1 = new ItemUpdateBuilder(existingSubject).addLabel(label1, false).build(); + ItemUpdate update2 = new ItemUpdateBuilder(existingSubject).addLabel(label2, true).build(); + ItemUpdate merged = update1.merge(update2); + assertEquals(Collections.singleton(label2), merged.getLabels()); + assertEquals(Collections.emptySet(), merged.getLabelsIfNew()); + } + + @Test + public void testMergeDescriptionsIfNew() { + MonolingualTextValue description1 = Datamodel.makeMonolingualTextValue("first description", "en"); + MonolingualTextValue description2 = Datamodel.makeMonolingualTextValue("second description", "en"); + ItemUpdate update1 = new ItemUpdateBuilder(existingSubject).addDescription(description1, false).build(); + ItemUpdate update2 = new ItemUpdateBuilder(existingSubject).addDescription(description2, false).build(); + ItemUpdate merged = update1.merge(update2); + assertEquals(Collections.singleton(description1), merged.getDescriptionsIfNew()); + assertEquals(Collections.emptySet(), merged.getDescriptions()); + } + + @Test + public void testMergeDescriptionsIfNewOverriding() { + MonolingualTextValue description1 = Datamodel.makeMonolingualTextValue("first description", "en"); + MonolingualTextValue description2 = Datamodel.makeMonolingualTextValue("second description", "en"); + ItemUpdate update1 = new ItemUpdateBuilder(existingSubject).addDescription(description1, true).build(); + ItemUpdate update2 = new ItemUpdateBuilder(existingSubject).addDescription(description2, false).build(); + ItemUpdate merged = update1.merge(update2); + assertEquals(Collections.singleton(description1), merged.getDescriptions()); + assertEquals(Collections.emptySet(), merged.getDescriptionsIfNew()); + } + + @Test + public void testMergeDescriptionsIfNewOverriding2() { + MonolingualTextValue description1 = Datamodel.makeMonolingualTextValue("first description", "en"); + MonolingualTextValue description2 = Datamodel.makeMonolingualTextValue("second description", "en"); + ItemUpdate update1 = new ItemUpdateBuilder(existingSubject).addDescription(description1, false).build(); + ItemUpdate update2 = new ItemUpdateBuilder(existingSubject).addDescription(description2, true).build(); + ItemUpdate merged = update1.merge(update2); + assertEquals(Collections.singleton(description2), merged.getDescriptions()); + assertEquals(Collections.emptySet(), merged.getDescriptionsIfNew()); + } + + @Test + public void testConstructOverridingLabels() { + MonolingualTextValue label1 = Datamodel.makeMonolingualTextValue("first label", "en"); + MonolingualTextValue label2 = Datamodel.makeMonolingualTextValue("second label", "en"); + ItemUpdate update = new ItemUpdateBuilder(existingSubject) + .addLabel(label1, false) + .addLabel(label2, true) + .build(); + assertEquals(Collections.singleton(label2), update.getLabels()); + assertEquals(Collections.emptySet(), update.getLabelsIfNew()); + } } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/updates/scheduler/UpdateSchedulerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/updates/scheduler/UpdateSchedulerTest.java index 4f1828981..5c68f1374 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/updates/scheduler/UpdateSchedulerTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/updates/scheduler/UpdateSchedulerTest.java @@ -87,7 +87,7 @@ public abstract class UpdateSchedulerTest { throws ImpossibleSchedulingException { ItemUpdate update1 = new ItemUpdateBuilder(existingIdA).addStatement(sAtoB).build(); ItemUpdate update2 = new ItemUpdateBuilder(existingIdA) - .addLabel(Datamodel.makeMonolingualTextValue("hello", "fr")).addStatement(sAtoB).build(); + .addLabel(Datamodel.makeMonolingualTextValue("hello", "fr"), true).addStatement(sAtoB).build(); ItemUpdate merged = update1.merge(update2); assertEquals(Collections.singletonList(merged), schedule(update1, update2)); } @@ -95,9 +95,9 @@ public abstract class UpdateSchedulerTest { @Test public void testMergeNew() throws ImpossibleSchedulingException { - ItemUpdate update1 = new ItemUpdateBuilder(newIdA).addLabel(Datamodel.makeMonolingualTextValue("hello", "fr")) + ItemUpdate update1 = new ItemUpdateBuilder(newIdA).addLabel(Datamodel.makeMonolingualTextValue("hello", "fr"), true) .addStatement(sNewAtoB).build(); - ItemUpdate update2 = new ItemUpdateBuilder(newIdA).addLabel(Datamodel.makeMonolingualTextValue("hello", "fr")) + ItemUpdate update2 = new ItemUpdateBuilder(newIdA).addLabel(Datamodel.makeMonolingualTextValue("hello", "fr"), true) .build(); ItemUpdate merged = update1.merge(update2); assertEquals(Collections.singletonList(merged), schedule(update1, update2));