From 29a757dc57d0cc4b246eac004f9edf2729a26881 Mon Sep 17 00:00:00 2001 From: Ekta Mishra Date: Sun, 31 May 2020 18:54:51 +0530 Subject: [PATCH] Added support for Multi-value Constraint in Wikidata extension (#2629) --- .../wikidata/module/langs/translation-en.json | 4 ++ .../wikidata/qa/ConstraintFetcher.java | 9 +++- .../openrefine/wikidata/qa/EditInspector.java | 11 ++-- .../qa/WikidataConstraintFetcher.java | 29 +++++------ .../scrutinizers/MultiValueScrutinizer.java | 50 +++++++++++++++++++ .../wikidata/qa/MockConstraintFetcher.java | 15 ++++-- .../MultiValueScrutinizerTest.java | 46 +++++++++++++++++ 7 files changed, 136 insertions(+), 28 deletions(-) create mode 100644 extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/MultiValueScrutinizer.java create mode 100644 extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/MultiValueScrutinizerTest.java diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index 834988c73..aa2f69bcb 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -126,6 +126,10 @@ "warnings-messages/no-edit-generated/body": "There might be something wrong with your schema.", "warnings-messages/no-issue-detected/title": "No issue was detected in your edits.", "warnings-messages/no-issue-detected/body": "Note that OpenRefine cannot detect all the types of problems Wikidata edits can have.", + "warnings-messages/multi-valued-property-is-required-for-new-item/title": "{property_entity} should have more than one statement on new items.", + "warnings-messages/multi-valued-property-is-required-for-new-item/body": "This property is expected to have more than one statement on each item but it has single statement, for instance on {example_entity}.", + "warnings-messages/multi-valued-property-is-required-for-existing-item/title": "{property_entity} should have more than one statement on existing items.", + "warnings-messages/multi-valued-property-is-required-for-existing-item/body": "This property is expected to have more than one statement on each item but it has single statement, for instance on {example_entity}. If the item already has statements with this property in Wikidata, then this warning can be ignored.", "warnings-messages/ignored-qualifiers/title": "Some qualifiers were ignored.", "warnings-messages/ignored-qualifiers/body": "Qualifier values could not be parsed, so they will not be added to the corresponding statements.", "warnings-messages/ignored-references/title": "Some references were ignored.", diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java index 48fd291d9..534086444 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java @@ -23,12 +23,12 @@ ******************************************************************************/ package org.openrefine.wikidata.qa; -import java.util.Set; - import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; import org.wikidata.wdtk.datamodel.interfaces.Value; +import java.util.Set; + /** * An object that fetches constraints about properties. * @@ -115,6 +115,11 @@ public interface ConstraintFetcher { */ boolean hasDistinctValues(PropertyIdValue pid); + /** + * Is this property expected to have more than one value per item? + */ + boolean hasMultiValue(PropertyIdValue pid); + /** * Can statements using this property have uncertainty bounds? */ diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java index 7165ffaef..2122a7c27 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java @@ -23,17 +23,17 @@ ******************************************************************************/ package org.openrefine.wikidata.qa; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - import org.openrefine.wikidata.qa.scrutinizers.*; import org.openrefine.wikidata.updates.ItemUpdate; import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler; import org.openrefine.wikidata.utils.EntityCache; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + /** * Runs a collection of edit scrutinizers on an edit batch. * @@ -69,6 +69,7 @@ public class EditInspector { register(new CalendarScrutinizer()); register(new CommonDescriptionScrutinizer()); register(new EnglishDescriptionScrutinizer()); + register(new MultiValueScrutinizer()); } /** diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java index bd752f437..a3718e966 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java @@ -23,26 +23,16 @@ ******************************************************************************/ package org.openrefine.wikidata.qa; +import org.openrefine.wikidata.utils.EntityCache; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.*; + import java.util.ArrayList; import java.util.List; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; -import org.openrefine.wikidata.utils.EntityCache; -import org.wikidata.wdtk.datamodel.helpers.Datamodel; -import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; -import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; -import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument; -import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; -import org.wikidata.wdtk.datamodel.interfaces.Snak; -import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; -import org.wikidata.wdtk.datamodel.interfaces.Statement; -import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; -import org.wikidata.wdtk.datamodel.interfaces.StatementRank; -import org.wikidata.wdtk.datamodel.interfaces.StringValue; -import org.wikidata.wdtk.datamodel.interfaces.Value; - /** * This class provides an abstraction over the way constraint definitions are * stored in Wikidata. @@ -86,7 +76,9 @@ public class WikidataConstraintFetcher implements ConstraintFetcher { public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404"; public static String SINGLE_BEST_VALUE_CONSTRAINT_QID = "Q52060874"; public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410"; - + + public static String MULTI_VALUE_CONSTRAINT_QID = "Q21510857"; + public static String NO_BOUNDS_CONSTRAINT_QID = "Q51723761"; public static String INTEGER_VALUED_CONSTRAINT_QID = "Q52848401"; @@ -208,7 +200,12 @@ public class WikidataConstraintFetcher implements ConstraintFetcher { public boolean hasDistinctValues(PropertyIdValue pid) { return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null; } - + + @Override + public boolean hasMultiValue(PropertyIdValue pid) { + return getSingleConstraint(pid, MULTI_VALUE_CONSTRAINT_QID) != null; + } + @Override public boolean isSymmetric(PropertyIdValue pid) { return getSingleConstraint(pid, SYMMETRIC_CONSTRAINT_QID) != null; diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/MultiValueScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/MultiValueScrutinizer.java new file mode 100644 index 000000000..32b5de81d --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/MultiValueScrutinizer.java @@ -0,0 +1,50 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.qa.QAWarning; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; +import org.wikidata.wdtk.datamodel.interfaces.Statement; + +import java.util.HashMap; +import java.util.Map; + +public class MultiValueScrutinizer extends EditScrutinizer { + + public static final String new_type = "multi-valued-property-is-required-for-new-item"; + public static final String existing_type = "multi-valued-property-is-required-for-existing-item"; + + @Override + public void scrutinize(ItemUpdate update) { + Map propertyCount = new HashMap<>(); + + for (Statement statement : update.getAddedStatements()) { + PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId(); + if (propertyCount.containsKey(pid)) { + propertyCount.put(pid, propertyCount.get(pid) + 1); + } else if (_fetcher.hasMultiValue(pid)) { + propertyCount.put(pid, 1); + } + } + + if (update.isNew()) { + for (PropertyIdValue pid : propertyCount.keySet()) { + if (propertyCount.get(pid) == 1) { + QAWarning issue = new QAWarning(new_type, pid.getId(), QAWarning.Severity.WARNING, 1); + issue.setProperty("property_entity", pid); + issue.setProperty("example_entity", update.getItemId()); + addIssue(issue); + } + } + } else { + for (PropertyIdValue pid : propertyCount.keySet()) { + if (propertyCount.get(pid) == 1) { + QAWarning issue = new QAWarning(existing_type, pid.getId(), QAWarning.Severity.INFO, 1); + issue.setProperty("property_entity", pid); + issue.setProperty("example_entity", update.getItemId()); + addIssue(issue); + } + } + } + + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java index 174053737..88b5d5dc7 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java @@ -23,16 +23,16 @@ ******************************************************************************/ package org.openrefine.wikidata.qa; -import java.util.Arrays; -import java.util.Collections; -import java.util.Set; -import java.util.stream.Collectors; - import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; import org.wikidata.wdtk.datamodel.interfaces.Value; +import java.util.Arrays; +import java.util.Collections; +import java.util.Set; +import java.util.stream.Collectors; + public class MockConstraintFetcher implements ConstraintFetcher { public static PropertyIdValue pidWithInverse = Datamodel.makeWikidataPropertyIdValue("P350"); @@ -116,6 +116,11 @@ public class MockConstraintFetcher implements ConstraintFetcher { return true; } + @Override + public boolean hasMultiValue(PropertyIdValue pid) { + return true; + } + @Override public boolean isSymmetric(PropertyIdValue pid) { return pid.equals(symmetricPid); diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/MultiValueScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/MultiValueScrutinizerTest.java new file mode 100644 index 000000000..152c49a21 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/MultiValueScrutinizerTest.java @@ -0,0 +1,46 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.testing.TestingData; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; + +public class MultiValueScrutinizerTest extends ScrutinizerTest { + + @Override + public EditScrutinizer getScrutinizer() { + return new MultiValueScrutinizer(); + } + + @Test + public void testNoIssue() { + ItemIdValue idA = TestingData.existingId; + ItemIdValue idB = TestingData.matchedId; + ItemUpdate update = new ItemUpdateBuilder(idA).addStatement(TestingData.generateStatement(idA, idB)) + .addStatement(TestingData.generateStatement(idA, idB)).build(); + scrutinize(update); + assertNoWarningRaised(); + } + + @Test + public void testNewItemTrigger() { + ItemIdValue idA = TestingData.newIdA; + ItemIdValue idB = TestingData.newIdB; + ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(TestingData.generateStatement(idA, idB)).build(); + ItemUpdate updateB = new ItemUpdateBuilder(idB).addStatement(TestingData.generateStatement(idB, idB)).build(); + scrutinize(updateA, updateB); + assertWarningsRaised(MultiValueScrutinizer.new_type); + } + + @Test + public void testExistingItemTrigger() { + ItemIdValue idA = TestingData.existingId; + ItemIdValue idB = TestingData.matchedId; + ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(TestingData.generateStatement(idA, idB)).build(); + ItemUpdate updateB = new ItemUpdateBuilder(idB).addStatement(TestingData.generateStatement(idB, idB)).build(); + scrutinize(updateA, updateB); + assertWarningsRaised(MultiValueScrutinizer.existing_type); + } + +}