From 1638d68e352a89c3481d661b0cc950e667447da2 Mon Sep 17 00:00:00 2001 From: Ekta Mishra Date: Sun, 7 Jun 2020 00:18:14 +0530 Subject: [PATCH] Added difference-within-range Scrutinizer (#2634) --- .../wikidata/module/langs/translation-en.json | 2 + .../wikidata/qa/ConstraintFetcher.java | 35 ++++++++ .../openrefine/wikidata/qa/EditInspector.java | 1 + .../qa/WikidataConstraintFetcher.java | 86 +++++++++++++++++++ .../DifferenceWithinRangeScrutinizer.java | 77 +++++++++++++++++ .../wikidata/qa/MockConstraintFetcher.java | 36 ++++++++ .../DifferenceWithinScrutinizerTest.java | 58 +++++++++++++ 7 files changed, 295 insertions(+) create mode 100644 extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DifferenceWithinRangeScrutinizer.java create mode 100644 extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DifferenceWithinScrutinizerTest.java diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index aa2f69bcb..58e9b4ae5 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -120,6 +120,8 @@ "warnings-messages/disallowed-qualifiers/body": "Statements using {statement_property_entity} such as the one on {example_item_entity} should not have a {disallowed_property_entity} qualifier as they are incompatible.", "warnings-messages/single-valued-property-added-more-than-once/title": "{property_entity} added more than once on the same item.", "warnings-messages/single-valued-property-added-more-than-once/body": "This property is expected to be used at most once on each item but has been added multiple times on the same item, for instance on {example_entity}.", + "warnings-messages/difference-of-the-properties-is-not-within-the-specified-range/title": "Inconsistent {source_entity} and {target_entity}", + "warnings-messages/difference-of-the-properties-is-not-within-the-specified-range/body": "The difference between {source_entity} and {target_entity} is expected to be within the range [{min_value}, {max_value}], but this fails on items such as {example_entity}.", "warnings-messages/identical-values-for-distinct-valued-property/title": "Identical values for {property_entity}", "warnings-messages/identical-values-for-distinct-valued-property/body": "This property should have distinct values, but the same value was found on {item1_entity} and {item2_entity} for instance.", "warnings-messages/no-edit-generated/title": "No edit was generated.", diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java index 534086444..4ccc36ecd 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java @@ -25,6 +25,7 @@ package org.openrefine.wikidata.qa; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; +import org.wikidata.wdtk.datamodel.interfaces.QuantityValue; import org.wikidata.wdtk.datamodel.interfaces.Value; import java.util.Set; @@ -139,4 +140,38 @@ public interface ConstraintFetcher { * Can this property be used on items? */ boolean usableOnItems(PropertyIdValue pid); + + /** + * Retrieves the lower bound of the range + * required in difference-within-range constraint + * + * @param pid + * @return minimum value + */ + QuantityValue getMinimumValue(PropertyIdValue pid); + + /** + * Retrieves the upper bound of the range + * required in difference-within-range constraint + * + * @param pid + * @return maximum value + */ + QuantityValue getMaximumValue(PropertyIdValue pid); + + /** + * Retrieves the lower value property for calculating the difference + * required in difference-within-range constraint + * + * @param pid: + * the property to calculate difference with + * @return the pid of the lower bound property + */ + PropertyIdValue getLowerPropertyId(PropertyIdValue pid); + + /** + * Is this property expected to have a value whose difference + * with its lower bound property should be in a range? + */ + boolean hasDiffWithinRange(PropertyIdValue pid); } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java index 2122a7c27..5e3ce15ea 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java @@ -70,6 +70,7 @@ public class EditInspector { register(new CommonDescriptionScrutinizer()); register(new EnglishDescriptionScrutinizer()); register(new MultiValueScrutinizer()); + register(new DifferenceWithinRangeScrutinizer()); } /** diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java index a3718e966..340623990 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java @@ -25,6 +25,7 @@ package org.openrefine.wikidata.qa; import org.openrefine.wikidata.utils.EntityCache; import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.implementation.QuantityValueImpl; import org.wikidata.wdtk.datamodel.interfaces.*; import java.util.ArrayList; @@ -79,6 +80,11 @@ public class WikidataConstraintFetcher implements ConstraintFetcher { public static String MULTI_VALUE_CONSTRAINT_QID = "Q21510857"; + public static String DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID = "Q21510854"; + public static String DIFFERENCE_WITHIN_RANGE_CONSTRAINT_PID = "P2306"; + public static String MINIMUM_VALUE_PID = "P2313"; + public static String MAXIMUM_VALUE_PID = "P2312"; + public static String NO_BOUNDS_CONSTRAINT_QID = "Q51723761"; public static String INTEGER_VALUED_CONSTRAINT_QID = "Q52848401"; @@ -337,4 +343,84 @@ public class WikidataConstraintFetcher implements ConstraintFetcher { } return results; } + + protected List getValues(List groups, String pid) { + List results = new ArrayList<>(); + for (SnakGroup group : groups) { + if (group.getProperty().getId().equals(pid)) { + for (Snak snak : group.getSnaks()) + results.add((QuantityValueImpl) snak.getValue()); + } + } + return results; + } + + /** + * Is this property expected to have a value whose difference + * with its lower bound property should be in a range? + */ + @Override + public boolean hasDiffWithinRange(PropertyIdValue pid) { + return getSingleConstraint(pid, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID) != null; + } + + /** + * Retrieves the lower value property for calculating the difference + * required in difference-within-range constraint + * + * @param pid: + * the property to calculate difference with + * @return the pid of the lower bound property + */ + @Override + public PropertyIdValue getLowerPropertyId(PropertyIdValue pid) { + List specs = getSingleConstraint(pid, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID); + if (specs != null) { + List lowerValueProperty = findValues(specs, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_PID); + if (!lowerValueProperty.isEmpty()) { + return (PropertyIdValue) lowerValueProperty.get(0); + } + } + + return null; + } + + /** + * Retrieves the lower bound of the range + * required in difference-within-range constraint + * + * @param pid + * @return minimum value + */ + @Override + public QuantityValue getMinimumValue(PropertyIdValue pid) { + List specs = getSingleConstraint(pid, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID); + if (specs != null) { + List minValue = getValues(specs, MINIMUM_VALUE_PID); + if (!minValue.isEmpty()) { + return minValue.get(0); + } + } + return null; + } + + /** + * Retrieves the upper bound of the range + * required in difference-within-range constraint + * + * @param pid + * @return maximum value + */ + @Override + public QuantityValue getMaximumValue(PropertyIdValue pid) { + List specs = getSingleConstraint(pid, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID); + if (specs != null) { + List maxValue = getValues(specs, MAXIMUM_VALUE_PID); + if (!maxValue.isEmpty()) { + return maxValue.get(0); + } + } + return null; + } + } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DifferenceWithinRangeScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DifferenceWithinRangeScrutinizer.java new file mode 100644 index 000000000..66c076045 --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DifferenceWithinRangeScrutinizer.java @@ -0,0 +1,77 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.qa.QAWarning; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.wikidata.wdtk.datamodel.interfaces.*; + +import java.util.HashMap; +import java.util.Map; + +public class DifferenceWithinRangeScrutinizer extends EditScrutinizer { + + public static final String type = "difference-of-the-properties-is-not-within-the-specified-range"; + + @Override + public void scrutinize(ItemUpdate update) { + Map propertyIdValueValueMap = new HashMap<>(); + for (Statement statement : update.getAddedStatements()){ + PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId(); + Value value = statement.getClaim().getMainSnak().getValue(); + propertyIdValueValueMap.put(pid, value); + } + + for(PropertyIdValue propertyId : propertyIdValueValueMap.keySet()){ + if (_fetcher.hasDiffWithinRange(propertyId)){ + PropertyIdValue lowerPropertyId = _fetcher.getLowerPropertyId(propertyId); + QuantityValue minRangeValue = _fetcher.getMinimumValue(propertyId); + QuantityValue maxRangeValue = _fetcher.getMaximumValue(propertyId); + + if (propertyIdValueValueMap.containsKey(lowerPropertyId)){ + Value startingValue = propertyIdValueValueMap.get(lowerPropertyId); + Value endingValue = propertyIdValueValueMap.get(propertyId); + + if (startingValue instanceof TimeValue && endingValue instanceof TimeValue){ + TimeValue lowerDate = (TimeValue)startingValue; + TimeValue upperDate = (TimeValue)endingValue; + + long differenceInYears = upperDate.getYear() - lowerDate.getYear(); + long differenceInMonths = upperDate.getMonth() - lowerDate.getMonth(); + long differenceInDays = upperDate.getMonth() - lowerDate.getMonth(); + + if (minRangeValue != null && (differenceInYears < minRangeValue.getNumericValue().longValue() + || differenceInYears == 0 && differenceInMonths < 0 + || differenceInYears == 0 && differenceInMonths == 0 && differenceInDays < 0)){ + QAWarning issue = new QAWarning(type, propertyId.getId(), QAWarning.Severity.WARNING, 1); + issue.setProperty("source_entity", lowerPropertyId); + issue.setProperty("target_entity", propertyId); + issue.setProperty("min_value", minRangeValue.getNumericValue()); + if (maxRangeValue != null) { + issue.setProperty("max_value", maxRangeValue.getNumericValue()); + } else { + issue.setProperty("max_value", null); + } + issue.setProperty("example_entity", update.getItemId()); + addIssue(issue); + } + + if (maxRangeValue != null && differenceInYears > maxRangeValue.getNumericValue().longValue()){ + QAWarning issue = new QAWarning(type, propertyId.getId(), QAWarning.Severity.WARNING, 1); + issue.setProperty("source_entity", lowerPropertyId); + issue.setProperty("target_entity", propertyId); + if (minRangeValue != null) { + issue.setProperty("min_value", minRangeValue.getNumericValue()); + } else { + issue.setProperty("min_value", null); + } + issue.setProperty("max_value", maxRangeValue.getNumericValue()); + issue.setProperty("example_entity", update.getItemId()); + addIssue(issue); + } + } + } + + } + } + + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java index 88b5d5dc7..3b4c314bb 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java @@ -26,8 +26,10 @@ package org.openrefine.wikidata.qa; import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; +import org.wikidata.wdtk.datamodel.interfaces.QuantityValue; import org.wikidata.wdtk.datamodel.interfaces.Value; +import java.math.BigDecimal; import java.util.Arrays; import java.util.Collections; import java.util.Set; @@ -59,6 +61,11 @@ public class MockConstraintFetcher implements ConstraintFetcher { public static PropertyIdValue propertyOnlyPid = Datamodel.makeWikidataPropertyIdValue("P372"); + public static PropertyIdValue differenceWithinRangePid = Datamodel.makeWikidataPropertyIdValue("P570"); + public static PropertyIdValue lowerBoundPid = Datamodel.makeWikidataPropertyIdValue("P569"); + public static QuantityValue minValuePid = Datamodel.makeQuantityValue(new BigDecimal(0)); + public static QuantityValue maxValuePid = Datamodel.makeQuantityValue(new BigDecimal(150)); + @Override public String getFormatRegex(PropertyIdValue pid) { return "[1-9]\\d+"; @@ -166,4 +173,33 @@ public class MockConstraintFetcher implements ConstraintFetcher { public boolean usableOnItems(PropertyIdValue pid) { return !propertyOnlyPid.equals(pid); } + + @Override + public QuantityValue getMinimumValue(PropertyIdValue pid) { + if (differenceWithinRangePid.equals(pid)) { + return minValuePid; + } + return null; + } + + @Override + public QuantityValue getMaximumValue(PropertyIdValue pid) { + if (differenceWithinRangePid.equals(pid)) { + return maxValuePid; + } + return null; + } + + @Override + public PropertyIdValue getLowerPropertyId(PropertyIdValue pid) { + if (differenceWithinRangePid.equals(pid)){ + return lowerBoundPid; + } + return null; + } + + @Override + public boolean hasDiffWithinRange(PropertyIdValue pid) { + return true; + } } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DifferenceWithinScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DifferenceWithinScrutinizerTest.java new file mode 100644 index 000000000..2ffb6be09 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DifferenceWithinScrutinizerTest.java @@ -0,0 +1,58 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.qa.MockConstraintFetcher; +import org.openrefine.wikidata.testing.TestingData; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.implementation.StatementImpl; +import org.wikidata.wdtk.datamodel.implementation.TimeValueImpl; +import org.wikidata.wdtk.datamodel.interfaces.*; + +public class DifferenceWithinScrutinizerTest extends ScrutinizerTest{ + @Override + public EditScrutinizer getScrutinizer() { + return new DifferenceWithinRangeScrutinizer(); + } + + @Test + public void testTrigger() { + ItemIdValue idA = TestingData.existingId; + PropertyIdValue lowerBoundPid = MockConstraintFetcher.lowerBoundPid; + PropertyIdValue upperBoundPid = MockConstraintFetcher.differenceWithinRangePid; + + TimeValue lowerYear = new TimeValueImpl(1800, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO); + TimeValue upperYear = new TimeValueImpl(2020, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO); + + ValueSnak value1 = Datamodel.makeValueSnak(lowerBoundPid, lowerYear); + ValueSnak value2 = Datamodel.makeValueSnak(upperBoundPid, upperYear); + + Statement statement1 = new StatementImpl("P569", value1,idA); + Statement statement2 = new StatementImpl("P570", value2,idA); + + ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement1).addStatement(statement2).build(); + scrutinize(updateA); + assertWarningsRaised(DifferenceWithinRangeScrutinizer.type); + } + + @Test + public void testNoIssue() { + ItemIdValue idA = TestingData.existingId; + PropertyIdValue lowerBoundPid = MockConstraintFetcher.lowerBoundPid; + PropertyIdValue upperBoundPid = MockConstraintFetcher.differenceWithinRangePid; + + TimeValue lowerYear = new TimeValueImpl(2000, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO); + TimeValue upperYear = new TimeValueImpl(2020, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO); + + ValueSnak value1 = Datamodel.makeValueSnak(lowerBoundPid, lowerYear); + ValueSnak value2 = Datamodel.makeValueSnak(upperBoundPid, upperYear); + + Statement statement1 = new StatementImpl("P569", value1,idA); + Statement statement2 = new StatementImpl("P570", value2,idA); + + ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement1).addStatement(statement2).build(); + scrutinize(updateA); + assertNoWarningRaised(); + } +}