Added difference-within-range Scrutinizer (#2634)

This commit is contained in:
Ekta Mishra 2020-06-07 00:18:14 +05:30 committed by GitHub
parent 1c4243ea57
commit 1638d68e35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 295 additions and 0 deletions

View File

@ -120,6 +120,8 @@
"warnings-messages/disallowed-qualifiers/body": "Statements using {statement_property_entity} such as the one on {example_item_entity} should not have a {disallowed_property_entity} qualifier as they are incompatible.",
"warnings-messages/single-valued-property-added-more-than-once/title": "{property_entity} added more than once on the same item.",
"warnings-messages/single-valued-property-added-more-than-once/body": "This property is expected to be used at most once on each item but has been added multiple times on the same item, for instance on {example_entity}.",
"warnings-messages/difference-of-the-properties-is-not-within-the-specified-range/title": "Inconsistent {source_entity} and {target_entity}",
"warnings-messages/difference-of-the-properties-is-not-within-the-specified-range/body": "The difference between {source_entity} and {target_entity} is expected to be within the range [{min_value}, {max_value}], but this fails on items such as {example_entity}.",
"warnings-messages/identical-values-for-distinct-valued-property/title": "Identical values for {property_entity}",
"warnings-messages/identical-values-for-distinct-valued-property/body": "This property should have distinct values, but the same value was found on {item1_entity} and {item2_entity} for instance.",
"warnings-messages/no-edit-generated/title": "No edit was generated.",

View File

@ -25,6 +25,7 @@ package org.openrefine.wikidata.qa;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
import java.util.Set;
@ -139,4 +140,38 @@ public interface ConstraintFetcher {
* Can this property be used on items?
*/
boolean usableOnItems(PropertyIdValue pid);
/**
* Retrieves the lower bound of the range
* required in difference-within-range constraint
*
* @param pid
* @return minimum value
*/
QuantityValue getMinimumValue(PropertyIdValue pid);
/**
* Retrieves the upper bound of the range
* required in difference-within-range constraint
*
* @param pid
* @return maximum value
*/
QuantityValue getMaximumValue(PropertyIdValue pid);
/**
* Retrieves the lower value property for calculating the difference
* required in difference-within-range constraint
*
* @param pid:
* the property to calculate difference with
* @return the pid of the lower bound property
*/
PropertyIdValue getLowerPropertyId(PropertyIdValue pid);
/**
* Is this property expected to have a value whose difference
* with its lower bound property should be in a range?
*/
boolean hasDiffWithinRange(PropertyIdValue pid);
}

View File

@ -70,6 +70,7 @@ public class EditInspector {
register(new CommonDescriptionScrutinizer());
register(new EnglishDescriptionScrutinizer());
register(new MultiValueScrutinizer());
register(new DifferenceWithinRangeScrutinizer());
}
/**

View File

@ -25,6 +25,7 @@ package org.openrefine.wikidata.qa;
import org.openrefine.wikidata.utils.EntityCache;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.implementation.QuantityValueImpl;
import org.wikidata.wdtk.datamodel.interfaces.*;
import java.util.ArrayList;
@ -79,6 +80,11 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
public static String MULTI_VALUE_CONSTRAINT_QID = "Q21510857";
public static String DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID = "Q21510854";
public static String DIFFERENCE_WITHIN_RANGE_CONSTRAINT_PID = "P2306";
public static String MINIMUM_VALUE_PID = "P2313";
public static String MAXIMUM_VALUE_PID = "P2312";
public static String NO_BOUNDS_CONSTRAINT_QID = "Q51723761";
public static String INTEGER_VALUED_CONSTRAINT_QID = "Q52848401";
@ -337,4 +343,84 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
}
return results;
}
protected List<QuantityValue> getValues(List<SnakGroup> groups, String pid) {
List<QuantityValue> results = new ArrayList<>();
for (SnakGroup group : groups) {
if (group.getProperty().getId().equals(pid)) {
for (Snak snak : group.getSnaks())
results.add((QuantityValueImpl) snak.getValue());
}
}
return results;
}
/**
* Is this property expected to have a value whose difference
* with its lower bound property should be in a range?
*/
@Override
public boolean hasDiffWithinRange(PropertyIdValue pid) {
return getSingleConstraint(pid, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID) != null;
}
/**
* Retrieves the lower value property for calculating the difference
* required in difference-within-range constraint
*
* @param pid:
* the property to calculate difference with
* @return the pid of the lower bound property
*/
@Override
public PropertyIdValue getLowerPropertyId(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID);
if (specs != null) {
List<Value> lowerValueProperty = findValues(specs, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_PID);
if (!lowerValueProperty.isEmpty()) {
return (PropertyIdValue) lowerValueProperty.get(0);
}
}
return null;
}
/**
* Retrieves the lower bound of the range
* required in difference-within-range constraint
*
* @param pid
* @return minimum value
*/
@Override
public QuantityValue getMinimumValue(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID);
if (specs != null) {
List<QuantityValue> minValue = getValues(specs, MINIMUM_VALUE_PID);
if (!minValue.isEmpty()) {
return minValue.get(0);
}
}
return null;
}
/**
* Retrieves the upper bound of the range
* required in difference-within-range constraint
*
* @param pid
* @return maximum value
*/
@Override
public QuantityValue getMaximumValue(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID);
if (specs != null) {
List<QuantityValue> maxValue = getValues(specs, MAXIMUM_VALUE_PID);
if (!maxValue.isEmpty()) {
return maxValue.get(0);
}
}
return null;
}
}

View File

@ -0,0 +1,77 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.interfaces.*;
import java.util.HashMap;
import java.util.Map;
public class DifferenceWithinRangeScrutinizer extends EditScrutinizer {
public static final String type = "difference-of-the-properties-is-not-within-the-specified-range";
@Override
public void scrutinize(ItemUpdate update) {
Map<PropertyIdValue, Value> propertyIdValueValueMap = new HashMap<>();
for (Statement statement : update.getAddedStatements()){
PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId();
Value value = statement.getClaim().getMainSnak().getValue();
propertyIdValueValueMap.put(pid, value);
}
for(PropertyIdValue propertyId : propertyIdValueValueMap.keySet()){
if (_fetcher.hasDiffWithinRange(propertyId)){
PropertyIdValue lowerPropertyId = _fetcher.getLowerPropertyId(propertyId);
QuantityValue minRangeValue = _fetcher.getMinimumValue(propertyId);
QuantityValue maxRangeValue = _fetcher.getMaximumValue(propertyId);
if (propertyIdValueValueMap.containsKey(lowerPropertyId)){
Value startingValue = propertyIdValueValueMap.get(lowerPropertyId);
Value endingValue = propertyIdValueValueMap.get(propertyId);
if (startingValue instanceof TimeValue && endingValue instanceof TimeValue){
TimeValue lowerDate = (TimeValue)startingValue;
TimeValue upperDate = (TimeValue)endingValue;
long differenceInYears = upperDate.getYear() - lowerDate.getYear();
long differenceInMonths = upperDate.getMonth() - lowerDate.getMonth();
long differenceInDays = upperDate.getMonth() - lowerDate.getMonth();
if (minRangeValue != null && (differenceInYears < minRangeValue.getNumericValue().longValue()
|| differenceInYears == 0 && differenceInMonths < 0
|| differenceInYears == 0 && differenceInMonths == 0 && differenceInDays < 0)){
QAWarning issue = new QAWarning(type, propertyId.getId(), QAWarning.Severity.WARNING, 1);
issue.setProperty("source_entity", lowerPropertyId);
issue.setProperty("target_entity", propertyId);
issue.setProperty("min_value", minRangeValue.getNumericValue());
if (maxRangeValue != null) {
issue.setProperty("max_value", maxRangeValue.getNumericValue());
} else {
issue.setProperty("max_value", null);
}
issue.setProperty("example_entity", update.getItemId());
addIssue(issue);
}
if (maxRangeValue != null && differenceInYears > maxRangeValue.getNumericValue().longValue()){
QAWarning issue = new QAWarning(type, propertyId.getId(), QAWarning.Severity.WARNING, 1);
issue.setProperty("source_entity", lowerPropertyId);
issue.setProperty("target_entity", propertyId);
if (minRangeValue != null) {
issue.setProperty("min_value", minRangeValue.getNumericValue());
} else {
issue.setProperty("min_value", null);
}
issue.setProperty("max_value", maxRangeValue.getNumericValue());
issue.setProperty("example_entity", update.getItemId());
addIssue(issue);
}
}
}
}
}
}
}

View File

@ -26,8 +26,10 @@ package org.openrefine.wikidata.qa;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
import java.math.BigDecimal;
import java.util.Arrays;
import java.util.Collections;
import java.util.Set;
@ -59,6 +61,11 @@ public class MockConstraintFetcher implements ConstraintFetcher {
public static PropertyIdValue propertyOnlyPid = Datamodel.makeWikidataPropertyIdValue("P372");
public static PropertyIdValue differenceWithinRangePid = Datamodel.makeWikidataPropertyIdValue("P570");
public static PropertyIdValue lowerBoundPid = Datamodel.makeWikidataPropertyIdValue("P569");
public static QuantityValue minValuePid = Datamodel.makeQuantityValue(new BigDecimal(0));
public static QuantityValue maxValuePid = Datamodel.makeQuantityValue(new BigDecimal(150));
@Override
public String getFormatRegex(PropertyIdValue pid) {
return "[1-9]\\d+";
@ -166,4 +173,33 @@ public class MockConstraintFetcher implements ConstraintFetcher {
public boolean usableOnItems(PropertyIdValue pid) {
return !propertyOnlyPid.equals(pid);
}
@Override
public QuantityValue getMinimumValue(PropertyIdValue pid) {
if (differenceWithinRangePid.equals(pid)) {
return minValuePid;
}
return null;
}
@Override
public QuantityValue getMaximumValue(PropertyIdValue pid) {
if (differenceWithinRangePid.equals(pid)) {
return maxValuePid;
}
return null;
}
@Override
public PropertyIdValue getLowerPropertyId(PropertyIdValue pid) {
if (differenceWithinRangePid.equals(pid)){
return lowerBoundPid;
}
return null;
}
@Override
public boolean hasDiffWithinRange(PropertyIdValue pid) {
return true;
}
}

View File

@ -0,0 +1,58 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.qa.MockConstraintFetcher;
import org.openrefine.wikidata.testing.TestingData;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.implementation.StatementImpl;
import org.wikidata.wdtk.datamodel.implementation.TimeValueImpl;
import org.wikidata.wdtk.datamodel.interfaces.*;
public class DifferenceWithinScrutinizerTest extends ScrutinizerTest{
@Override
public EditScrutinizer getScrutinizer() {
return new DifferenceWithinRangeScrutinizer();
}
@Test
public void testTrigger() {
ItemIdValue idA = TestingData.existingId;
PropertyIdValue lowerBoundPid = MockConstraintFetcher.lowerBoundPid;
PropertyIdValue upperBoundPid = MockConstraintFetcher.differenceWithinRangePid;
TimeValue lowerYear = new TimeValueImpl(1800, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO);
TimeValue upperYear = new TimeValueImpl(2020, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO);
ValueSnak value1 = Datamodel.makeValueSnak(lowerBoundPid, lowerYear);
ValueSnak value2 = Datamodel.makeValueSnak(upperBoundPid, upperYear);
Statement statement1 = new StatementImpl("P569", value1,idA);
Statement statement2 = new StatementImpl("P570", value2,idA);
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement1).addStatement(statement2).build();
scrutinize(updateA);
assertWarningsRaised(DifferenceWithinRangeScrutinizer.type);
}
@Test
public void testNoIssue() {
ItemIdValue idA = TestingData.existingId;
PropertyIdValue lowerBoundPid = MockConstraintFetcher.lowerBoundPid;
PropertyIdValue upperBoundPid = MockConstraintFetcher.differenceWithinRangePid;
TimeValue lowerYear = new TimeValueImpl(2000, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO);
TimeValue upperYear = new TimeValueImpl(2020, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO);
ValueSnak value1 = Datamodel.makeValueSnak(lowerBoundPid, lowerYear);
ValueSnak value2 = Datamodel.makeValueSnak(upperBoundPid, upperYear);
Statement statement1 = new StatementImpl("P569", value1,idA);
Statement statement2 = new StatementImpl("P570", value2,idA);
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement1).addStatement(statement2).build();
scrutinize(updateA);
assertNoWarningRaised();
}
}