Added difference-within-range Scrutinizer (#2634)
This commit is contained in:
parent
1c4243ea57
commit
1638d68e35
@ -120,6 +120,8 @@
|
||||
"warnings-messages/disallowed-qualifiers/body": "Statements using {statement_property_entity} such as the one on {example_item_entity} should not have a {disallowed_property_entity} qualifier as they are incompatible.",
|
||||
"warnings-messages/single-valued-property-added-more-than-once/title": "{property_entity} added more than once on the same item.",
|
||||
"warnings-messages/single-valued-property-added-more-than-once/body": "This property is expected to be used at most once on each item but has been added multiple times on the same item, for instance on {example_entity}.",
|
||||
"warnings-messages/difference-of-the-properties-is-not-within-the-specified-range/title": "Inconsistent {source_entity} and {target_entity}",
|
||||
"warnings-messages/difference-of-the-properties-is-not-within-the-specified-range/body": "The difference between {source_entity} and {target_entity} is expected to be within the range [{min_value}, {max_value}], but this fails on items such as {example_entity}.",
|
||||
"warnings-messages/identical-values-for-distinct-valued-property/title": "Identical values for {property_entity}",
|
||||
"warnings-messages/identical-values-for-distinct-valued-property/body": "This property should have distinct values, but the same value was found on {item1_entity} and {item2_entity} for instance.",
|
||||
"warnings-messages/no-edit-generated/title": "No edit was generated.",
|
||||
|
@ -25,6 +25,7 @@ package org.openrefine.wikidata.qa;
|
||||
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
import java.util.Set;
|
||||
@ -139,4 +140,38 @@ public interface ConstraintFetcher {
|
||||
* Can this property be used on items?
|
||||
*/
|
||||
boolean usableOnItems(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Retrieves the lower bound of the range
|
||||
* required in difference-within-range constraint
|
||||
*
|
||||
* @param pid
|
||||
* @return minimum value
|
||||
*/
|
||||
QuantityValue getMinimumValue(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Retrieves the upper bound of the range
|
||||
* required in difference-within-range constraint
|
||||
*
|
||||
* @param pid
|
||||
* @return maximum value
|
||||
*/
|
||||
QuantityValue getMaximumValue(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Retrieves the lower value property for calculating the difference
|
||||
* required in difference-within-range constraint
|
||||
*
|
||||
* @param pid:
|
||||
* the property to calculate difference with
|
||||
* @return the pid of the lower bound property
|
||||
*/
|
||||
PropertyIdValue getLowerPropertyId(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Is this property expected to have a value whose difference
|
||||
* with its lower bound property should be in a range?
|
||||
*/
|
||||
boolean hasDiffWithinRange(PropertyIdValue pid);
|
||||
}
|
||||
|
@ -70,6 +70,7 @@ public class EditInspector {
|
||||
register(new CommonDescriptionScrutinizer());
|
||||
register(new EnglishDescriptionScrutinizer());
|
||||
register(new MultiValueScrutinizer());
|
||||
register(new DifferenceWithinRangeScrutinizer());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -25,6 +25,7 @@ package org.openrefine.wikidata.qa;
|
||||
|
||||
import org.openrefine.wikidata.utils.EntityCache;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.implementation.QuantityValueImpl;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
@ -79,6 +80,11 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
|
||||
|
||||
public static String MULTI_VALUE_CONSTRAINT_QID = "Q21510857";
|
||||
|
||||
public static String DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID = "Q21510854";
|
||||
public static String DIFFERENCE_WITHIN_RANGE_CONSTRAINT_PID = "P2306";
|
||||
public static String MINIMUM_VALUE_PID = "P2313";
|
||||
public static String MAXIMUM_VALUE_PID = "P2312";
|
||||
|
||||
public static String NO_BOUNDS_CONSTRAINT_QID = "Q51723761";
|
||||
public static String INTEGER_VALUED_CONSTRAINT_QID = "Q52848401";
|
||||
|
||||
@ -337,4 +343,84 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
protected List<QuantityValue> getValues(List<SnakGroup> groups, String pid) {
|
||||
List<QuantityValue> results = new ArrayList<>();
|
||||
for (SnakGroup group : groups) {
|
||||
if (group.getProperty().getId().equals(pid)) {
|
||||
for (Snak snak : group.getSnaks())
|
||||
results.add((QuantityValueImpl) snak.getValue());
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this property expected to have a value whose difference
|
||||
* with its lower bound property should be in a range?
|
||||
*/
|
||||
@Override
|
||||
public boolean hasDiffWithinRange(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the lower value property for calculating the difference
|
||||
* required in difference-within-range constraint
|
||||
*
|
||||
* @param pid:
|
||||
* the property to calculate difference with
|
||||
* @return the pid of the lower bound property
|
||||
*/
|
||||
@Override
|
||||
public PropertyIdValue getLowerPropertyId(PropertyIdValue pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID);
|
||||
if (specs != null) {
|
||||
List<Value> lowerValueProperty = findValues(specs, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_PID);
|
||||
if (!lowerValueProperty.isEmpty()) {
|
||||
return (PropertyIdValue) lowerValueProperty.get(0);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the lower bound of the range
|
||||
* required in difference-within-range constraint
|
||||
*
|
||||
* @param pid
|
||||
* @return minimum value
|
||||
*/
|
||||
@Override
|
||||
public QuantityValue getMinimumValue(PropertyIdValue pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID);
|
||||
if (specs != null) {
|
||||
List<QuantityValue> minValue = getValues(specs, MINIMUM_VALUE_PID);
|
||||
if (!minValue.isEmpty()) {
|
||||
return minValue.get(0);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the upper bound of the range
|
||||
* required in difference-within-range constraint
|
||||
*
|
||||
* @param pid
|
||||
* @return maximum value
|
||||
*/
|
||||
@Override
|
||||
public QuantityValue getMaximumValue(PropertyIdValue pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, DIFFERENCE_WITHIN_RANGE_CONSTRAINT_QID);
|
||||
if (specs != null) {
|
||||
List<QuantityValue> maxValue = getValues(specs, MAXIMUM_VALUE_PID);
|
||||
if (!maxValue.isEmpty()) {
|
||||
return maxValue.get(0);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,77 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.*;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class DifferenceWithinRangeScrutinizer extends EditScrutinizer {
|
||||
|
||||
public static final String type = "difference-of-the-properties-is-not-within-the-specified-range";
|
||||
|
||||
@Override
|
||||
public void scrutinize(ItemUpdate update) {
|
||||
Map<PropertyIdValue, Value> propertyIdValueValueMap = new HashMap<>();
|
||||
for (Statement statement : update.getAddedStatements()){
|
||||
PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId();
|
||||
Value value = statement.getClaim().getMainSnak().getValue();
|
||||
propertyIdValueValueMap.put(pid, value);
|
||||
}
|
||||
|
||||
for(PropertyIdValue propertyId : propertyIdValueValueMap.keySet()){
|
||||
if (_fetcher.hasDiffWithinRange(propertyId)){
|
||||
PropertyIdValue lowerPropertyId = _fetcher.getLowerPropertyId(propertyId);
|
||||
QuantityValue minRangeValue = _fetcher.getMinimumValue(propertyId);
|
||||
QuantityValue maxRangeValue = _fetcher.getMaximumValue(propertyId);
|
||||
|
||||
if (propertyIdValueValueMap.containsKey(lowerPropertyId)){
|
||||
Value startingValue = propertyIdValueValueMap.get(lowerPropertyId);
|
||||
Value endingValue = propertyIdValueValueMap.get(propertyId);
|
||||
|
||||
if (startingValue instanceof TimeValue && endingValue instanceof TimeValue){
|
||||
TimeValue lowerDate = (TimeValue)startingValue;
|
||||
TimeValue upperDate = (TimeValue)endingValue;
|
||||
|
||||
long differenceInYears = upperDate.getYear() - lowerDate.getYear();
|
||||
long differenceInMonths = upperDate.getMonth() - lowerDate.getMonth();
|
||||
long differenceInDays = upperDate.getMonth() - lowerDate.getMonth();
|
||||
|
||||
if (minRangeValue != null && (differenceInYears < minRangeValue.getNumericValue().longValue()
|
||||
|| differenceInYears == 0 && differenceInMonths < 0
|
||||
|| differenceInYears == 0 && differenceInMonths == 0 && differenceInDays < 0)){
|
||||
QAWarning issue = new QAWarning(type, propertyId.getId(), QAWarning.Severity.WARNING, 1);
|
||||
issue.setProperty("source_entity", lowerPropertyId);
|
||||
issue.setProperty("target_entity", propertyId);
|
||||
issue.setProperty("min_value", minRangeValue.getNumericValue());
|
||||
if (maxRangeValue != null) {
|
||||
issue.setProperty("max_value", maxRangeValue.getNumericValue());
|
||||
} else {
|
||||
issue.setProperty("max_value", null);
|
||||
}
|
||||
issue.setProperty("example_entity", update.getItemId());
|
||||
addIssue(issue);
|
||||
}
|
||||
|
||||
if (maxRangeValue != null && differenceInYears > maxRangeValue.getNumericValue().longValue()){
|
||||
QAWarning issue = new QAWarning(type, propertyId.getId(), QAWarning.Severity.WARNING, 1);
|
||||
issue.setProperty("source_entity", lowerPropertyId);
|
||||
issue.setProperty("target_entity", propertyId);
|
||||
if (minRangeValue != null) {
|
||||
issue.setProperty("min_value", minRangeValue.getNumericValue());
|
||||
} else {
|
||||
issue.setProperty("min_value", null);
|
||||
}
|
||||
issue.setProperty("max_value", maxRangeValue.getNumericValue());
|
||||
issue.setProperty("example_entity", update.getItemId());
|
||||
addIssue(issue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -26,8 +26,10 @@ package org.openrefine.wikidata.qa;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
@ -59,6 +61,11 @@ public class MockConstraintFetcher implements ConstraintFetcher {
|
||||
|
||||
public static PropertyIdValue propertyOnlyPid = Datamodel.makeWikidataPropertyIdValue("P372");
|
||||
|
||||
public static PropertyIdValue differenceWithinRangePid = Datamodel.makeWikidataPropertyIdValue("P570");
|
||||
public static PropertyIdValue lowerBoundPid = Datamodel.makeWikidataPropertyIdValue("P569");
|
||||
public static QuantityValue minValuePid = Datamodel.makeQuantityValue(new BigDecimal(0));
|
||||
public static QuantityValue maxValuePid = Datamodel.makeQuantityValue(new BigDecimal(150));
|
||||
|
||||
@Override
|
||||
public String getFormatRegex(PropertyIdValue pid) {
|
||||
return "[1-9]\\d+";
|
||||
@ -166,4 +173,33 @@ public class MockConstraintFetcher implements ConstraintFetcher {
|
||||
public boolean usableOnItems(PropertyIdValue pid) {
|
||||
return !propertyOnlyPid.equals(pid);
|
||||
}
|
||||
|
||||
@Override
|
||||
public QuantityValue getMinimumValue(PropertyIdValue pid) {
|
||||
if (differenceWithinRangePid.equals(pid)) {
|
||||
return minValuePid;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public QuantityValue getMaximumValue(PropertyIdValue pid) {
|
||||
if (differenceWithinRangePid.equals(pid)) {
|
||||
return maxValuePid;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PropertyIdValue getLowerPropertyId(PropertyIdValue pid) {
|
||||
if (differenceWithinRangePid.equals(pid)){
|
||||
return lowerBoundPid;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasDiffWithinRange(PropertyIdValue pid) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,58 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.qa.MockConstraintFetcher;
|
||||
import org.openrefine.wikidata.testing.TestingData;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.implementation.StatementImpl;
|
||||
import org.wikidata.wdtk.datamodel.implementation.TimeValueImpl;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.*;
|
||||
|
||||
public class DifferenceWithinScrutinizerTest extends ScrutinizerTest{
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new DifferenceWithinRangeScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrigger() {
|
||||
ItemIdValue idA = TestingData.existingId;
|
||||
PropertyIdValue lowerBoundPid = MockConstraintFetcher.lowerBoundPid;
|
||||
PropertyIdValue upperBoundPid = MockConstraintFetcher.differenceWithinRangePid;
|
||||
|
||||
TimeValue lowerYear = new TimeValueImpl(1800, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO);
|
||||
TimeValue upperYear = new TimeValueImpl(2020, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO);
|
||||
|
||||
ValueSnak value1 = Datamodel.makeValueSnak(lowerBoundPid, lowerYear);
|
||||
ValueSnak value2 = Datamodel.makeValueSnak(upperBoundPid, upperYear);
|
||||
|
||||
Statement statement1 = new StatementImpl("P569", value1,idA);
|
||||
Statement statement2 = new StatementImpl("P570", value2,idA);
|
||||
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement1).addStatement(statement2).build();
|
||||
scrutinize(updateA);
|
||||
assertWarningsRaised(DifferenceWithinRangeScrutinizer.type);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoIssue() {
|
||||
ItemIdValue idA = TestingData.existingId;
|
||||
PropertyIdValue lowerBoundPid = MockConstraintFetcher.lowerBoundPid;
|
||||
PropertyIdValue upperBoundPid = MockConstraintFetcher.differenceWithinRangePid;
|
||||
|
||||
TimeValue lowerYear = new TimeValueImpl(2000, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO);
|
||||
TimeValue upperYear = new TimeValueImpl(2020, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO);
|
||||
|
||||
ValueSnak value1 = Datamodel.makeValueSnak(lowerBoundPid, lowerYear);
|
||||
ValueSnak value2 = Datamodel.makeValueSnak(upperBoundPid, upperYear);
|
||||
|
||||
Statement statement1 = new StatementImpl("P569", value1,idA);
|
||||
Statement statement2 = new StatementImpl("P570", value2,idA);
|
||||
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement1).addStatement(statement2).build();
|
||||
scrutinize(updateA);
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user