Add support for quantity-related constraints in Wikidata QA

This commit is contained in:
Antonin Delpeuch 2018-06-10 08:17:24 +01:00
parent 34cddb61a2
commit 9cb4a46277
8 changed files with 344 additions and 5 deletions

View File

@ -25,7 +25,9 @@ package org.openrefine.wikidata.qa;
import java.util.Set;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
/**
* An object that fetches constraints about properties.
@ -53,6 +55,11 @@ public interface ConstraintFetcher {
* @return the pid of the inverse property
*/
PropertyIdValue getInversePid(PropertyIdValue pid);
/**
* Is this property supposed to be symmetric (its own inverse)?
*/
boolean isSymmetric(PropertyIdValue pid);
/**
* Is this property for values only?
@ -80,6 +87,18 @@ public interface ConstraintFetcher {
* (null if any)
*/
Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid);
/**
* Get the set of allowed values for this property (null if no such constraint).
* This set may contain null if one of the allowed values in novalue or somevalue.
*/
Set<Value> allowedValues(PropertyIdValue pid);
/**
* Get the set of disallowed values for this property (null if no such constraint).
* This set may contain null if one of the allowed values in novalue or somevalue.
*/
Set<Value> disallowedValues(PropertyIdValue pid);
/**
* Is this property expected to have at most one value per item?
@ -91,4 +110,18 @@ public interface ConstraintFetcher {
*/
boolean hasDistinctValues(PropertyIdValue pid);
/**
* Can statements using this property have uncertainty bounds?
*/
boolean boundsAllowed(PropertyIdValue pid);
/**
* Is this property expected to have integer values only?
*/
boolean integerValued(PropertyIdValue pid);
/**
* Returns the allowed units for this property. If empty, no unit is allowed. If null, any unit is allowed.
*/
Set<ItemIdValue> allowedUnits(PropertyIdValue pid);
}

View File

@ -35,6 +35,7 @@ import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.NoEditsMadeScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.QualifierCompatibilityScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.QuantityScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.RestrictedPositionScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.SelfReferentialScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.SingleValueScrutinizer;
@ -73,6 +74,7 @@ public class EditInspector {
register(new DistinctValuesScrutinizer());
register(new NoEditsMadeScrutinizer());
register(new WhitespaceScrutinizer());
register(new QuantityScrutinizer());
}
/**

View File

@ -24,6 +24,7 @@
package org.openrefine.wikidata.qa;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
@ -31,6 +32,7 @@ import java.util.stream.Stream;
import org.openrefine.wikidata.utils.EntityCache;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
@ -56,6 +58,7 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
public static String INVERSE_CONSTRAINT_QID = "Q21510855";
public static String INVERSE_PROPERTY_PID = "P2306";
public static String SYMMETRIC_CONSTRAINT_QID = "Q21510862";
public static String USED_ONLY_AS_VALUES_CONSTRAINT_QID = "Q21528958";
@ -68,9 +71,22 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
public static String MANDATORY_QUALIFIERS_CONSTRAINT_QID = "Q21510856";
public static String MANDATORY_QUALIFIERS_CONSTRAINT_PID = "P2306";
public static String ALLOWED_VALUES_CONSTRAINT_QID = "Q21510859";
public static String ALLOWED_VALUES_CONSTRAINT_PID = "P2305";
public static String DISALLOWED_VALUES_CONSTRAINT_QID = "Q52558054";
public static String DISALLOWED_VALUES_CONSTRAINT_PID = "P2305";
public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404";
public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410";
public static String NO_BOUNDS_CONSTRAINT_QID = "Q51723761";
public static String INTEGER_VALUED_CONSTRAINT_QID = "Q52848401";
public static String ALLOWED_UNITS_CONSTRAINT_QID = "Q21514353";
public static String ALLOWED_UNITS_CONSTRAINT_PID = "P2305";
// The following constraints still need to be implemented:
@ -122,7 +138,10 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
if (specs != null) {
List<Value> properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID);
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
return properties.stream()
.filter(e -> e != null)
.map(e -> (PropertyIdValue) e)
.collect(Collectors.toSet());
}
return null;
}
@ -133,7 +152,10 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
if (specs != null) {
List<Value> properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID);
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
return properties.stream()
.filter(e -> e != null)
.map(e -> (PropertyIdValue) e)
.collect(Collectors.toSet());
}
return null;
}
@ -147,6 +169,58 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
public boolean hasDistinctValues(PropertyIdValue pid) {
return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null;
}
@Override
public boolean isSymmetric(PropertyIdValue pid) {
return getSingleConstraint(pid, SYMMETRIC_CONSTRAINT_QID) != null;
}
@Override
public Set<Value> allowedValues(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, ALLOWED_VALUES_CONSTRAINT_QID);
if (specs != null) {
List<Value> properties = findValues(specs, ALLOWED_VALUES_CONSTRAINT_PID);
return properties.stream().collect(Collectors.toSet());
}
return null;
}
@Override
public Set<Value> disallowedValues(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, DISALLOWED_VALUES_CONSTRAINT_QID);
if (specs != null) {
List<Value> properties = findValues(specs, DISALLOWED_VALUES_CONSTRAINT_PID);
return properties.stream().collect(Collectors.toSet());
}
return null;
}
@Override
public boolean boundsAllowed(PropertyIdValue pid) {
return getSingleConstraint(pid, NO_BOUNDS_CONSTRAINT_QID) == null;
}
@Override
public boolean integerValued(PropertyIdValue pid) {
return getSingleConstraint(pid, INTEGER_VALUED_CONSTRAINT_QID) != null;
}
@Override
public Set<ItemIdValue> allowedUnits(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, ALLOWED_UNITS_CONSTRAINT_QID);
if (specs != null) {
List<Value> properties = findValues(specs, ALLOWED_UNITS_CONSTRAINT_PID);
if (properties.contains(null)) {
return Collections.emptySet();
} else {
return properties.stream().map(e -> (ItemIdValue) e).collect(Collectors.toSet());
}
}
return null;
}
/**
* Returns a single constraint for a particular type and a property, or null if
@ -193,7 +267,9 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid);
StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID);
if (group != null) {
return group.getStatements();
return group.getStatements().stream()
.filter(s -> s.getValue() != null && s.getValue() instanceof EntityIdValue)
.collect(Collectors.toList());
} else {
return new ArrayList<Statement>();
}

View File

@ -0,0 +1,68 @@
package org.openrefine.wikidata.qa.scrutinizers;
import java.util.Set;
import java.util.stream.Collectors;
import org.openrefine.wikidata.qa.QAWarning;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
/**
* Scrutinizer checking for units and bounds in quantities.
*
* @author Antonin Delpeuch
*
*/
public class QuantityScrutinizer extends SnakScrutinizer {
public static final String boundsDisallowedType = "bounds-disallowed";
public static final String integerConstraintType = "values-should-be-integers";
public static final String invalidUnitType = "invalid-unit";
public static final String noUnitProvidedType = "no-unit-provided";
@Override
public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
if (QuantityValue.class.isInstance(snak.getValue()) && added) {
PropertyIdValue pid = snak.getPropertyId();
QuantityValue value = (QuantityValue)snak.getValue();
if(!_fetcher.boundsAllowed(pid) && (value.getUpperBound() != null || value.getLowerBound() != null)) {
QAWarning issue = new QAWarning(boundsDisallowedType, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
issue.setProperty("property_entity", pid);
issue.setProperty("example_value", value.getNumericValue().toString());
issue.setProperty("example_item_entity", entityId);
addIssue(issue);
}
if(_fetcher.integerValued(pid) && value.getNumericValue().scale() > 0) {
QAWarning issue = new QAWarning(integerConstraintType, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
issue.setProperty("property_entity", pid);
issue.setProperty("example_value", value.getNumericValue().toString());
issue.setProperty("example_item_entity", entityId);
addIssue(issue);
}
Set<ItemIdValue> allowedUnits = _fetcher.allowedUnits(pid);
String currentUnit = null;
if (value.getUnit() != null && value.getUnit() != "") {
currentUnit = value.getUnit();
}
if(allowedUnits != null &&
!allowedUnits.stream().map(u -> u != null ? u.getIri() : null)
.collect(Collectors.toSet()).contains(currentUnit)) {
String issueType = currentUnit == null ? noUnitProvidedType : invalidUnitType;
QAWarning issue = new QAWarning(issueType, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
issue.setProperty("property_entity", pid);
issue.setProperty("example_value", value.getNumericValue().toString());
issue.setProperty("example_item_entity", entityId);
if (currentUnit != null) {
issue.setProperty("unit_entity", currentUnit);
}
addIssue(issue);
}
}
}
}

View File

@ -63,7 +63,7 @@ public class WbItemVariable extends WbVariableExpr<ItemIdValue> {
throws SkipSchemaExpressionException {
if (cell.recon != null
&& (Judgment.Matched.equals(cell.recon.judgment) || Judgment.New.equals(cell.recon.judgment))) {
if (!cell.recon.identifierSpace.equals(Datamodel.SITE_WIKIDATA)) {
if (cell.recon.identifierSpace == null || !cell.recon.identifierSpace.equals(Datamodel.SITE_WIKIDATA)) {
QAWarning warning = new QAWarning("invalid-identifier-space", null, QAWarning.Severity.INFO, 1);
warning.setProperty("example_cell", cell.value.toString());
ctxt.addWarning(warning);

View File

@ -29,18 +29,32 @@ import java.util.Set;
import java.util.stream.Collectors;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
public class MockConstraintFetcher implements ConstraintFetcher {
public static PropertyIdValue pidWithInverse = Datamodel.makeWikidataPropertyIdValue("P350");
public static PropertyIdValue inversePid = Datamodel.makeWikidataPropertyIdValue("P57");
public static PropertyIdValue symmetricPid = Datamodel.makeWikidataPropertyIdValue("P783");
public static PropertyIdValue allowedQualifierPid = Datamodel.makeWikidataPropertyIdValue("P34");
public static PropertyIdValue mandatoryQualifierPid = Datamodel.makeWikidataPropertyIdValue("P97");
public static PropertyIdValue mainSnakPid = Datamodel.makeWikidataPropertyIdValue("P1234");
public static PropertyIdValue qualifierPid = Datamodel.makeWikidataPropertyIdValue("P987");
public static PropertyIdValue referencePid = Datamodel.makeWikidataPropertyIdValue("P384");
public static PropertyIdValue restrictedValuesPid = Datamodel.makeWikidataPropertyIdValue("P8121");
public static ItemIdValue particularValue1 = Datamodel.makeWikidataItemIdValue("Q389");
public static ItemIdValue particularValue2 = Datamodel.makeWikidataItemIdValue("Q378");
public static PropertyIdValue allowedUnitsPid = Datamodel.makeWikidataPropertyIdValue("P34787");
public static ItemIdValue allowedUnit = Datamodel.makeWikidataItemIdValue("Q7887");
public static PropertyIdValue noUnitsPid = Datamodel.makeWikidataPropertyIdValue("P334211");
public static PropertyIdValue noBoundsPid = Datamodel.makeWikidataPropertyIdValue("P8932");
public static PropertyIdValue integerPid = Datamodel.makeWikidataPropertyIdValue("P389");
@Override
public String getFormatRegex(PropertyIdValue pid) {
@ -94,4 +108,44 @@ public class MockConstraintFetcher implements ConstraintFetcher {
return true;
}
@Override
public boolean isSymmetric(PropertyIdValue pid) {
return pid.equals(symmetricPid);
}
@Override
public Set<Value> allowedValues(PropertyIdValue pid) {
if (restrictedValuesPid.equals(pid)) {
return Collections.singleton(particularValue1);
}
return null;
}
@Override
public Set<Value> disallowedValues(PropertyIdValue pid) {
if (restrictedValuesPid.equals(pid)) {
return Collections.singleton(particularValue2);
}
return null;
}
@Override
public boolean boundsAllowed(PropertyIdValue pid) {
return !noBoundsPid.equals(pid);
}
@Override
public boolean integerValued(PropertyIdValue pid) {
return integerPid.equals(pid);
}
@Override
public Set<ItemIdValue> allowedUnits(PropertyIdValue pid) {
if(allowedUnitsPid.equals(pid)) {
return Collections.singleton(allowedUnit);
} else if(noUnitsPid.equals(pid)) {
return Collections.singleton(null);
}
return null;
}
}

View File

@ -0,0 +1,102 @@
package org.openrefine.wikidata.qa.scrutinizers;
import java.math.BigDecimal;
import org.openrefine.wikidata.qa.MockConstraintFetcher;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
public class QuantityScrutinizerTest extends ValueScrutinizerTest{
private QuantityValue exactValue = Datamodel.makeQuantityValue(
new BigDecimal("1.234"));
private QuantityValue integerValue = Datamodel.makeQuantityValue(
new BigDecimal("132"));
private QuantityValue trailingZeros = Datamodel.makeQuantityValue(
new BigDecimal("132.00"));
private QuantityValue valueWithBounds = Datamodel.makeQuantityValue(
new BigDecimal("1.234"),
new BigDecimal("1.200"),
new BigDecimal("1.545"));
private QuantityValue wrongUnitValue = Datamodel.makeQuantityValue(
new BigDecimal("1.234"), "Q346721");
private QuantityValue goodUnitValue = Datamodel.makeQuantityValue(
new BigDecimal("1.234"), MockConstraintFetcher.allowedUnit.getIri());
@Override
public EditScrutinizer getScrutinizer() {
return new QuantityScrutinizer();
}
@Test
public void testBoundsAllowed() {
scrutinize(valueWithBounds);
assertNoWarningRaised();
}
@Test
public void testBoundsDisallowed() {
scrutinize(MockConstraintFetcher.noBoundsPid, valueWithBounds);
assertWarningsRaised(QuantityScrutinizer.boundsDisallowedType);
}
@Test
public void testFractionalAllowed() {
scrutinize(exactValue);
assertNoWarningRaised();
}
@Test
public void testFractionalDisallowed() {
scrutinize(MockConstraintFetcher.integerPid, exactValue);
assertWarningsRaised(QuantityScrutinizer.integerConstraintType);
}
@Test
public void testTrailingZeros() {
scrutinize(MockConstraintFetcher.integerPid, trailingZeros);
assertWarningsRaised(QuantityScrutinizer.integerConstraintType);
}
@Test
public void testInteger() {
scrutinize(MockConstraintFetcher.integerPid, integerValue);
assertNoWarningRaised();
}
@Test
public void testUnitReqired() {
scrutinize(MockConstraintFetcher.allowedUnitsPid, integerValue);
assertWarningsRaised(QuantityScrutinizer.noUnitProvidedType);
}
@Test
public void testWrongUnit() {
scrutinize(MockConstraintFetcher.allowedUnitsPid, wrongUnitValue);
assertWarningsRaised(QuantityScrutinizer.invalidUnitType);
}
@Test
public void testGoodUnit() {
scrutinize(MockConstraintFetcher.allowedUnitsPid, goodUnitValue);
assertNoWarningRaised();
}
@Test
public void testUnitForbidden() {
scrutinize(MockConstraintFetcher.noUnitsPid, goodUnitValue);
assertWarningsRaised(QuantityScrutinizer.invalidUnitType);
}
@Test
public void testNoUnit() {
scrutinize(MockConstraintFetcher.noUnitsPid, integerValue);
assertNoWarningRaised();
}
}

View File

@ -35,7 +35,11 @@ public abstract class ValueScrutinizerTest extends SnakScrutinizerTest {
public static final PropertyIdValue defaultPid = Datamodel.makeWikidataPropertyIdValue("P328");
public void scrutinize(Value value) {
scrutinize(Datamodel.makeValueSnak(defaultPid, value));
scrutinize(defaultPid, value);
}
public void scrutinize(PropertyIdValue pid, Value value) {
scrutinize(Datamodel.makeValueSnak(pid, value));
}
public void scrutinizeLabel(MonolingualTextValue text) {