From 9cb4a462775784c0866c1cd008a14ba4c38de543 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Sun, 10 Jun 2018 08:17:24 +0100 Subject: [PATCH] Add support for quantity-related constraints in Wikidata QA --- .../wikidata/qa/ConstraintFetcher.java | 33 ++++++ .../openrefine/wikidata/qa/EditInspector.java | 2 + .../qa/WikidataConstraintFetcher.java | 82 +++++++++++++- .../qa/scrutinizers/QuantityScrutinizer.java | 68 ++++++++++++ .../wikidata/schema/WbItemVariable.java | 2 +- .../wikidata/qa/MockConstraintFetcher.java | 54 ++++++++++ .../scrutinizers/QuantityScrutinizerTest.java | 102 ++++++++++++++++++ .../qa/scrutinizers/ValueScrutinizerTest.java | 6 +- 8 files changed, 344 insertions(+), 5 deletions(-) create mode 100644 extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/QuantityScrutinizer.java create mode 100644 extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/QuantityScrutinizerTest.java diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java index 9933bb02d..a0f5402ee 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java @@ -25,7 +25,9 @@ package org.openrefine.wikidata.qa; import java.util.Set; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; +import org.wikidata.wdtk.datamodel.interfaces.Value; /** * An object that fetches constraints about properties. @@ -53,6 +55,11 @@ public interface ConstraintFetcher { * @return the pid of the inverse property */ PropertyIdValue getInversePid(PropertyIdValue pid); + + /** + * Is this property supposed to be symmetric (its own inverse)? + */ + boolean isSymmetric(PropertyIdValue pid); /** * Is this property for values only? @@ -80,6 +87,18 @@ public interface ConstraintFetcher { * (null if any) */ Set mandatoryQualifiers(PropertyIdValue pid); + + /** + * Get the set of allowed values for this property (null if no such constraint). + * This set may contain null if one of the allowed values in novalue or somevalue. + */ + Set allowedValues(PropertyIdValue pid); + + /** + * Get the set of disallowed values for this property (null if no such constraint). + * This set may contain null if one of the allowed values in novalue or somevalue. + */ + Set disallowedValues(PropertyIdValue pid); /** * Is this property expected to have at most one value per item? @@ -91,4 +110,18 @@ public interface ConstraintFetcher { */ boolean hasDistinctValues(PropertyIdValue pid); + /** + * Can statements using this property have uncertainty bounds? + */ + boolean boundsAllowed(PropertyIdValue pid); + + /** + * Is this property expected to have integer values only? + */ + boolean integerValued(PropertyIdValue pid); + + /** + * Returns the allowed units for this property. If empty, no unit is allowed. If null, any unit is allowed. + */ + Set allowedUnits(PropertyIdValue pid); } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java index 83bfa37e9..0a61f2f50 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java @@ -35,6 +35,7 @@ import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.NoEditsMadeScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.QualifierCompatibilityScrutinizer; +import org.openrefine.wikidata.qa.scrutinizers.QuantityScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.RestrictedPositionScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.SelfReferentialScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.SingleValueScrutinizer; @@ -73,6 +74,7 @@ public class EditInspector { register(new DistinctValuesScrutinizer()); register(new NoEditsMadeScrutinizer()); register(new WhitespaceScrutinizer()); + register(new QuantityScrutinizer()); } /** diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java index 8b301a530..29f9337e4 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java @@ -24,6 +24,7 @@ package org.openrefine.wikidata.qa; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -31,6 +32,7 @@ import java.util.stream.Stream; import org.openrefine.wikidata.utils.EntityCache; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; import org.wikidata.wdtk.datamodel.interfaces.Snak; @@ -56,6 +58,7 @@ public class WikidataConstraintFetcher implements ConstraintFetcher { public static String INVERSE_CONSTRAINT_QID = "Q21510855"; public static String INVERSE_PROPERTY_PID = "P2306"; + public static String SYMMETRIC_CONSTRAINT_QID = "Q21510862"; public static String USED_ONLY_AS_VALUES_CONSTRAINT_QID = "Q21528958"; @@ -68,9 +71,22 @@ public class WikidataConstraintFetcher implements ConstraintFetcher { public static String MANDATORY_QUALIFIERS_CONSTRAINT_QID = "Q21510856"; public static String MANDATORY_QUALIFIERS_CONSTRAINT_PID = "P2306"; + + public static String ALLOWED_VALUES_CONSTRAINT_QID = "Q21510859"; + public static String ALLOWED_VALUES_CONSTRAINT_PID = "P2305"; + + public static String DISALLOWED_VALUES_CONSTRAINT_QID = "Q52558054"; + public static String DISALLOWED_VALUES_CONSTRAINT_PID = "P2305"; public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404"; public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410"; + + public static String NO_BOUNDS_CONSTRAINT_QID = "Q51723761"; + public static String INTEGER_VALUED_CONSTRAINT_QID = "Q52848401"; + + public static String ALLOWED_UNITS_CONSTRAINT_QID = "Q21514353"; + public static String ALLOWED_UNITS_CONSTRAINT_PID = "P2305"; + // The following constraints still need to be implemented: @@ -122,7 +138,10 @@ public class WikidataConstraintFetcher implements ConstraintFetcher { if (specs != null) { List properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID); - return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet()); + return properties.stream() + .filter(e -> e != null) + .map(e -> (PropertyIdValue) e) + .collect(Collectors.toSet()); } return null; } @@ -133,7 +152,10 @@ public class WikidataConstraintFetcher implements ConstraintFetcher { if (specs != null) { List properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID); - return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet()); + return properties.stream() + .filter(e -> e != null) + .map(e -> (PropertyIdValue) e) + .collect(Collectors.toSet()); } return null; } @@ -147,6 +169,58 @@ public class WikidataConstraintFetcher implements ConstraintFetcher { public boolean hasDistinctValues(PropertyIdValue pid) { return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null; } + + @Override + public boolean isSymmetric(PropertyIdValue pid) { + return getSingleConstraint(pid, SYMMETRIC_CONSTRAINT_QID) != null; + } + + @Override + public Set allowedValues(PropertyIdValue pid) { + List specs = getSingleConstraint(pid, ALLOWED_VALUES_CONSTRAINT_QID); + + if (specs != null) { + List properties = findValues(specs, ALLOWED_VALUES_CONSTRAINT_PID); + return properties.stream().collect(Collectors.toSet()); + } + return null; + } + + @Override + public Set disallowedValues(PropertyIdValue pid) { + List specs = getSingleConstraint(pid, DISALLOWED_VALUES_CONSTRAINT_QID); + + if (specs != null) { + List properties = findValues(specs, DISALLOWED_VALUES_CONSTRAINT_PID); + return properties.stream().collect(Collectors.toSet()); + } + return null; + } + + @Override + public boolean boundsAllowed(PropertyIdValue pid) { + return getSingleConstraint(pid, NO_BOUNDS_CONSTRAINT_QID) == null; + } + + @Override + public boolean integerValued(PropertyIdValue pid) { + return getSingleConstraint(pid, INTEGER_VALUED_CONSTRAINT_QID) != null; + } + + @Override + public Set allowedUnits(PropertyIdValue pid) { + List specs = getSingleConstraint(pid, ALLOWED_UNITS_CONSTRAINT_QID); + + if (specs != null) { + List properties = findValues(specs, ALLOWED_UNITS_CONSTRAINT_PID); + if (properties.contains(null)) { + return Collections.emptySet(); + } else { + return properties.stream().map(e -> (ItemIdValue) e).collect(Collectors.toSet()); + } + } + return null; + } /** * Returns a single constraint for a particular type and a property, or null if @@ -193,7 +267,9 @@ public class WikidataConstraintFetcher implements ConstraintFetcher { PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid); StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID); if (group != null) { - return group.getStatements(); + return group.getStatements().stream() + .filter(s -> s.getValue() != null && s.getValue() instanceof EntityIdValue) + .collect(Collectors.toList()); } else { return new ArrayList(); } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/QuantityScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/QuantityScrutinizer.java new file mode 100644 index 000000000..a32f3e2a2 --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/QuantityScrutinizer.java @@ -0,0 +1,68 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import java.util.Set; +import java.util.stream.Collectors; + +import org.openrefine.wikidata.qa.QAWarning; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; +import org.wikidata.wdtk.datamodel.interfaces.QuantityValue; +import org.wikidata.wdtk.datamodel.interfaces.Snak; + +/** + * Scrutinizer checking for units and bounds in quantities. + * + * @author Antonin Delpeuch + * + */ +public class QuantityScrutinizer extends SnakScrutinizer { + + public static final String boundsDisallowedType = "bounds-disallowed"; + public static final String integerConstraintType = "values-should-be-integers"; + public static final String invalidUnitType = "invalid-unit"; + public static final String noUnitProvidedType = "no-unit-provided"; + + @Override + public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) { + if (QuantityValue.class.isInstance(snak.getValue()) && added) { + PropertyIdValue pid = snak.getPropertyId(); + QuantityValue value = (QuantityValue)snak.getValue(); + + if(!_fetcher.boundsAllowed(pid) && (value.getUpperBound() != null || value.getLowerBound() != null)) { + QAWarning issue = new QAWarning(boundsDisallowedType, pid.getId(), QAWarning.Severity.IMPORTANT, 1); + issue.setProperty("property_entity", pid); + issue.setProperty("example_value", value.getNumericValue().toString()); + issue.setProperty("example_item_entity", entityId); + addIssue(issue); + } + if(_fetcher.integerValued(pid) && value.getNumericValue().scale() > 0) { + QAWarning issue = new QAWarning(integerConstraintType, pid.getId(), QAWarning.Severity.IMPORTANT, 1); + issue.setProperty("property_entity", pid); + issue.setProperty("example_value", value.getNumericValue().toString()); + issue.setProperty("example_item_entity", entityId); + addIssue(issue); + } + Set allowedUnits = _fetcher.allowedUnits(pid); + String currentUnit = null; + if (value.getUnit() != null && value.getUnit() != "") { + currentUnit = value.getUnit(); + } + if(allowedUnits != null && + !allowedUnits.stream().map(u -> u != null ? u.getIri() : null) + .collect(Collectors.toSet()).contains(currentUnit)) { + String issueType = currentUnit == null ? noUnitProvidedType : invalidUnitType; + QAWarning issue = new QAWarning(issueType, pid.getId(), QAWarning.Severity.IMPORTANT, 1); + issue.setProperty("property_entity", pid); + issue.setProperty("example_value", value.getNumericValue().toString()); + issue.setProperty("example_item_entity", entityId); + if (currentUnit != null) { + issue.setProperty("unit_entity", currentUnit); + } + addIssue(issue); + } + } + } + +} diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemVariable.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemVariable.java index 00c634580..af7d2fd72 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemVariable.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemVariable.java @@ -63,7 +63,7 @@ public class WbItemVariable extends WbVariableExpr { throws SkipSchemaExpressionException { if (cell.recon != null && (Judgment.Matched.equals(cell.recon.judgment) || Judgment.New.equals(cell.recon.judgment))) { - if (!cell.recon.identifierSpace.equals(Datamodel.SITE_WIKIDATA)) { + if (cell.recon.identifierSpace == null || !cell.recon.identifierSpace.equals(Datamodel.SITE_WIKIDATA)) { QAWarning warning = new QAWarning("invalid-identifier-space", null, QAWarning.Severity.INFO, 1); warning.setProperty("example_cell", cell.value.toString()); ctxt.addWarning(warning); diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java index b13c7ccff..ee2824611 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java @@ -29,18 +29,32 @@ import java.util.Set; import java.util.stream.Collectors; import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; +import org.wikidata.wdtk.datamodel.interfaces.Value; public class MockConstraintFetcher implements ConstraintFetcher { public static PropertyIdValue pidWithInverse = Datamodel.makeWikidataPropertyIdValue("P350"); public static PropertyIdValue inversePid = Datamodel.makeWikidataPropertyIdValue("P57"); + public static PropertyIdValue symmetricPid = Datamodel.makeWikidataPropertyIdValue("P783"); public static PropertyIdValue allowedQualifierPid = Datamodel.makeWikidataPropertyIdValue("P34"); public static PropertyIdValue mandatoryQualifierPid = Datamodel.makeWikidataPropertyIdValue("P97"); public static PropertyIdValue mainSnakPid = Datamodel.makeWikidataPropertyIdValue("P1234"); public static PropertyIdValue qualifierPid = Datamodel.makeWikidataPropertyIdValue("P987"); public static PropertyIdValue referencePid = Datamodel.makeWikidataPropertyIdValue("P384"); + + public static PropertyIdValue restrictedValuesPid = Datamodel.makeWikidataPropertyIdValue("P8121"); + public static ItemIdValue particularValue1 = Datamodel.makeWikidataItemIdValue("Q389"); + public static ItemIdValue particularValue2 = Datamodel.makeWikidataItemIdValue("Q378"); + + public static PropertyIdValue allowedUnitsPid = Datamodel.makeWikidataPropertyIdValue("P34787"); + public static ItemIdValue allowedUnit = Datamodel.makeWikidataItemIdValue("Q7887"); + public static PropertyIdValue noUnitsPid = Datamodel.makeWikidataPropertyIdValue("P334211"); + + public static PropertyIdValue noBoundsPid = Datamodel.makeWikidataPropertyIdValue("P8932"); + public static PropertyIdValue integerPid = Datamodel.makeWikidataPropertyIdValue("P389"); @Override public String getFormatRegex(PropertyIdValue pid) { @@ -94,4 +108,44 @@ public class MockConstraintFetcher implements ConstraintFetcher { return true; } + @Override + public boolean isSymmetric(PropertyIdValue pid) { + return pid.equals(symmetricPid); + } + + @Override + public Set allowedValues(PropertyIdValue pid) { + if (restrictedValuesPid.equals(pid)) { + return Collections.singleton(particularValue1); + } + return null; + } + + @Override + public Set disallowedValues(PropertyIdValue pid) { + if (restrictedValuesPid.equals(pid)) { + return Collections.singleton(particularValue2); + } + return null; + } + + @Override + public boolean boundsAllowed(PropertyIdValue pid) { + return !noBoundsPid.equals(pid); + } + + @Override + public boolean integerValued(PropertyIdValue pid) { + return integerPid.equals(pid); + } + + @Override + public Set allowedUnits(PropertyIdValue pid) { + if(allowedUnitsPid.equals(pid)) { + return Collections.singleton(allowedUnit); + } else if(noUnitsPid.equals(pid)) { + return Collections.singleton(null); + } + return null; + } } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/QuantityScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/QuantityScrutinizerTest.java new file mode 100644 index 000000000..f0dbb0acf --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/QuantityScrutinizerTest.java @@ -0,0 +1,102 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import java.math.BigDecimal; + +import org.openrefine.wikidata.qa.MockConstraintFetcher; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.QuantityValue; + +public class QuantityScrutinizerTest extends ValueScrutinizerTest{ + + private QuantityValue exactValue = Datamodel.makeQuantityValue( + new BigDecimal("1.234")); + + private QuantityValue integerValue = Datamodel.makeQuantityValue( + new BigDecimal("132")); + + private QuantityValue trailingZeros = Datamodel.makeQuantityValue( + new BigDecimal("132.00")); + + private QuantityValue valueWithBounds = Datamodel.makeQuantityValue( + new BigDecimal("1.234"), + new BigDecimal("1.200"), + new BigDecimal("1.545")); + + private QuantityValue wrongUnitValue = Datamodel.makeQuantityValue( + new BigDecimal("1.234"), "Q346721"); + + private QuantityValue goodUnitValue = Datamodel.makeQuantityValue( + new BigDecimal("1.234"), MockConstraintFetcher.allowedUnit.getIri()); + + @Override + public EditScrutinizer getScrutinizer() { + return new QuantityScrutinizer(); + } + + @Test + public void testBoundsAllowed() { + scrutinize(valueWithBounds); + assertNoWarningRaised(); + } + + @Test + public void testBoundsDisallowed() { + scrutinize(MockConstraintFetcher.noBoundsPid, valueWithBounds); + assertWarningsRaised(QuantityScrutinizer.boundsDisallowedType); + } + + @Test + public void testFractionalAllowed() { + scrutinize(exactValue); + assertNoWarningRaised(); + } + + @Test + public void testFractionalDisallowed() { + scrutinize(MockConstraintFetcher.integerPid, exactValue); + assertWarningsRaised(QuantityScrutinizer.integerConstraintType); + } + + @Test + public void testTrailingZeros() { + scrutinize(MockConstraintFetcher.integerPid, trailingZeros); + assertWarningsRaised(QuantityScrutinizer.integerConstraintType); + } + + @Test + public void testInteger() { + scrutinize(MockConstraintFetcher.integerPid, integerValue); + assertNoWarningRaised(); + } + + @Test + public void testUnitReqired() { + scrutinize(MockConstraintFetcher.allowedUnitsPid, integerValue); + assertWarningsRaised(QuantityScrutinizer.noUnitProvidedType); + } + + @Test + public void testWrongUnit() { + scrutinize(MockConstraintFetcher.allowedUnitsPid, wrongUnitValue); + assertWarningsRaised(QuantityScrutinizer.invalidUnitType); + } + + @Test + public void testGoodUnit() { + scrutinize(MockConstraintFetcher.allowedUnitsPid, goodUnitValue); + assertNoWarningRaised(); + } + + @Test + public void testUnitForbidden() { + scrutinize(MockConstraintFetcher.noUnitsPid, goodUnitValue); + assertWarningsRaised(QuantityScrutinizer.invalidUnitType); + } + + @Test + public void testNoUnit() { + scrutinize(MockConstraintFetcher.noUnitsPid, integerValue); + assertNoWarningRaised(); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizerTest.java index 21ec3fdb3..bba04c5c6 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizerTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizerTest.java @@ -35,7 +35,11 @@ public abstract class ValueScrutinizerTest extends SnakScrutinizerTest { public static final PropertyIdValue defaultPid = Datamodel.makeWikidataPropertyIdValue("P328"); public void scrutinize(Value value) { - scrutinize(Datamodel.makeValueSnak(defaultPid, value)); + scrutinize(defaultPid, value); + } + + public void scrutinize(PropertyIdValue pid, Value value) { + scrutinize(Datamodel.makeValueSnak(pid, value)); } public void scrutinizeLabel(MonolingualTextValue text) {