diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index 66e4c4ab5..6b097070b 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -93,6 +93,10 @@ "title": "{property_entity} added more than once on the same item.", "body": "This property is expected to be used at most once on each item but has been added multiple times on the same item, for instance on {example_entity}." }, + "identical-values-for-distinct-valued-property": { + "title": "Identical values for {property_entity}", + "body": "This property should have distinct values." + }, "no-edit-generated": { "title": "No edit was generated.", "body": "There might be something wrong with your schema." diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java index 9d1d35c40..181c75a6d 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java @@ -5,6 +5,7 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; +import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer; @@ -40,6 +41,7 @@ public class EditInspector { register(new RestrictedPositionScrutinizer()); register(new QualifierCompatibilityScrutinizer()); register(new SingleValueScrutinizer()); + register(new DistinctValuesScrutinizer()); register(new NoEditsMadeScrutinizer()); } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java new file mode 100644 index 000000000..27a26080d --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java @@ -0,0 +1,54 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.openrefine.wikidata.qa.QAWarning; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; +import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.Value; + +/** + * A scrutinizer that checks for properties using the same value + * on different items. + * + * @author antonin + * + */ +public class DistinctValuesScrutinizer extends StatementScrutinizer { + + private Map> _seenValues; + + public DistinctValuesScrutinizer() { + _seenValues = new HashMap<>(); + } + + @Override + public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) { + PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId(); + if (_fetcher.hasDistinctValues(pid)) { + Value mainSnakValue = statement.getClaim().getMainSnak().getValue(); + Set seen = _seenValues.get(pid); + if (seen == null) { + seen = new HashSet(); + _seenValues.put(pid, seen); + } + if (seen.contains(mainSnakValue)) { + QAWarning issue = new QAWarning( + "identical-values-for-distinct-valued-property", + pid.getId(), + QAWarning.Severity.IMPORTANT, + 1); + issue.setProperty("property_entity", pid); + // TODO also report the items on which the property is duplicated + addIssue(issue); + } else { + seen.add(mainSnakValue); + } + } + } + +}