Add distinct values scrutinizer

This commit is contained in:
Antonin Delpeuch 2018-01-10 18:50:52 +00:00
parent 7518d194f0
commit 5d9aa9cae5
3 changed files with 60 additions and 0 deletions

View File

@ -93,6 +93,10 @@
"title": "{property_entity} added more than once on the same item.", "title": "{property_entity} added more than once on the same item.",
"body": "This property is expected to be used at most once on each item but has been added multiple times on the same item, for instance on {example_entity}." "body": "This property is expected to be used at most once on each item but has been added multiple times on the same item, for instance on {example_entity}."
}, },
"identical-values-for-distinct-valued-property": {
"title": "Identical values for {property_entity}",
"body": "This property should have distinct values."
},
"no-edit-generated": { "no-edit-generated": {
"title": "No edit was generated.", "title": "No edit was generated.",
"body": "There might be something wrong with your schema." "body": "There might be something wrong with your schema."

View File

@ -5,6 +5,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
@ -40,6 +41,7 @@ public class EditInspector {
register(new RestrictedPositionScrutinizer()); register(new RestrictedPositionScrutinizer());
register(new QualifierCompatibilityScrutinizer()); register(new QualifierCompatibilityScrutinizer());
register(new SingleValueScrutinizer()); register(new SingleValueScrutinizer());
register(new DistinctValuesScrutinizer());
register(new NoEditsMadeScrutinizer()); register(new NoEditsMadeScrutinizer());
} }

View File

@ -0,0 +1,54 @@
package org.openrefine.wikidata.qa.scrutinizers;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.openrefine.wikidata.qa.QAWarning;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.Value;
/**
* A scrutinizer that checks for properties using the same value
* on different items.
*
* @author antonin
*
*/
public class DistinctValuesScrutinizer extends StatementScrutinizer {
private Map<PropertyIdValue, Set<Value>> _seenValues;
public DistinctValuesScrutinizer() {
_seenValues = new HashMap<>();
}
@Override
public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId();
if (_fetcher.hasDistinctValues(pid)) {
Value mainSnakValue = statement.getClaim().getMainSnak().getValue();
Set<Value> seen = _seenValues.get(pid);
if (seen == null) {
seen = new HashSet<Value>();
_seenValues.put(pid, seen);
}
if (seen.contains(mainSnakValue)) {
QAWarning issue = new QAWarning(
"identical-values-for-distinct-valued-property",
pid.getId(),
QAWarning.Severity.IMPORTANT,
1);
issue.setProperty("property_entity", pid);
// TODO also report the items on which the property is duplicated
addIssue(issue);
} else {
seen.add(mainSnakValue);
}
}
}
}