Add distinct values scrutinizer
This commit is contained in:
parent
7518d194f0
commit
5d9aa9cae5
@ -93,6 +93,10 @@
|
||||
"title": "{property_entity} added more than once on the same item.",
|
||||
"body": "This property is expected to be used at most once on each item but has been added multiple times on the same item, for instance on {example_entity}."
|
||||
},
|
||||
"identical-values-for-distinct-valued-property": {
|
||||
"title": "Identical values for {property_entity}",
|
||||
"body": "This property should have distinct values."
|
||||
},
|
||||
"no-edit-generated": {
|
||||
"title": "No edit was generated.",
|
||||
"body": "There might be something wrong with your schema."
|
||||
|
@ -5,6 +5,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
|
||||
@ -40,6 +41,7 @@ public class EditInspector {
|
||||
register(new RestrictedPositionScrutinizer());
|
||||
register(new QualifierCompatibilityScrutinizer());
|
||||
register(new SingleValueScrutinizer());
|
||||
register(new DistinctValuesScrutinizer());
|
||||
register(new NoEditsMadeScrutinizer());
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,54 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
/**
|
||||
* A scrutinizer that checks for properties using the same value
|
||||
* on different items.
|
||||
*
|
||||
* @author antonin
|
||||
*
|
||||
*/
|
||||
public class DistinctValuesScrutinizer extends StatementScrutinizer {
|
||||
|
||||
private Map<PropertyIdValue, Set<Value>> _seenValues;
|
||||
|
||||
public DistinctValuesScrutinizer() {
|
||||
_seenValues = new HashMap<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
|
||||
PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId();
|
||||
if (_fetcher.hasDistinctValues(pid)) {
|
||||
Value mainSnakValue = statement.getClaim().getMainSnak().getValue();
|
||||
Set<Value> seen = _seenValues.get(pid);
|
||||
if (seen == null) {
|
||||
seen = new HashSet<Value>();
|
||||
_seenValues.put(pid, seen);
|
||||
}
|
||||
if (seen.contains(mainSnakValue)) {
|
||||
QAWarning issue = new QAWarning(
|
||||
"identical-values-for-distinct-valued-property",
|
||||
pid.getId(),
|
||||
QAWarning.Severity.IMPORTANT,
|
||||
1);
|
||||
issue.setProperty("property_entity", pid);
|
||||
// TODO also report the items on which the property is duplicated
|
||||
addIssue(issue);
|
||||
} else {
|
||||
seen.add(mainSnakValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user