Add distinct values scrutinizer
This commit is contained in:
parent
7518d194f0
commit
5d9aa9cae5
@ -93,6 +93,10 @@
|
|||||||
"title": "{property_entity} added more than once on the same item.",
|
"title": "{property_entity} added more than once on the same item.",
|
||||||
"body": "This property is expected to be used at most once on each item but has been added multiple times on the same item, for instance on {example_entity}."
|
"body": "This property is expected to be used at most once on each item but has been added multiple times on the same item, for instance on {example_entity}."
|
||||||
},
|
},
|
||||||
|
"identical-values-for-distinct-valued-property": {
|
||||||
|
"title": "Identical values for {property_entity}",
|
||||||
|
"body": "This property should have distinct values."
|
||||||
|
},
|
||||||
"no-edit-generated": {
|
"no-edit-generated": {
|
||||||
"title": "No edit was generated.",
|
"title": "No edit was generated.",
|
||||||
"body": "There might be something wrong with your schema."
|
"body": "There might be something wrong with your schema."
|
||||||
|
@ -5,6 +5,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer;
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
|
||||||
@ -40,6 +41,7 @@ public class EditInspector {
|
|||||||
register(new RestrictedPositionScrutinizer());
|
register(new RestrictedPositionScrutinizer());
|
||||||
register(new QualifierCompatibilityScrutinizer());
|
register(new QualifierCompatibilityScrutinizer());
|
||||||
register(new SingleValueScrutinizer());
|
register(new SingleValueScrutinizer());
|
||||||
|
register(new DistinctValuesScrutinizer());
|
||||||
register(new NoEditsMadeScrutinizer());
|
register(new NoEditsMadeScrutinizer());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,54 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.qa.QAWarning;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A scrutinizer that checks for properties using the same value
|
||||||
|
* on different items.
|
||||||
|
*
|
||||||
|
* @author antonin
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class DistinctValuesScrutinizer extends StatementScrutinizer {
|
||||||
|
|
||||||
|
private Map<PropertyIdValue, Set<Value>> _seenValues;
|
||||||
|
|
||||||
|
public DistinctValuesScrutinizer() {
|
||||||
|
_seenValues = new HashMap<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
|
||||||
|
PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId();
|
||||||
|
if (_fetcher.hasDistinctValues(pid)) {
|
||||||
|
Value mainSnakValue = statement.getClaim().getMainSnak().getValue();
|
||||||
|
Set<Value> seen = _seenValues.get(pid);
|
||||||
|
if (seen == null) {
|
||||||
|
seen = new HashSet<Value>();
|
||||||
|
_seenValues.put(pid, seen);
|
||||||
|
}
|
||||||
|
if (seen.contains(mainSnakValue)) {
|
||||||
|
QAWarning issue = new QAWarning(
|
||||||
|
"identical-values-for-distinct-valued-property",
|
||||||
|
pid.getId(),
|
||||||
|
QAWarning.Severity.IMPORTANT,
|
||||||
|
1);
|
||||||
|
issue.setProperty("property_entity", pid);
|
||||||
|
// TODO also report the items on which the property is duplicated
|
||||||
|
addIssue(issue);
|
||||||
|
} else {
|
||||||
|
seen.add(mainSnakValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user