Add inverse constraint scrutinizer

This commit is contained in:
Antonin Delpeuch 2018-01-09 10:27:19 +00:00
parent 0d5875b35b
commit 508e9d22ad
3 changed files with 91 additions and 0 deletions

View File

@ -45,6 +45,10 @@
"remove-statements-with-invalid-format": {
"title": "Statements with invalid format will be removed.",
"body": "If these statements currently exist on Wikidata, this will solve constraint violations."
},
"missing-inverse-statements": {
"title": "Inverse statements will not be added.",
"body": "Some of the properties that you are using require inverse statements. You should add them in your schema."
}
}
}

View File

@ -26,6 +26,7 @@ public class EditInspector {
// Register all known scrutinizers here
register(new NewItemScrutinizer());
register(new FormatConstraintScrutinizer());
register(new InverseConstraintScrutinizer());
}
/**

View File

@ -0,0 +1,86 @@
package org.openrefine.wikidata.qa.scrutinizers;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.openrefine.wikidata.qa.ConstraintFetcher;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.Value;
public class InverseConstraintScrutinizer extends StatementScrutinizer {
private ConstraintFetcher _fetcher;
private Map<String, String> _inverse;
private Map<String, Map<EntityIdValue, Set<EntityIdValue> >> _statements;
public InverseConstraintScrutinizer() {
_fetcher = new ConstraintFetcher();
_inverse = new HashMap<>();
_statements = new HashMap<>();
}
protected String getInverseConstraint(String pid) {
if (_inverse.containsKey(pid)) {
return _inverse.get(pid);
} else {
String inversePid = _fetcher.getInversePid(pid);
_inverse.put(pid, inversePid);
_statements.put(pid, new HashMap<EntityIdValue,Set<EntityIdValue>>());
// We are doing this check because we do not have any guarantee that
// the inverse constraints are consistent on Wikidata.
if (inversePid != null && !_inverse.containsKey(inversePid)) {
_inverse.put(inversePid, pid);
_statements.put(inversePid, new HashMap<EntityIdValue,Set<EntityIdValue>>());
}
return inversePid;
}
}
@Override
public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
if (!added) {
return; // TODO support for deleted statements
}
Value mainSnakValue = statement.getClaim().getMainSnak().getValue();
if (ItemIdValue.class.isInstance(mainSnakValue)) {
String pid = statement.getClaim().getMainSnak().getPropertyId().getId();
String inversePid = getInverseConstraint(pid);
if (inversePid != null) {
EntityIdValue targetEntityId = (EntityIdValue) mainSnakValue;
Set<EntityIdValue> currentValues = _statements.get(pid).get(entityId);
if (currentValues == null) {
currentValues = new HashSet<EntityIdValue>();
_statements.get(pid).put(entityId, currentValues);
}
currentValues.add(targetEntityId);
}
}
}
@Override
public void batchIsFinished() {
// For each pair of inverse properties (in each direction)
for(Entry<String,String> propertyPair : _inverse.entrySet()) {
// Get the statements made for the first
for(Entry<EntityIdValue, Set<EntityIdValue>> itemLinks : _statements.get(propertyPair.getKey()).entrySet()) {
// For each outgoing link
for(EntityIdValue idValue : itemLinks.getValue()) {
// Check that they are in the statements made for the second
Set<EntityIdValue> reciprocalLinks = _statements.get(propertyPair.getValue()).get(idValue);
if (reciprocalLinks == null || !reciprocalLinks.contains(itemLinks.getKey())) {
important("missing-inverse-statements");
}
}
}
}
}
}