Add self-referential scrutinizer
This commit is contained in:
parent
d347e5091f
commit
665585add9
@ -49,6 +49,10 @@
|
||||
"missing-inverse-statements": {
|
||||
"title": "Inverse statements will not be added.",
|
||||
"body": "Some of the properties that you are using require inverse statements. You should add them in your schema."
|
||||
},
|
||||
"self-referential-statements": {
|
||||
"title": "Some statements are self-referential.",
|
||||
"body": "While not forbidden, self-referential statements are generally suspicious. You could have reconciliation issues."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.SelfReferentialScrutinizer;
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
|
||||
/**
|
||||
@ -27,6 +28,7 @@ public class EditInspector {
|
||||
register(new NewItemScrutinizer());
|
||||
register(new FormatConstraintScrutinizer());
|
||||
register(new InverseConstraintScrutinizer());
|
||||
register(new SelfReferentialScrutinizer());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -5,10 +5,17 @@ import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||
|
||||
|
||||
/**
|
||||
* A scrutinizer that detects incorrect formats in text values
|
||||
* (mostly identifiers).
|
||||
*
|
||||
* @author antonin
|
||||
*
|
||||
*/
|
||||
public class FormatConstraintScrutinizer extends SnakScrutinizer {
|
||||
|
||||
private Map<String, Pattern> _patterns;
|
||||
@ -41,7 +48,7 @@ public class FormatConstraintScrutinizer extends SnakScrutinizer {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void scrutinize(Snak snak, boolean added) {
|
||||
public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
|
||||
if(StringValue.class.isInstance(snak.getValue())) {
|
||||
String value = ((StringValue) snak.getValue()).getString();
|
||||
String pid = snak.getPropertyId().getId();
|
||||
|
@ -12,7 +12,13 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
|
||||
/**
|
||||
* A scrutinizer that checks for missing inverse statements in
|
||||
* edit batches.
|
||||
*
|
||||
* @author antonin
|
||||
*
|
||||
*/
|
||||
public class InverseConstraintScrutinizer extends StatementScrutinizer {
|
||||
|
||||
private ConstraintFetcher _fetcher;
|
||||
|
@ -0,0 +1,22 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
|
||||
/**
|
||||
* A scrutinizer that checks for self-referential statements.
|
||||
* These statements are flagged by Wikibase as suspicious.
|
||||
*
|
||||
* @author antonin
|
||||
*
|
||||
*/
|
||||
public class SelfReferentialScrutinizer extends SnakScrutinizer {
|
||||
|
||||
@Override
|
||||
public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
|
||||
if (entityId.equals(snak.getValue())) {
|
||||
warning("self-referential-statements");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -7,28 +7,41 @@ import org.wikidata.wdtk.datamodel.interfaces.Reference;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
|
||||
/**
|
||||
* A scrutinizer that inspects snaks individually, no matter whether they
|
||||
* appear as main snaks, qualifiers or references.
|
||||
*
|
||||
* @author antonin
|
||||
*
|
||||
*/
|
||||
public abstract class SnakScrutinizer extends StatementScrutinizer {
|
||||
|
||||
public abstract void scrutinize(Snak snak, boolean added);
|
||||
/**
|
||||
* This is the method that subclasses should override to implement their checks.
|
||||
* @param snak: the snak to inspect
|
||||
* @param entityId: the item on which it is going to (dis)appear
|
||||
* @param added: whether this snak is going to be added or deleted
|
||||
*/
|
||||
public abstract void scrutinize(Snak snak, EntityIdValue entityId, boolean added);
|
||||
|
||||
@Override
|
||||
public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
|
||||
// Main snak
|
||||
scrutinize(statement.getClaim().getMainSnak(), added);
|
||||
scrutinize(statement.getClaim().getMainSnak(), entityId, added);
|
||||
|
||||
// Qualifiers
|
||||
scrutinizeSnakSet(statement.getClaim().getAllQualifiers(), added);
|
||||
scrutinizeSnakSet(statement.getClaim().getAllQualifiers(), entityId, added);
|
||||
|
||||
// References
|
||||
for(Reference ref : statement.getReferences()) {
|
||||
scrutinizeSnakSet(ref.getAllSnaks(), added);
|
||||
scrutinizeSnakSet(ref.getAllSnaks(), entityId, added);
|
||||
}
|
||||
}
|
||||
|
||||
private void scrutinizeSnakSet(Iterator<Snak> snaks, boolean added) {
|
||||
private void scrutinizeSnakSet(Iterator<Snak> snaks, EntityIdValue entityId, boolean added) {
|
||||
while(snaks.hasNext()) {
|
||||
Snak snak = snaks.next();
|
||||
scrutinize(snak, added);
|
||||
scrutinize(snak, entityId, added);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user