Add self-referential scrutinizer

This commit is contained in:
Antonin Delpeuch 2018-01-09 10:48:11 +00:00
parent d347e5091f
commit 665585add9
6 changed files with 63 additions and 9 deletions

View File

@ -49,6 +49,10 @@
"missing-inverse-statements": {
"title": "Inverse statements will not be added.",
"body": "Some of the properties that you are using require inverse statements. You should add them in your schema."
},
"self-referential-statements": {
"title": "Some statements are self-referential.",
"body": "While not forbidden, self-referential statements are generally suspicious. You could have reconciliation issues."
}
}
}

View File

@ -8,6 +8,7 @@ import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.SelfReferentialScrutinizer;
import org.openrefine.wikidata.schema.ItemUpdate;
/**
@ -27,6 +28,7 @@ public class EditInspector {
register(new NewItemScrutinizer());
register(new FormatConstraintScrutinizer());
register(new InverseConstraintScrutinizer());
register(new SelfReferentialScrutinizer());
}
/**

View File

@ -5,10 +5,17 @@ import java.util.Map;
import java.util.regex.Pattern;
import org.openrefine.wikidata.qa.ConstraintFetcher;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
/**
* A scrutinizer that detects incorrect formats in text values
* (mostly identifiers).
*
* @author antonin
*
*/
public class FormatConstraintScrutinizer extends SnakScrutinizer {
private Map<String, Pattern> _patterns;
@ -41,7 +48,7 @@ public class FormatConstraintScrutinizer extends SnakScrutinizer {
}
@Override
public void scrutinize(Snak snak, boolean added) {
public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
if(StringValue.class.isInstance(snak.getValue())) {
String value = ((StringValue) snak.getValue()).getString();
String pid = snak.getPropertyId().getId();

View File

@ -12,7 +12,13 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.Value;
/**
* A scrutinizer that checks for missing inverse statements in
* edit batches.
*
* @author antonin
*
*/
public class InverseConstraintScrutinizer extends StatementScrutinizer {
private ConstraintFetcher _fetcher;

View File

@ -0,0 +1,22 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
/**
* A scrutinizer that checks for self-referential statements.
* These statements are flagged by Wikibase as suspicious.
*
* @author antonin
*
*/
public class SelfReferentialScrutinizer extends SnakScrutinizer {
@Override
public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
if (entityId.equals(snak.getValue())) {
warning("self-referential-statements");
}
}
}

View File

@ -7,28 +7,41 @@ import org.wikidata.wdtk.datamodel.interfaces.Reference;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
/**
* A scrutinizer that inspects snaks individually, no matter whether they
* appear as main snaks, qualifiers or references.
*
* @author antonin
*
*/
public abstract class SnakScrutinizer extends StatementScrutinizer {
public abstract void scrutinize(Snak snak, boolean added);
/**
* This is the method that subclasses should override to implement their checks.
* @param snak: the snak to inspect
* @param entityId: the item on which it is going to (dis)appear
* @param added: whether this snak is going to be added or deleted
*/
public abstract void scrutinize(Snak snak, EntityIdValue entityId, boolean added);
@Override
public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
// Main snak
scrutinize(statement.getClaim().getMainSnak(), added);
scrutinize(statement.getClaim().getMainSnak(), entityId, added);
// Qualifiers
scrutinizeSnakSet(statement.getClaim().getAllQualifiers(), added);
scrutinizeSnakSet(statement.getClaim().getAllQualifiers(), entityId, added);
// References
for(Reference ref : statement.getReferences()) {
scrutinizeSnakSet(ref.getAllSnaks(), added);
scrutinizeSnakSet(ref.getAllSnaks(), entityId, added);
}
}
private void scrutinizeSnakSet(Iterator<Snak> snaks, boolean added) {
private void scrutinizeSnakSet(Iterator<Snak> snaks, EntityIdValue entityId, boolean added) {
while(snaks.hasNext()) {
Snak snak = snaks.next();
scrutinize(snak, added);
scrutinize(snak, entityId, added);
}
}
}