Add self-referential scrutinizer
This commit is contained in:
parent
d347e5091f
commit
665585add9
@ -49,6 +49,10 @@
|
|||||||
"missing-inverse-statements": {
|
"missing-inverse-statements": {
|
||||||
"title": "Inverse statements will not be added.",
|
"title": "Inverse statements will not be added.",
|
||||||
"body": "Some of the properties that you are using require inverse statements. You should add them in your schema."
|
"body": "Some of the properties that you are using require inverse statements. You should add them in your schema."
|
||||||
|
},
|
||||||
|
"self-referential-statements": {
|
||||||
|
"title": "Some statements are self-referential.",
|
||||||
|
"body": "While not forbidden, self-referential statements are generally suspicious. You could have reconciliation issues."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@ import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
|||||||
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
|
||||||
|
import org.openrefine.wikidata.qa.scrutinizers.SelfReferentialScrutinizer;
|
||||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -27,6 +28,7 @@ public class EditInspector {
|
|||||||
register(new NewItemScrutinizer());
|
register(new NewItemScrutinizer());
|
||||||
register(new FormatConstraintScrutinizer());
|
register(new FormatConstraintScrutinizer());
|
||||||
register(new InverseConstraintScrutinizer());
|
register(new InverseConstraintScrutinizer());
|
||||||
|
register(new SelfReferentialScrutinizer());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -5,10 +5,17 @@ import java.util.Map;
|
|||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A scrutinizer that detects incorrect formats in text values
|
||||||
|
* (mostly identifiers).
|
||||||
|
*
|
||||||
|
* @author antonin
|
||||||
|
*
|
||||||
|
*/
|
||||||
public class FormatConstraintScrutinizer extends SnakScrutinizer {
|
public class FormatConstraintScrutinizer extends SnakScrutinizer {
|
||||||
|
|
||||||
private Map<String, Pattern> _patterns;
|
private Map<String, Pattern> _patterns;
|
||||||
@ -41,7 +48,7 @@ public class FormatConstraintScrutinizer extends SnakScrutinizer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scrutinize(Snak snak, boolean added) {
|
public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
|
||||||
if(StringValue.class.isInstance(snak.getValue())) {
|
if(StringValue.class.isInstance(snak.getValue())) {
|
||||||
String value = ((StringValue) snak.getValue()).getString();
|
String value = ((StringValue) snak.getValue()).getString();
|
||||||
String pid = snak.getPropertyId().getId();
|
String pid = snak.getPropertyId().getId();
|
||||||
|
@ -12,7 +12,13 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
|||||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A scrutinizer that checks for missing inverse statements in
|
||||||
|
* edit batches.
|
||||||
|
*
|
||||||
|
* @author antonin
|
||||||
|
*
|
||||||
|
*/
|
||||||
public class InverseConstraintScrutinizer extends StatementScrutinizer {
|
public class InverseConstraintScrutinizer extends StatementScrutinizer {
|
||||||
|
|
||||||
private ConstraintFetcher _fetcher;
|
private ConstraintFetcher _fetcher;
|
||||||
|
@ -0,0 +1,22 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A scrutinizer that checks for self-referential statements.
|
||||||
|
* These statements are flagged by Wikibase as suspicious.
|
||||||
|
*
|
||||||
|
* @author antonin
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class SelfReferentialScrutinizer extends SnakScrutinizer {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
|
||||||
|
if (entityId.equals(snak.getValue())) {
|
||||||
|
warning("self-referential-statements");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -7,28 +7,41 @@ import org.wikidata.wdtk.datamodel.interfaces.Reference;
|
|||||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A scrutinizer that inspects snaks individually, no matter whether they
|
||||||
|
* appear as main snaks, qualifiers or references.
|
||||||
|
*
|
||||||
|
* @author antonin
|
||||||
|
*
|
||||||
|
*/
|
||||||
public abstract class SnakScrutinizer extends StatementScrutinizer {
|
public abstract class SnakScrutinizer extends StatementScrutinizer {
|
||||||
|
|
||||||
public abstract void scrutinize(Snak snak, boolean added);
|
/**
|
||||||
|
* This is the method that subclasses should override to implement their checks.
|
||||||
|
* @param snak: the snak to inspect
|
||||||
|
* @param entityId: the item on which it is going to (dis)appear
|
||||||
|
* @param added: whether this snak is going to be added or deleted
|
||||||
|
*/
|
||||||
|
public abstract void scrutinize(Snak snak, EntityIdValue entityId, boolean added);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
|
public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
|
||||||
// Main snak
|
// Main snak
|
||||||
scrutinize(statement.getClaim().getMainSnak(), added);
|
scrutinize(statement.getClaim().getMainSnak(), entityId, added);
|
||||||
|
|
||||||
// Qualifiers
|
// Qualifiers
|
||||||
scrutinizeSnakSet(statement.getClaim().getAllQualifiers(), added);
|
scrutinizeSnakSet(statement.getClaim().getAllQualifiers(), entityId, added);
|
||||||
|
|
||||||
// References
|
// References
|
||||||
for(Reference ref : statement.getReferences()) {
|
for(Reference ref : statement.getReferences()) {
|
||||||
scrutinizeSnakSet(ref.getAllSnaks(), added);
|
scrutinizeSnakSet(ref.getAllSnaks(), entityId, added);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void scrutinizeSnakSet(Iterator<Snak> snaks, boolean added) {
|
private void scrutinizeSnakSet(Iterator<Snak> snaks, EntityIdValue entityId, boolean added) {
|
||||||
while(snaks.hasNext()) {
|
while(snaks.hasNext()) {
|
||||||
Snak snak = snaks.next();
|
Snak snak = snaks.next();
|
||||||
scrutinize(snak, added);
|
scrutinize(snak, entityId, added);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user