From 665585add9b0e2b72cae93bb224d79eaa910a2e2 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Tue, 9 Jan 2018 10:48:11 +0000 Subject: [PATCH] Add self-referential scrutinizer --- .../wikidata/module/langs/translation-en.json | 4 +++ .../openrefine/wikidata/qa/EditInspector.java | 2 ++ .../FormatConstraintScrutinizer.java | 11 ++++++-- .../InverseConstraintScrutinizer.java | 8 +++++- .../SelfReferentialScrutinizer.java | 22 ++++++++++++++++ .../qa/scrutinizers/SnakScrutinizer.java | 25 ++++++++++++++----- 6 files changed, 63 insertions(+), 9 deletions(-) create mode 100644 extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizer.java diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index 005f631ef..4b1272da4 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -49,6 +49,10 @@ "missing-inverse-statements": { "title": "Inverse statements will not be added.", "body": "Some of the properties that you are using require inverse statements. You should add them in your schema." + }, + "self-referential-statements": { + "title": "Some statements are self-referential.", + "body": "While not forbidden, self-referential statements are generally suspicious. You could have reconciliation issues." } } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java index e2f61acc9..e3fae40b2 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java @@ -8,6 +8,7 @@ import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer; +import org.openrefine.wikidata.qa.scrutinizers.SelfReferentialScrutinizer; import org.openrefine.wikidata.schema.ItemUpdate; /** @@ -27,6 +28,7 @@ public class EditInspector { register(new NewItemScrutinizer()); register(new FormatConstraintScrutinizer()); register(new InverseConstraintScrutinizer()); + register(new SelfReferentialScrutinizer()); } /** diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatConstraintScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatConstraintScrutinizer.java index 66b6f8d51..d560f0cd4 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatConstraintScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatConstraintScrutinizer.java @@ -5,10 +5,17 @@ import java.util.Map; import java.util.regex.Pattern; import org.openrefine.wikidata.qa.ConstraintFetcher; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.Snak; import org.wikidata.wdtk.datamodel.interfaces.StringValue; - +/** + * A scrutinizer that detects incorrect formats in text values + * (mostly identifiers). + * + * @author antonin + * + */ public class FormatConstraintScrutinizer extends SnakScrutinizer { private Map _patterns; @@ -41,7 +48,7 @@ public class FormatConstraintScrutinizer extends SnakScrutinizer { } @Override - public void scrutinize(Snak snak, boolean added) { + public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) { if(StringValue.class.isInstance(snak.getValue())) { String value = ((StringValue) snak.getValue()).getString(); String pid = snak.getPropertyId().getId(); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstraintScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstraintScrutinizer.java index 0a3ad2c91..60ac66ee9 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstraintScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstraintScrutinizer.java @@ -12,7 +12,13 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.Value; - +/** + * A scrutinizer that checks for missing inverse statements in + * edit batches. + * + * @author antonin + * + */ public class InverseConstraintScrutinizer extends StatementScrutinizer { private ConstraintFetcher _fetcher; diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizer.java new file mode 100644 index 000000000..5fc8f8b0d --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizer.java @@ -0,0 +1,22 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.Snak; + +/** + * A scrutinizer that checks for self-referential statements. + * These statements are flagged by Wikibase as suspicious. + * + * @author antonin + * + */ +public class SelfReferentialScrutinizer extends SnakScrutinizer { + + @Override + public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) { + if (entityId.equals(snak.getValue())) { + warning("self-referential-statements"); + } + } + +} diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizer.java index cbd34a9ee..7cd1cb446 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizer.java @@ -7,28 +7,41 @@ import org.wikidata.wdtk.datamodel.interfaces.Reference; import org.wikidata.wdtk.datamodel.interfaces.Snak; import org.wikidata.wdtk.datamodel.interfaces.Statement; +/** + * A scrutinizer that inspects snaks individually, no matter whether they + * appear as main snaks, qualifiers or references. + * + * @author antonin + * + */ public abstract class SnakScrutinizer extends StatementScrutinizer { - public abstract void scrutinize(Snak snak, boolean added); + /** + * This is the method that subclasses should override to implement their checks. + * @param snak: the snak to inspect + * @param entityId: the item on which it is going to (dis)appear + * @param added: whether this snak is going to be added or deleted + */ + public abstract void scrutinize(Snak snak, EntityIdValue entityId, boolean added); @Override public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) { // Main snak - scrutinize(statement.getClaim().getMainSnak(), added); + scrutinize(statement.getClaim().getMainSnak(), entityId, added); // Qualifiers - scrutinizeSnakSet(statement.getClaim().getAllQualifiers(), added); + scrutinizeSnakSet(statement.getClaim().getAllQualifiers(), entityId, added); // References for(Reference ref : statement.getReferences()) { - scrutinizeSnakSet(ref.getAllSnaks(), added); + scrutinizeSnakSet(ref.getAllSnaks(), entityId, added); } } - private void scrutinizeSnakSet(Iterator snaks, boolean added) { + private void scrutinizeSnakSet(Iterator snaks, EntityIdValue entityId, boolean added) { while(snaks.hasNext()) { Snak snak = snaks.next(); - scrutinize(snak, added); + scrutinize(snak, entityId, added); } } }