From 30ce8680c597e06162e633688dac86e5becaa5b6 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Thu, 11 Jan 2018 11:34:00 +0000 Subject: [PATCH] Add architecture to emit warnings during evaluation too --- .../wikidata/module/langs/translation-en.json | 8 ++++ .../PreviewWikibaseSchemaCommand.java | 10 +++-- .../openrefine/wikidata/qa/EditInspector.java | 21 ++-------- .../wikidata/qa/QAWarningStore.java | 6 +-- .../wikidata/schema/ExpressionContext.java | 35 ++++++++++++++++- .../wikidata/schema/WbItemDocumentExpr.java | 8 +++- .../wikidata/schema/WbReferenceExpr.java | 14 +++++-- .../wikidata/schema/WbStatementExpr.java | 27 ++++++++++++- .../wikidata/schema/WbStatementGroupExpr.java | 12 +++++- .../wikidata/schema/WikibaseSchema.java | 38 +++++++++++++++++-- 10 files changed, 139 insertions(+), 40 deletions(-) diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index 6b097070b..12686bb13 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -104,6 +104,14 @@ "no-issue-detected": { "title": "No issue was detected in your edits.", "body": "Note that OpenRefine cannot detect all the types of problems Wikidata edits can have." + }, + "ignored-qualifiers": { + "title": "Some qualifiers were ignored.", + "body": "Qualifier values could not be parsed, so they will not be added to the corresponding statements." + }, + "ignored-references": { + "title": "Some references were ignored.", + "body": "None of their statements could be parsed, so no reference was added." } } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommand.java b/extensions/wikidata/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommand.java index 8c400e64c..707fbd526 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommand.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommand.java @@ -53,6 +53,7 @@ import com.google.refine.commands.Command; import org.openrefine.wikidata.exporters.QuickStatementsExporter; import org.openrefine.wikidata.qa.EditInspector; import org.openrefine.wikidata.qa.QAWarning; +import org.openrefine.wikidata.qa.QAWarningStore; import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.schema.WikibaseSchema; import com.google.refine.model.Project; @@ -72,10 +73,11 @@ public class PreviewWikibaseSchemaCommand extends Command { String jsonString = request.getParameter("schema"); JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString); WikibaseSchema schema = WikibaseSchema.reconstruct(json); + QAWarningStore warningStore = new QAWarningStore(); // Evaluate project Engine engine = getEngine(request, project); - List editBatch = schema.evaluate(project, engine); + List editBatch = schema.evaluate(project, engine, warningStore); StringWriter sb = new StringWriter(2048); JSONWriter writer = new JSONWriter(sb, 32); @@ -85,11 +87,11 @@ public class PreviewWikibaseSchemaCommand extends Command { StringWriter stringWriter = new StringWriter(); // Inspect the edits and generate warnings - EditInspector inspector = new EditInspector(); + EditInspector inspector = new EditInspector(warningStore); inspector.inspect(editBatch); writer.key("warnings"); writer.array(); - for (QAWarning warning : inspector.getWarnings()) { + for (QAWarning warning : warningStore.getWarnings()) { warning.write(writer, new Properties()); } writer.endArray(); @@ -97,7 +99,7 @@ public class PreviewWikibaseSchemaCommand extends Command { // this is not the length of the warnings array written before, // but the total number of issues raised (before deduplication) writer.key("nb_warnings"); - writer.value(inspector.getTotalNumberOfWarnings()); + writer.value(warningStore.getNbWarnings()); // Export to QuickStatements QuickStatementsExporter exporter = new QuickStatementsExporter(); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java index 181c75a6d..0009dbeb7 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java @@ -28,9 +28,9 @@ public class EditInspector { private Map scrutinizers; private QAWarningStore warningStore; - public EditInspector() { - scrutinizers = new HashMap<>(); - warningStore = new QAWarningStore(); + public EditInspector(QAWarningStore warningStore) { + this.scrutinizers = new HashMap<>(); + this.warningStore = warningStore; // Register all known scrutinizers here register(new NewItemScrutinizer()); @@ -72,19 +72,4 @@ public class EditInspector { "no-issue-detected", null, QAWarning.Severity.INFO, 0)); } } - - /** - * Retrieve the warnings after inspection of the edits - * @return - */ - public List getWarnings() { - return warningStore.getWarnings(); - } - - /** - * Retrieve the number of warnings before deduplication - */ - public int getTotalNumberOfWarnings() { - return warningStore.getNbWarnings(); - } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarningStore.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarningStore.java index 508fe5c39..219003aa8 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarningStore.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarningStore.java @@ -46,7 +46,7 @@ public class QAWarningStore { * Returns the list of aggregated warnings, ordered by decreasing severity */ @JsonProperty("warnings") - List getWarnings() { + public List getWarnings() { List result = new ArrayList<>(map.values()); Collections.sort(result); return result; @@ -56,7 +56,7 @@ public class QAWarningStore { * Returns the maximum severity of the stored warnings (INFO if empty) */ @JsonProperty("max_severity") - QAWarning.Severity getMaxSeverity() { + public QAWarning.Severity getMaxSeverity() { return maxSeverity; } @@ -64,7 +64,7 @@ public class QAWarningStore { * Returns the total number of warnings */ @JsonProperty("nb_warnings") - int getNbWarnings() { + public int getNbWarnings() { return totalWarnings; } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/ExpressionContext.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/ExpressionContext.java index c66970332..471c50ee0 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/ExpressionContext.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/ExpressionContext.java @@ -1,21 +1,46 @@ package org.openrefine.wikidata.schema; +import org.openrefine.wikidata.qa.QAWarning; +import org.openrefine.wikidata.qa.QAWarningStore; + import com.google.refine.model.Cell; import com.google.refine.model.ColumnModel; import com.google.refine.model.Row; - +/** + * A class holding all the necessary information about + * the context in which a schema expression is evaluated. + * + * @author antonin + * + */ public class ExpressionContext { private String baseIRI; private int rowId; private Row row; private ColumnModel columnModel; + private QAWarningStore warningStore; - public ExpressionContext(String baseIRI, int rowId, Row row, ColumnModel columnModel) { + /** + * Builds an expression context to evaluate a schema on a row + * @param baseIRI: the siteIRI of the schema + * @param rowId: the id of the row currently visited + * @param row: the row itself + * @param columnModel: lets us access cells by column name + * @param warningStore: where to store the issues encountered when + * evaluating (can be set to null if these issues should be ignored) + */ + public ExpressionContext( + String baseIRI, + int rowId, + Row row, + ColumnModel columnModel, + QAWarningStore warningStore) { this.baseIRI = baseIRI; this.rowId = rowId; this.row = row; this.columnModel = columnModel; + this.warningStore = warningStore; } public String getBaseIRI() { @@ -34,4 +59,10 @@ public class ExpressionContext { public int getRowId() { return rowId; } + + public void addWarning(QAWarning warning) { + if (warningStore != null) { + warningStore.addWarning(warning); + } + } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemDocumentExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemDocumentExpr.java index 1c9fcbbe7..ed79d9f72 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemDocumentExpr.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemDocumentExpr.java @@ -31,8 +31,12 @@ public class WbItemDocumentExpr extends JacksonJsonizable { ItemIdValue subjectId = getSubject().evaluate(ctxt); ItemUpdate update = new ItemUpdate(subjectId); for(WbStatementGroupExpr expr : getStatementGroups()) { - for(Statement s : expr.evaluate(ctxt, subjectId).getStatements()) { - update.addStatement(s); + try { + for(Statement s : expr.evaluate(ctxt, subjectId).getStatements()) { + update.addStatement(s); + } + } catch (SkipSchemaExpressionException e) { + continue; } } for(WbNameDescExpr expr : getNameDescs()) { diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbReferenceExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbReferenceExpr.java index c8ac630dd..22e3fffc7 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbReferenceExpr.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbReferenceExpr.java @@ -27,10 +27,18 @@ public class WbReferenceExpr extends JacksonJsonizable { List snakGroups = new ArrayList(); for (WbSnakExpr expr : getSnaks()) { List snakList = new ArrayList(1); - snakList.add(expr.evaluate(ctxt)); - snakGroups.add(Datamodel.makeSnakGroup(snakList)); + try { + snakList.add(expr.evaluate(ctxt)); + snakGroups.add(Datamodel.makeSnakGroup(snakList)); + } catch (SkipSchemaExpressionException e) { + continue; + } + } + if (! snakGroups.isEmpty()) { + return Datamodel.makeReference(snakGroups); + } else { + throw new SkipSchemaExpressionException(); } - return Datamodel.makeReference(snakGroups); } public List getSnaks() { diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementExpr.java index e3570ceb8..070e636a5 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementExpr.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementExpr.java @@ -10,6 +10,7 @@ import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.json.JSONWriter; +import org.openrefine.wikidata.qa.QAWarning; import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException; import org.openrefine.wikidata.utils.JacksonJsonizable; import org.wikidata.wdtk.datamodel.helpers.Datamodel; @@ -60,7 +61,18 @@ public class WbStatementExpr extends JacksonJsonizable { // evaluate qualifiers List qualifiers = new ArrayList(getQualifiers().size()); for (WbSnakExpr qExpr : getQualifiers()) { - qualifiers.add(qExpr.evaluate(ctxt)); + try { + qualifiers.add(qExpr.evaluate(ctxt)); + } catch(SkipSchemaExpressionException e) { + QAWarning warning = new QAWarning( + "ignored-qualifiers", + null, + QAWarning.Severity.INFO, + 1); + warning.setProperty("example_entity", subject); + warning.setProperty("example_property_entity", mainSnak.getPropertyId()); + ctxt.addWarning(warning); + } } List groupedQualifiers = groupSnaks(qualifiers); Claim claim = Datamodel.makeClaim(subject, mainSnak, groupedQualifiers); @@ -68,7 +80,18 @@ public class WbStatementExpr extends JacksonJsonizable { // evaluate references List references = new ArrayList(); for (WbReferenceExpr rExpr : getReferences()) { - references.add(rExpr.evaluate(ctxt)); + try { + references.add(rExpr.evaluate(ctxt)); + } catch(SkipSchemaExpressionException e) { + QAWarning warning = new QAWarning( + "ignored-references", + null, + QAWarning.Severity.INFO, + 1); + warning.setProperty("example_entity", subject); + warning.setProperty("example_property_entity", mainSnak.getPropertyId()); + ctxt.addWarning(warning); + } } StatementRank rank = StatementRank.NORMAL; diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementGroupExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementGroupExpr.java index afb5b0d1f..7f446ec90 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementGroupExpr.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementGroupExpr.java @@ -32,9 +32,17 @@ public class WbStatementGroupExpr extends JacksonJsonizable { PropertyIdValue propertyId = propertyExpr.evaluate(ctxt); List statements = new ArrayList(statementExprs.size()); for(WbStatementExpr expr : statementExprs) { - statements.add(expr.evaluate(ctxt, subject, propertyId)); + try { + statements.add(expr.evaluate(ctxt, subject, propertyId)); + } catch (SkipSchemaExpressionException e) { + continue; + } + } + if (!statements.isEmpty()) { + return Datamodel.makeStatementGroup(statements); + } else { + throw new SkipSchemaExpressionException(); } - return Datamodel.makeStatementGroup(statements); } public WbPropExpr getProperty() { diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WikibaseSchema.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WikibaseSchema.java index e0a9803e5..607f62abc 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WikibaseSchema.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WikibaseSchema.java @@ -26,6 +26,7 @@ import com.google.refine.model.Project; import com.google.refine.model.Row; import org.openrefine.wikidata.schema.WbItemDocumentExpr; import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException; +import org.openrefine.wikidata.qa.QAWarningStore; import org.openrefine.wikidata.schema.ExpressionContext; import org.openrefine.wikidata.utils.JacksonJsonizable; @@ -89,17 +90,41 @@ public class WikibaseSchema implements OverlayModel { return result; } - public List evaluate(Project project, Engine engine) { + /** + * Evaluates the schema on a project, returning a list of ItemUpdates + * generated by the schema. + * + * Some warnings will be emitted in the warning store: those are only + * the ones that are generated at evaluation time (such as invalid formats + * for dates). Issues detected on candidate statements (such as constraint + * violations) are not included at this stage. + * + * @param project: the project on which the schema should be evaluated + * @param engine: the engine, which gives access to the current facets + * @param warningStore: a store in which issues will be emitted + * @return item updates are stored in their + * generating order (not merged yet). + */ + public List evaluate(Project project, Engine engine, QAWarningStore warningStore) { List result = new ArrayList(); FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, new EvaluatingRowVisitor(result)); + filteredRows.accept(project, new EvaluatingRowVisitor(result, warningStore)); return result; } + /** + * Same as above, ignoring any warnings. + */ + public List evaluate(Project project, Engine engine) { + return evaluate(project, engine, null); + } + protected class EvaluatingRowVisitor implements RowVisitor { private List result; - public EvaluatingRowVisitor(List result) { + private QAWarningStore warningStore; + public EvaluatingRowVisitor(List result, QAWarningStore warningStore) { this.result = result; + this.warningStore = warningStore; } @Override @@ -109,7 +134,12 @@ public class WikibaseSchema implements OverlayModel { @Override public boolean visit(Project project, int rowIndex, Row row) { - ExpressionContext ctxt = new ExpressionContext(baseUri, rowIndex, row, project.columnModel); + ExpressionContext ctxt = new ExpressionContext( + baseUri, + rowIndex, + row, + project.columnModel, + warningStore); result.addAll(evaluateItemDocuments(ctxt)); return false; }