Add architecture to emit warnings during evaluation too

This commit is contained in:
Antonin Delpeuch 2018-01-11 11:34:00 +00:00
parent 2687741263
commit 30ce8680c5
10 changed files with 139 additions and 40 deletions

View File

@ -104,6 +104,14 @@
"no-issue-detected": { "no-issue-detected": {
"title": "No issue was detected in your edits.", "title": "No issue was detected in your edits.",
"body": "Note that OpenRefine cannot detect all the types of problems Wikidata edits can have." "body": "Note that OpenRefine cannot detect all the types of problems Wikidata edits can have."
},
"ignored-qualifiers": {
"title": "Some qualifiers were ignored.",
"body": "Qualifier values could not be parsed, so they will not be added to the corresponding statements."
},
"ignored-references": {
"title": "Some references were ignored.",
"body": "None of their statements could be parsed, so no reference was added."
} }
} }
} }

View File

@ -53,6 +53,7 @@ import com.google.refine.commands.Command;
import org.openrefine.wikidata.exporters.QuickStatementsExporter; import org.openrefine.wikidata.exporters.QuickStatementsExporter;
import org.openrefine.wikidata.qa.EditInspector; import org.openrefine.wikidata.qa.EditInspector;
import org.openrefine.wikidata.qa.QAWarning; import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.qa.QAWarningStore;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.schema.ItemUpdate;
import org.openrefine.wikidata.schema.WikibaseSchema; import org.openrefine.wikidata.schema.WikibaseSchema;
import com.google.refine.model.Project; import com.google.refine.model.Project;
@ -72,10 +73,11 @@ public class PreviewWikibaseSchemaCommand extends Command {
String jsonString = request.getParameter("schema"); String jsonString = request.getParameter("schema");
JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString); JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString);
WikibaseSchema schema = WikibaseSchema.reconstruct(json); WikibaseSchema schema = WikibaseSchema.reconstruct(json);
QAWarningStore warningStore = new QAWarningStore();
// Evaluate project // Evaluate project
Engine engine = getEngine(request, project); Engine engine = getEngine(request, project);
List<ItemUpdate> editBatch = schema.evaluate(project, engine); List<ItemUpdate> editBatch = schema.evaluate(project, engine, warningStore);
StringWriter sb = new StringWriter(2048); StringWriter sb = new StringWriter(2048);
JSONWriter writer = new JSONWriter(sb, 32); JSONWriter writer = new JSONWriter(sb, 32);
@ -85,11 +87,11 @@ public class PreviewWikibaseSchemaCommand extends Command {
StringWriter stringWriter = new StringWriter(); StringWriter stringWriter = new StringWriter();
// Inspect the edits and generate warnings // Inspect the edits and generate warnings
EditInspector inspector = new EditInspector(); EditInspector inspector = new EditInspector(warningStore);
inspector.inspect(editBatch); inspector.inspect(editBatch);
writer.key("warnings"); writer.key("warnings");
writer.array(); writer.array();
for (QAWarning warning : inspector.getWarnings()) { for (QAWarning warning : warningStore.getWarnings()) {
warning.write(writer, new Properties()); warning.write(writer, new Properties());
} }
writer.endArray(); writer.endArray();
@ -97,7 +99,7 @@ public class PreviewWikibaseSchemaCommand extends Command {
// this is not the length of the warnings array written before, // this is not the length of the warnings array written before,
// but the total number of issues raised (before deduplication) // but the total number of issues raised (before deduplication)
writer.key("nb_warnings"); writer.key("nb_warnings");
writer.value(inspector.getTotalNumberOfWarnings()); writer.value(warningStore.getNbWarnings());
// Export to QuickStatements // Export to QuickStatements
QuickStatementsExporter exporter = new QuickStatementsExporter(); QuickStatementsExporter exporter = new QuickStatementsExporter();

View File

@ -28,9 +28,9 @@ public class EditInspector {
private Map<String, EditScrutinizer> scrutinizers; private Map<String, EditScrutinizer> scrutinizers;
private QAWarningStore warningStore; private QAWarningStore warningStore;
public EditInspector() { public EditInspector(QAWarningStore warningStore) {
scrutinizers = new HashMap<>(); this.scrutinizers = new HashMap<>();
warningStore = new QAWarningStore(); this.warningStore = warningStore;
// Register all known scrutinizers here // Register all known scrutinizers here
register(new NewItemScrutinizer()); register(new NewItemScrutinizer());
@ -72,19 +72,4 @@ public class EditInspector {
"no-issue-detected", null, QAWarning.Severity.INFO, 0)); "no-issue-detected", null, QAWarning.Severity.INFO, 0));
} }
} }
/**
* Retrieve the warnings after inspection of the edits
* @return
*/
public List<QAWarning> getWarnings() {
return warningStore.getWarnings();
}
/**
* Retrieve the number of warnings before deduplication
*/
public int getTotalNumberOfWarnings() {
return warningStore.getNbWarnings();
}
} }

View File

@ -46,7 +46,7 @@ public class QAWarningStore {
* Returns the list of aggregated warnings, ordered by decreasing severity * Returns the list of aggregated warnings, ordered by decreasing severity
*/ */
@JsonProperty("warnings") @JsonProperty("warnings")
List<QAWarning> getWarnings() { public List<QAWarning> getWarnings() {
List<QAWarning> result = new ArrayList<>(map.values()); List<QAWarning> result = new ArrayList<>(map.values());
Collections.sort(result); Collections.sort(result);
return result; return result;
@ -56,7 +56,7 @@ public class QAWarningStore {
* Returns the maximum severity of the stored warnings (INFO if empty) * Returns the maximum severity of the stored warnings (INFO if empty)
*/ */
@JsonProperty("max_severity") @JsonProperty("max_severity")
QAWarning.Severity getMaxSeverity() { public QAWarning.Severity getMaxSeverity() {
return maxSeverity; return maxSeverity;
} }
@ -64,7 +64,7 @@ public class QAWarningStore {
* Returns the total number of warnings * Returns the total number of warnings
*/ */
@JsonProperty("nb_warnings") @JsonProperty("nb_warnings")
int getNbWarnings() { public int getNbWarnings() {
return totalWarnings; return totalWarnings;
} }
} }

View File

@ -1,21 +1,46 @@
package org.openrefine.wikidata.schema; package org.openrefine.wikidata.schema;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.qa.QAWarningStore;
import com.google.refine.model.Cell; import com.google.refine.model.Cell;
import com.google.refine.model.ColumnModel; import com.google.refine.model.ColumnModel;
import com.google.refine.model.Row; import com.google.refine.model.Row;
/**
* A class holding all the necessary information about
* the context in which a schema expression is evaluated.
*
* @author antonin
*
*/
public class ExpressionContext { public class ExpressionContext {
private String baseIRI; private String baseIRI;
private int rowId; private int rowId;
private Row row; private Row row;
private ColumnModel columnModel; private ColumnModel columnModel;
private QAWarningStore warningStore;
public ExpressionContext(String baseIRI, int rowId, Row row, ColumnModel columnModel) { /**
* Builds an expression context to evaluate a schema on a row
* @param baseIRI: the siteIRI of the schema
* @param rowId: the id of the row currently visited
* @param row: the row itself
* @param columnModel: lets us access cells by column name
* @param warningStore: where to store the issues encountered when
* evaluating (can be set to null if these issues should be ignored)
*/
public ExpressionContext(
String baseIRI,
int rowId,
Row row,
ColumnModel columnModel,
QAWarningStore warningStore) {
this.baseIRI = baseIRI; this.baseIRI = baseIRI;
this.rowId = rowId; this.rowId = rowId;
this.row = row; this.row = row;
this.columnModel = columnModel; this.columnModel = columnModel;
this.warningStore = warningStore;
} }
public String getBaseIRI() { public String getBaseIRI() {
@ -34,4 +59,10 @@ public class ExpressionContext {
public int getRowId() { public int getRowId() {
return rowId; return rowId;
} }
public void addWarning(QAWarning warning) {
if (warningStore != null) {
warningStore.addWarning(warning);
}
}
} }

View File

@ -31,8 +31,12 @@ public class WbItemDocumentExpr extends JacksonJsonizable {
ItemIdValue subjectId = getSubject().evaluate(ctxt); ItemIdValue subjectId = getSubject().evaluate(ctxt);
ItemUpdate update = new ItemUpdate(subjectId); ItemUpdate update = new ItemUpdate(subjectId);
for(WbStatementGroupExpr expr : getStatementGroups()) { for(WbStatementGroupExpr expr : getStatementGroups()) {
for(Statement s : expr.evaluate(ctxt, subjectId).getStatements()) { try {
update.addStatement(s); for(Statement s : expr.evaluate(ctxt, subjectId).getStatements()) {
update.addStatement(s);
}
} catch (SkipSchemaExpressionException e) {
continue;
} }
} }
for(WbNameDescExpr expr : getNameDescs()) { for(WbNameDescExpr expr : getNameDescs()) {

View File

@ -27,10 +27,18 @@ public class WbReferenceExpr extends JacksonJsonizable {
List<SnakGroup> snakGroups = new ArrayList<SnakGroup>(); List<SnakGroup> snakGroups = new ArrayList<SnakGroup>();
for (WbSnakExpr expr : getSnaks()) { for (WbSnakExpr expr : getSnaks()) {
List<Snak> snakList = new ArrayList<Snak>(1); List<Snak> snakList = new ArrayList<Snak>(1);
snakList.add(expr.evaluate(ctxt)); try {
snakGroups.add(Datamodel.makeSnakGroup(snakList)); snakList.add(expr.evaluate(ctxt));
snakGroups.add(Datamodel.makeSnakGroup(snakList));
} catch (SkipSchemaExpressionException e) {
continue;
}
}
if (! snakGroups.isEmpty()) {
return Datamodel.makeReference(snakGroups);
} else {
throw new SkipSchemaExpressionException();
} }
return Datamodel.makeReference(snakGroups);
} }
public List<WbSnakExpr> getSnaks() { public List<WbSnakExpr> getSnaks() {

View File

@ -10,6 +10,7 @@ import org.json.JSONArray;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
import org.json.JSONWriter; import org.json.JSONWriter;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException; import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import org.openrefine.wikidata.utils.JacksonJsonizable; import org.openrefine.wikidata.utils.JacksonJsonizable;
import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.Datamodel;
@ -60,7 +61,18 @@ public class WbStatementExpr extends JacksonJsonizable {
// evaluate qualifiers // evaluate qualifiers
List<Snak> qualifiers = new ArrayList<Snak>(getQualifiers().size()); List<Snak> qualifiers = new ArrayList<Snak>(getQualifiers().size());
for (WbSnakExpr qExpr : getQualifiers()) { for (WbSnakExpr qExpr : getQualifiers()) {
qualifiers.add(qExpr.evaluate(ctxt)); try {
qualifiers.add(qExpr.evaluate(ctxt));
} catch(SkipSchemaExpressionException e) {
QAWarning warning = new QAWarning(
"ignored-qualifiers",
null,
QAWarning.Severity.INFO,
1);
warning.setProperty("example_entity", subject);
warning.setProperty("example_property_entity", mainSnak.getPropertyId());
ctxt.addWarning(warning);
}
} }
List<SnakGroup> groupedQualifiers = groupSnaks(qualifiers); List<SnakGroup> groupedQualifiers = groupSnaks(qualifiers);
Claim claim = Datamodel.makeClaim(subject, mainSnak, groupedQualifiers); Claim claim = Datamodel.makeClaim(subject, mainSnak, groupedQualifiers);
@ -68,7 +80,18 @@ public class WbStatementExpr extends JacksonJsonizable {
// evaluate references // evaluate references
List<Reference> references = new ArrayList<Reference>(); List<Reference> references = new ArrayList<Reference>();
for (WbReferenceExpr rExpr : getReferences()) { for (WbReferenceExpr rExpr : getReferences()) {
references.add(rExpr.evaluate(ctxt)); try {
references.add(rExpr.evaluate(ctxt));
} catch(SkipSchemaExpressionException e) {
QAWarning warning = new QAWarning(
"ignored-references",
null,
QAWarning.Severity.INFO,
1);
warning.setProperty("example_entity", subject);
warning.setProperty("example_property_entity", mainSnak.getPropertyId());
ctxt.addWarning(warning);
}
} }
StatementRank rank = StatementRank.NORMAL; StatementRank rank = StatementRank.NORMAL;

View File

@ -32,9 +32,17 @@ public class WbStatementGroupExpr extends JacksonJsonizable {
PropertyIdValue propertyId = propertyExpr.evaluate(ctxt); PropertyIdValue propertyId = propertyExpr.evaluate(ctxt);
List<Statement> statements = new ArrayList<Statement>(statementExprs.size()); List<Statement> statements = new ArrayList<Statement>(statementExprs.size());
for(WbStatementExpr expr : statementExprs) { for(WbStatementExpr expr : statementExprs) {
statements.add(expr.evaluate(ctxt, subject, propertyId)); try {
statements.add(expr.evaluate(ctxt, subject, propertyId));
} catch (SkipSchemaExpressionException e) {
continue;
}
}
if (!statements.isEmpty()) {
return Datamodel.makeStatementGroup(statements);
} else {
throw new SkipSchemaExpressionException();
} }
return Datamodel.makeStatementGroup(statements);
} }
public WbPropExpr getProperty() { public WbPropExpr getProperty() {

View File

@ -26,6 +26,7 @@ import com.google.refine.model.Project;
import com.google.refine.model.Row; import com.google.refine.model.Row;
import org.openrefine.wikidata.schema.WbItemDocumentExpr; import org.openrefine.wikidata.schema.WbItemDocumentExpr;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException; import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import org.openrefine.wikidata.qa.QAWarningStore;
import org.openrefine.wikidata.schema.ExpressionContext; import org.openrefine.wikidata.schema.ExpressionContext;
import org.openrefine.wikidata.utils.JacksonJsonizable; import org.openrefine.wikidata.utils.JacksonJsonizable;
@ -89,17 +90,41 @@ public class WikibaseSchema implements OverlayModel {
return result; return result;
} }
public List<ItemUpdate> evaluate(Project project, Engine engine) { /**
* Evaluates the schema on a project, returning a list of ItemUpdates
* generated by the schema.
*
* Some warnings will be emitted in the warning store: those are only
* the ones that are generated at evaluation time (such as invalid formats
* for dates). Issues detected on candidate statements (such as constraint
* violations) are not included at this stage.
*
* @param project: the project on which the schema should be evaluated
* @param engine: the engine, which gives access to the current facets
* @param warningStore: a store in which issues will be emitted
* @return item updates are stored in their
* generating order (not merged yet).
*/
public List<ItemUpdate> evaluate(Project project, Engine engine, QAWarningStore warningStore) {
List<ItemUpdate> result = new ArrayList<ItemUpdate>(); List<ItemUpdate> result = new ArrayList<ItemUpdate>();
FilteredRows filteredRows = engine.getAllFilteredRows(); FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, new EvaluatingRowVisitor(result)); filteredRows.accept(project, new EvaluatingRowVisitor(result, warningStore));
return result; return result;
} }
/**
* Same as above, ignoring any warnings.
*/
public List<ItemUpdate> evaluate(Project project, Engine engine) {
return evaluate(project, engine, null);
}
protected class EvaluatingRowVisitor implements RowVisitor { protected class EvaluatingRowVisitor implements RowVisitor {
private List<ItemUpdate> result; private List<ItemUpdate> result;
public EvaluatingRowVisitor(List<ItemUpdate> result) { private QAWarningStore warningStore;
public EvaluatingRowVisitor(List<ItemUpdate> result, QAWarningStore warningStore) {
this.result = result; this.result = result;
this.warningStore = warningStore;
} }
@Override @Override
@ -109,7 +134,12 @@ public class WikibaseSchema implements OverlayModel {
@Override @Override
public boolean visit(Project project, int rowIndex, Row row) { public boolean visit(Project project, int rowIndex, Row row) {
ExpressionContext ctxt = new ExpressionContext(baseUri, rowIndex, row, project.columnModel); ExpressionContext ctxt = new ExpressionContext(
baseUri,
rowIndex,
row,
project.columnModel,
warningStore);
result.addAll(evaluateItemDocuments(ctxt)); result.addAll(evaluateItemDocuments(ctxt));
return false; return false;
} }