Add architecture to emit warnings during evaluation too

This commit is contained in:
Antonin Delpeuch 2018-01-11 11:34:00 +00:00
parent 2687741263
commit 30ce8680c5
10 changed files with 139 additions and 40 deletions

View File

@ -104,6 +104,14 @@
"no-issue-detected": {
"title": "No issue was detected in your edits.",
"body": "Note that OpenRefine cannot detect all the types of problems Wikidata edits can have."
},
"ignored-qualifiers": {
"title": "Some qualifiers were ignored.",
"body": "Qualifier values could not be parsed, so they will not be added to the corresponding statements."
},
"ignored-references": {
"title": "Some references were ignored.",
"body": "None of their statements could be parsed, so no reference was added."
}
}
}

View File

@ -53,6 +53,7 @@ import com.google.refine.commands.Command;
import org.openrefine.wikidata.exporters.QuickStatementsExporter;
import org.openrefine.wikidata.qa.EditInspector;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.qa.QAWarningStore;
import org.openrefine.wikidata.schema.ItemUpdate;
import org.openrefine.wikidata.schema.WikibaseSchema;
import com.google.refine.model.Project;
@ -72,10 +73,11 @@ public class PreviewWikibaseSchemaCommand extends Command {
String jsonString = request.getParameter("schema");
JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString);
WikibaseSchema schema = WikibaseSchema.reconstruct(json);
QAWarningStore warningStore = new QAWarningStore();
// Evaluate project
Engine engine = getEngine(request, project);
List<ItemUpdate> editBatch = schema.evaluate(project, engine);
List<ItemUpdate> editBatch = schema.evaluate(project, engine, warningStore);
StringWriter sb = new StringWriter(2048);
JSONWriter writer = new JSONWriter(sb, 32);
@ -85,11 +87,11 @@ public class PreviewWikibaseSchemaCommand extends Command {
StringWriter stringWriter = new StringWriter();
// Inspect the edits and generate warnings
EditInspector inspector = new EditInspector();
EditInspector inspector = new EditInspector(warningStore);
inspector.inspect(editBatch);
writer.key("warnings");
writer.array();
for (QAWarning warning : inspector.getWarnings()) {
for (QAWarning warning : warningStore.getWarnings()) {
warning.write(writer, new Properties());
}
writer.endArray();
@ -97,7 +99,7 @@ public class PreviewWikibaseSchemaCommand extends Command {
// this is not the length of the warnings array written before,
// but the total number of issues raised (before deduplication)
writer.key("nb_warnings");
writer.value(inspector.getTotalNumberOfWarnings());
writer.value(warningStore.getNbWarnings());
// Export to QuickStatements
QuickStatementsExporter exporter = new QuickStatementsExporter();

View File

@ -28,9 +28,9 @@ public class EditInspector {
private Map<String, EditScrutinizer> scrutinizers;
private QAWarningStore warningStore;
public EditInspector() {
scrutinizers = new HashMap<>();
warningStore = new QAWarningStore();
public EditInspector(QAWarningStore warningStore) {
this.scrutinizers = new HashMap<>();
this.warningStore = warningStore;
// Register all known scrutinizers here
register(new NewItemScrutinizer());
@ -72,19 +72,4 @@ public class EditInspector {
"no-issue-detected", null, QAWarning.Severity.INFO, 0));
}
}
/**
* Retrieve the warnings after inspection of the edits
* @return
*/
public List<QAWarning> getWarnings() {
return warningStore.getWarnings();
}
/**
* Retrieve the number of warnings before deduplication
*/
public int getTotalNumberOfWarnings() {
return warningStore.getNbWarnings();
}
}

View File

@ -46,7 +46,7 @@ public class QAWarningStore {
* Returns the list of aggregated warnings, ordered by decreasing severity
*/
@JsonProperty("warnings")
List<QAWarning> getWarnings() {
public List<QAWarning> getWarnings() {
List<QAWarning> result = new ArrayList<>(map.values());
Collections.sort(result);
return result;
@ -56,7 +56,7 @@ public class QAWarningStore {
* Returns the maximum severity of the stored warnings (INFO if empty)
*/
@JsonProperty("max_severity")
QAWarning.Severity getMaxSeverity() {
public QAWarning.Severity getMaxSeverity() {
return maxSeverity;
}
@ -64,7 +64,7 @@ public class QAWarningStore {
* Returns the total number of warnings
*/
@JsonProperty("nb_warnings")
int getNbWarnings() {
public int getNbWarnings() {
return totalWarnings;
}
}

View File

@ -1,21 +1,46 @@
package org.openrefine.wikidata.schema;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.qa.QAWarningStore;
import com.google.refine.model.Cell;
import com.google.refine.model.ColumnModel;
import com.google.refine.model.Row;
/**
* A class holding all the necessary information about
* the context in which a schema expression is evaluated.
*
* @author antonin
*
*/
public class ExpressionContext {
private String baseIRI;
private int rowId;
private Row row;
private ColumnModel columnModel;
private QAWarningStore warningStore;
public ExpressionContext(String baseIRI, int rowId, Row row, ColumnModel columnModel) {
/**
* Builds an expression context to evaluate a schema on a row
* @param baseIRI: the siteIRI of the schema
* @param rowId: the id of the row currently visited
* @param row: the row itself
* @param columnModel: lets us access cells by column name
* @param warningStore: where to store the issues encountered when
* evaluating (can be set to null if these issues should be ignored)
*/
public ExpressionContext(
String baseIRI,
int rowId,
Row row,
ColumnModel columnModel,
QAWarningStore warningStore) {
this.baseIRI = baseIRI;
this.rowId = rowId;
this.row = row;
this.columnModel = columnModel;
this.warningStore = warningStore;
}
public String getBaseIRI() {
@ -34,4 +59,10 @@ public class ExpressionContext {
public int getRowId() {
return rowId;
}
public void addWarning(QAWarning warning) {
if (warningStore != null) {
warningStore.addWarning(warning);
}
}
}

View File

@ -31,8 +31,12 @@ public class WbItemDocumentExpr extends JacksonJsonizable {
ItemIdValue subjectId = getSubject().evaluate(ctxt);
ItemUpdate update = new ItemUpdate(subjectId);
for(WbStatementGroupExpr expr : getStatementGroups()) {
for(Statement s : expr.evaluate(ctxt, subjectId).getStatements()) {
update.addStatement(s);
try {
for(Statement s : expr.evaluate(ctxt, subjectId).getStatements()) {
update.addStatement(s);
}
} catch (SkipSchemaExpressionException e) {
continue;
}
}
for(WbNameDescExpr expr : getNameDescs()) {

View File

@ -27,10 +27,18 @@ public class WbReferenceExpr extends JacksonJsonizable {
List<SnakGroup> snakGroups = new ArrayList<SnakGroup>();
for (WbSnakExpr expr : getSnaks()) {
List<Snak> snakList = new ArrayList<Snak>(1);
snakList.add(expr.evaluate(ctxt));
snakGroups.add(Datamodel.makeSnakGroup(snakList));
try {
snakList.add(expr.evaluate(ctxt));
snakGroups.add(Datamodel.makeSnakGroup(snakList));
} catch (SkipSchemaExpressionException e) {
continue;
}
}
if (! snakGroups.isEmpty()) {
return Datamodel.makeReference(snakGroups);
} else {
throw new SkipSchemaExpressionException();
}
return Datamodel.makeReference(snakGroups);
}
public List<WbSnakExpr> getSnaks() {

View File

@ -10,6 +10,7 @@ import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import org.openrefine.wikidata.utils.JacksonJsonizable;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
@ -60,7 +61,18 @@ public class WbStatementExpr extends JacksonJsonizable {
// evaluate qualifiers
List<Snak> qualifiers = new ArrayList<Snak>(getQualifiers().size());
for (WbSnakExpr qExpr : getQualifiers()) {
qualifiers.add(qExpr.evaluate(ctxt));
try {
qualifiers.add(qExpr.evaluate(ctxt));
} catch(SkipSchemaExpressionException e) {
QAWarning warning = new QAWarning(
"ignored-qualifiers",
null,
QAWarning.Severity.INFO,
1);
warning.setProperty("example_entity", subject);
warning.setProperty("example_property_entity", mainSnak.getPropertyId());
ctxt.addWarning(warning);
}
}
List<SnakGroup> groupedQualifiers = groupSnaks(qualifiers);
Claim claim = Datamodel.makeClaim(subject, mainSnak, groupedQualifiers);
@ -68,7 +80,18 @@ public class WbStatementExpr extends JacksonJsonizable {
// evaluate references
List<Reference> references = new ArrayList<Reference>();
for (WbReferenceExpr rExpr : getReferences()) {
references.add(rExpr.evaluate(ctxt));
try {
references.add(rExpr.evaluate(ctxt));
} catch(SkipSchemaExpressionException e) {
QAWarning warning = new QAWarning(
"ignored-references",
null,
QAWarning.Severity.INFO,
1);
warning.setProperty("example_entity", subject);
warning.setProperty("example_property_entity", mainSnak.getPropertyId());
ctxt.addWarning(warning);
}
}
StatementRank rank = StatementRank.NORMAL;

View File

@ -32,9 +32,17 @@ public class WbStatementGroupExpr extends JacksonJsonizable {
PropertyIdValue propertyId = propertyExpr.evaluate(ctxt);
List<Statement> statements = new ArrayList<Statement>(statementExprs.size());
for(WbStatementExpr expr : statementExprs) {
statements.add(expr.evaluate(ctxt, subject, propertyId));
try {
statements.add(expr.evaluate(ctxt, subject, propertyId));
} catch (SkipSchemaExpressionException e) {
continue;
}
}
if (!statements.isEmpty()) {
return Datamodel.makeStatementGroup(statements);
} else {
throw new SkipSchemaExpressionException();
}
return Datamodel.makeStatementGroup(statements);
}
public WbPropExpr getProperty() {

View File

@ -26,6 +26,7 @@ import com.google.refine.model.Project;
import com.google.refine.model.Row;
import org.openrefine.wikidata.schema.WbItemDocumentExpr;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import org.openrefine.wikidata.qa.QAWarningStore;
import org.openrefine.wikidata.schema.ExpressionContext;
import org.openrefine.wikidata.utils.JacksonJsonizable;
@ -89,17 +90,41 @@ public class WikibaseSchema implements OverlayModel {
return result;
}
public List<ItemUpdate> evaluate(Project project, Engine engine) {
/**
* Evaluates the schema on a project, returning a list of ItemUpdates
* generated by the schema.
*
* Some warnings will be emitted in the warning store: those are only
* the ones that are generated at evaluation time (such as invalid formats
* for dates). Issues detected on candidate statements (such as constraint
* violations) are not included at this stage.
*
* @param project: the project on which the schema should be evaluated
* @param engine: the engine, which gives access to the current facets
* @param warningStore: a store in which issues will be emitted
* @return item updates are stored in their
* generating order (not merged yet).
*/
public List<ItemUpdate> evaluate(Project project, Engine engine, QAWarningStore warningStore) {
List<ItemUpdate> result = new ArrayList<ItemUpdate>();
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, new EvaluatingRowVisitor(result));
filteredRows.accept(project, new EvaluatingRowVisitor(result, warningStore));
return result;
}
/**
* Same as above, ignoring any warnings.
*/
public List<ItemUpdate> evaluate(Project project, Engine engine) {
return evaluate(project, engine, null);
}
protected class EvaluatingRowVisitor implements RowVisitor {
private List<ItemUpdate> result;
public EvaluatingRowVisitor(List<ItemUpdate> result) {
private QAWarningStore warningStore;
public EvaluatingRowVisitor(List<ItemUpdate> result, QAWarningStore warningStore) {
this.result = result;
this.warningStore = warningStore;
}
@Override
@ -109,7 +134,12 @@ public class WikibaseSchema implements OverlayModel {
@Override
public boolean visit(Project project, int rowIndex, Row row) {
ExpressionContext ctxt = new ExpressionContext(baseUri, rowIndex, row, project.columnModel);
ExpressionContext ctxt = new ExpressionContext(
baseUri,
rowIndex,
row,
project.columnModel,
warningStore);
result.addAll(evaluateItemDocuments(ctxt));
return false;
}