Foundations of the edit inspector backend
This commit is contained in:
parent
38176189e4
commit
990a404c76
@ -37,6 +37,8 @@ import java.io.IOException;
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.StringReader;
|
||||
import java.io.StringWriter;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
@ -49,6 +51,9 @@ import com.google.refine.browsing.Engine;
|
||||
import com.google.refine.commands.Command;
|
||||
|
||||
import org.openrefine.wikidata.exporters.QuickStatementsExporter;
|
||||
import org.openrefine.wikidata.qa.EditInspector;
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.openrefine.wikidata.schema.WikibaseSchema;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
@ -68,20 +73,36 @@ public class PreviewWikibaseSchemaCommand extends Command {
|
||||
JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString);
|
||||
WikibaseSchema schema = WikibaseSchema.reconstruct(json);
|
||||
|
||||
// Evaluate project
|
||||
Engine engine = getEngine(request, project);
|
||||
List<ItemUpdate> editBatch = schema.evaluate(project, engine);
|
||||
|
||||
StringWriter sb = new StringWriter(2048);
|
||||
JSONWriter writer = new JSONWriter(sb, 32);
|
||||
writer.object();
|
||||
|
||||
{
|
||||
StringWriter stringWriter = new StringWriter();
|
||||
QuickStatementsExporter exporter = new QuickStatementsExporter();
|
||||
Engine engine = getEngine(request, project);
|
||||
exporter.translateSchema(project, engine, schema, stringWriter);
|
||||
|
||||
// Inspect the edits and generate warnings
|
||||
EditInspector inspector = new EditInspector();
|
||||
inspector.inspect(editBatch);
|
||||
writer.key("warnings");
|
||||
writer.array();
|
||||
for (QAWarning warning : inspector.getWarnings()) {
|
||||
warning.write(writer, new Properties());
|
||||
}
|
||||
writer.endArray();
|
||||
|
||||
// Export to QuickStatements
|
||||
QuickStatementsExporter exporter = new QuickStatementsExporter();
|
||||
exporter.translateItemList(editBatch, stringWriter);
|
||||
|
||||
String fullQS = stringWriter.toString();
|
||||
stringWriter = new StringWriter();
|
||||
LineNumberReader reader = new LineNumberReader(new StringReader(fullQS));
|
||||
|
||||
// Only keep the first 50 lines
|
||||
int maxQSLinesForPreview = 50;
|
||||
reader.setLineNumber(0);
|
||||
String line = reader.readLine();
|
||||
|
@ -42,7 +42,6 @@ public class QuickStatementsExporter implements WriterExporter {
|
||||
return "text";
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void export(Project project, Properties options, Engine engine, Writer writer)
|
||||
throws IOException {
|
||||
@ -53,9 +52,21 @@ public class QuickStatementsExporter implements WriterExporter {
|
||||
translateSchema(project, engine, schema, writer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Exports a project and a schema to a QuickStatements file
|
||||
* @param project: the project to translate
|
||||
* @param engine: the engine used for evaluation of the edits
|
||||
* @param schema: the WikibaseSchema used for translation of tabular data to edits
|
||||
* @param writer: the writer to which the QS should be written
|
||||
* @throws IOException
|
||||
*/
|
||||
public void translateSchema(Project project, Engine engine, WikibaseSchema schema, Writer writer) throws IOException {
|
||||
List<ItemUpdate> items = schema.evaluate(project, engine);
|
||||
for (ItemUpdate item : items) {
|
||||
translateItemList(items, writer);
|
||||
}
|
||||
|
||||
public void translateItemList(List<ItemUpdate> editBatch, Writer writer) throws IOException {
|
||||
for (ItemUpdate item : editBatch) {
|
||||
translateItem(item, writer);
|
||||
}
|
||||
}
|
||||
@ -73,7 +84,7 @@ public class QuickStatementsExporter implements WriterExporter {
|
||||
|
||||
protected void translateItem(ItemUpdate item, Writer writer) throws IOException {
|
||||
String qid = item.getItemId().getId();
|
||||
if (item.getItemId().getId() == "Q0") {
|
||||
if (item.isNew()) {
|
||||
writer.write("CREATE\n");
|
||||
qid = "LAST";
|
||||
item.normalizeLabelsAndAliases();
|
||||
|
@ -0,0 +1,57 @@
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
|
||||
/**
|
||||
* Runs a collection of edit scrutinizers on an edit batch
|
||||
* @author antonin
|
||||
*
|
||||
*/
|
||||
public class EditInspector {
|
||||
private Map<String, EditScrutinizer> scrutinizers;
|
||||
private QAWarningStore warningStore;
|
||||
|
||||
public EditInspector() {
|
||||
scrutinizers = new HashMap<>();
|
||||
warningStore = new QAWarningStore();
|
||||
|
||||
// Register all known scrutinizers here
|
||||
register(new NewItemScrutinizer());
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new scrutinizer to the inspector
|
||||
* @param scrutinizer
|
||||
*/
|
||||
public void register(EditScrutinizer scrutinizer) {
|
||||
String key = scrutinizer.getClass().getName();
|
||||
scrutinizers.put(key, scrutinizer);
|
||||
scrutinizer.setStore(warningStore);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Inspect a batch of edits with the registered scrutinizers
|
||||
* @param editBatch
|
||||
*/
|
||||
public void inspect(List<ItemUpdate> editBatch) {
|
||||
for(EditScrutinizer scrutinizer : scrutinizers.values()) {
|
||||
scrutinizer.scrutinize(editBatch);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the warnings after inspection of the edits
|
||||
* @return
|
||||
*/
|
||||
public List<QAWarning> getWarnings() {
|
||||
return warningStore.getWarnings();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import org.openrefine.wikidata.utils.JacksonJsonizable;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
/**
|
||||
* A class to represent a QA warning emited by the Wikidata schema
|
||||
* This could probably be reused at a broader scale, for instance for
|
||||
* Data Package validation.
|
||||
*
|
||||
* @author antonin
|
||||
*
|
||||
*/
|
||||
public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning> {
|
||||
|
||||
public enum Severity {
|
||||
INFO, // We just report something to the user but it is probably fine
|
||||
WARNING, // Edits that look wrong but in some cases they are actually fine
|
||||
IMPORTANT, // There is almost surely something wrong about the edit but in rare cases we might want to allow it
|
||||
CRITICAL, // We should never edit if there is a critical issue
|
||||
}
|
||||
|
||||
/// The type of QA warning emitted
|
||||
private String type;
|
||||
// The key for aggregation of other QA warnings together - this specializes the id
|
||||
private String bucketId;
|
||||
// The severity of the issue
|
||||
private Severity severity;
|
||||
// The number of times this issue was found
|
||||
private int count;
|
||||
|
||||
@JsonCreator
|
||||
public QAWarning(
|
||||
@JsonProperty("type") String type,
|
||||
@JsonProperty("bucket_id") String bucketId,
|
||||
@JsonProperty("severity") Severity severity,
|
||||
@JsonProperty("count") int count) {
|
||||
this.type = type;
|
||||
this.bucketId = bucketId;
|
||||
this.severity = severity;
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the full key for aggregation of QA warnings
|
||||
* @return
|
||||
*/
|
||||
public String getAggregationId() {
|
||||
if (this.bucketId != null) {
|
||||
return this.type + "_" + this.bucketId;
|
||||
} else {
|
||||
return this.type;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregates another QA warning of the same aggregation id.
|
||||
* @param other
|
||||
*/
|
||||
public void aggregate(QAWarning other) {
|
||||
assert other.getAggregationId() == getAggregationId();
|
||||
this.count += other.getCount();
|
||||
if(this.severity.compareTo(other.getSeverity()) < 0) {
|
||||
this.severity = other.getSeverity();
|
||||
}
|
||||
}
|
||||
|
||||
@JsonProperty("type")
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
@JsonProperty("bucketId")
|
||||
public String getBucketId() {
|
||||
return bucketId;
|
||||
}
|
||||
|
||||
@JsonProperty("severity")
|
||||
public Severity getSeverity() {
|
||||
return severity;
|
||||
}
|
||||
|
||||
@JsonProperty("count")
|
||||
public int getCount() {
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Warnings are sorted by decreasing severity.
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(QAWarning other) {
|
||||
return - severity.compareTo(other.getSeverity());
|
||||
}
|
||||
}
|
@ -0,0 +1,60 @@
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
/**
|
||||
* A store for QA warnings which aggregates them by type.
|
||||
* @author antonin
|
||||
*/
|
||||
public class QAWarningStore {
|
||||
|
||||
private Map<String, QAWarning> map;
|
||||
private QAWarning.Severity maxSeverity;
|
||||
|
||||
public QAWarningStore() {
|
||||
this.map = new HashMap<>();
|
||||
this.maxSeverity = QAWarning.Severity.INFO;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores a warning, aggregating it with any existing
|
||||
* @param warning
|
||||
*/
|
||||
public void addWarning(QAWarning warning) {
|
||||
String aggregationKey = warning.getAggregationId();
|
||||
QAWarning.Severity severity = warning.getSeverity();
|
||||
if (severity.compareTo(maxSeverity) > 0) {
|
||||
maxSeverity = severity;
|
||||
}
|
||||
if (map.containsKey(aggregationKey)) {
|
||||
QAWarning existing = map.get(aggregationKey);
|
||||
existing.aggregate(warning);
|
||||
} else {
|
||||
map.put(aggregationKey, warning);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of aggregated warnings, ordered by decreasing severity
|
||||
*/
|
||||
@JsonProperty("warnings")
|
||||
List<QAWarning> getWarnings() {
|
||||
List<QAWarning> result = new ArrayList<>(map.values());
|
||||
Collections.sort(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum severity of the stored warnings (INFO if empty)
|
||||
*/
|
||||
@JsonProperty("max_severity")
|
||||
QAWarning.Severity getMaxSeverity() {
|
||||
return maxSeverity;
|
||||
}
|
||||
}
|
@ -0,0 +1,59 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.qa.QAWarningStore;
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
|
||||
/**
|
||||
* Interface for any class that
|
||||
* @author antonin
|
||||
*
|
||||
*/
|
||||
public abstract class EditScrutinizer {
|
||||
|
||||
private QAWarningStore store;
|
||||
|
||||
public void setStore(QAWarningStore store) {
|
||||
this.store = store;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the candidate edits and emits warnings in the store
|
||||
* @param edit: the list of ItemUpdates to scrutinize
|
||||
*/
|
||||
public abstract void scrutinize(List<ItemUpdate> edit);
|
||||
|
||||
/**
|
||||
* Helper to be used by subclasses to emit INFO warnings
|
||||
* @param warning
|
||||
*/
|
||||
protected void info(String type) {
|
||||
store.addWarning(new QAWarning(type, null, QAWarning.Severity.INFO, 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to be used by subclasses to emit warnings
|
||||
* @param warning
|
||||
*/
|
||||
protected void warning(String type) {
|
||||
store.addWarning(new QAWarning(type, null, QAWarning.Severity.WARNING, 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to be used by subclasses to emit important warnings
|
||||
* @param warning
|
||||
*/
|
||||
protected void important(String type) {
|
||||
store.addWarning(new QAWarning(type, null, QAWarning.Severity.IMPORTANT, 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to be used by subclasses to emit critical warnings
|
||||
* @param warning
|
||||
*/
|
||||
protected void critical(String type) {
|
||||
store.addWarning(new QAWarning(type, null, QAWarning.Severity.CRITICAL, 1));
|
||||
}
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
|
||||
public abstract class ItemEditScrutinizer extends EditScrutinizer {
|
||||
|
||||
@Override
|
||||
public void scrutinize(List<ItemUpdate> edit) {
|
||||
for(ItemUpdate update : edit) {
|
||||
if(!update.isNull()) {
|
||||
scrutinize(update);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public abstract void scrutinize(ItemUpdate update);
|
||||
}
|
@ -0,0 +1,44 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||
|
||||
/**
|
||||
* A scrutinizer that inspects new items
|
||||
* @author antonin
|
||||
*/
|
||||
public class NewItemScrutinizer extends ItemEditScrutinizer {
|
||||
|
||||
@Override
|
||||
public void scrutinize(ItemUpdate update) {
|
||||
if (update.isNew()) {
|
||||
info("new-item-created");
|
||||
|
||||
if (update.getLabels().isEmpty() && update.getAliases().isEmpty()) {
|
||||
important("new-item-without-labels-or-aliases");
|
||||
}
|
||||
|
||||
if (update.getDescriptions().isEmpty()) {
|
||||
warning("new-item-without-descriptions");
|
||||
}
|
||||
|
||||
if (! update.getDeletedStatements().isEmpty()) {
|
||||
warning("new-item-with-deleted-statements");
|
||||
}
|
||||
|
||||
// Try to find a "instance of" or "subclass of" claim
|
||||
boolean typeFound = false;
|
||||
for(StatementGroup group : update.getAddedStatementGroups()) {
|
||||
String pid = group.getProperty().getId();
|
||||
if ("P31".equals(pid) || "P279".equals(pid)) {
|
||||
typeFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!typeFound) {
|
||||
warning("new-item-without-P31-or-P279");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Reference;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
|
||||
public abstract class SnakScrutinizer extends StatementScrutinizer {
|
||||
|
||||
public abstract void scrutinizeAdded(Snak snak);
|
||||
|
||||
public abstract void scrutinizeDeleted(Snak snak);
|
||||
|
||||
@Override
|
||||
public void scrutinizeAdded(Statement statement) {
|
||||
// Main snak
|
||||
scrutinizeAdded(statement.getClaim().getMainSnak());
|
||||
|
||||
// Qualifiers
|
||||
scrutinizeSnakSet(statement.getClaim().getAllQualifiers(), true);
|
||||
|
||||
// References
|
||||
for(Reference ref : statement.getReferences()) {
|
||||
scrutinizeSnakSet(ref.getAllSnaks(), true);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void scrutinizeDeleted(Statement statement) {
|
||||
// Main snak
|
||||
scrutinizeDeleted(statement.getClaim().getMainSnak());
|
||||
|
||||
// Qualifiers
|
||||
scrutinizeSnakSet(statement.getClaim().getAllQualifiers(), false);
|
||||
|
||||
// References
|
||||
for(Reference ref : statement.getReferences()) {
|
||||
scrutinizeSnakSet(ref.getAllSnaks(), false);
|
||||
}
|
||||
}
|
||||
|
||||
private void scrutinizeSnakSet(Iterator<Snak> snaks, boolean add) {
|
||||
while(snaks.hasNext()) {
|
||||
Snak snak = snaks.next();
|
||||
if (add) {
|
||||
scrutinizeAdded(snak);
|
||||
} else {
|
||||
scrutinizeDeleted(snak);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||
|
||||
public abstract class StatementGroupScrutinizer extends ItemEditScrutinizer {
|
||||
|
||||
@Override
|
||||
public void scrutinize(ItemUpdate update) {
|
||||
for(StatementGroup statementGroup : update.getAddedStatementGroups()) {
|
||||
scrutinizeAdded(statementGroup);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public abstract void scrutinizeAdded(StatementGroup statementGroup);
|
||||
|
||||
public abstract void scrutinizeDeleted(StatementGroup statementGroup);
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
|
||||
public abstract class StatementScrutinizer extends ItemEditScrutinizer {
|
||||
|
||||
@Override
|
||||
public void scrutinize(ItemUpdate update) {
|
||||
for(Statement statement : update.getAddedStatements()) {
|
||||
scrutinizeAdded(statement);
|
||||
}
|
||||
}
|
||||
|
||||
public abstract void scrutinizeAdded(Statement statement);
|
||||
|
||||
public abstract void scrutinizeDeleted(Statement statement);
|
||||
}
|
@ -180,4 +180,11 @@ public class ItemUpdate {
|
||||
}
|
||||
aliases = filteredAliases;
|
||||
}
|
||||
|
||||
/**
|
||||
* is this update about a new item?
|
||||
*/
|
||||
public boolean isNew() {
|
||||
return "Q0".equals(getItemId().getId());
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user