Initial support for creation of new items in Wikidata

This commit is contained in:
Antonin Delpeuch 2017-10-19 18:15:22 +01:00
parent 4076f088f3
commit 34568ca9b1
13 changed files with 388 additions and 44 deletions

View File

@ -8,10 +8,6 @@
</p> </p>
<div class="wikibase-perform-edits-area"> <div class="wikibase-perform-edits-area">
<p>You are logged in as <span bind="loggedInUsername"></span>.</p> <p>You are logged in as <span bind="loggedInUsername"></span>.</p>
<form bind="performEditsForm">
<input type="hidden" name="strategy" value="SNAK_QUALIFIERS" />
<input type="hidden" name="action" value="MERGE" />
</form>
<div class="wikibase-login-buttons"> <div class="wikibase-login-buttons">
<button class="button cancel-button" bind="cancelButton">Cancel</button> <button class="button cancel-button" bind="cancelButton">Cancel</button>
<button class="button button-primary" bind="performEditsButton">Perform edits</button> <button class="button button-primary" bind="performEditsButton">Perform edits</button>

View File

@ -22,8 +22,10 @@ PerformEditsDialog.launch = function(logged_in_username) {
"wikidata", "wikidata",
"perform-wikibase-edits", "perform-wikibase-edits",
{}, {},
elmts.performEditsForm.serialize(), { strategy : "SNAK_QUALIFIERS",
{}, action: "MERGE",
},
{ includeEngine: true, cellsChanged: true, columnStatsChanged: true },
{ onDone: { onDone:
function() { function() {
dismiss(); dismiss();

View File

@ -0,0 +1,33 @@
package org.openrefine.wikidata.editing;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* A class to facilitate serialization of
* the map from cell positions to qids
*
* @author antonin
*
*/
public class CellCoordinates {
public int row;
public int col;
public CellCoordinates(int row, int col) {
this.row = row;
this.col = col;
}
public CellCoordinates(String serialized) {
String[] coords = serialized.split("_");
this.row = Integer.parseInt(coords[0]);
this.col = Integer.parseInt(coords[1]);
}
@JsonIgnore
public String toString() {
return String.format("%d_%d", row, col);
}
}

View File

@ -0,0 +1,18 @@
package org.openrefine.wikidata.editing;
import java.io.IOException;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.KeyDeserializer;
public class CellCoordinatesKeyDeserializer extends KeyDeserializer {
@Override
public Object deserializeKey(final String key, final DeserializationContext ctxt ) throws IOException, JsonProcessingException
{
return new CellCoordinates(key);
}
}

View File

@ -0,0 +1,104 @@
package org.openrefine.wikidata.editing;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
import com.google.refine.model.Project;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Recon;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.ReconStats;
import org.openrefine.wikidata.schema.NewEntityIdValue;
/**
* This keeps track of the new items that we
* have created for each cell that was marked
* as such.
*
* @author antonin
*
*/
public class NewItemLibrary {
private Map<CellCoordinates, String> map;
public NewItemLibrary() {
map = new HashMap<>();
}
/**
* Retrieves the Qid allocated to a given new cell
* @param id: the fake ItemId generated by the cell
* @return the qid (or null if unallocated yet)
*/
public String getQid(NewEntityIdValue id) {
return map.get(fromNewEntityIdValue(id));
}
/**
* Stores a Qid associated to a new cell
* @param id : the fake EntityIdValue generated by the cell
* @param qid : the associated Qid returned by Wikibase
*/
public void setQid(NewEntityIdValue id, String qid) {
map.put(fromNewEntityIdValue(id), qid);
}
/**
* Changes the "new" reconciled cells to their allocated
* qids for later use.
*
* @param reset: set to true to revert the operation (set cells to "new")
*/
public void updateReconciledCells(Project project, boolean reset) {
Set<Integer> impactedColumns = new HashSet<>();
for(Map.Entry<CellCoordinates, String> entry : map.entrySet()) {
CellCoordinates coords = entry.getKey();
Cell cell = project.rows.get(coords.row).getCell(coords.col);
Recon recon = cell.recon;
if (recon.judgment.equals(Recon.Judgment.New) && !reset) {
recon.judgment = Recon.Judgment.Matched;
recon.match = new ReconCandidate(
entry.getValue(),
cell.value.toString(),
new String[0],
100);
} else if (recon.judgment.equals(Recon.Judgment.Matched) && reset) {
recon.judgment = Recon.Judgment.New;
recon.match = null;
}
impactedColumns.add(coords.col);
}
// Update reconciliation statistics for impacted columns
for(Integer colId : impactedColumns) {
Column column = project.columnModel.getColumnByCellIndex(colId);
column.setReconStats(ReconStats.create(project, colId));
}
}
// TODO migrate NewEntityIdValue to use CellCoordinates directly
private CellCoordinates fromNewEntityIdValue(NewEntityIdValue id) {
return new CellCoordinates(id.getRowId(), id.getColId());
}
/**
* Accessor, only meant to be used by Jackson
* @return the underlying map
*/
public Map<CellCoordinates, String> getQidMap() {
return map;
}
/**
* Accessor, only meant to be used by Jackson
*/
public void setQidMap(Map<CellCoordinates, String> newMap) {
map = newMap;
}
}

View File

@ -73,7 +73,7 @@ public class QuickStatementsExporter implements WriterExporter {
protected void translateItem(ItemUpdate item, Writer writer) throws IOException { protected void translateItem(ItemUpdate item, Writer writer) throws IOException {
String qid = item.getItemId().getId(); String qid = item.getItemId().getId();
if (item.getItemId().equals(ItemIdValue.NULL)) { if (item.getItemId().getId() == "Q0") {
writer.write("CREATE\n"); writer.write("CREATE\n");
qid = "LAST"; qid = "LAST";
} }

View File

@ -11,39 +11,40 @@ import java.util.Properties;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
import org.json.JSONWriter; import org.json.JSONWriter;
import com.google.common.collect.Lists;
import org.openrefine.wikidata.editing.ConnectionManager; import org.openrefine.wikidata.editing.ConnectionManager;
import org.openrefine.wikidata.operations.SaveWikibaseSchemaOperation.WikibaseSchemaChange; import org.openrefine.wikidata.editing.NewItemLibrary;
import org.openrefine.wikidata.editing.CellCoordinates;
import org.openrefine.wikidata.editing.CellCoordinatesKeyDeserializer;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.schema.ItemUpdate;
import org.openrefine.wikidata.schema.NewEntityIdValue;
import org.openrefine.wikidata.schema.WikibaseSchema; import org.openrefine.wikidata.schema.WikibaseSchema;
import org.wikidata.wdtk.datamodel.helpers.EntityDocumentBuilder; import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl;
import org.wikidata.wdtk.datamodel.helpers.ItemDocumentBuilder; import org.wikidata.wdtk.datamodel.interfaces.DataObjectFactory;
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemDocument; import org.wikidata.wdtk.datamodel.interfaces.ItemDocument;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.util.WebResourceFetcherImpl; import org.wikidata.wdtk.util.WebResourceFetcherImpl;
import org.wikidata.wdtk.wikibaseapi.ApiConnection; import org.wikidata.wdtk.wikibaseapi.ApiConnection;
import org.wikidata.wdtk.wikibaseapi.LoginFailedException;
import org.wikidata.wdtk.wikibaseapi.StatementUpdate;
import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor; import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor;
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException; import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.SiteLink;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.KeyDeserializer;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.google.refine.browsing.Engine; import com.google.refine.browsing.Engine;
import com.google.refine.history.Change; import com.google.refine.history.Change;
import com.google.refine.history.HistoryEntry; import com.google.refine.history.HistoryEntry;
import com.google.refine.model.AbstractOperation; import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.model.changes.ReconChange;
import com.google.refine.operations.EngineDependentOperation; import com.google.refine.operations.EngineDependentOperation;
import com.google.refine.operations.OperationRegistry; import com.google.refine.operations.OperationRegistry;
import com.google.refine.operations.recon.ReconOperation;
import com.google.refine.process.LongRunningProcess; import com.google.refine.process.LongRunningProcess;
import com.google.refine.process.Process; import com.google.refine.process.Process;
import com.google.refine.util.ParsingUtilities;
import com.google.refine.util.Pool; import com.google.refine.util.Pool;
@ -113,27 +114,56 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
"#openrefine" "#openrefine"
); );
} }
static public class PerformWikibaseEditsChange implements Change { static public class PerformWikibaseEditsChange implements Change {
private NewItemLibrary newItemLibrary;
public PerformWikibaseEditsChange(NewItemLibrary library) {
newItemLibrary = library;
}
@Override @Override
public void apply(Project project) { public void apply(Project project) {
// this does not do anything to the project (we don't re-run changes on Wikidata) // we don't re-run changes on Wikidata
newItemLibrary.updateReconciledCells(project, false);
} }
@Override @Override
public void revert(Project project) { public void revert(Project project) {
// this does not do anything (we don't revert changes on Wikidata either) // this does not do anything on Wikibase side -
// (we don't revert changes on Wikidata either)
newItemLibrary.updateReconciledCells(project, true);
} }
@Override @Override
public void save(Writer writer, Properties options) public void save(Writer writer, Properties options)
throws IOException { throws IOException {
if (newItemLibrary != null) {
writer.write("newItems=");
ObjectMapper mapper = new ObjectMapper();
writer.write(mapper.writeValueAsString(newItemLibrary)+"\n");
}
writer.write("/ec/\n"); // end of change writer.write("/ec/\n"); // end of change
} }
static public Change load(LineNumberReader reader, Pool pool) static public Change load(LineNumberReader reader, Pool pool)
throws Exception { throws Exception {
return new PerformWikibaseEditsChange(); NewItemLibrary library = new NewItemLibrary();
String line = null;
while ((line = reader.readLine()) != null && !"/ec/".equals(line)) {
int equal = line.indexOf('=');
CharSequence field = line.subSequence(0, equal);
String value = line.substring(equal + 1);
if ("newItems".equals(field)) {
ObjectMapper mapper = new ObjectMapper();
SimpleModule simpleModule = new SimpleModule();
simpleModule.addKeyDeserializer(CellCoordinates.class, new CellCoordinatesKeyDeserializer());
library = mapper.readValue(value, NewItemLibrary.class);
}
}
return new PerformWikibaseEditsChange(library);
} }
} }
@ -175,20 +205,7 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
List<ItemUpdate> itemDocuments = _schema.evaluate(_project, _engine); List<ItemUpdate> itemDocuments = _schema.evaluate(_project, _engine);
// Group statements by item // Group statements by item
Map<EntityIdValue, ItemUpdate> updates = new HashMap<EntityIdValue, ItemUpdate>(); Map<EntityIdValue, ItemUpdate> updates = ItemUpdate.groupBySubject(itemDocuments);
for(ItemUpdate update : itemDocuments) {
if (update.isNull()) {
continue;
}
ItemIdValue qid = update.getItemId();
if (updates.containsKey(qid)) {
ItemUpdate oldUpdate = updates.get(qid);
oldUpdate.merge(update);
} else {
updates.put(qid, update);
}
}
/** /**
* TODO: * TODO:
@ -197,12 +214,32 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
*/ */
// Perform edits // Perform edits
NewItemLibrary newItemLibrary = new NewItemLibrary();
DataObjectFactory factory = new DataObjectFactoryImpl();
int totalItemUpdates = updates.size(); int totalItemUpdates = updates.size();
int updatesDone = 0; int updatesDone = 0;
for(ItemUpdate update : updates.values()) { for(ItemUpdate update : updates.values()) {
try { try {
wbde.updateStatements(update.getItemId(), update.getAddedStatements(), update.getDeletedStatements(), _summary); // New item
if (update.getItemId().getId() == "Q0") {
NewEntityIdValue newCell = (NewEntityIdValue)update.getItemId();
ItemDocument itemDocument = factory.getItemDocument(
update.getItemId(),
update.getLabels(),
update.getDescriptions(),
update.getAliases(),
update.getAddedStatementGroups(),
new HashMap<String,SiteLink>(),
0L);
ItemDocument createdDoc = wbde.createItemDocument(itemDocument, _summary);
newItemLibrary.setQid(newCell, createdDoc.getItemId().getId());
} else {
// Existing item
wbde.updateStatements(update.getItemId(),
update.getAddedStatements(),
update.getDeletedStatements(), _summary);
}
} catch (MediaWikiApiErrorException e) { } catch (MediaWikiApiErrorException e) {
// TODO Auto-generated catch block // TODO Auto-generated catch block
e.printStackTrace(); e.printStackTrace();
@ -221,7 +258,7 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
_progress = 100; _progress = 100;
if (!_canceled) { if (!_canceled) {
Change change = new PerformWikibaseEditsChange(); Change change = new PerformWikibaseEditsChange(newItemLibrary);
HistoryEntry historyEntry = new HistoryEntry( HistoryEntry historyEntry = new HistoryEntry(
_historyEntryID, _historyEntryID,

View File

@ -7,11 +7,13 @@ import com.google.refine.model.Row;
public class ExpressionContext { public class ExpressionContext {
private String baseIRI; private String baseIRI;
private int rowId;
private Row row; private Row row;
private ColumnModel columnModel; private ColumnModel columnModel;
public ExpressionContext(String baseIRI, Row row, ColumnModel columnModel) { public ExpressionContext(String baseIRI, int rowId, Row row, ColumnModel columnModel) {
this.baseIRI = baseIRI; this.baseIRI = baseIRI;
this.rowId = rowId;
this.row = row; this.row = row;
this.columnModel = columnModel; this.columnModel = columnModel;
} }
@ -20,8 +22,16 @@ public class ExpressionContext {
return baseIRI; return baseIRI;
} }
public int getCellIndexByName(String name) {
return columnModel.getColumnByName(name).getCellIndex();
}
public Cell getCellByName(String name) { public Cell getCellByName(String name) {
int idx = columnModel.getColumnByName(name).getCellIndex(); int idx = getCellIndexByName(name);
return row.getCell(idx); return row.getCell(idx);
} }
public int getRowId() {
return rowId;
}
} }

View File

@ -2,10 +2,16 @@ package org.openrefine.wikidata.schema;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.HashMap;
import org.wikidata.wdtk.datamodel.implementation.StatementGroupImpl;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
/** /**
@ -103,4 +109,51 @@ public class ItemUpdate {
public List<MonolingualTextValue> getAliases() { public List<MonolingualTextValue> getAliases() {
return aliases; return aliases;
} }
/**
* Group added statements in StatementGroups: useful if the
* item is new.
*
* @return a grouped version of getAddedStatements()
*/
public List<StatementGroup> getAddedStatementGroups() {
Map<PropertyIdValue, List<Statement>> map = new HashMap<>();
for(Statement statement : getAddedStatements()) {
PropertyIdValue propertyId = statement.getClaim().getMainSnak().getPropertyId();
if (!map.containsKey(propertyId)) {
map.put(propertyId, new ArrayList<Statement>());
}
map.get(propertyId).add(statement);
}
List<StatementGroup> result = new ArrayList<>();
for(Map.Entry<PropertyIdValue, List<Statement>> entry : map.entrySet()) {
result.add(new StatementGroupImpl(entry.getValue()));
}
return result;
}
/**
* Group a list of ItemUpdates by subject: this is useful to make one single edit
* per item.
*
* @param itemDocuments
* @return a map from item ids to merged ItemUpdate for that id
*/
public static Map<EntityIdValue, ItemUpdate> groupBySubject(List<ItemUpdate> itemDocuments) {
Map<EntityIdValue, ItemUpdate> map = new HashMap<EntityIdValue, ItemUpdate>();
for(ItemUpdate update : itemDocuments) {
if (update.isNull()) {
continue;
}
ItemIdValue qid = update.getItemId();
if (map.containsKey(qid)) {
ItemUpdate oldUpdate = map.get(qid);
oldUpdate.merge(update);
} else {
map.put(qid, update);
}
}
return map;
}
} }

View File

@ -0,0 +1,90 @@
package org.openrefine.wikidata.schema;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
/**
* A placeholder for the Qid of a new item, which
* also remembers from which reconciled cell it was
* generated. This allows us to make sure that we will
* create only one item per cell marked as "new".
*
* @author antonin
*/
public class NewEntityIdValue implements ItemIdValue {
private final int rowId;
private final int colId;
/**
* Creates a new entity id corresponding to the
* cell designated by the indices.
*
* @param rowId
* the index of the row for the cell
* @param colId
* the index of the column for the cell
*/
public NewEntityIdValue(int rowId, int colId) {
this.rowId = rowId;
this.colId = colId;
}
public int getRowId() {
return rowId;
}
public int getColId() {
return colId;
}
/**
* Equality check is important when we gather
* all ItemUpdates related to an ItemId.
*/
@Override
public boolean equals(Object other) {
if (other == null ||
!NewEntityIdValue.class.isInstance(other)) {
return false;
}
final NewEntityIdValue otherNew = (NewEntityIdValue)other;
return (rowId == otherNew.getRowId() &&
colId == otherNew.getColId());
}
@Override
public int hashCode() {
int hash = 3;
hash = 41*hash + rowId;
hash = 41*hash + colId;
return hash;
}
@Override
public String getIri() {
return getSiteIri() + getId();
}
@Override
public <T> T accept(ValueVisitor<T> valueVisitor) {
return valueVisitor.visit(this);
}
@Override
public String getEntityType() {
return ET_ITEM;
}
@Override
public String getId() {
return "Q0";
}
@Override
public String getSiteIri() {
return EntityIdValue.SITE_LOCAL;
}
}

View File

@ -44,7 +44,8 @@ public class WbItemVariable extends WbItemExpr {
ReconCandidate match = cell.recon.match; ReconCandidate match = cell.recon.match;
return Datamodel.makeItemIdValue(match.id, ctxt.getBaseIRI()); return Datamodel.makeItemIdValue(match.id, ctxt.getBaseIRI());
} else if (recon.judgment == Recon.Judgment.New) { } else if (recon.judgment == Recon.Judgment.New) {
return ItemIdValue.NULL; return new NewEntityIdValue(ctxt.getRowId(),
ctxt.getCellIndexByName(columnName));
} }
} }
throw new SkipStatementException(); throw new SkipStatementException();

View File

@ -31,7 +31,7 @@ public class WbNameDescExpr extends BiJsonizable {
throws JSONException { throws JSONException {
writer.key("name_type"); writer.key("name_type");
writer.value(_type.name()); writer.value(_type.name());
writer.value("value"); writer.key("value");
_value.write(writer, options); _value.write(writer, options);
} }

View File

@ -96,7 +96,7 @@ public class WikibaseSchema implements OverlayModel {
@Override @Override
public boolean visit(Project project, int rowIndex, Row row) { public boolean visit(Project project, int rowIndex, Row row) {
ExpressionContext ctxt = new ExpressionContext(baseUri, row, project.columnModel); ExpressionContext ctxt = new ExpressionContext(baseUri, rowIndex, row, project.columnModel);
result.addAll(evaluateItemDocuments(ctxt)); result.addAll(evaluateItemDocuments(ctxt));
return false; return false;
} }