Change architecture of custom EntityIdVales

This commit is contained in:
Antonin Delpeuch 2018-01-12 17:26:31 +00:00
parent 43f0348ee9
commit 69bfad6a47
12 changed files with 286 additions and 190 deletions

View File

@ -1,33 +0,0 @@
package org.openrefine.wikidata.editing;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* A class to facilitate serialization of
* the map from cell positions to qids
*
* @author antonin
*
*/
public class CellCoordinates {
public int row;
public int col;
public CellCoordinates(int row, int col) {
this.row = row;
this.col = col;
}
public CellCoordinates(String serialized) {
String[] coords = serialized.split("_");
this.row = Integer.parseInt(coords[0]);
this.col = Integer.parseInt(coords[1]);
}
@JsonIgnore
public String toString() {
return String.format("%d_%d", row, col);
}
}

View File

@ -1,18 +0,0 @@
package org.openrefine.wikidata.editing;
import java.io.IOException;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.KeyDeserializer;
public class CellCoordinatesKeyDeserializer extends KeyDeserializer {
@Override
public Object deserializeKey(final String key, final DeserializationContext ctxt ) throws IOException, JsonProcessingException
{
return new CellCoordinates(key);
}
}

View File

@ -4,8 +4,6 @@ import java.util.Map;
import java.util.HashMap; import java.util.HashMap;
import java.util.Set; import java.util.Set;
import org.openrefine.wikidata.schema.entityvalues.NewEntityIdValue;
import java.util.HashSet; import java.util.HashSet;
import com.google.refine.model.Project; import com.google.refine.model.Project;
@ -14,18 +12,18 @@ import com.google.refine.model.Column;
import com.google.refine.model.Recon; import com.google.refine.model.Recon;
import com.google.refine.model.ReconCandidate; import com.google.refine.model.ReconCandidate;
import com.google.refine.model.ReconStats; import com.google.refine.model.ReconStats;
import com.google.refine.model.Row;
/** /**
* This keeps track of the new items that we * This keeps track of the new items that we
* have created for each cell that was marked * have created for each internal reconciliation id.
* as such.
* *
* @author antonin * @author antonin
* *
*/ */
public class NewItemLibrary { public class NewItemLibrary {
private Map<CellCoordinates, String> map; private Map<Long, String> map;
public NewItemLibrary() { public NewItemLibrary() {
map = new HashMap<>(); map = new HashMap<>();
@ -36,17 +34,17 @@ public class NewItemLibrary {
* @param id: the fake ItemId generated by the cell * @param id: the fake ItemId generated by the cell
* @return the qid (or null if unallocated yet) * @return the qid (or null if unallocated yet)
*/ */
public String getQid(NewEntityIdValue id) { public String getQid(long id) {
return map.get(fromNewEntityIdValue(id)); return map.get(id);
} }
/** /**
* Stores a Qid associated to a new cell * Stores a Qid associated to a new cell
* @param id : the fake EntityIdValue generated by the cell * @param id : the internal reconciliation id of the new cell
* @param qid : the associated Qid returned by Wikibase * @param qid : the associated Qid returned by Wikibase
*/ */
public void setQid(NewEntityIdValue id, String qid) { public void setQid(long id, String qid) {
map.put(fromNewEntityIdValue(id), qid); map.put(id, qid);
} }
/** /**
@ -56,26 +54,39 @@ public class NewItemLibrary {
* @param reset: set to true to revert the operation (set cells to "new") * @param reset: set to true to revert the operation (set cells to "new")
*/ */
public void updateReconciledCells(Project project, boolean reset) { public void updateReconciledCells(Project project, boolean reset) {
Set<Integer> impactedColumns = new HashSet<>();
for(Map.Entry<CellCoordinates, String> entry : map.entrySet()) { Set<Integer> impactedColumns = new HashSet<>();
CellCoordinates coords = entry.getKey();
Cell cell = project.rows.get(coords.row).getCell(coords.col); /*
Recon recon = cell.recon; * Note that there is a slight violation of OpenRefine's model here:
if (recon.judgment.equals(Recon.Judgment.New) && !reset) { * if we reconcile multiple cells to the same new item, and then
recon.judgment = Recon.Judgment.Matched; * perform this operation on a subset of the corresponding rows,
recon.match = new ReconCandidate( * we are going to modify cells that are outside the facet (because
entry.getValue(), * they are reconciled to the same cell). But I think this is the
cell.value.toString(), * right thing to do.
new String[0], */
100);
} else if (recon.judgment.equals(Recon.Judgment.Matched) && reset) { for(Row row : project.rows) {
recon.judgment = Recon.Judgment.New; for(Cell cell : row.cells) {
recon.match = null; if (cell.recon == null) {
continue;
}
Recon recon = cell.recon;
if (Recon.Judgment.New.equals(recon.judgment) && !reset &&
map.containsKey(recon.id)) {
recon.judgment = Recon.Judgment.Matched;
recon.match = new ReconCandidate(
map.get(recon.id),
cell.value.toString(),
new String[0],
100);
} else if (Recon.Judgment.Matched.equals(recon.judgment) && reset &&
map.containsKey(recon.id)) {
recon.judgment = Recon.Judgment.New;
recon.match = null;
}
} }
impactedColumns.add(coords.col);
} }
// Update reconciliation statistics for impacted columns // Update reconciliation statistics for impacted columns
for(Integer colId : impactedColumns) { for(Integer colId : impactedColumns) {
Column column = project.columnModel.getColumnByCellIndex(colId); Column column = project.columnModel.getColumnByCellIndex(colId);
@ -83,23 +94,18 @@ public class NewItemLibrary {
} }
} }
// TODO migrate NewEntityIdValue to use CellCoordinates directly
private CellCoordinates fromNewEntityIdValue(NewEntityIdValue id) {
return new CellCoordinates(id.getRowId(), id.getColId());
}
/** /**
* Accessor, only meant to be used by Jackson * Getter, only meant to be used by Jackson
* @return the underlying map * @return the underlying map
*/ */
public Map<CellCoordinates, String> getQidMap() { public Map<Long, String> getQidMap() {
return map; return map;
} }
/** /**
* Accessor, only meant to be used by Jackson * Setter, only meant to be used by Jackson
*/ */
public void setQidMap(Map<CellCoordinates, String> newMap) { public void setQidMap(Map<Long, String> newMap) {
map = newMap; map = newMap;
} }
} }

View File

@ -15,16 +15,13 @@ import org.json.JSONWriter;
import org.openrefine.wikidata.editing.ConnectionManager; import org.openrefine.wikidata.editing.ConnectionManager;
import org.openrefine.wikidata.editing.NewItemLibrary; import org.openrefine.wikidata.editing.NewItemLibrary;
import org.openrefine.wikidata.editing.CellCoordinates;
import org.openrefine.wikidata.editing.CellCoordinatesKeyDeserializer;
import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.schema.ItemUpdate;
import org.openrefine.wikidata.schema.WikibaseSchema; import org.openrefine.wikidata.schema.WikibaseSchema;
import org.openrefine.wikidata.schema.entityvalues.NewEntityIdValue; import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl; import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl;
import org.wikidata.wdtk.datamodel.interfaces.DataObjectFactory; import org.wikidata.wdtk.datamodel.interfaces.DataObjectFactory;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemDocument; import org.wikidata.wdtk.datamodel.interfaces.ItemDocument;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.util.WebResourceFetcherImpl; import org.wikidata.wdtk.util.WebResourceFetcherImpl;
import org.wikidata.wdtk.wikibaseapi.ApiConnection; import org.wikidata.wdtk.wikibaseapi.ApiConnection;
@ -32,11 +29,7 @@ import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor;
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException; import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
import org.wikidata.wdtk.datamodel.interfaces.SiteLink; import org.wikidata.wdtk.datamodel.interfaces.SiteLink;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.KeyDeserializer;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.google.refine.browsing.Engine; import com.google.refine.browsing.Engine;
import com.google.refine.history.Change; import com.google.refine.history.Change;
@ -170,8 +163,6 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
if ("newItems".equals(field)) { if ("newItems".equals(field)) {
ObjectMapper mapper = new ObjectMapper(); ObjectMapper mapper = new ObjectMapper();
SimpleModule simpleModule = new SimpleModule();
simpleModule.addKeyDeserializer(CellCoordinates.class, new CellCoordinatesKeyDeserializer());
library = mapper.readValue(value, NewItemLibrary.class); library = mapper.readValue(value, NewItemLibrary.class);
} }
} }
@ -234,7 +225,7 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
try { try {
// New item // New item
if (update.getItemId().getId() == "Q0") { if (update.getItemId().getId() == "Q0") {
NewEntityIdValue newCell = (NewEntityIdValue)update.getItemId(); ReconEntityIdValue newCell = (ReconEntityIdValue)update.getItemId();
update.normalizeLabelsAndAliases(); update.normalizeLabelsAndAliases();
@ -248,7 +239,7 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
0L); 0L);
ItemDocument createdDoc = wbde.createItemDocument(itemDocument, _summary); ItemDocument createdDoc = wbde.createItemDocument(itemDocument, _summary);
newItemLibrary.setQid(newCell, createdDoc.getItemId().getId()); newItemLibrary.setQid(newCell.getReconInternalId(), createdDoc.getItemId().getId());
} else { } else {
// Existing item // Existing item
wbde.updateTermsStatements(update.getItemId(), wbde.updateTermsStatements(update.getItemId(),

View File

@ -1,63 +0,0 @@
package org.openrefine.wikidata.schema.entityvalues;
/**
* A placeholder for the Qid of a new item, which
* also remembers from which reconciled cell it was
* generated. This allows us to make sure that we will
* create only one item per cell marked as "new".
*
* @author antonin
*/
public class NewEntityIdValue extends TermedItemIdValue {
private final int rowId;
private final int colId;
/**
* Creates a new entity id corresponding to the
* cell designated by the indices.
*
* @param rowId
* the index of the row for the cell
* @param colId
* the index of the column for the cell
* @param label
* the value of the cell
*/
public NewEntityIdValue(int rowId, int colId, String siteIRI, String label) {
super("Q0", siteIRI, label);
this.rowId = rowId;
this.colId = colId;
}
public int getRowId() {
return rowId;
}
public int getColId() {
return colId;
}
/**
* Equality check is important when we gather
* all ItemUpdates related to an ItemId.
*/
@Override
public boolean equals(Object other) {
if (other == null ||
!NewEntityIdValue.class.isInstance(other)) {
return false;
}
final NewEntityIdValue otherNew = (NewEntityIdValue)other;
return (rowId == otherNew.getRowId() &&
colId == otherNew.getColId());
}
@Override
public int hashCode() {
int hash = 3;
hash = 41*hash + rowId;
hash = 41*hash + colId;
return hash;
}
}

View File

@ -0,0 +1,40 @@
package org.openrefine.wikidata.schema.entityvalues;
import java.util.List;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
/**
* An entity id value that also comes with
* a label and possibly types.
*
* The rationale behind this classes is that OpenRefine
* already stores labels and types for the Wikidata items
* it knows about (in the reconciliation data), so it is
* worth keeping this data to avoid re-fetching it when
* we need it.
*
* @author antonin
*
*/
public interface PrefetchedEntityIdValue extends EntityIdValue {
/**
* This should return the label "as we got it", with no guarantee
* that it is current or that its language matches that of the user.
* In general though, that should be the case if the user always uses
* OpenRefine with the same language settings.
*
* @return the preferred label of the entity
*/
public String getLabel();
/**
* Returns a list of types for this item. Again these are the types
* as they were originally fetched from the reconciliation interface:
* they can diverge from what is currently on the item.
*
* Empty lists should be returned for
*/
public List<String> getTypes();
}

View File

@ -0,0 +1,141 @@
package org.openrefine.wikidata.schema.entityvalues;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.wikidata.wdtk.datamodel.helpers.Hash;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
import com.google.refine.model.Recon;
/**
* An EntityIdValue that holds not just the id but also
* the label as fetched by either the reconciliation interface
* or the suggester and its type, both stored as reconciliation
* candidates.
*
* This label will be localized depending on the language chosen
* by the user for OpenRefine's interface. Storing it lets us
* reuse it later on without having to re-fetch it.
*
* Storing the types also lets us perform some constraint checks
* without re-fetching the types of many items.
* @author antonin
*
*/
public abstract class ReconEntityIdValue implements PrefetchedEntityIdValue {
private Recon _recon;
private String _cellValue;
public ReconEntityIdValue(Recon match, String cellValue) {
_recon = match;
_cellValue = cellValue;
}
protected boolean isMatched() {
return Recon.Judgment.Matched.equals(_recon.judgment) && _recon.match != null;
}
protected boolean isNew() {
return !isMatched();
}
public String getLabel() {
if (isMatched()) {
return _recon.match.name;
} else {
return _cellValue;
}
}
public List<String> getTypes() {
if (isMatched()) {
return Arrays.asList(_recon.match.types);
} else {
return new ArrayList<>();
}
}
@Override
public abstract String getEntityType();
/**
* Returns the id of the reconciled item
*/
@Override
public String getId() {
if (isMatched()) {
return _recon.match.id;
} else if (ET_ITEM.equals(getEntityType())) {
return "Q0";
} else if (ET_PROPERTY.equals(getEntityType())) {
return "P0";
}
return null;
}
@Override
public String getSiteIri() {
return _recon.schemaSpace;
}
@Override
public String getIri() {
return getSiteIri() + getId();
}
@Override
public <T> T accept(ValueVisitor<T> valueVisitor) {
return valueVisitor.visit(this);
}
/**
* Equality check is important when we gather
* all ItemUpdates related to an ItemId.
*
* The label is ignored in the equality check.
*/
@Override
public boolean equals(Object other) {
if (other == null ||
!EntityIdValue.class.isInstance(other)) {
return false;
}
if (ReconEntityIdValue.class.isInstance(other)) {
final ReconEntityIdValue reconOther = (ReconEntityIdValue)other;
if (isNew() != reconOther.isNew()) {
return false;
} else if (isNew()) {
// This ensures compliance with OR's notion of new items
// (it is possible that two cells are reconciled to the same
// new item, in which case they share the same internal recon id).
return getRecon().id == reconOther.getRecon().id;
}
}
final EntityIdValue otherNew = (EntityIdValue)other;
return getIri().equals(otherNew.getIri());
}
public long getReconInternalId() {
return getRecon().id;
}
@Override
public int hashCode() {
if (isMatched()) {
return Hash.hashCode(this);
} else {
return (int) _recon.id;
}
}
protected Recon getRecon() {
return _recon;
}
}

View File

@ -0,0 +1,18 @@
package org.openrefine.wikidata.schema.entityvalues;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import com.google.refine.model.Recon;
public class ReconItemIdValue extends ReconEntityIdValue implements ItemIdValue {
public ReconItemIdValue(Recon recon, String cellValue) {
super(recon, cellValue);
}
@Override
public String getEntityType() {
return ET_ITEM;
}
}

View File

@ -0,0 +1,17 @@
package org.openrefine.wikidata.schema.entityvalues;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import com.google.refine.model.Recon;
public class ReconPropertyIdValue extends ReconEntityIdValue implements PropertyIdValue {
public ReconPropertyIdValue(Recon recon, String cellValue) {
super(recon, cellValue);
}
@Override
public String getEntityType() {
return ET_PROPERTY;
}
}

View File

@ -1,49 +1,51 @@
package org.openrefine.wikidata.schema.entityvalues; package org.openrefine.wikidata.schema.entityvalues;
import java.util.ArrayList;
import java.util.List;
import org.wikidata.wdtk.datamodel.helpers.Hash; import org.wikidata.wdtk.datamodel.helpers.Hash;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor; import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
/** /**
* An EntityIdValue that holds not just the id but also * An EntityIdValue that we have obtained from a suggest widget
* the label as fetched by either the reconciliation interface * in the schema alignment dialog.
* or the suggester.
* *
* This label will be localized depending on the language chosen
* by the user for OpenRefine's interface. Storing it lets us
* reuse it later on without having to re-fetch it.
* @author antonin * @author antonin
* *
*/ */
public abstract class TermedEntityIdValue implements EntityIdValue { public abstract class SuggestedEntityIdValue implements PrefetchedEntityIdValue {
private String _id; private String _id;
private String _siteIRI; private String _siteIRI;
private String _label; private String _label;
public TermedEntityIdValue(String id, String siteIRI, String label) { public SuggestedEntityIdValue(String id, String siteIRI, String label) {
_id = id; _id = id;
_siteIRI = siteIRI; _siteIRI = siteIRI;
_label = label; _label = label;
} }
@Override
public abstract String getEntityType();
@Override @Override
public String getId() { public String getId() {
return _id; return _id;
} }
@Override @Override
public String getSiteIri() { public String getSiteIri() {
return _siteIRI; return _siteIRI;
} }
@Override
public String getLabel() { public String getLabel() {
return _label; return _label;
} }
@Override
public List<String> getTypes() {
return new ArrayList<>();
}
@Override @Override
public String getIri() { public String getIri() {
return getSiteIri() + getId(); return getSiteIri() + getId();
@ -53,23 +55,17 @@ public abstract class TermedEntityIdValue implements EntityIdValue {
public <T> T accept(ValueVisitor<T> valueVisitor) { public <T> T accept(ValueVisitor<T> valueVisitor) {
return valueVisitor.visit(this); return valueVisitor.visit(this);
} }
/**
* Equality check is important when we gather
* all ItemUpdates related to an ItemId.
*
* The label is ignored in the equality check.
*/
@Override @Override
public boolean equals(Object other) { public boolean equals(Object other) {
if (other == null || if (other == null ||
!EntityIdValue.class.isInstance(other)) { !EntityIdValue.class.isInstance(other)) {
return false; return false;
} }
final EntityIdValue otherNew = (EntityIdValue)other; final EntityIdValue otherNew = (EntityIdValue)other;
return getIri().equals(otherNew.getIri()); return getIri().equals(otherNew.getIri());
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Hash.hashCode(this); return Hash.hashCode(this);

View File

@ -2,9 +2,9 @@ package org.openrefine.wikidata.schema.entityvalues;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
public class TermedItemIdValue extends TermedEntityIdValue implements ItemIdValue { public class SuggestedItemIdValue extends SuggestedEntityIdValue implements ItemIdValue {
public TermedItemIdValue(String id, String siteIRI, String label) { public SuggestedItemIdValue(String id, String siteIRI, String label) {
super(id, siteIRI, label); super(id, siteIRI, label);
} }

View File

@ -2,9 +2,9 @@ package org.openrefine.wikidata.schema.entityvalues;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
public class TermedPropertyIdValue extends TermedEntityIdValue implements PropertyIdValue { public class SuggestedPropertyIdValue extends SuggestedEntityIdValue implements PropertyIdValue {
public TermedPropertyIdValue(String id, String siteIRI, String label) { public SuggestedPropertyIdValue(String id, String siteIRI, String label) {
super(id, siteIRI, label); super(id, siteIRI, label);
} }
@ -12,4 +12,5 @@ public class TermedPropertyIdValue extends TermedEntityIdValue implements Proper
public String getEntityType() { public String getEntityType() {
return ET_PROPERTY; return ET_PROPERTY;
} }
} }