diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/CellCoordinates.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/CellCoordinates.java deleted file mode 100644 index f64e69a1c..000000000 --- a/extensions/wikidata/src/org/openrefine/wikidata/editing/CellCoordinates.java +++ /dev/null @@ -1,33 +0,0 @@ -package org.openrefine.wikidata.editing; - -import com.fasterxml.jackson.annotation.JsonIgnore; - - -/** - * A class to facilitate serialization of - * the map from cell positions to qids - * - * @author antonin - * - */ -public class CellCoordinates { - public int row; - public int col; - - public CellCoordinates(int row, int col) { - this.row = row; - this.col = col; - } - - public CellCoordinates(String serialized) { - String[] coords = serialized.split("_"); - this.row = Integer.parseInt(coords[0]); - this.col = Integer.parseInt(coords[1]); - } - - @JsonIgnore - public String toString() { - return String.format("%d_%d", row, col); - } -} - diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/CellCoordinatesKeyDeserializer.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/CellCoordinatesKeyDeserializer.java deleted file mode 100644 index 12235783b..000000000 --- a/extensions/wikidata/src/org/openrefine/wikidata/editing/CellCoordinatesKeyDeserializer.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.openrefine.wikidata.editing; - -import java.io.IOException; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.KeyDeserializer; - - -public class CellCoordinatesKeyDeserializer extends KeyDeserializer { - - @Override - public Object deserializeKey(final String key, final DeserializationContext ctxt ) throws IOException, JsonProcessingException - { - return new CellCoordinates(key); - } - -} diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java index 0b57617c9..1a4a4d9da 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java @@ -4,8 +4,6 @@ import java.util.Map; import java.util.HashMap; import java.util.Set; -import org.openrefine.wikidata.schema.entityvalues.NewEntityIdValue; - import java.util.HashSet; import com.google.refine.model.Project; @@ -14,18 +12,18 @@ import com.google.refine.model.Column; import com.google.refine.model.Recon; import com.google.refine.model.ReconCandidate; import com.google.refine.model.ReconStats; +import com.google.refine.model.Row; /** * This keeps track of the new items that we - * have created for each cell that was marked - * as such. + * have created for each internal reconciliation id. * * @author antonin * */ public class NewItemLibrary { - private Map map; + private Map map; public NewItemLibrary() { map = new HashMap<>(); @@ -36,17 +34,17 @@ public class NewItemLibrary { * @param id: the fake ItemId generated by the cell * @return the qid (or null if unallocated yet) */ - public String getQid(NewEntityIdValue id) { - return map.get(fromNewEntityIdValue(id)); + public String getQid(long id) { + return map.get(id); } /** * Stores a Qid associated to a new cell - * @param id : the fake EntityIdValue generated by the cell + * @param id : the internal reconciliation id of the new cell * @param qid : the associated Qid returned by Wikibase */ - public void setQid(NewEntityIdValue id, String qid) { - map.put(fromNewEntityIdValue(id), qid); + public void setQid(long id, String qid) { + map.put(id, qid); } /** @@ -56,26 +54,39 @@ public class NewItemLibrary { * @param reset: set to true to revert the operation (set cells to "new") */ public void updateReconciledCells(Project project, boolean reset) { - Set impactedColumns = new HashSet<>(); - for(Map.Entry entry : map.entrySet()) { - CellCoordinates coords = entry.getKey(); - Cell cell = project.rows.get(coords.row).getCell(coords.col); - Recon recon = cell.recon; - if (recon.judgment.equals(Recon.Judgment.New) && !reset) { - recon.judgment = Recon.Judgment.Matched; - recon.match = new ReconCandidate( - entry.getValue(), - cell.value.toString(), - new String[0], - 100); - } else if (recon.judgment.equals(Recon.Judgment.Matched) && reset) { - recon.judgment = Recon.Judgment.New; - recon.match = null; + Set impactedColumns = new HashSet<>(); + + /* + * Note that there is a slight violation of OpenRefine's model here: + * if we reconcile multiple cells to the same new item, and then + * perform this operation on a subset of the corresponding rows, + * we are going to modify cells that are outside the facet (because + * they are reconciled to the same cell). But I think this is the + * right thing to do. + */ + + for(Row row : project.rows) { + for(Cell cell : row.cells) { + if (cell.recon == null) { + continue; + } + Recon recon = cell.recon; + if (Recon.Judgment.New.equals(recon.judgment) && !reset && + map.containsKey(recon.id)) { + recon.judgment = Recon.Judgment.Matched; + recon.match = new ReconCandidate( + map.get(recon.id), + cell.value.toString(), + new String[0], + 100); + } else if (Recon.Judgment.Matched.equals(recon.judgment) && reset && + map.containsKey(recon.id)) { + recon.judgment = Recon.Judgment.New; + recon.match = null; + } } - impactedColumns.add(coords.col); } - // Update reconciliation statistics for impacted columns for(Integer colId : impactedColumns) { Column column = project.columnModel.getColumnByCellIndex(colId); @@ -83,23 +94,18 @@ public class NewItemLibrary { } } - // TODO migrate NewEntityIdValue to use CellCoordinates directly - private CellCoordinates fromNewEntityIdValue(NewEntityIdValue id) { - return new CellCoordinates(id.getRowId(), id.getColId()); - } - /** - * Accessor, only meant to be used by Jackson + * Getter, only meant to be used by Jackson * @return the underlying map */ - public Map getQidMap() { + public Map getQidMap() { return map; } /** - * Accessor, only meant to be used by Jackson + * Setter, only meant to be used by Jackson */ - public void setQidMap(Map newMap) { + public void setQidMap(Map newMap) { map = newMap; } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/operations/PerformWikibaseEditsOperation.java b/extensions/wikidata/src/org/openrefine/wikidata/operations/PerformWikibaseEditsOperation.java index 5ba5d6a39..5fefca255 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/operations/PerformWikibaseEditsOperation.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/operations/PerformWikibaseEditsOperation.java @@ -15,16 +15,13 @@ import org.json.JSONWriter; import org.openrefine.wikidata.editing.ConnectionManager; import org.openrefine.wikidata.editing.NewItemLibrary; -import org.openrefine.wikidata.editing.CellCoordinates; -import org.openrefine.wikidata.editing.CellCoordinatesKeyDeserializer; import org.openrefine.wikidata.schema.ItemUpdate; import org.openrefine.wikidata.schema.WikibaseSchema; -import org.openrefine.wikidata.schema.entityvalues.NewEntityIdValue; +import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue; import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl; import org.wikidata.wdtk.datamodel.interfaces.DataObjectFactory; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemDocument; -import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; import org.wikidata.wdtk.util.WebResourceFetcherImpl; import org.wikidata.wdtk.wikibaseapi.ApiConnection; @@ -32,11 +29,7 @@ import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor; import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException; import org.wikidata.wdtk.datamodel.interfaces.SiteLink; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.KeyDeserializer; import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.refine.browsing.Engine; import com.google.refine.history.Change; @@ -170,8 +163,6 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation { if ("newItems".equals(field)) { ObjectMapper mapper = new ObjectMapper(); - SimpleModule simpleModule = new SimpleModule(); - simpleModule.addKeyDeserializer(CellCoordinates.class, new CellCoordinatesKeyDeserializer()); library = mapper.readValue(value, NewItemLibrary.class); } } @@ -234,7 +225,7 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation { try { // New item if (update.getItemId().getId() == "Q0") { - NewEntityIdValue newCell = (NewEntityIdValue)update.getItemId(); + ReconEntityIdValue newCell = (ReconEntityIdValue)update.getItemId(); update.normalizeLabelsAndAliases(); @@ -248,7 +239,7 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation { 0L); ItemDocument createdDoc = wbde.createItemDocument(itemDocument, _summary); - newItemLibrary.setQid(newCell, createdDoc.getItemId().getId()); + newItemLibrary.setQid(newCell.getReconInternalId(), createdDoc.getItemId().getId()); } else { // Existing item wbde.updateTermsStatements(update.getItemId(), diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/NewEntityIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/NewEntityIdValue.java deleted file mode 100644 index 8f2a2fe69..000000000 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/NewEntityIdValue.java +++ /dev/null @@ -1,63 +0,0 @@ -package org.openrefine.wikidata.schema.entityvalues; - -/** - * A placeholder for the Qid of a new item, which - * also remembers from which reconciled cell it was - * generated. This allows us to make sure that we will - * create only one item per cell marked as "new". - * - * @author antonin - */ -public class NewEntityIdValue extends TermedItemIdValue { - - private final int rowId; - private final int colId; - - /** - * Creates a new entity id corresponding to the - * cell designated by the indices. - * - * @param rowId - * the index of the row for the cell - * @param colId - * the index of the column for the cell - * @param label - * the value of the cell - */ - public NewEntityIdValue(int rowId, int colId, String siteIRI, String label) { - super("Q0", siteIRI, label); - this.rowId = rowId; - this.colId = colId; - } - - public int getRowId() { - return rowId; - } - - public int getColId() { - return colId; - } - - /** - * Equality check is important when we gather - * all ItemUpdates related to an ItemId. - */ - @Override - public boolean equals(Object other) { - if (other == null || - !NewEntityIdValue.class.isInstance(other)) { - return false; - } - final NewEntityIdValue otherNew = (NewEntityIdValue)other; - return (rowId == otherNew.getRowId() && - colId == otherNew.getColId()); - } - - @Override - public int hashCode() { - int hash = 3; - hash = 41*hash + rowId; - hash = 41*hash + colId; - return hash; - } -} diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/PrefetchedEntityIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/PrefetchedEntityIdValue.java new file mode 100644 index 000000000..37f9baf37 --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/PrefetchedEntityIdValue.java @@ -0,0 +1,40 @@ +package org.openrefine.wikidata.schema.entityvalues; + +import java.util.List; + +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; + +/** + * An entity id value that also comes with + * a label and possibly types. + * + * The rationale behind this classes is that OpenRefine + * already stores labels and types for the Wikidata items + * it knows about (in the reconciliation data), so it is + * worth keeping this data to avoid re-fetching it when + * we need it. + * + * @author antonin + * + */ +public interface PrefetchedEntityIdValue extends EntityIdValue { + + /** + * This should return the label "as we got it", with no guarantee + * that it is current or that its language matches that of the user. + * In general though, that should be the case if the user always uses + * OpenRefine with the same language settings. + * + * @return the preferred label of the entity + */ + public String getLabel(); + + /** + * Returns a list of types for this item. Again these are the types + * as they were originally fetched from the reconciliation interface: + * they can diverge from what is currently on the item. + * + * Empty lists should be returned for + */ + public List getTypes(); +} diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconEntityIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconEntityIdValue.java new file mode 100644 index 000000000..fccc3ed11 --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconEntityIdValue.java @@ -0,0 +1,141 @@ +package org.openrefine.wikidata.schema.entityvalues; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.wikidata.wdtk.datamodel.helpers.Hash; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor; + +import com.google.refine.model.Recon; + +/** + * An EntityIdValue that holds not just the id but also + * the label as fetched by either the reconciliation interface + * or the suggester and its type, both stored as reconciliation + * candidates. + * + * This label will be localized depending on the language chosen + * by the user for OpenRefine's interface. Storing it lets us + * reuse it later on without having to re-fetch it. + * + * Storing the types also lets us perform some constraint checks + * without re-fetching the types of many items. + * @author antonin + * + */ +public abstract class ReconEntityIdValue implements PrefetchedEntityIdValue { + + private Recon _recon; + private String _cellValue; + + public ReconEntityIdValue(Recon match, String cellValue) { + _recon = match; + _cellValue = cellValue; + } + + protected boolean isMatched() { + return Recon.Judgment.Matched.equals(_recon.judgment) && _recon.match != null; + } + + protected boolean isNew() { + return !isMatched(); + } + + public String getLabel() { + if (isMatched()) { + return _recon.match.name; + } else { + return _cellValue; + } + } + + public List getTypes() { + if (isMatched()) { + return Arrays.asList(_recon.match.types); + } else { + return new ArrayList<>(); + } + } + + @Override + public abstract String getEntityType(); + + /** + * Returns the id of the reconciled item + */ + @Override + public String getId() { + if (isMatched()) { + return _recon.match.id; + } else if (ET_ITEM.equals(getEntityType())) { + return "Q0"; + } else if (ET_PROPERTY.equals(getEntityType())) { + return "P0"; + } + return null; + } + + @Override + public String getSiteIri() { + return _recon.schemaSpace; + } + + @Override + public String getIri() { + return getSiteIri() + getId(); + } + + @Override + public T accept(ValueVisitor valueVisitor) { + return valueVisitor.visit(this); + } + + /** + * Equality check is important when we gather + * all ItemUpdates related to an ItemId. + * + * The label is ignored in the equality check. + */ + @Override + public boolean equals(Object other) { + if (other == null || + !EntityIdValue.class.isInstance(other)) { + return false; + } + + if (ReconEntityIdValue.class.isInstance(other)) { + final ReconEntityIdValue reconOther = (ReconEntityIdValue)other; + + if (isNew() != reconOther.isNew()) { + return false; + } else if (isNew()) { + // This ensures compliance with OR's notion of new items + // (it is possible that two cells are reconciled to the same + // new item, in which case they share the same internal recon id). + return getRecon().id == reconOther.getRecon().id; + } + } + + final EntityIdValue otherNew = (EntityIdValue)other; + return getIri().equals(otherNew.getIri()); + } + + public long getReconInternalId() { + return getRecon().id; + } + + @Override + public int hashCode() { + if (isMatched()) { + return Hash.hashCode(this); + } else { + return (int) _recon.id; + } + } + + protected Recon getRecon() { + return _recon; + } +} diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconItemIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconItemIdValue.java new file mode 100644 index 000000000..9f0388bad --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconItemIdValue.java @@ -0,0 +1,18 @@ +package org.openrefine.wikidata.schema.entityvalues; + +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; + +import com.google.refine.model.Recon; + +public class ReconItemIdValue extends ReconEntityIdValue implements ItemIdValue { + + public ReconItemIdValue(Recon recon, String cellValue) { + super(recon, cellValue); + } + + @Override + public String getEntityType() { + return ET_ITEM; + } + +} diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconPropertyIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconPropertyIdValue.java new file mode 100644 index 000000000..434c1eb45 --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconPropertyIdValue.java @@ -0,0 +1,17 @@ +package org.openrefine.wikidata.schema.entityvalues; + +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; + +import com.google.refine.model.Recon; + +public class ReconPropertyIdValue extends ReconEntityIdValue implements PropertyIdValue { + + public ReconPropertyIdValue(Recon recon, String cellValue) { + super(recon, cellValue); + } + + @Override + public String getEntityType() { + return ET_PROPERTY; + } +} diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/TermedEntityIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedEntityIdValue.java similarity index 60% rename from extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/TermedEntityIdValue.java rename to extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedEntityIdValue.java index 131ef7e88..4b06a72e6 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/TermedEntityIdValue.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedEntityIdValue.java @@ -1,49 +1,51 @@ package org.openrefine.wikidata.schema.entityvalues; +import java.util.ArrayList; +import java.util.List; + import org.wikidata.wdtk.datamodel.helpers.Hash; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor; /** - * An EntityIdValue that holds not just the id but also - * the label as fetched by either the reconciliation interface - * or the suggester. + * An EntityIdValue that we have obtained from a suggest widget + * in the schema alignment dialog. * - * This label will be localized depending on the language chosen - * by the user for OpenRefine's interface. Storing it lets us - * reuse it later on without having to re-fetch it. * @author antonin * */ -public abstract class TermedEntityIdValue implements EntityIdValue { +public abstract class SuggestedEntityIdValue implements PrefetchedEntityIdValue { private String _id; private String _siteIRI; private String _label; - - public TermedEntityIdValue(String id, String siteIRI, String label) { + + public SuggestedEntityIdValue(String id, String siteIRI, String label) { _id = id; _siteIRI = siteIRI; _label = label; } - @Override - public abstract String getEntityType(); - @Override public String getId() { return _id; } - + @Override public String getSiteIri() { return _siteIRI; } + @Override public String getLabel() { return _label; } - + + @Override + public List getTypes() { + return new ArrayList<>(); + } + @Override public String getIri() { return getSiteIri() + getId(); @@ -53,23 +55,17 @@ public abstract class TermedEntityIdValue implements EntityIdValue { public T accept(ValueVisitor valueVisitor) { return valueVisitor.visit(this); } - - /** - * Equality check is important when we gather - * all ItemUpdates related to an ItemId. - * - * The label is ignored in the equality check. - */ + @Override public boolean equals(Object other) { if (other == null || - !EntityIdValue.class.isInstance(other)) { - return false; + !EntityIdValue.class.isInstance(other)) { + return false; } final EntityIdValue otherNew = (EntityIdValue)other; return getIri().equals(otherNew.getIri()); } - + @Override public int hashCode() { return Hash.hashCode(this); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/TermedItemIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedItemIdValue.java similarity index 59% rename from extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/TermedItemIdValue.java rename to extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedItemIdValue.java index f11926fd3..d33d60a9f 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/TermedItemIdValue.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedItemIdValue.java @@ -2,9 +2,9 @@ package org.openrefine.wikidata.schema.entityvalues; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; -public class TermedItemIdValue extends TermedEntityIdValue implements ItemIdValue { +public class SuggestedItemIdValue extends SuggestedEntityIdValue implements ItemIdValue { - public TermedItemIdValue(String id, String siteIRI, String label) { + public SuggestedItemIdValue(String id, String siteIRI, String label) { super(id, siteIRI, label); } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/TermedPropertyIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedPropertyIdValue.java similarity index 58% rename from extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/TermedPropertyIdValue.java rename to extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedPropertyIdValue.java index 0fe6d4468..ef072dc3c 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/TermedPropertyIdValue.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedPropertyIdValue.java @@ -2,9 +2,9 @@ package org.openrefine.wikidata.schema.entityvalues; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; -public class TermedPropertyIdValue extends TermedEntityIdValue implements PropertyIdValue { +public class SuggestedPropertyIdValue extends SuggestedEntityIdValue implements PropertyIdValue { - public TermedPropertyIdValue(String id, String siteIRI, String label) { + public SuggestedPropertyIdValue(String id, String siteIRI, String label) { super(id, siteIRI, label); } @@ -12,4 +12,5 @@ public class TermedPropertyIdValue extends TermedEntityIdValue implements Proper public String getEntityType() { return ET_PROPERTY; } + }