Merge pull request #1754 from OpenRefine/issue1596
Directly use column as reconciliation results
This commit is contained in:
commit
d2edffcef4
@ -0,0 +1,31 @@
|
|||||||
|
package com.google.refine.commands.recon;
|
||||||
|
|
||||||
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.google.refine.browsing.EngineConfig;
|
||||||
|
import com.google.refine.commands.EngineDependentCommand;
|
||||||
|
import com.google.refine.model.AbstractOperation;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.model.ReconCandidate;
|
||||||
|
import com.google.refine.model.recon.StandardReconConfig;
|
||||||
|
import com.google.refine.operations.recon.ReconMatchSpecificTopicOperation;
|
||||||
|
import com.google.refine.operations.recon.ReconUseValuesAsIdentifiersOperation;
|
||||||
|
|
||||||
|
public class ReconUseValuesAsIdentifiersCommand extends EngineDependentCommand {
|
||||||
|
@Override
|
||||||
|
protected AbstractOperation createOperation(Project project,
|
||||||
|
HttpServletRequest request, EngineConfig engineConfig) throws Exception {
|
||||||
|
|
||||||
|
String columnName = request.getParameter("columnName");
|
||||||
|
|
||||||
|
return new ReconUseValuesAsIdentifiersOperation(
|
||||||
|
engineConfig,
|
||||||
|
columnName,
|
||||||
|
request.getParameter("service"),
|
||||||
|
request.getParameter("identifierSpace"),
|
||||||
|
request.getParameter("schemaSpace")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,157 @@
|
|||||||
|
package com.google.refine.operations.recon;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
|
import com.google.refine.browsing.EngineConfig;
|
||||||
|
import com.google.refine.browsing.RowVisitor;
|
||||||
|
import com.google.refine.expr.ExpressionUtils;
|
||||||
|
import com.google.refine.history.Change;
|
||||||
|
import com.google.refine.model.Cell;
|
||||||
|
import com.google.refine.model.Column;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.model.Recon;
|
||||||
|
import com.google.refine.model.Recon.Judgment;
|
||||||
|
import com.google.refine.model.ReconCandidate;
|
||||||
|
import com.google.refine.model.Row;
|
||||||
|
import com.google.refine.model.changes.CellChange;
|
||||||
|
import com.google.refine.model.changes.ReconChange;
|
||||||
|
import com.google.refine.model.recon.StandardReconConfig;
|
||||||
|
import com.google.refine.operations.EngineDependentMassCellOperation;
|
||||||
|
import com.google.refine.operations.OperationRegistry;
|
||||||
|
|
||||||
|
public class ReconUseValuesAsIdentifiersOperation extends EngineDependentMassCellOperation {
|
||||||
|
|
||||||
|
@JsonProperty("identifierSpace")
|
||||||
|
protected String identifierSpace;
|
||||||
|
@JsonProperty("schemaSpace")
|
||||||
|
protected String schemaSpace;
|
||||||
|
@JsonProperty("service")
|
||||||
|
protected String service;
|
||||||
|
|
||||||
|
@JsonIgnore
|
||||||
|
protected StandardReconConfig reconConfig;
|
||||||
|
|
||||||
|
public ReconUseValuesAsIdentifiersOperation(
|
||||||
|
EngineConfig engineConfig,
|
||||||
|
String columnName,
|
||||||
|
String service,
|
||||||
|
String identifierSpace,
|
||||||
|
String schemaSpace) {
|
||||||
|
super(engineConfig, columnName, false);
|
||||||
|
this.service = service;
|
||||||
|
this.identifierSpace = identifierSpace;
|
||||||
|
this.schemaSpace = schemaSpace;
|
||||||
|
this.reconConfig = new StandardReconConfig(service, identifierSpace, schemaSpace, null, null, true, Collections.emptyList());
|
||||||
|
}
|
||||||
|
|
||||||
|
static public ReconUseValuesAsIdentifiersOperation reconstruct(JSONObject obj) throws Exception {
|
||||||
|
JSONObject engineConfig = obj.getJSONObject("engineConfig");
|
||||||
|
return new ReconUseValuesAsIdentifiersOperation(
|
||||||
|
EngineConfig.reconstruct(engineConfig),
|
||||||
|
obj.getString("columnName"),
|
||||||
|
obj.getString("service"),
|
||||||
|
obj.getString("identifierSpace"),
|
||||||
|
obj.getString("schemaSpace")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
writer.object();
|
||||||
|
writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
|
||||||
|
writer.key("description"); writer.value(getBriefDescription(null));
|
||||||
|
writer.key("engineConfig"); getEngineConfig().write(writer, options);
|
||||||
|
writer.key("columnName"); writer.value(_columnName);
|
||||||
|
writer.key("service"); writer.value(service);
|
||||||
|
writer.key("schemaSpace"); writer.value(schemaSpace);
|
||||||
|
writer.key("identifierSpace"); writer.value(identifierSpace);
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getBriefDescription(Project project) {
|
||||||
|
return "Use values as reconciliation identifiers in column " + _columnName;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected RowVisitor createRowVisitor(Project project, List<CellChange> cellChanges, long historyEntryID)
|
||||||
|
throws Exception {
|
||||||
|
Column column = project.columnModel.getColumnByName(_columnName);
|
||||||
|
|
||||||
|
return new RowVisitor() {
|
||||||
|
int cellIndex;
|
||||||
|
List<CellChange> cellChanges;
|
||||||
|
long historyEntryID;
|
||||||
|
|
||||||
|
public RowVisitor init(int cellIndex, List<CellChange> cellChanges, long historyEntryID) {
|
||||||
|
this.cellIndex = cellIndex;
|
||||||
|
this.cellChanges = cellChanges;
|
||||||
|
this.historyEntryID = historyEntryID;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void start(Project project) {
|
||||||
|
// nothing to do
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void end(Project project) {
|
||||||
|
// nothing to do
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean visit(Project project, int rowIndex, Row row) {
|
||||||
|
Cell cell = row.getCell(cellIndex);
|
||||||
|
if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) {
|
||||||
|
String id = cell.value.toString();
|
||||||
|
|
||||||
|
ReconCandidate match = new ReconCandidate(id, id, new String[0], 100);
|
||||||
|
Recon newRecon = reconConfig.createNewRecon(historyEntryID);
|
||||||
|
newRecon.match = match;
|
||||||
|
newRecon.candidates = Collections.singletonList(match);
|
||||||
|
newRecon.matchRank = -1;
|
||||||
|
newRecon.judgment = Judgment.Matched;
|
||||||
|
newRecon.judgmentAction = "mass";
|
||||||
|
newRecon.judgmentBatchSize = 1;
|
||||||
|
|
||||||
|
Cell newCell = new Cell(
|
||||||
|
cell.value,
|
||||||
|
newRecon
|
||||||
|
);
|
||||||
|
|
||||||
|
CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
|
||||||
|
cellChanges.add(cellChange);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}.init(column.getCellIndex(), cellChanges, historyEntryID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String createDescription(Column column, List<CellChange> cellChanges) {
|
||||||
|
return "Use values as reconciliation identifiers for "+ cellChanges.size() +
|
||||||
|
" cells in column " + column.getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Change createChange(Project project, Column column, List<CellChange> cellChanges) {
|
||||||
|
return new ReconChange(
|
||||||
|
cellChanges,
|
||||||
|
_columnName,
|
||||||
|
reconConfig,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,58 @@
|
|||||||
|
package com.google.refine.tests.operations.recon;
|
||||||
|
|
||||||
|
import static org.testng.Assert.assertEquals;
|
||||||
|
import static org.testng.Assert.assertNull;
|
||||||
|
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.testng.annotations.BeforeSuite;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.model.recon.StandardReconConfig;
|
||||||
|
import com.google.refine.operations.OperationRegistry;
|
||||||
|
import com.google.refine.operations.recon.ReconUseValuesAsIdentifiersOperation;
|
||||||
|
import com.google.refine.tests.RefineTest;
|
||||||
|
import com.google.refine.tests.util.TestUtils;
|
||||||
|
|
||||||
|
|
||||||
|
public class ReconUseValuesAsIdsOperation extends RefineTest {
|
||||||
|
String json = "{"
|
||||||
|
+ "\"op\":\"core/recon-use-values-as-identifiers\","
|
||||||
|
+ "\"description\":\"Use values as reconciliation identifiers in column ids\","
|
||||||
|
+ "\"columnName\":\"ids\","
|
||||||
|
+ "\"engineConfig\":{\"mode\":\"row-based\",\"facets\":[]},"
|
||||||
|
+ "\"service\":\"http://localhost:8080/api\","
|
||||||
|
+ "\"identifierSpace\":\"http://test.org/entities\","
|
||||||
|
+ "\"schemaSpace\":\"http://test.org/schema\""
|
||||||
|
+ "}";
|
||||||
|
|
||||||
|
@BeforeSuite
|
||||||
|
public void registerOperation() {
|
||||||
|
OperationRegistry.registerOperation(getCoreModule(), "recon-use-values-as-identifiers", ReconUseValuesAsIdentifiersOperation.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void serializeReconUseValuesAsIdentifiersOperation() throws JSONException, Exception {
|
||||||
|
TestUtils.isSerializedTo(ReconUseValuesAsIdentifiersOperation.reconstruct(new JSONObject(json)), json);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUseValuesAsIds() throws JSONException, Exception {
|
||||||
|
Project project = createCSVProject("ids,v\n"
|
||||||
|
+ "Q343,hello\n"
|
||||||
|
+ ",world\n"
|
||||||
|
+ "Q31,test");
|
||||||
|
ReconUseValuesAsIdentifiersOperation op = ReconUseValuesAsIdentifiersOperation.reconstruct(new JSONObject(json));
|
||||||
|
op.createProcess(project, new Properties()).performImmediate();
|
||||||
|
|
||||||
|
assertEquals("Q343", project.rows.get(0).cells.get(0).recon.match.id);
|
||||||
|
assertEquals("http://test.org/entities", project.rows.get(0).cells.get(0).recon.identifierSpace);
|
||||||
|
assertNull(project.rows.get(1).cells.get(0));
|
||||||
|
assertEquals("Q31", project.rows.get(2).cells.get(0).recon.match.id);
|
||||||
|
assertEquals(2, project.columnModel.columns.get(0).getReconStats().matchedTopics);
|
||||||
|
assertEquals("http://test.org/schema", ((StandardReconConfig)project.columnModel.columns.get(0).getReconConfig()).schemaSpace);
|
||||||
|
}
|
||||||
|
}
|
@ -127,6 +127,7 @@ function registerCommands() {
|
|||||||
RS.registerCommand(module, "recon-clear-one-cell", new Packages.com.google.refine.commands.recon.ReconClearOneCellCommand());
|
RS.registerCommand(module, "recon-clear-one-cell", new Packages.com.google.refine.commands.recon.ReconClearOneCellCommand());
|
||||||
RS.registerCommand(module, "recon-clear-similar-cells", new Packages.com.google.refine.commands.recon.ReconClearSimilarCellsCommand());
|
RS.registerCommand(module, "recon-clear-similar-cells", new Packages.com.google.refine.commands.recon.ReconClearSimilarCellsCommand());
|
||||||
RS.registerCommand(module, "recon-copy-across-columns", new Packages.com.google.refine.commands.recon.ReconCopyAcrossColumnsCommand());
|
RS.registerCommand(module, "recon-copy-across-columns", new Packages.com.google.refine.commands.recon.ReconCopyAcrossColumnsCommand());
|
||||||
|
RS.registerCommand(module, "recon-use-values-as-identifiers", new Packages.com.google.refine.commands.recon.ReconUseValuesAsIdentifiersCommand());
|
||||||
RS.registerCommand(module, "preview-extend-data", new Packages.com.google.refine.commands.recon.PreviewExtendDataCommand());
|
RS.registerCommand(module, "preview-extend-data", new Packages.com.google.refine.commands.recon.PreviewExtendDataCommand());
|
||||||
RS.registerCommand(module, "extend-data", new Packages.com.google.refine.commands.recon.ExtendDataCommand());
|
RS.registerCommand(module, "extend-data", new Packages.com.google.refine.commands.recon.ExtendDataCommand());
|
||||||
|
|
||||||
@ -190,6 +191,7 @@ function registerOperations() {
|
|||||||
OR.registerOperation(module, "recon-clear-similar-cells", Packages.com.google.refine.operations.recon.ReconClearSimilarCellsOperation);
|
OR.registerOperation(module, "recon-clear-similar-cells", Packages.com.google.refine.operations.recon.ReconClearSimilarCellsOperation);
|
||||||
OR.registerOperation(module, "recon-copy-across-columns", Packages.com.google.refine.operations.recon.ReconCopyAcrossColumnsOperation);
|
OR.registerOperation(module, "recon-copy-across-columns", Packages.com.google.refine.operations.recon.ReconCopyAcrossColumnsOperation);
|
||||||
OR.registerOperation(module, "extend-reconciled-data", Packages.com.google.refine.operations.recon.ExtendDataOperation);
|
OR.registerOperation(module, "extend-reconciled-data", Packages.com.google.refine.operations.recon.ExtendDataOperation);
|
||||||
|
OR.registerOperation(module, "recon-use-values-as-identifiers", Packages.com.google.refine.operations.recon.ReconUseValuesAsIdentifiersOperation);
|
||||||
}
|
}
|
||||||
|
|
||||||
function registerImporting() {
|
function registerImporting() {
|
||||||
|
@ -676,7 +676,11 @@
|
|||||||
"rows": "rows",
|
"rows": "rows",
|
||||||
"records": "records",
|
"records": "records",
|
||||||
"show": "Show",
|
"show": "Show",
|
||||||
"hide": "Hide"
|
"hide": "Hide",
|
||||||
|
"use-values-as-identifiers": "Use values as identifiers",
|
||||||
|
"use-values-as-identifiers2": "Mark cells as reconciled with their values as identifiers",
|
||||||
|
"choose-reconciliation-service": "Choose a reconciliation service",
|
||||||
|
"choose-reconciliation-service-alert": "Please choose a reconciliation service first."
|
||||||
},
|
},
|
||||||
"core-buttons": {
|
"core-buttons": {
|
||||||
"cancel": "Cancel",
|
"cancel": "Cancel",
|
||||||
|
@ -126,6 +126,60 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
|
|||||||
input.focus().data("suggest").textchange();
|
input.focus().data("suggest").textchange();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
var doUseValuesAsIdentifiers = function() {
|
||||||
|
var frame = DialogSystem.createDialog();
|
||||||
|
frame.width("400px");
|
||||||
|
|
||||||
|
var header = $('<div></div>').addClass("dialog-header").text($.i18n._('core-views')["use-values-as-identifiers"]).appendTo(frame);
|
||||||
|
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
|
||||||
|
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
|
||||||
|
|
||||||
|
$('<p></p>').text($.i18n._('core-views')["choose-reconciliation-service"]).appendTo(body);
|
||||||
|
var select = $('<select></select>').appendTo(body);
|
||||||
|
var services = ReconciliationManager.getAllServices();
|
||||||
|
for (var i = 0; i < services.length; i++) {
|
||||||
|
var service = services[i];
|
||||||
|
$('<option></option>').attr('value', service.url)
|
||||||
|
.text(service.name)
|
||||||
|
.appendTo(select);
|
||||||
|
}
|
||||||
|
|
||||||
|
$('<button class="button"></button>').text($.i18n._('core-buttons')["cancel"]).click(function() {
|
||||||
|
DialogSystem.dismissUntil(level - 1);
|
||||||
|
}).appendTo(footer);
|
||||||
|
$('<button class="button"></button>').html($.i18n._('core-buttons')["ok"]).click(function() {
|
||||||
|
|
||||||
|
var service = select.val();
|
||||||
|
var identifierSpace = null;
|
||||||
|
var schemaSpace = null;
|
||||||
|
for(var i = 0; i < services.length; i++) {
|
||||||
|
if(services[i].url === service) {
|
||||||
|
identifierSpace = services[i].identifierSpace;
|
||||||
|
schemaSpace = services[i].schemaSpace;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (identifierSpace === null) {
|
||||||
|
alert($.i18n._('core-views')["choose-reconciliation-service-alert"]);
|
||||||
|
} else {
|
||||||
|
Refine.postCoreProcess(
|
||||||
|
"recon-use-values-as-identifiers",
|
||||||
|
{
|
||||||
|
columnName: column.name,
|
||||||
|
service: service,
|
||||||
|
identifierSpace: identifierSpace,
|
||||||
|
schemaSpace: schemaSpace
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
{ cellsChanged: true, columnStatsChanged: true }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
DialogSystem.dismissUntil(level - 1);
|
||||||
|
}).appendTo(footer);
|
||||||
|
|
||||||
|
var level = DialogSystem.showDialog(frame);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
var doCopyAcrossColumns = function() {
|
var doCopyAcrossColumns = function() {
|
||||||
var frame = $(DOM.loadHTML("core", "scripts/views/data-table/copy-recon-across-columns-dialog.html"));
|
var frame = $(DOM.loadHTML("core", "scripts/views/data-table/copy-recon-across-columns-dialog.html"));
|
||||||
var elmts = DOM.bind(frame);
|
var elmts = DOM.bind(frame);
|
||||||
@ -402,6 +456,12 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
|
|||||||
label: $.i18n._('core-views')["copy-recon"],
|
label: $.i18n._('core-views')["copy-recon"],
|
||||||
tooltip: $.i18n._('core-views')["copy-recon2"],
|
tooltip: $.i18n._('core-views')["copy-recon2"],
|
||||||
click: doCopyAcrossColumns
|
click: doCopyAcrossColumns
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "core/use-values-as-identifiers",
|
||||||
|
label: $.i18n._('core-views')["use-values-as-identifiers"],
|
||||||
|
tooltip: $.i18n._('core-views')['use-values-as-identifiers2'],
|
||||||
|
click: doUseValuesAsIdentifiers
|
||||||
}
|
}
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
Loading…
Reference in New Issue
Block a user