diff --git a/main/src/com/google/refine/commands/recon/ReconUseValuesAsIdentifiersCommand.java b/main/src/com/google/refine/commands/recon/ReconUseValuesAsIdentifiersCommand.java new file mode 100644 index 000000000..cece2bddb --- /dev/null +++ b/main/src/com/google/refine/commands/recon/ReconUseValuesAsIdentifiersCommand.java @@ -0,0 +1,31 @@ +package com.google.refine.commands.recon; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.browsing.EngineConfig; +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.operations.recon.ReconMatchSpecificTopicOperation; +import com.google.refine.operations.recon.ReconUseValuesAsIdentifiersOperation; + +public class ReconUseValuesAsIdentifiersCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, EngineConfig engineConfig) throws Exception { + + String columnName = request.getParameter("columnName"); + + return new ReconUseValuesAsIdentifiersOperation( + engineConfig, + columnName, + request.getParameter("service"), + request.getParameter("identifierSpace"), + request.getParameter("schemaSpace") + ); + } +} diff --git a/main/src/com/google/refine/operations/recon/ReconUseValuesAsIdentifiersOperation.java b/main/src/com/google/refine/operations/recon/ReconUseValuesAsIdentifiersOperation.java new file mode 100644 index 000000000..0e6f65d92 --- /dev/null +++ b/main/src/com/google/refine/operations/recon/ReconUseValuesAsIdentifiersOperation.java @@ -0,0 +1,157 @@ +package com.google.refine.operations.recon; + +import java.util.Collections; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; + +import com.google.refine.browsing.EngineConfig; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.history.Change; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.changes.CellChange; +import com.google.refine.model.changes.ReconChange; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.operations.EngineDependentMassCellOperation; +import com.google.refine.operations.OperationRegistry; + +public class ReconUseValuesAsIdentifiersOperation extends EngineDependentMassCellOperation { + + @JsonProperty("identifierSpace") + protected String identifierSpace; + @JsonProperty("schemaSpace") + protected String schemaSpace; + @JsonProperty("service") + protected String service; + + @JsonIgnore + protected StandardReconConfig reconConfig; + + public ReconUseValuesAsIdentifiersOperation( + EngineConfig engineConfig, + String columnName, + String service, + String identifierSpace, + String schemaSpace) { + super(engineConfig, columnName, false); + this.service = service; + this.identifierSpace = identifierSpace; + this.schemaSpace = schemaSpace; + this.reconConfig = new StandardReconConfig(service, identifierSpace, schemaSpace, null, null, true, Collections.emptyList()); + } + + static public ReconUseValuesAsIdentifiersOperation reconstruct(JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + return new ReconUseValuesAsIdentifiersOperation( + EngineConfig.reconstruct(engineConfig), + obj.getString("columnName"), + obj.getString("service"), + obj.getString("identifierSpace"), + obj.getString("schemaSpace") + ); + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); getEngineConfig().write(writer, options); + writer.key("columnName"); writer.value(_columnName); + writer.key("service"); writer.value(service); + writer.key("schemaSpace"); writer.value(schemaSpace); + writer.key("identifierSpace"); writer.value(identifierSpace); + writer.endObject(); + } + + @Override + public String getBriefDescription(Project project) { + return "Use values as reconciliation identifiers in column " + _columnName; + } + + @Override + protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) + throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + + return new RowVisitor() { + int cellIndex; + List cellChanges; + long historyEntryID; + + public RowVisitor init(int cellIndex, List cellChanges, long historyEntryID) { + this.cellIndex = cellIndex; + this.cellChanges = cellChanges; + this.historyEntryID = historyEntryID; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(cellIndex); + if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) { + String id = cell.value.toString(); + + ReconCandidate match = new ReconCandidate(id, id, new String[0], 100); + Recon newRecon = reconConfig.createNewRecon(historyEntryID); + newRecon.match = match; + newRecon.candidates = Collections.singletonList(match); + newRecon.matchRank = -1; + newRecon.judgment = Judgment.Matched; + newRecon.judgmentAction = "mass"; + newRecon.judgmentBatchSize = 1; + + Cell newCell = new Cell( + cell.value, + newRecon + ); + + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); + cellChanges.add(cellChange); + } + return false; + } + }.init(column.getCellIndex(), cellChanges, historyEntryID); + } + + @Override + protected String createDescription(Column column, List cellChanges) { + return "Use values as reconciliation identifiers for "+ cellChanges.size() + + " cells in column " + column.getName(); + } + + @Override + protected Change createChange(Project project, Column column, List cellChanges) { + return new ReconChange( + cellChanges, + _columnName, + reconConfig, + null + ); + } + +} diff --git a/main/tests/server/src/com/google/refine/tests/operations/recon/ReconUseValuesAsIdsOperation.java b/main/tests/server/src/com/google/refine/tests/operations/recon/ReconUseValuesAsIdsOperation.java new file mode 100644 index 000000000..2b11c86a3 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/operations/recon/ReconUseValuesAsIdsOperation.java @@ -0,0 +1,58 @@ +package com.google.refine.tests.operations.recon; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNull; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.Test; + +import com.google.refine.model.Project; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.operations.recon.ReconUseValuesAsIdentifiersOperation; +import com.google.refine.tests.RefineTest; +import com.google.refine.tests.util.TestUtils; + + +public class ReconUseValuesAsIdsOperation extends RefineTest { + String json = "{" + + "\"op\":\"core/recon-use-values-as-identifiers\"," + + "\"description\":\"Use values as reconciliation identifiers in column ids\"," + + "\"columnName\":\"ids\"," + + "\"engineConfig\":{\"mode\":\"row-based\",\"facets\":[]}," + + "\"service\":\"http://localhost:8080/api\"," + + "\"identifierSpace\":\"http://test.org/entities\"," + + "\"schemaSpace\":\"http://test.org/schema\"" + + "}"; + + @BeforeSuite + public void registerOperation() { + OperationRegistry.registerOperation(getCoreModule(), "recon-use-values-as-identifiers", ReconUseValuesAsIdentifiersOperation.class); + } + + @Test + public void serializeReconUseValuesAsIdentifiersOperation() throws JSONException, Exception { + TestUtils.isSerializedTo(ReconUseValuesAsIdentifiersOperation.reconstruct(new JSONObject(json)), json); + } + + @Test + public void testUseValuesAsIds() throws JSONException, Exception { + Project project = createCSVProject("ids,v\n" + + "Q343,hello\n" + + ",world\n" + + "Q31,test"); + ReconUseValuesAsIdentifiersOperation op = ReconUseValuesAsIdentifiersOperation.reconstruct(new JSONObject(json)); + op.createProcess(project, new Properties()).performImmediate(); + + assertEquals("Q343", project.rows.get(0).cells.get(0).recon.match.id); + assertEquals("http://test.org/entities", project.rows.get(0).cells.get(0).recon.identifierSpace); + assertNull(project.rows.get(1).cells.get(0)); + assertEquals("Q31", project.rows.get(2).cells.get(0).recon.match.id); + assertEquals(2, project.columnModel.columns.get(0).getReconStats().matchedTopics); + assertEquals("http://test.org/schema", ((StandardReconConfig)project.columnModel.columns.get(0).getReconConfig()).schemaSpace); + } +} diff --git a/main/webapp/modules/core/MOD-INF/controller.js b/main/webapp/modules/core/MOD-INF/controller.js index 44c58d272..6229bb789 100644 --- a/main/webapp/modules/core/MOD-INF/controller.js +++ b/main/webapp/modules/core/MOD-INF/controller.js @@ -127,6 +127,7 @@ function registerCommands() { RS.registerCommand(module, "recon-clear-one-cell", new Packages.com.google.refine.commands.recon.ReconClearOneCellCommand()); RS.registerCommand(module, "recon-clear-similar-cells", new Packages.com.google.refine.commands.recon.ReconClearSimilarCellsCommand()); RS.registerCommand(module, "recon-copy-across-columns", new Packages.com.google.refine.commands.recon.ReconCopyAcrossColumnsCommand()); + RS.registerCommand(module, "recon-use-values-as-identifiers", new Packages.com.google.refine.commands.recon.ReconUseValuesAsIdentifiersCommand()); RS.registerCommand(module, "preview-extend-data", new Packages.com.google.refine.commands.recon.PreviewExtendDataCommand()); RS.registerCommand(module, "extend-data", new Packages.com.google.refine.commands.recon.ExtendDataCommand()); @@ -190,6 +191,7 @@ function registerOperations() { OR.registerOperation(module, "recon-clear-similar-cells", Packages.com.google.refine.operations.recon.ReconClearSimilarCellsOperation); OR.registerOperation(module, "recon-copy-across-columns", Packages.com.google.refine.operations.recon.ReconCopyAcrossColumnsOperation); OR.registerOperation(module, "extend-reconciled-data", Packages.com.google.refine.operations.recon.ExtendDataOperation); + OR.registerOperation(module, "recon-use-values-as-identifiers", Packages.com.google.refine.operations.recon.ReconUseValuesAsIdentifiersOperation); } function registerImporting() { diff --git a/main/webapp/modules/core/langs/translation-en.json b/main/webapp/modules/core/langs/translation-en.json index bbf06ac57..db82ff4db 100644 --- a/main/webapp/modules/core/langs/translation-en.json +++ b/main/webapp/modules/core/langs/translation-en.json @@ -676,7 +676,11 @@ "rows": "rows", "records": "records", "show": "Show", - "hide": "Hide" + "hide": "Hide", + "use-values-as-identifiers": "Use values as identifiers", + "use-values-as-identifiers2": "Mark cells as reconciled with their values as identifiers", + "choose-reconciliation-service": "Choose a reconciliation service", + "choose-reconciliation-service-alert": "Please choose a reconciliation service first." }, "core-buttons": { "cancel": "Cancel", diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-reconcile.js b/main/webapp/modules/core/scripts/views/data-table/menu-reconcile.js index 3c20e1d57..399b6309c 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-reconcile.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-reconcile.js @@ -126,6 +126,60 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { input.focus().data("suggest").textchange(); }; + var doUseValuesAsIdentifiers = function() { + var frame = DialogSystem.createDialog(); + frame.width("400px"); + + var header = $('
').addClass("dialog-header").text($.i18n._('core-views')["use-values-as-identifiers"]).appendTo(frame); + var body = $('
').addClass("dialog-body").appendTo(frame); + var footer = $('
').addClass("dialog-footer").appendTo(frame); + + $('

').text($.i18n._('core-views')["choose-reconciliation-service"]).appendTo(body); + var select = $('').appendTo(body); + var services = ReconciliationManager.getAllServices(); + for (var i = 0; i < services.length; i++) { + var service = services[i]; + $('').attr('value', service.url) + .text(service.name) + .appendTo(select); + } + + $('').text($.i18n._('core-buttons')["cancel"]).click(function() { + DialogSystem.dismissUntil(level - 1); + }).appendTo(footer); + $('').html($.i18n._('core-buttons')["ok"]).click(function() { + + var service = select.val(); + var identifierSpace = null; + var schemaSpace = null; + for(var i = 0; i < services.length; i++) { + if(services[i].url === service) { + identifierSpace = services[i].identifierSpace; + schemaSpace = services[i].schemaSpace; + } + } + if (identifierSpace === null) { + alert($.i18n._('core-views')["choose-reconciliation-service-alert"]); + } else { + Refine.postCoreProcess( + "recon-use-values-as-identifiers", + { + columnName: column.name, + service: service, + identifierSpace: identifierSpace, + schemaSpace: schemaSpace + }, + null, + { cellsChanged: true, columnStatsChanged: true } + ); + } + DialogSystem.dismissUntil(level - 1); + }).appendTo(footer); + + var level = DialogSystem.showDialog(frame); + }; + + var doCopyAcrossColumns = function() { var frame = $(DOM.loadHTML("core", "scripts/views/data-table/copy-recon-across-columns-dialog.html")); var elmts = DOM.bind(frame); @@ -402,6 +456,12 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { label: $.i18n._('core-views')["copy-recon"], tooltip: $.i18n._('core-views')["copy-recon2"], click: doCopyAcrossColumns + }, + { + id: "core/use-values-as-identifiers", + label: $.i18n._('core-views')["use-values-as-identifiers"], + tooltip: $.i18n._('core-views')['use-values-as-identifiers2'], + click: doUseValuesAsIdentifiers } ]); });