diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs index 3de14ea66..aac53c648 100644 --- a/.settings/org.eclipse.core.resources.prefs +++ b/.settings/org.eclipse.core.resources.prefs @@ -1,3 +1,4 @@ -#Fri Apr 08 10:38:11 EDT 2011 -eclipse.preferences.version=1 -encoding/=UTF-8 +#Fri Oct 28 16:27:56 CEST 2011 +eclipse.preferences.version=1 +encoding//main/src/com/google/refine/clustering/binning/Metaphone3.java=UTF-8 +encoding/=UTF-8 diff --git a/main/src/com/google/refine/commands/cell/KeyValueColumnizeCommand.java b/main/src/com/google/refine/commands/cell/KeyValueColumnizeCommand.java new file mode 100644 index 000000000..f941e5e8a --- /dev/null +++ b/main/src/com/google/refine/commands/cell/KeyValueColumnizeCommand.java @@ -0,0 +1,71 @@ +/* + +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ +package com.google.refine.commands.cell; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.cell.KeyValueColumnizeOperation; +import com.google.refine.process.Process; + + +public class KeyValueColumnizeCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + String keyColumnName = request.getParameter("keyColumnName"); + String valueColumnName = request.getParameter("valueColumnName"); + String noteColumnName = request.getParameter("noteColumnName"); + + AbstractOperation op = new KeyValueColumnizeOperation( + keyColumnName, valueColumnName, noteColumnName); + + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/model/ColumnModel.java b/main/src/com/google/refine/model/ColumnModel.java index 3ff14f5df..1fa3069fb 100644 --- a/main/src/com/google/refine/model/ColumnModel.java +++ b/main/src/com/google/refine/model/ColumnModel.java @@ -110,14 +110,22 @@ public class ColumnModel implements Jsonizable { } synchronized public void addColumn(int index, Column column, boolean avoidNameCollision) throws ModelException { - String baseName = column.getName(); + String name = column.getName(); - if (_nameToColumn.containsKey(baseName)) { + if (_nameToColumn.containsKey(name)) { if (!avoidNameCollision) { throw new ModelException("Duplicated column name"); + } else { + name = getUnduplicatedColumnName(name); + column.setName(name); } } + columns.add(index < 0 ? columns.size() : index, column); + _nameToColumn.put(name, column); // so the next call can check + } + + synchronized public String getUnduplicatedColumnName(String baseName) { String name = baseName; int i = 1; while (true) { @@ -128,10 +136,7 @@ public class ColumnModel implements Jsonizable { break; } } - - column.setName(name); - columns.add(index < 0 ? columns.size() : index, column); - _nameToColumn.put(name, column); // so the next call can check + return name; } synchronized public Column getColumnByName(String name) { diff --git a/main/src/com/google/refine/operations/cell/KeyValueColumnizeOperation.java b/main/src/com/google/refine/operations/cell/KeyValueColumnizeOperation.java new file mode 100644 index 000000000..9da4bc1c8 --- /dev/null +++ b/main/src/com/google/refine/operations/cell/KeyValueColumnizeOperation.java @@ -0,0 +1,224 @@ +/* + +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.operations.cell; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.MassRowColumnChange; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.util.JSONUtilities; + +public class KeyValueColumnizeOperation extends AbstractOperation { + final protected String _keyColumnName; + final protected String _valueColumnName; + final protected String _noteColumnName; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + return new KeyValueColumnizeOperation( + obj.getString("keyColumnName"), + obj.getString("valueColumnName"), + JSONUtilities.getString(obj, "noteColumnName", null) + ); + } + + public KeyValueColumnizeOperation( + String keyColumnName, + String valueColumnName, + String noteColumnName + ) { + _keyColumnName = keyColumnName; + _valueColumnName = valueColumnName; + _noteColumnName = noteColumnName; + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value( + "Columnize by key column " + + _keyColumnName + " and value column " + _valueColumnName + + (_noteColumnName != null ? (" with note column " + _noteColumnName) : "")); + writer.key("keyColumnName"); writer.value(_keyColumnName); + writer.key("valueColumnName"); writer.value(_valueColumnName); + writer.key("noteColumnName"); writer.value(_noteColumnName); + writer.endObject(); + } + + @Override + protected String getBriefDescription(Project project) { + return "Columnize by key column " + + _keyColumnName + " and value column " + _valueColumnName + + (_noteColumnName != null ? (" with note column " + _noteColumnName) : ""); + } + + @Override + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + int keyColumnIndex = project.columnModel.getColumnIndexByName(_keyColumnName); + int valueColumnIndex = project.columnModel.getColumnIndexByName(_valueColumnName); + int noteColumnIndex = _noteColumnName == null ? -1 : + project.columnModel.getColumnIndexByName(_noteColumnName); + Column keyColumn = project.columnModel.getColumnByName(_keyColumnName); + Column valueColumn = project.columnModel.getColumnByName(_valueColumnName); + Column noteColumn = _noteColumnName == null ? null : + project.columnModel.getColumnByName(_noteColumnName); + + List unchangedColumns = new ArrayList(); + List oldColumns = project.columnModel.columns; + for (int i = 0; i < oldColumns.size(); i++) { + if (i != keyColumnIndex && + i != valueColumnIndex && + i != noteColumnIndex) { + unchangedColumns.add(oldColumns.get(i)); + } + } + + List newColumns = new ArrayList(); + List newNoteColumns = new ArrayList(); + Map keyValueToColumn = new HashMap(); + Map keyValueToNoteColumn = new HashMap(); + Map groupByCellValuesToRow = new HashMap(); + + List newRows = new ArrayList(); + List oldRows = project.rows; + for (int r = 0; r < oldRows.size(); r++) { + Row oldRow = oldRows.get(r); + + Object value = oldRow.getCellValue(valueColumn.getCellIndex()); + Object key = oldRow.getCellValue(keyColumn.getCellIndex()); + if (!ExpressionUtils.isNonBlankData(value) || + !ExpressionUtils.isNonBlankData(key)) { + continue; // TODO: ignore this row entirely? + } + + String keyString = key.toString(); + Column newColumn = keyValueToColumn.get(keyString); + if (newColumn == null) { + // Allocate new column + newColumn = new Column( + project.columnModel.allocateNewCellIndex(), + project.columnModel.getUnduplicatedColumnName(keyString)); + keyValueToColumn.put(keyString, newColumn); + newColumns.add(newColumn); + } + + StringBuffer sb = new StringBuffer(); + for (int c = 0; c < unchangedColumns.size(); c++) { + Column unchangedColumn = unchangedColumns.get(c); + Object cellValue = oldRow.getCellValue(unchangedColumn.getCellIndex()); + if (c > 0) { + sb.append('\0'); + } + if (cellValue != null) { + sb.append(cellValue.toString()); + } + } + String unchangedCellValues = sb.toString(); + + Row reusableRow = groupByCellValuesToRow.get(unchangedCellValues); + if (reusableRow == null || + reusableRow.getCellValue(valueColumn.getCellIndex()) != null) { + reusableRow = new Row(newColumn.getCellIndex() + 1); + + for (int c = 0; c < unchangedColumns.size(); c++) { + Column unchangedColumn = unchangedColumns.get(c); + int cellIndex = unchangedColumn.getCellIndex(); + + reusableRow.setCell(cellIndex, oldRow.getCell(cellIndex)); + } + + groupByCellValuesToRow.put(unchangedCellValues, reusableRow); + newRows.add(reusableRow); + } + + reusableRow.setCell( + newColumn.getCellIndex(), + oldRow.getCell(valueColumn.getCellIndex())); + + if (noteColumn != null) { + Object noteValue = oldRow.getCellValue(noteColumn.getCellIndex()); + if (ExpressionUtils.isNonBlankData(noteValue)) { + Column newNoteColumn = keyValueToNoteColumn.get(keyString); + if (newNoteColumn == null) { + // Allocate new column + newNoteColumn = new Column( + project.columnModel.allocateNewCellIndex(), + project.columnModel.getUnduplicatedColumnName( + noteColumn.getName() + " : " + keyString)); + keyValueToNoteColumn.put(keyString, newNoteColumn); + newNoteColumns.add(newNoteColumn); + } + + int newNoteCellIndex = newNoteColumn.getCellIndex(); + Object existingNewNoteValue = reusableRow.getCellValue(newNoteCellIndex); + if (ExpressionUtils.isNonBlankData(existingNewNoteValue)) { + Cell concatenatedNoteCell = new Cell( + existingNewNoteValue.toString() + ";" + noteValue.toString(), null); + reusableRow.setCell(newNoteCellIndex, concatenatedNoteCell); + } else { + reusableRow.setCell(newNoteCellIndex, oldRow.getCell(noteColumn.getCellIndex())); + } + } + } + } + + unchangedColumns.addAll(newColumns); + unchangedColumns.addAll(newNoteColumns); + + return new HistoryEntry( + historyEntryID, + project, + getBriefDescription(null), + this, + new MassRowColumnChange(unchangedColumns, newRows) + ); + } +} diff --git a/main/webapp/modules/core/MOD-INF/controller.js b/main/webapp/modules/core/MOD-INF/controller.js index f442eb136..0d903744c 100644 --- a/main/webapp/modules/core/MOD-INF/controller.js +++ b/main/webapp/modules/core/MOD-INF/controller.js @@ -96,6 +96,7 @@ function registerCommands() { RS.registerCommand(module, "blank-down", new Packages.com.google.refine.commands.cell.BlankDownCommand()); RS.registerCommand(module, "transpose-columns-into-rows", new Packages.com.google.refine.commands.cell.TransposeColumnsIntoRowsCommand()); RS.registerCommand(module, "transpose-rows-into-columns", new Packages.com.google.refine.commands.cell.TransposeRowsIntoColumnsCommand()); + RS.registerCommand(module, "key-value-columnize", new Packages.com.google.refine.commands.cell.KeyValueColumnizeCommand()); RS.registerCommand(module, "add-column", new Packages.com.google.refine.commands.column.AddColumnCommand()); RS.registerCommand(module, "add-column-by-fetching-urls", new Packages.com.google.refine.commands.column.AddColumnByFetchingURLsCommand()); @@ -150,6 +151,7 @@ function registerOperations() { OR.registerOperation(module, "blank-down", Packages.com.google.refine.operations.cell.BlankDownOperation); OR.registerOperation(module, "transpose-columns-into-rows", Packages.com.google.refine.operations.cell.TransposeColumnsIntoRowsOperation); OR.registerOperation(module, "transpose-rows-into-columns", Packages.com.google.refine.operations.cell.TransposeRowsIntoColumnsOperation); + OR.registerOperation(module, "key-value-columnize", Packages.com.google.refine.operations.cell.KeyValueColumnizeOperation); OR.registerOperation(module, "column-addition", Packages.com.google.refine.operations.column.ColumnAdditionOperation); OR.registerOperation(module, "column-removal", Packages.com.google.refine.operations.column.ColumnRemovalOperation); diff --git a/main/webapp/modules/core/scripts/views/data-table/key-value-columnize.html b/main/webapp/modules/core/scripts/views/data-table/key-value-columnize.html new file mode 100644 index 000000000..2877fc2c5 --- /dev/null +++ b/main/webapp/modules/core/scripts/views/data-table/key-value-columnize.html @@ -0,0 +1,23 @@ +
+
+
Columnize by Key/Value Columns
+
+
+ + + + + + + + + + +
Key ColumnValue ColumnNote Column (optional)
+
+ +
+
\ No newline at end of file diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js b/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js index 9980b8671..8c8348ddf 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js @@ -241,8 +241,6 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { var dialog = $(DOM.loadHTML("core", "scripts/views/data-table/transpose-columns-into-rows.html")); var elmts = DOM.bind(dialog); - elmts.dialogHeader.text('Transpose Cells Across Columns into Rows'); - var level = DialogSystem.showDialog(dialog); var dismiss = function() { DialogSystem.dismissUntil(level - 1); @@ -333,17 +331,86 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { } } }; + + var doKeyValueColumnize = function() { + var dialog = $(DOM.loadHTML("core", "scripts/views/data-table/key-value-columnize.html")); + + var elmts = DOM.bind(dialog); + var level = DialogSystem.showDialog(dialog); + var dismiss = function() { + DialogSystem.dismissUntil(level - 1); + }; + + var columns = theProject.columnModel.columns; + + elmts.cancelButton.click(function() { dismiss(); }); + elmts.okButton.click(function() { + var config = { + keyColumnName: elmts.keyColumnSelect[0].value, + valueColumnName: elmts.valueColumnSelect[0].value, + noteColumnName: elmts.noteColumnSelect[0].value + }; + if (config.keyColumnName == null || + config.valueColumnName == null || + config.keyColumnName == config.valueColumnName) { + alert('Please select one key column and one value column that are different from one another.'); + return; + } + + var noteColumnName = elmts.noteColumnSelect[0].value; + if (noteColumnName != null) { + if (noteColumnName == config.keyColumnName || + noteColumnName == config.valueColumnName) { + alert('If specified, the note column cannot be the same as the key column or the value column.'); + return; + } + config.noteColumnName = noteColumnName; + } + + Refine.postCoreProcess( + "key-value-columnize", + config, + null, + { modelsChanged: true } + ); + dismiss(); + }); + + var valueColumnIndex = -1; + for (var i = 0; i < columns.length; i++) { + var column2 = columns[i]; + + var keyOption = $('