Added initial implementation of Key/value Columnize operation and command.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2356 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2011-11-04 21:00:32 +00:00
parent a7c81880a8
commit 2f6b635f66
8 changed files with 407 additions and 14 deletions

View File

@ -1,3 +1,4 @@
#Fri Apr 08 10:38:11 EDT 2011
eclipse.preferences.version=1
encoding/<project>=UTF-8
#Fri Oct 28 16:27:56 CEST 2011
eclipse.preferences.version=1
encoding//main/src/com/google/refine/clustering/binning/Metaphone3.java=UTF-8
encoding/<project>=UTF-8

View File

@ -0,0 +1,71 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.commands.cell;
import java.io.IOException;
import java.util.Properties;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import com.google.refine.commands.Command;
import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Project;
import com.google.refine.operations.cell.KeyValueColumnizeOperation;
import com.google.refine.process.Process;
public class KeyValueColumnizeCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
try {
Project project = getProject(request);
String keyColumnName = request.getParameter("keyColumnName");
String valueColumnName = request.getParameter("valueColumnName");
String noteColumnName = request.getParameter("noteColumnName");
AbstractOperation op = new KeyValueColumnizeOperation(
keyColumnName, valueColumnName, noteColumnName);
Process process = op.createProcess(project, new Properties());
performProcessAndRespond(request, response, project, process);
} catch (Exception e) {
respondException(response, e);
}
}
}

View File

@ -110,14 +110,22 @@ public class ColumnModel implements Jsonizable {
}
synchronized public void addColumn(int index, Column column, boolean avoidNameCollision) throws ModelException {
String baseName = column.getName();
String name = column.getName();
if (_nameToColumn.containsKey(baseName)) {
if (_nameToColumn.containsKey(name)) {
if (!avoidNameCollision) {
throw new ModelException("Duplicated column name");
} else {
name = getUnduplicatedColumnName(name);
column.setName(name);
}
}
columns.add(index < 0 ? columns.size() : index, column);
_nameToColumn.put(name, column); // so the next call can check
}
synchronized public String getUnduplicatedColumnName(String baseName) {
String name = baseName;
int i = 1;
while (true) {
@ -128,10 +136,7 @@ public class ColumnModel implements Jsonizable {
break;
}
}
column.setName(name);
columns.add(index < 0 ? columns.size() : index, column);
_nameToColumn.put(name, column); // so the next call can check
return name;
}
synchronized public Column getColumnByName(String name) {

View File

@ -0,0 +1,224 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.operations.cell;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.history.HistoryEntry;
import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.model.changes.MassRowColumnChange;
import com.google.refine.operations.OperationRegistry;
import com.google.refine.util.JSONUtilities;
public class KeyValueColumnizeOperation extends AbstractOperation {
final protected String _keyColumnName;
final protected String _valueColumnName;
final protected String _noteColumnName;
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
return new KeyValueColumnizeOperation(
obj.getString("keyColumnName"),
obj.getString("valueColumnName"),
JSONUtilities.getString(obj, "noteColumnName", null)
);
}
public KeyValueColumnizeOperation(
String keyColumnName,
String valueColumnName,
String noteColumnName
) {
_keyColumnName = keyColumnName;
_valueColumnName = valueColumnName;
_noteColumnName = noteColumnName;
}
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
writer.key("description"); writer.value(
"Columnize by key column " +
_keyColumnName + " and value column " + _valueColumnName +
(_noteColumnName != null ? (" with note column " + _noteColumnName) : ""));
writer.key("keyColumnName"); writer.value(_keyColumnName);
writer.key("valueColumnName"); writer.value(_valueColumnName);
writer.key("noteColumnName"); writer.value(_noteColumnName);
writer.endObject();
}
@Override
protected String getBriefDescription(Project project) {
return "Columnize by key column " +
_keyColumnName + " and value column " + _valueColumnName +
(_noteColumnName != null ? (" with note column " + _noteColumnName) : "");
}
@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
int keyColumnIndex = project.columnModel.getColumnIndexByName(_keyColumnName);
int valueColumnIndex = project.columnModel.getColumnIndexByName(_valueColumnName);
int noteColumnIndex = _noteColumnName == null ? -1 :
project.columnModel.getColumnIndexByName(_noteColumnName);
Column keyColumn = project.columnModel.getColumnByName(_keyColumnName);
Column valueColumn = project.columnModel.getColumnByName(_valueColumnName);
Column noteColumn = _noteColumnName == null ? null :
project.columnModel.getColumnByName(_noteColumnName);
List<Column> unchangedColumns = new ArrayList<Column>();
List<Column> oldColumns = project.columnModel.columns;
for (int i = 0; i < oldColumns.size(); i++) {
if (i != keyColumnIndex &&
i != valueColumnIndex &&
i != noteColumnIndex) {
unchangedColumns.add(oldColumns.get(i));
}
}
List<Column> newColumns = new ArrayList<Column>();
List<Column> newNoteColumns = new ArrayList<Column>();
Map<String, Column> keyValueToColumn = new HashMap<String, Column>();
Map<String, Column> keyValueToNoteColumn = new HashMap<String, Column>();
Map<String, Row> groupByCellValuesToRow = new HashMap<String, Row>();
List<Row> newRows = new ArrayList<Row>();
List<Row> oldRows = project.rows;
for (int r = 0; r < oldRows.size(); r++) {
Row oldRow = oldRows.get(r);
Object value = oldRow.getCellValue(valueColumn.getCellIndex());
Object key = oldRow.getCellValue(keyColumn.getCellIndex());
if (!ExpressionUtils.isNonBlankData(value) ||
!ExpressionUtils.isNonBlankData(key)) {
continue; // TODO: ignore this row entirely?
}
String keyString = key.toString();
Column newColumn = keyValueToColumn.get(keyString);
if (newColumn == null) {
// Allocate new column
newColumn = new Column(
project.columnModel.allocateNewCellIndex(),
project.columnModel.getUnduplicatedColumnName(keyString));
keyValueToColumn.put(keyString, newColumn);
newColumns.add(newColumn);
}
StringBuffer sb = new StringBuffer();
for (int c = 0; c < unchangedColumns.size(); c++) {
Column unchangedColumn = unchangedColumns.get(c);
Object cellValue = oldRow.getCellValue(unchangedColumn.getCellIndex());
if (c > 0) {
sb.append('\0');
}
if (cellValue != null) {
sb.append(cellValue.toString());
}
}
String unchangedCellValues = sb.toString();
Row reusableRow = groupByCellValuesToRow.get(unchangedCellValues);
if (reusableRow == null ||
reusableRow.getCellValue(valueColumn.getCellIndex()) != null) {
reusableRow = new Row(newColumn.getCellIndex() + 1);
for (int c = 0; c < unchangedColumns.size(); c++) {
Column unchangedColumn = unchangedColumns.get(c);
int cellIndex = unchangedColumn.getCellIndex();
reusableRow.setCell(cellIndex, oldRow.getCell(cellIndex));
}
groupByCellValuesToRow.put(unchangedCellValues, reusableRow);
newRows.add(reusableRow);
}
reusableRow.setCell(
newColumn.getCellIndex(),
oldRow.getCell(valueColumn.getCellIndex()));
if (noteColumn != null) {
Object noteValue = oldRow.getCellValue(noteColumn.getCellIndex());
if (ExpressionUtils.isNonBlankData(noteValue)) {
Column newNoteColumn = keyValueToNoteColumn.get(keyString);
if (newNoteColumn == null) {
// Allocate new column
newNoteColumn = new Column(
project.columnModel.allocateNewCellIndex(),
project.columnModel.getUnduplicatedColumnName(
noteColumn.getName() + " : " + keyString));
keyValueToNoteColumn.put(keyString, newNoteColumn);
newNoteColumns.add(newNoteColumn);
}
int newNoteCellIndex = newNoteColumn.getCellIndex();
Object existingNewNoteValue = reusableRow.getCellValue(newNoteCellIndex);
if (ExpressionUtils.isNonBlankData(existingNewNoteValue)) {
Cell concatenatedNoteCell = new Cell(
existingNewNoteValue.toString() + ";" + noteValue.toString(), null);
reusableRow.setCell(newNoteCellIndex, concatenatedNoteCell);
} else {
reusableRow.setCell(newNoteCellIndex, oldRow.getCell(noteColumn.getCellIndex()));
}
}
}
}
unchangedColumns.addAll(newColumns);
unchangedColumns.addAll(newNoteColumns);
return new HistoryEntry(
historyEntryID,
project,
getBriefDescription(null),
this,
new MassRowColumnChange(unchangedColumns, newRows)
);
}
}

View File

@ -96,6 +96,7 @@ function registerCommands() {
RS.registerCommand(module, "blank-down", new Packages.com.google.refine.commands.cell.BlankDownCommand());
RS.registerCommand(module, "transpose-columns-into-rows", new Packages.com.google.refine.commands.cell.TransposeColumnsIntoRowsCommand());
RS.registerCommand(module, "transpose-rows-into-columns", new Packages.com.google.refine.commands.cell.TransposeRowsIntoColumnsCommand());
RS.registerCommand(module, "key-value-columnize", new Packages.com.google.refine.commands.cell.KeyValueColumnizeCommand());
RS.registerCommand(module, "add-column", new Packages.com.google.refine.commands.column.AddColumnCommand());
RS.registerCommand(module, "add-column-by-fetching-urls", new Packages.com.google.refine.commands.column.AddColumnByFetchingURLsCommand());
@ -150,6 +151,7 @@ function registerOperations() {
OR.registerOperation(module, "blank-down", Packages.com.google.refine.operations.cell.BlankDownOperation);
OR.registerOperation(module, "transpose-columns-into-rows", Packages.com.google.refine.operations.cell.TransposeColumnsIntoRowsOperation);
OR.registerOperation(module, "transpose-rows-into-columns", Packages.com.google.refine.operations.cell.TransposeRowsIntoColumnsOperation);
OR.registerOperation(module, "key-value-columnize", Packages.com.google.refine.operations.cell.KeyValueColumnizeOperation);
OR.registerOperation(module, "column-addition", Packages.com.google.refine.operations.column.ColumnAdditionOperation);
OR.registerOperation(module, "column-removal", Packages.com.google.refine.operations.column.ColumnRemovalOperation);

View File

@ -0,0 +1,23 @@
<div class="dialog-frame" style="width: 600px;">
<div class="dialog-border">
<div class="dialog-header" bind="dialogHeader">Columnize by Key/Value Columns</div>
<div class="dialog-body" bind="dialogBody">
<div class="grid-layout layout-normal layout-full grid-layout-for-ui"><table>
<tr>
<td>Key Column</td>
<td>Value Column</td>
<td>Note Column (optional)</td>
</tr>
<tr>
<td><select bind="keyColumnSelect" size="15" style="width: 100%;"></select></td>
<td><select bind="valueColumnSelect" size="15" style="width: 100%;"></select></td>
<td><select bind="noteColumnSelect" size="15" style="width: 100%;"></select></td>
</tr>
</table></div>
</div>
<div class="dialog-footer" bind="dialogFooter">
<button class="button" bind="okButton">OK</button>
<button class="button" bind="cancelButton">Cancel</button>
</div>
</div>
</div>

View File

@ -241,8 +241,6 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
var dialog = $(DOM.loadHTML("core", "scripts/views/data-table/transpose-columns-into-rows.html"));
var elmts = DOM.bind(dialog);
elmts.dialogHeader.text('Transpose Cells Across Columns into Rows');
var level = DialogSystem.showDialog(dialog);
var dismiss = function() {
DialogSystem.dismissUntil(level - 1);
@ -333,17 +331,86 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
}
}
};
var doKeyValueColumnize = function() {
var dialog = $(DOM.loadHTML("core", "scripts/views/data-table/key-value-columnize.html"));
var elmts = DOM.bind(dialog);
var level = DialogSystem.showDialog(dialog);
var dismiss = function() {
DialogSystem.dismissUntil(level - 1);
};
var columns = theProject.columnModel.columns;
elmts.cancelButton.click(function() { dismiss(); });
elmts.okButton.click(function() {
var config = {
keyColumnName: elmts.keyColumnSelect[0].value,
valueColumnName: elmts.valueColumnSelect[0].value,
noteColumnName: elmts.noteColumnSelect[0].value
};
if (config.keyColumnName == null ||
config.valueColumnName == null ||
config.keyColumnName == config.valueColumnName) {
alert('Please select one key column and one value column that are different from one another.');
return;
}
var noteColumnName = elmts.noteColumnSelect[0].value;
if (noteColumnName != null) {
if (noteColumnName == config.keyColumnName ||
noteColumnName == config.valueColumnName) {
alert('If specified, the note column cannot be the same as the key column or the value column.');
return;
}
config.noteColumnName = noteColumnName;
}
Refine.postCoreProcess(
"key-value-columnize",
config,
null,
{ modelsChanged: true }
);
dismiss();
});
var valueColumnIndex = -1;
for (var i = 0; i < columns.length; i++) {
var column2 = columns[i];
var keyOption = $('<option>').attr("value", column2.name).text(column2.name).appendTo(elmts.keyColumnSelect);
if (column2.name == column.name) {
keyOption.attr("selected", "true");
valueColumnIndex = i + 1;
}
var valueOption = $('<option>').attr("value", column2.name).text(column2.name).appendTo(elmts.valueColumnSelect);
if (i === valueColumnIndex) {
valueOption.attr("selected", "true");
}
$('<option>').attr("value", column2.name).text(column2.name).appendTo(elmts.noteColumnSelect);
}
};
MenuSystem.appendTo(menu, [ "core/transpose" ], [
{
id: "core/transpose-columns-into-rows",
label: "Cells across columns into rows...",
label: "Transpose cells across columns into rows...",
click: doTransposeColumnsIntoRows
},
{
id: "core/transpose-rows-into-columns",
label: "Cells in rows into columns...",
label: "Transpose cells in rows into columns...",
click: doTransposeRowsIntoColumns
},
{},
{
id: "core/key-value-columnize",
label: "Columnize by key/value columns...",
click: doKeyValueColumnize
}
]
);

View File

@ -1,6 +1,6 @@
<div class="dialog-frame" style="width: 600px;">
<div class="dialog-border">
<div class="dialog-header" bind="dialogHeader"></div>
<div class="dialog-header" bind="dialogHeader">Transpose Cells Across Columns into Rows</div>
<div class="dialog-body" bind="dialogBody">
<div class="grid-layout layout-normal layout-full grid-layout-for-ui"><table>
<tr>