FIXED - task 529: Add support for key/value transpose with only two columns as well as repeating key fields in a single record.

http://code.google.com/p/google-refine/issues/detail?id=529

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2574 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Tom Morris 2012-10-05 23:31:25 +00:00
parent ffe674729c
commit ca2e959957

View File

@ -129,17 +129,43 @@ public class KeyValueColumnizeOperation extends AbstractOperation {
List<Row> newRows = new ArrayList<Row>();
List<Row> oldRows = project.rows;
Row reusableRow = null;
List<Row> currentRows = new ArrayList<Row>();
String recordKey = null; // key which indicates the start of a record
if (unchangedColumns.isEmpty()) {
reusableRow = new Row(1);
newRows.add(reusableRow);
currentRows.clear();
currentRows.add(reusableRow);
}
for (int r = 0; r < oldRows.size(); r++) {
Row oldRow = oldRows.get(r);
Object value = oldRow.getCellValue(valueColumn.getCellIndex());
Object key = oldRow.getCellValue(keyColumn.getCellIndex());
if (!ExpressionUtils.isNonBlankData(value) ||
!ExpressionUtils.isNonBlankData(key)) {
continue; // TODO: ignore this row entirely?
if (!ExpressionUtils.isNonBlankData(key)) {
if (unchangedColumns.isEmpty()) {
// For degenerate 2 column case (plus optional note column),
// start a new row when we hit a blank line
reusableRow = new Row(newColumns.size());
newRows.add(reusableRow);
currentRows.clear();
currentRows.add(reusableRow);
} else {
// Copy rows with no key
newRows.add(buildNewRow(unchangedColumns, oldRow, unchangedColumns.size()));
}
continue;
}
String keyString = key.toString();
// Start a new row on our beginning of record key
if (keyString.equals(recordKey)) {
reusableRow = new Row(newColumns.size());
newRows.add(reusableRow);
currentRows.clear();
currentRows.add(reusableRow);
}
Column newColumn = keyValueToColumn.get(keyString);
if (newColumn == null) {
// Allocate new column
@ -148,40 +174,50 @@ public class KeyValueColumnizeOperation extends AbstractOperation {
project.columnModel.getUnduplicatedColumnName(keyString));
keyValueToColumn.put(keyString, newColumn);
newColumns.add(newColumn);
}
StringBuffer sb = new StringBuffer();
for (int c = 0; c < unchangedColumns.size(); c++) {
Column unchangedColumn = unchangedColumns.get(c);
Object cellValue = oldRow.getCellValue(unchangedColumn.getCellIndex());
if (c > 0) {
sb.append('\0');
}
if (cellValue != null) {
sb.append(cellValue.toString());
// We assume first key encountered is the beginning of record key
// TODO: make customizable?
if (recordKey == null) {
recordKey = keyString;
}
}
String unchangedCellValues = sb.toString();
Row reusableRow = groupByCellValuesToRow.get(unchangedCellValues);
if (reusableRow == null ||
reusableRow.getCellValue(valueColumn.getCellIndex()) != null) {
reusableRow = new Row(newColumn.getCellIndex() + 1);
/*
* NOTE: If we have additional columns, we currently merge all rows that
* have identical values in those columns and then add our new columns.
*/
if (unchangedColumns.size() > 0) {
StringBuffer sb = new StringBuffer();
for (int c = 0; c < unchangedColumns.size(); c++) {
Column unchangedColumn = unchangedColumns.get(c);
int cellIndex = unchangedColumn.getCellIndex();
reusableRow.setCell(cellIndex, oldRow.getCell(cellIndex));
Object cellValue = oldRow.getCellValue(unchangedColumn.getCellIndex());
if (c > 0) {
sb.append('\0');
}
if (cellValue != null) {
sb.append(cellValue.toString());
}
}
String unchangedCellValues = sb.toString();
reusableRow = groupByCellValuesToRow.get(unchangedCellValues);
if (reusableRow == null ||
reusableRow.getCellValue(valueColumn.getCellIndex()) != null) {
reusableRow = buildNewRow(unchangedColumns, oldRow, newColumn.getCellIndex() + 1);
groupByCellValuesToRow.put(unchangedCellValues, reusableRow);
newRows.add(reusableRow);
}
groupByCellValuesToRow.put(unchangedCellValues, reusableRow);
newRows.add(reusableRow);
}
reusableRow.setCell(
newColumn.getCellIndex(),
oldRow.getCell(valueColumn.getCellIndex()));
Cell cell = oldRow.getCell(valueColumn.getCellIndex());
if (unchangedColumns.size() == 0) {
int index = newColumn.getCellIndex();
Row row = getAvailableRow(currentRows, newRows, index);
row.setCell(index, cell);
} else {
// TODO: support repeating keys in this mode too
reusableRow.setCell(newColumn.getCellIndex(), cell);
}
if (noteColumn != null) {
Object noteValue = oldRow.getCellValue(noteColumn.getCellIndex());
@ -210,15 +246,39 @@ public class KeyValueColumnizeOperation extends AbstractOperation {
}
}
unchangedColumns.addAll(newColumns);
unchangedColumns.addAll(newNoteColumns);
List<Column> allColumns = new ArrayList<Column>(unchangedColumns);
allColumns.addAll(newColumns);
allColumns.addAll(newNoteColumns);
return new HistoryEntry(
historyEntryID,
project,
getBriefDescription(null),
this,
new MassRowColumnChange(unchangedColumns, newRows)
new MassRowColumnChange(allColumns, newRows)
);
}
private Row getAvailableRow(List<Row> currentRows, List<Row> newRows, int index) {
for (Row row : currentRows) {
if (row.getCell(index) == null) {
return row;
}
}
// If we couldn't find a row with an empty spot, we'll need a new row
Row row = new Row(index);
newRows.add(row);
currentRows.add(row);
return row;
}
private Row buildNewRow(List<Column> unchangedColumns, Row oldRow, int size) {
Row reusableRow = new Row(size);
for (int c = 0; c < unchangedColumns.size(); c++) {
Column unchangedColumn = unchangedColumns.get(c);
int cellIndex = unchangedColumn.getCellIndex();
reusableRow.setCell(cellIndex, oldRow.getCell(cellIndex));
}
return reusableRow;
}
}