FIXED - task 529: Add support for key/value transpose with only two columns as well as repeating key fields in a single record.

http://code.google.com/p/google-refine/issues/detail?id=529

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2574 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Tom Morris 2012-10-05 23:31:25 +00:00
parent ffe674729c
commit ca2e959957

View File

@ -129,17 +129,43 @@ public class KeyValueColumnizeOperation extends AbstractOperation {
List<Row> newRows = new ArrayList<Row>(); List<Row> newRows = new ArrayList<Row>();
List<Row> oldRows = project.rows; List<Row> oldRows = project.rows;
Row reusableRow = null;
List<Row> currentRows = new ArrayList<Row>();
String recordKey = null; // key which indicates the start of a record
if (unchangedColumns.isEmpty()) {
reusableRow = new Row(1);
newRows.add(reusableRow);
currentRows.clear();
currentRows.add(reusableRow);
}
for (int r = 0; r < oldRows.size(); r++) { for (int r = 0; r < oldRows.size(); r++) {
Row oldRow = oldRows.get(r); Row oldRow = oldRows.get(r);
Object value = oldRow.getCellValue(valueColumn.getCellIndex());
Object key = oldRow.getCellValue(keyColumn.getCellIndex()); Object key = oldRow.getCellValue(keyColumn.getCellIndex());
if (!ExpressionUtils.isNonBlankData(value) || if (!ExpressionUtils.isNonBlankData(key)) {
!ExpressionUtils.isNonBlankData(key)) { if (unchangedColumns.isEmpty()) {
continue; // TODO: ignore this row entirely? // For degenerate 2 column case (plus optional note column),
// start a new row when we hit a blank line
reusableRow = new Row(newColumns.size());
newRows.add(reusableRow);
currentRows.clear();
currentRows.add(reusableRow);
} else {
// Copy rows with no key
newRows.add(buildNewRow(unchangedColumns, oldRow, unchangedColumns.size()));
}
continue;
} }
String keyString = key.toString(); String keyString = key.toString();
// Start a new row on our beginning of record key
if (keyString.equals(recordKey)) {
reusableRow = new Row(newColumns.size());
newRows.add(reusableRow);
currentRows.clear();
currentRows.add(reusableRow);
}
Column newColumn = keyValueToColumn.get(keyString); Column newColumn = keyValueToColumn.get(keyString);
if (newColumn == null) { if (newColumn == null) {
// Allocate new column // Allocate new column
@ -148,8 +174,19 @@ public class KeyValueColumnizeOperation extends AbstractOperation {
project.columnModel.getUnduplicatedColumnName(keyString)); project.columnModel.getUnduplicatedColumnName(keyString));
keyValueToColumn.put(keyString, newColumn); keyValueToColumn.put(keyString, newColumn);
newColumns.add(newColumn); newColumns.add(newColumn);
// We assume first key encountered is the beginning of record key
// TODO: make customizable?
if (recordKey == null) {
recordKey = keyString;
}
} }
/*
* NOTE: If we have additional columns, we currently merge all rows that
* have identical values in those columns and then add our new columns.
*/
if (unchangedColumns.size() > 0) {
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
for (int c = 0; c < unchangedColumns.size(); c++) { for (int c = 0; c < unchangedColumns.size(); c++) {
Column unchangedColumn = unchangedColumns.get(c); Column unchangedColumn = unchangedColumns.get(c);
@ -163,25 +200,24 @@ public class KeyValueColumnizeOperation extends AbstractOperation {
} }
String unchangedCellValues = sb.toString(); String unchangedCellValues = sb.toString();
Row reusableRow = groupByCellValuesToRow.get(unchangedCellValues); reusableRow = groupByCellValuesToRow.get(unchangedCellValues);
if (reusableRow == null || if (reusableRow == null ||
reusableRow.getCellValue(valueColumn.getCellIndex()) != null) { reusableRow.getCellValue(valueColumn.getCellIndex()) != null) {
reusableRow = new Row(newColumn.getCellIndex() + 1); reusableRow = buildNewRow(unchangedColumns, oldRow, newColumn.getCellIndex() + 1);
for (int c = 0; c < unchangedColumns.size(); c++) {
Column unchangedColumn = unchangedColumns.get(c);
int cellIndex = unchangedColumn.getCellIndex();
reusableRow.setCell(cellIndex, oldRow.getCell(cellIndex));
}
groupByCellValuesToRow.put(unchangedCellValues, reusableRow); groupByCellValuesToRow.put(unchangedCellValues, reusableRow);
newRows.add(reusableRow); newRows.add(reusableRow);
} }
}
reusableRow.setCell( Cell cell = oldRow.getCell(valueColumn.getCellIndex());
newColumn.getCellIndex(), if (unchangedColumns.size() == 0) {
oldRow.getCell(valueColumn.getCellIndex())); int index = newColumn.getCellIndex();
Row row = getAvailableRow(currentRows, newRows, index);
row.setCell(index, cell);
} else {
// TODO: support repeating keys in this mode too
reusableRow.setCell(newColumn.getCellIndex(), cell);
}
if (noteColumn != null) { if (noteColumn != null) {
Object noteValue = oldRow.getCellValue(noteColumn.getCellIndex()); Object noteValue = oldRow.getCellValue(noteColumn.getCellIndex());
@ -210,15 +246,39 @@ public class KeyValueColumnizeOperation extends AbstractOperation {
} }
} }
unchangedColumns.addAll(newColumns); List<Column> allColumns = new ArrayList<Column>(unchangedColumns);
unchangedColumns.addAll(newNoteColumns); allColumns.addAll(newColumns);
allColumns.addAll(newNoteColumns);
return new HistoryEntry( return new HistoryEntry(
historyEntryID, historyEntryID,
project, project,
getBriefDescription(null), getBriefDescription(null),
this, this,
new MassRowColumnChange(unchangedColumns, newRows) new MassRowColumnChange(allColumns, newRows)
); );
} }
private Row getAvailableRow(List<Row> currentRows, List<Row> newRows, int index) {
for (Row row : currentRows) {
if (row.getCell(index) == null) {
return row;
}
}
// If we couldn't find a row with an empty spot, we'll need a new row
Row row = new Row(index);
newRows.add(row);
currentRows.add(row);
return row;
}
private Row buildNewRow(List<Column> unchangedColumns, Row oldRow, int size) {
Row reusableRow = new Row(size);
for (int c = 0; c < unchangedColumns.size(); c++) {
Column unchangedColumn = unchangedColumns.get(c);
int cellIndex = unchangedColumn.getCellIndex();
reusableRow.setCell(cellIndex, oldRow.getCell(cellIndex));
}
return reusableRow;
}
} }