FIXED - task 529: Add support for key/value transpose with only two columns as well as repeating key fields in a single record.
http://code.google.com/p/google-refine/issues/detail?id=529 git-svn-id: http://google-refine.googlecode.com/svn/trunk@2574 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
ffe674729c
commit
ca2e959957
@ -129,17 +129,43 @@ public class KeyValueColumnizeOperation extends AbstractOperation {
|
|||||||
|
|
||||||
List<Row> newRows = new ArrayList<Row>();
|
List<Row> newRows = new ArrayList<Row>();
|
||||||
List<Row> oldRows = project.rows;
|
List<Row> oldRows = project.rows;
|
||||||
|
Row reusableRow = null;
|
||||||
|
List<Row> currentRows = new ArrayList<Row>();
|
||||||
|
String recordKey = null; // key which indicates the start of a record
|
||||||
|
if (unchangedColumns.isEmpty()) {
|
||||||
|
reusableRow = new Row(1);
|
||||||
|
newRows.add(reusableRow);
|
||||||
|
currentRows.clear();
|
||||||
|
currentRows.add(reusableRow);
|
||||||
|
}
|
||||||
|
|
||||||
for (int r = 0; r < oldRows.size(); r++) {
|
for (int r = 0; r < oldRows.size(); r++) {
|
||||||
Row oldRow = oldRows.get(r);
|
Row oldRow = oldRows.get(r);
|
||||||
|
|
||||||
Object value = oldRow.getCellValue(valueColumn.getCellIndex());
|
|
||||||
Object key = oldRow.getCellValue(keyColumn.getCellIndex());
|
Object key = oldRow.getCellValue(keyColumn.getCellIndex());
|
||||||
if (!ExpressionUtils.isNonBlankData(value) ||
|
if (!ExpressionUtils.isNonBlankData(key)) {
|
||||||
!ExpressionUtils.isNonBlankData(key)) {
|
if (unchangedColumns.isEmpty()) {
|
||||||
continue; // TODO: ignore this row entirely?
|
// For degenerate 2 column case (plus optional note column),
|
||||||
|
// start a new row when we hit a blank line
|
||||||
|
reusableRow = new Row(newColumns.size());
|
||||||
|
newRows.add(reusableRow);
|
||||||
|
currentRows.clear();
|
||||||
|
currentRows.add(reusableRow);
|
||||||
|
} else {
|
||||||
|
// Copy rows with no key
|
||||||
|
newRows.add(buildNewRow(unchangedColumns, oldRow, unchangedColumns.size()));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
String keyString = key.toString();
|
String keyString = key.toString();
|
||||||
|
// Start a new row on our beginning of record key
|
||||||
|
if (keyString.equals(recordKey)) {
|
||||||
|
reusableRow = new Row(newColumns.size());
|
||||||
|
newRows.add(reusableRow);
|
||||||
|
currentRows.clear();
|
||||||
|
currentRows.add(reusableRow);
|
||||||
|
}
|
||||||
Column newColumn = keyValueToColumn.get(keyString);
|
Column newColumn = keyValueToColumn.get(keyString);
|
||||||
if (newColumn == null) {
|
if (newColumn == null) {
|
||||||
// Allocate new column
|
// Allocate new column
|
||||||
@ -148,8 +174,19 @@ public class KeyValueColumnizeOperation extends AbstractOperation {
|
|||||||
project.columnModel.getUnduplicatedColumnName(keyString));
|
project.columnModel.getUnduplicatedColumnName(keyString));
|
||||||
keyValueToColumn.put(keyString, newColumn);
|
keyValueToColumn.put(keyString, newColumn);
|
||||||
newColumns.add(newColumn);
|
newColumns.add(newColumn);
|
||||||
|
|
||||||
|
// We assume first key encountered is the beginning of record key
|
||||||
|
// TODO: make customizable?
|
||||||
|
if (recordKey == null) {
|
||||||
|
recordKey = keyString;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NOTE: If we have additional columns, we currently merge all rows that
|
||||||
|
* have identical values in those columns and then add our new columns.
|
||||||
|
*/
|
||||||
|
if (unchangedColumns.size() > 0) {
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
for (int c = 0; c < unchangedColumns.size(); c++) {
|
for (int c = 0; c < unchangedColumns.size(); c++) {
|
||||||
Column unchangedColumn = unchangedColumns.get(c);
|
Column unchangedColumn = unchangedColumns.get(c);
|
||||||
@ -163,25 +200,24 @@ public class KeyValueColumnizeOperation extends AbstractOperation {
|
|||||||
}
|
}
|
||||||
String unchangedCellValues = sb.toString();
|
String unchangedCellValues = sb.toString();
|
||||||
|
|
||||||
Row reusableRow = groupByCellValuesToRow.get(unchangedCellValues);
|
reusableRow = groupByCellValuesToRow.get(unchangedCellValues);
|
||||||
if (reusableRow == null ||
|
if (reusableRow == null ||
|
||||||
reusableRow.getCellValue(valueColumn.getCellIndex()) != null) {
|
reusableRow.getCellValue(valueColumn.getCellIndex()) != null) {
|
||||||
reusableRow = new Row(newColumn.getCellIndex() + 1);
|
reusableRow = buildNewRow(unchangedColumns, oldRow, newColumn.getCellIndex() + 1);
|
||||||
|
|
||||||
for (int c = 0; c < unchangedColumns.size(); c++) {
|
|
||||||
Column unchangedColumn = unchangedColumns.get(c);
|
|
||||||
int cellIndex = unchangedColumn.getCellIndex();
|
|
||||||
|
|
||||||
reusableRow.setCell(cellIndex, oldRow.getCell(cellIndex));
|
|
||||||
}
|
|
||||||
|
|
||||||
groupByCellValuesToRow.put(unchangedCellValues, reusableRow);
|
groupByCellValuesToRow.put(unchangedCellValues, reusableRow);
|
||||||
newRows.add(reusableRow);
|
newRows.add(reusableRow);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
reusableRow.setCell(
|
Cell cell = oldRow.getCell(valueColumn.getCellIndex());
|
||||||
newColumn.getCellIndex(),
|
if (unchangedColumns.size() == 0) {
|
||||||
oldRow.getCell(valueColumn.getCellIndex()));
|
int index = newColumn.getCellIndex();
|
||||||
|
Row row = getAvailableRow(currentRows, newRows, index);
|
||||||
|
row.setCell(index, cell);
|
||||||
|
} else {
|
||||||
|
// TODO: support repeating keys in this mode too
|
||||||
|
reusableRow.setCell(newColumn.getCellIndex(), cell);
|
||||||
|
}
|
||||||
|
|
||||||
if (noteColumn != null) {
|
if (noteColumn != null) {
|
||||||
Object noteValue = oldRow.getCellValue(noteColumn.getCellIndex());
|
Object noteValue = oldRow.getCellValue(noteColumn.getCellIndex());
|
||||||
@ -210,15 +246,39 @@ public class KeyValueColumnizeOperation extends AbstractOperation {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unchangedColumns.addAll(newColumns);
|
List<Column> allColumns = new ArrayList<Column>(unchangedColumns);
|
||||||
unchangedColumns.addAll(newNoteColumns);
|
allColumns.addAll(newColumns);
|
||||||
|
allColumns.addAll(newNoteColumns);
|
||||||
|
|
||||||
return new HistoryEntry(
|
return new HistoryEntry(
|
||||||
historyEntryID,
|
historyEntryID,
|
||||||
project,
|
project,
|
||||||
getBriefDescription(null),
|
getBriefDescription(null),
|
||||||
this,
|
this,
|
||||||
new MassRowColumnChange(unchangedColumns, newRows)
|
new MassRowColumnChange(allColumns, newRows)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Row getAvailableRow(List<Row> currentRows, List<Row> newRows, int index) {
|
||||||
|
for (Row row : currentRows) {
|
||||||
|
if (row.getCell(index) == null) {
|
||||||
|
return row;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If we couldn't find a row with an empty spot, we'll need a new row
|
||||||
|
Row row = new Row(index);
|
||||||
|
newRows.add(row);
|
||||||
|
currentRows.add(row);
|
||||||
|
return row;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Row buildNewRow(List<Column> unchangedColumns, Row oldRow, int size) {
|
||||||
|
Row reusableRow = new Row(size);
|
||||||
|
for (int c = 0; c < unchangedColumns.size(); c++) {
|
||||||
|
Column unchangedColumn = unchangedColumns.get(c);
|
||||||
|
int cellIndex = unchangedColumn.getCellIndex();
|
||||||
|
reusableRow.setCell(cellIndex, oldRow.getCell(cellIndex));
|
||||||
|
}
|
||||||
|
return reusableRow;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user