2010-10-20 22:45:52 +02:00
|
|
|
/*
|
|
|
|
|
|
|
|
Copyright 2010, Google Inc.
|
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions are
|
|
|
|
met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above
|
|
|
|
copyright notice, this list of conditions and the following disclaimer
|
|
|
|
in the documentation and/or other materials provided with the
|
|
|
|
distribution.
|
|
|
|
* Neither the name of Google Inc. nor the names of its
|
|
|
|
contributors may be used to endorse or promote products derived from
|
|
|
|
this software without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
2010-09-22 19:04:10 +02:00
|
|
|
package com.google.refine.operations.cell;
|
2010-05-05 01:24:48 +02:00
|
|
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Properties;
|
|
|
|
|
|
|
|
import org.json.JSONException;
|
|
|
|
import org.json.JSONObject;
|
|
|
|
import org.json.JSONWriter;
|
|
|
|
|
2010-09-22 19:04:10 +02:00
|
|
|
import com.google.refine.expr.ExpressionUtils;
|
|
|
|
import com.google.refine.history.HistoryEntry;
|
|
|
|
import com.google.refine.model.AbstractOperation;
|
|
|
|
import com.google.refine.model.Cell;
|
|
|
|
import com.google.refine.model.Column;
|
|
|
|
import com.google.refine.model.Project;
|
|
|
|
import com.google.refine.model.Row;
|
|
|
|
import com.google.refine.model.changes.MassRowChange;
|
|
|
|
import com.google.refine.operations.OperationRegistry;
|
2010-05-05 01:24:48 +02:00
|
|
|
|
|
|
|
public class MultiValuedCellJoinOperation extends AbstractOperation {
|
|
|
|
final protected String _columnName;
|
|
|
|
final protected String _keyColumnName;
|
|
|
|
final protected String _separator;
|
|
|
|
|
|
|
|
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
|
|
|
|
return new MultiValuedCellJoinOperation(
|
|
|
|
obj.getString("columnName"),
|
|
|
|
obj.getString("keyColumnName"),
|
|
|
|
obj.getString("separator")
|
|
|
|
);
|
|
|
|
}
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
public MultiValuedCellJoinOperation(
|
|
|
|
String columnName,
|
|
|
|
String keyColumnName,
|
|
|
|
String separator
|
|
|
|
) {
|
|
|
|
_columnName = columnName;
|
|
|
|
_keyColumnName = keyColumnName;
|
|
|
|
_separator = separator;
|
|
|
|
}
|
|
|
|
|
2011-08-02 21:30:23 +02:00
|
|
|
@Override
|
2010-05-05 01:24:48 +02:00
|
|
|
public void write(JSONWriter writer, Properties options)
|
|
|
|
throws JSONException {
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
writer.object();
|
|
|
|
writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
|
|
|
|
writer.key("description"); writer.value(getBriefDescription(null));
|
|
|
|
writer.key("columnName"); writer.value(_columnName);
|
|
|
|
writer.key("keyColumnName"); writer.value(_keyColumnName);
|
|
|
|
writer.key("separator"); writer.value(_separator);
|
|
|
|
writer.endObject();
|
|
|
|
}
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2011-08-02 21:30:23 +02:00
|
|
|
@Override
|
2010-05-05 01:24:48 +02:00
|
|
|
protected String getBriefDescription(Project project) {
|
|
|
|
return "Join multi-valued cells in column " + _columnName;
|
|
|
|
}
|
|
|
|
|
2011-08-02 21:30:23 +02:00
|
|
|
@Override
|
2010-05-05 01:24:48 +02:00
|
|
|
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
|
|
|
|
Column column = project.columnModel.getColumnByName(_columnName);
|
|
|
|
if (column == null) {
|
|
|
|
throw new Exception("No column named " + _columnName);
|
|
|
|
}
|
|
|
|
int cellIndex = column.getCellIndex();
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
Column keyColumn = project.columnModel.getColumnByName(_keyColumnName);
|
|
|
|
if (keyColumn == null) {
|
|
|
|
throw new Exception("No key column named " + _keyColumnName);
|
|
|
|
}
|
|
|
|
int keyCellIndex = keyColumn.getCellIndex();
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
List<Row> newRows = new ArrayList<Row>();
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
int oldRowCount = project.rows.size();
|
|
|
|
for (int r = 0; r < oldRowCount; r++) {
|
|
|
|
Row oldRow = project.rows.get(r);
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
if (oldRow.isCellBlank(keyCellIndex)) {
|
|
|
|
newRows.add(oldRow.dup());
|
|
|
|
continue;
|
|
|
|
}
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
int r2 = r + 1;
|
|
|
|
while (r2 < oldRowCount && project.rows.get(r2).isCellBlank(keyCellIndex)) {
|
|
|
|
r2++;
|
|
|
|
}
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
if (r2 == r + 1) {
|
|
|
|
newRows.add(oldRow.dup());
|
|
|
|
continue;
|
|
|
|
}
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
StringBuffer sb = new StringBuffer();
|
|
|
|
for (int r3 = r; r3 < r2; r3++) {
|
|
|
|
Object value = project.rows.get(r3).getCellValue(cellIndex);
|
|
|
|
if (ExpressionUtils.isNonBlankData(value)) {
|
|
|
|
if (sb.length() > 0) {
|
|
|
|
sb.append(_separator);
|
|
|
|
}
|
|
|
|
sb.append(value.toString());
|
|
|
|
}
|
|
|
|
}
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
for (int r3 = r; r3 < r2; r3++) {
|
|
|
|
Row newRow = project.rows.get(r3).dup();
|
|
|
|
if (r3 == r) {
|
|
|
|
newRow.setCell(cellIndex, new Cell(sb.toString(), null));
|
|
|
|
} else {
|
|
|
|
newRow.setCell(cellIndex, null);
|
|
|
|
}
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
if (!newRow.isEmpty()) {
|
|
|
|
newRows.add(newRow);
|
|
|
|
}
|
|
|
|
}
|
2010-06-16 14:35:37 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
r = r2 - 1; // r will be incremented by the for loop anyway
|
|
|
|
}
|
2010-06-16 14:35:37 +02:00
|
|
|
|
|
|
|
return new HistoryEntry(
|
2010-05-05 01:24:48 +02:00
|
|
|
historyEntryID,
|
2010-06-16 14:35:37 +02:00
|
|
|
project,
|
|
|
|
getBriefDescription(null),
|
|
|
|
this,
|
2010-05-05 01:24:48 +02:00
|
|
|
new MassRowChange(newRows)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|