Implemented column split command. It seems to be working in "by lengths" mode.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@510 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
d0df704d8a
commit
5ba67b7b26
@ -28,6 +28,7 @@ import com.metaweb.gridworks.commands.edit.RemoveColumnCommand;
|
|||||||
import com.metaweb.gridworks.commands.edit.RemoveRowsCommand;
|
import com.metaweb.gridworks.commands.edit.RemoveRowsCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.RenameColumnCommand;
|
import com.metaweb.gridworks.commands.edit.RenameColumnCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.SaveProtographCommand;
|
import com.metaweb.gridworks.commands.edit.SaveProtographCommand;
|
||||||
|
import com.metaweb.gridworks.commands.edit.SplitColumnCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.SplitMultiValueCellsCommand;
|
import com.metaweb.gridworks.commands.edit.SplitMultiValueCellsCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.TextTransformCommand;
|
import com.metaweb.gridworks.commands.edit.TextTransformCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.UndoRedoCommand;
|
import com.metaweb.gridworks.commands.edit.UndoRedoCommand;
|
||||||
@ -103,6 +104,7 @@ public class GridworksServlet extends HttpServlet {
|
|||||||
_commands.put("add-column", new AddColumnCommand());
|
_commands.put("add-column", new AddColumnCommand());
|
||||||
_commands.put("remove-column", new RemoveColumnCommand());
|
_commands.put("remove-column", new RemoveColumnCommand());
|
||||||
_commands.put("rename-column", new RenameColumnCommand());
|
_commands.put("rename-column", new RenameColumnCommand());
|
||||||
|
_commands.put("split-column", new SplitColumnCommand());
|
||||||
_commands.put("extend-data", new ExtendDataCommand());
|
_commands.put("extend-data", new ExtendDataCommand());
|
||||||
|
|
||||||
_commands.put("reconcile", new ReconcileCommand());
|
_commands.put("reconcile", new ReconcileCommand());
|
||||||
|
@ -0,0 +1,52 @@
|
|||||||
|
package com.metaweb.gridworks.commands.edit;
|
||||||
|
|
||||||
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.commands.EngineDependentCommand;
|
||||||
|
import com.metaweb.gridworks.model.AbstractOperation;
|
||||||
|
import com.metaweb.gridworks.model.Project;
|
||||||
|
import com.metaweb.gridworks.operations.ColumnSplitOperation;
|
||||||
|
import com.metaweb.gridworks.util.ParsingUtilities;
|
||||||
|
|
||||||
|
public class SplitColumnCommand extends EngineDependentCommand {
|
||||||
|
@Override
|
||||||
|
protected AbstractOperation createOperation(Project project,
|
||||||
|
HttpServletRequest request, JSONObject engineConfig) throws Exception {
|
||||||
|
|
||||||
|
String columnName = request.getParameter("columnName");
|
||||||
|
boolean guessCellType = Boolean.parseBoolean(request.getParameter("guessCellType"));
|
||||||
|
boolean removeOriginalColumn = Boolean.parseBoolean(request.getParameter("removeOriginalColumn"));
|
||||||
|
String mode = request.getParameter("mode");
|
||||||
|
if ("separator".equals(mode)) {
|
||||||
|
return new ColumnSplitOperation(
|
||||||
|
engineConfig,
|
||||||
|
columnName,
|
||||||
|
guessCellType,
|
||||||
|
removeOriginalColumn,
|
||||||
|
request.getParameter("separator"),
|
||||||
|
Boolean.parseBoolean(request.getParameter("regex")),
|
||||||
|
Integer.parseInt(request.getParameter("maxColumns"))
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
String s = request.getParameter("fieldLengths");
|
||||||
|
|
||||||
|
JSONArray a = ParsingUtilities.evaluateJsonStringToArray(s);
|
||||||
|
int[] fieldLengths = new int[a.length()];
|
||||||
|
|
||||||
|
for (int i = 0; i < fieldLengths.length; i++) {
|
||||||
|
fieldLengths[i] = a.getInt(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ColumnSplitOperation(
|
||||||
|
engineConfig,
|
||||||
|
columnName,
|
||||||
|
guessCellType,
|
||||||
|
removeOriginalColumn,
|
||||||
|
fieldLengths
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -10,7 +10,6 @@ import com.metaweb.gridworks.gel.ControlFunctionRegistry;
|
|||||||
import com.metaweb.gridworks.gel.Function;
|
import com.metaweb.gridworks.gel.Function;
|
||||||
|
|
||||||
public class SplitByLengths implements Function {
|
public class SplitByLengths implements Function {
|
||||||
|
|
||||||
public Object call(Properties bindings, Object[] args) {
|
public Object call(Properties bindings, Object[] args) {
|
||||||
if (args.length >= 2 && args[0] != null) {
|
if (args.length >= 2 && args[0] != null) {
|
||||||
Object o = args[0];
|
Object o = args[0];
|
||||||
|
@ -81,6 +81,16 @@ public class ColumnModel implements Jsonizable {
|
|||||||
return _nameToColumn.get(name);
|
return _nameToColumn.get(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getColumnIndexByName(String name) {
|
||||||
|
for (int i = 0; i < _columnNames.size(); i++) {
|
||||||
|
String s = _columnNames.get(i);
|
||||||
|
if (name.equals(s)) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
public Column getColumnByCellIndex(int cellIndex) {
|
public Column getColumnByCellIndex(int cellIndex) {
|
||||||
return _cellIndexToColumn.get(cellIndex);
|
return _cellIndexToColumn.get(cellIndex);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,334 @@
|
|||||||
|
package com.metaweb.gridworks.model.changes;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.LineNumberReader;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.io.Writer;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.json.JSONTokener;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.history.Change;
|
||||||
|
import com.metaweb.gridworks.model.Cell;
|
||||||
|
import com.metaweb.gridworks.model.Column;
|
||||||
|
import com.metaweb.gridworks.model.Project;
|
||||||
|
import com.metaweb.gridworks.model.Recon;
|
||||||
|
import com.metaweb.gridworks.model.Row;
|
||||||
|
|
||||||
|
public class ColumnSplitChange implements Change {
|
||||||
|
final protected String _columnName;
|
||||||
|
|
||||||
|
final protected List<String> _columnNames;
|
||||||
|
final protected List<Integer> _rowIndices;
|
||||||
|
final protected List<List<Serializable>> _tuples;
|
||||||
|
|
||||||
|
final protected boolean _removeOriginalColumn;
|
||||||
|
|
||||||
|
protected Column _column;
|
||||||
|
protected int _columnIndex;
|
||||||
|
|
||||||
|
protected int _firstNewCellIndex = -1;
|
||||||
|
protected List<Row> _oldRows;
|
||||||
|
protected List<Row> _newRows;
|
||||||
|
|
||||||
|
public ColumnSplitChange(
|
||||||
|
String columnName,
|
||||||
|
List<String> columnNames,
|
||||||
|
List<Integer> rowIndices,
|
||||||
|
List<List<Serializable>> tuples,
|
||||||
|
boolean removeOriginalColumn
|
||||||
|
) {
|
||||||
|
_columnName = columnName;
|
||||||
|
|
||||||
|
_columnNames = columnNames;
|
||||||
|
_rowIndices = rowIndices;
|
||||||
|
_tuples = tuples;
|
||||||
|
|
||||||
|
_removeOriginalColumn = removeOriginalColumn;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected ColumnSplitChange(
|
||||||
|
String columnName,
|
||||||
|
List<String> columnNames,
|
||||||
|
List<Integer> rowIndices,
|
||||||
|
List<List<Serializable>> tuples,
|
||||||
|
boolean removeOriginalColumn,
|
||||||
|
|
||||||
|
Column column,
|
||||||
|
int columnIndex,
|
||||||
|
|
||||||
|
int firstNewCellIndex,
|
||||||
|
List<Row> oldRows,
|
||||||
|
List<Row> newRows
|
||||||
|
) {
|
||||||
|
_columnName = columnName;
|
||||||
|
|
||||||
|
_columnNames = columnNames;
|
||||||
|
_rowIndices = rowIndices;
|
||||||
|
_tuples = tuples;
|
||||||
|
|
||||||
|
_removeOriginalColumn = removeOriginalColumn;
|
||||||
|
|
||||||
|
_column = column;
|
||||||
|
_columnIndex = columnIndex;
|
||||||
|
|
||||||
|
_firstNewCellIndex = firstNewCellIndex;
|
||||||
|
_oldRows = oldRows;
|
||||||
|
_newRows = newRows;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void apply(Project project) {
|
||||||
|
synchronized (project) {
|
||||||
|
if (_firstNewCellIndex < 0) {
|
||||||
|
_firstNewCellIndex = project.columnModel.allocateNewCellIndex();
|
||||||
|
for (int i = 1; i < _columnNames.size(); i++) {
|
||||||
|
project.columnModel.allocateNewCellIndex();
|
||||||
|
}
|
||||||
|
|
||||||
|
_column = project.columnModel.getColumnByName(_columnName);
|
||||||
|
_columnIndex = project.columnModel.getColumnIndexByName(_columnName);
|
||||||
|
|
||||||
|
_oldRows = new ArrayList<Row>(_rowIndices.size());
|
||||||
|
_newRows = new ArrayList<Row>(_rowIndices.size());
|
||||||
|
|
||||||
|
int cellIndex = _column.getCellIndex();
|
||||||
|
|
||||||
|
for (int i = 0; i < _rowIndices.size(); i++) {
|
||||||
|
int r = _rowIndices.get(i);
|
||||||
|
List<Serializable> tuple = _tuples.get(i);
|
||||||
|
|
||||||
|
Row oldRow = project.rows.get(r);
|
||||||
|
Row newRow = oldRow.dup();
|
||||||
|
|
||||||
|
_oldRows.add(oldRow);
|
||||||
|
_newRows.add(newRow);
|
||||||
|
|
||||||
|
for (int c = 0; c < tuple.size(); c++) {
|
||||||
|
Serializable value = tuple.get(c);
|
||||||
|
if (value != null) {
|
||||||
|
newRow.setCell(_firstNewCellIndex + c, new Cell(value, null));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_removeOriginalColumn) {
|
||||||
|
newRow.setCell(cellIndex, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < _rowIndices.size(); i++) {
|
||||||
|
int r = _rowIndices.get(i);
|
||||||
|
Row newRow = _newRows.get(i);
|
||||||
|
|
||||||
|
project.rows.set(r, newRow);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < _columnNames.size(); i++) {
|
||||||
|
String name = _columnNames.get(i);
|
||||||
|
int cellIndex = _firstNewCellIndex + i;
|
||||||
|
|
||||||
|
Column column = new Column(cellIndex, name);
|
||||||
|
|
||||||
|
project.columnModel.columns.add(_columnIndex + 1 + i, column);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_removeOriginalColumn) {
|
||||||
|
project.columnModel.columns.remove(_columnIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
project.columnModel.update();
|
||||||
|
project.recomputeRowContextDependencies();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void revert(Project project) {
|
||||||
|
synchronized (project) {
|
||||||
|
for (int i = 0; i < _rowIndices.size(); i++) {
|
||||||
|
int r = _rowIndices.get(i);
|
||||||
|
Row oldRow = _oldRows.get(i);
|
||||||
|
|
||||||
|
project.rows.set(r, oldRow);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_removeOriginalColumn) {
|
||||||
|
project.columnModel.columns.add(_columnIndex, _column);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < _columnNames.size(); i++) {
|
||||||
|
project.columnModel.columns.remove(_columnIndex + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
project.columnModel.update();
|
||||||
|
project.recomputeRowContextDependencies();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void save(Writer writer, Properties options) throws IOException {
|
||||||
|
writer.write("columnName="); writer.write(_columnName); writer.write('\n');
|
||||||
|
|
||||||
|
writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n');
|
||||||
|
for (String name : _columnNames) {
|
||||||
|
writer.write(name); writer.write('\n');
|
||||||
|
}
|
||||||
|
writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n');
|
||||||
|
for (Integer rowIndex : _rowIndices) {
|
||||||
|
writer.write(rowIndex.toString()); writer.write('\n');
|
||||||
|
}
|
||||||
|
writer.write("tupleCount="); writer.write(Integer.toString(_tuples.size())); writer.write('\n');
|
||||||
|
for (List<Serializable> tuple : _tuples) {
|
||||||
|
writer.write(Integer.toString(tuple.size())); writer.write('\n');
|
||||||
|
|
||||||
|
for (Serializable value : tuple) {
|
||||||
|
if (value == null) {
|
||||||
|
writer.write("null");
|
||||||
|
} else if (value instanceof String) {
|
||||||
|
writer.write(JSONObject.quote((String) value));
|
||||||
|
} else {
|
||||||
|
writer.write(value.toString());
|
||||||
|
}
|
||||||
|
writer.write('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writer.write("removeOriginalColumn="); writer.write(Boolean.toString(_removeOriginalColumn)); writer.write('\n');
|
||||||
|
|
||||||
|
writer.write("column="); _column.save(writer); writer.write('\n');
|
||||||
|
writer.write("columnIndex="); writer.write(Integer.toString(_columnIndex)); writer.write('\n');
|
||||||
|
|
||||||
|
writer.write("firstNewCellIndex="); writer.write(Integer.toString(_firstNewCellIndex)); writer.write('\n');
|
||||||
|
|
||||||
|
writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n');
|
||||||
|
for (Row row : _newRows) {
|
||||||
|
row.save(writer, options);
|
||||||
|
writer.write('\n');
|
||||||
|
}
|
||||||
|
writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n');
|
||||||
|
for (Row row : _oldRows) {
|
||||||
|
row.save(writer, options);
|
||||||
|
writer.write('\n');
|
||||||
|
}
|
||||||
|
writer.write("/ec/\n"); // end of change marker
|
||||||
|
}
|
||||||
|
|
||||||
|
static public Change load(LineNumberReader reader) throws Exception {
|
||||||
|
String columnName = null;
|
||||||
|
List<String> columnNames = null;
|
||||||
|
List<Integer> rowIndices = null;
|
||||||
|
List<List<Serializable>> tuples = null;
|
||||||
|
boolean removeOriginalColumn = false;
|
||||||
|
|
||||||
|
Column column = null;
|
||||||
|
int columnIndex = -1;
|
||||||
|
|
||||||
|
int firstNewCellIndex = -1;
|
||||||
|
List<Row> oldRows = null;
|
||||||
|
List<Row> newRows = null;
|
||||||
|
|
||||||
|
Map<Long, Recon> reconCache = new HashMap<Long, Recon>();
|
||||||
|
String line;
|
||||||
|
while ((line = reader.readLine()) != null && !"/ec/".equals(line)) {
|
||||||
|
int equal = line.indexOf('=');
|
||||||
|
CharSequence field = line.subSequence(0, equal);
|
||||||
|
String value = line.substring(equal + 1);
|
||||||
|
|
||||||
|
if ("columnName".equals(field)) {
|
||||||
|
columnName = value;
|
||||||
|
} else if ("columnNameCount".equals(field)) {
|
||||||
|
int count = Integer.parseInt(value);
|
||||||
|
|
||||||
|
columnNames = new ArrayList<String>(count);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
line = reader.readLine();
|
||||||
|
if (line != null) {
|
||||||
|
columnNames.add(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if ("rowIndexCount".equals(field)) {
|
||||||
|
int count = Integer.parseInt(value);
|
||||||
|
|
||||||
|
rowIndices = new ArrayList<Integer>(count);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
line = reader.readLine();
|
||||||
|
if (line != null) {
|
||||||
|
rowIndices.add(Integer.parseInt(line));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if ("tupleCount".equals(field)) {
|
||||||
|
int count = Integer.parseInt(value);
|
||||||
|
|
||||||
|
tuples = new ArrayList<List<Serializable>>(count);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
line = reader.readLine();
|
||||||
|
|
||||||
|
if (line == null) continue;
|
||||||
|
|
||||||
|
int valueCount = Integer.parseInt(line);
|
||||||
|
|
||||||
|
List<Serializable> tuple = new ArrayList<Serializable>(valueCount);
|
||||||
|
for (int r = 0; r < valueCount; r++) {
|
||||||
|
line = reader.readLine();
|
||||||
|
|
||||||
|
JSONTokener t = new JSONTokener(line);
|
||||||
|
Object o = t.nextValue();
|
||||||
|
|
||||||
|
tuple.add((o != JSONObject.NULL) ? (Serializable) o : null);
|
||||||
|
}
|
||||||
|
|
||||||
|
tuples.add(tuple);
|
||||||
|
}
|
||||||
|
} else if ("removeOriginalColumn".equals(field)) {
|
||||||
|
removeOriginalColumn = Boolean.parseBoolean(value);
|
||||||
|
|
||||||
|
} else if ("column".equals(field)) {
|
||||||
|
column = Column.load(value);
|
||||||
|
} else if ("columnIndex".equals(field)) {
|
||||||
|
columnIndex = Integer.parseInt(value);
|
||||||
|
} else if ("firstNewCellIndex".equals(field)) {
|
||||||
|
firstNewCellIndex = Integer.parseInt(value);
|
||||||
|
} else if ("oldRowCount".equals(field)) {
|
||||||
|
int count = Integer.parseInt(value);
|
||||||
|
|
||||||
|
oldRows = new ArrayList<Row>(count);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
line = reader.readLine();
|
||||||
|
if (line != null) {
|
||||||
|
oldRows.add(Row.load(line, reconCache));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if ("newRowCount".equals(field)) {
|
||||||
|
int count = Integer.parseInt(value);
|
||||||
|
|
||||||
|
newRows = new ArrayList<Row>(count);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
line = reader.readLine();
|
||||||
|
if (line != null) {
|
||||||
|
newRows.add(Row.load(line, reconCache));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnSplitChange change = new ColumnSplitChange(
|
||||||
|
columnName,
|
||||||
|
columnNames,
|
||||||
|
rowIndices,
|
||||||
|
tuples,
|
||||||
|
removeOriginalColumn,
|
||||||
|
|
||||||
|
column,
|
||||||
|
columnIndex,
|
||||||
|
|
||||||
|
firstNewCellIndex,
|
||||||
|
oldRows,
|
||||||
|
newRows
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
return change;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,281 @@
|
|||||||
|
package com.metaweb.gridworks.operations;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Properties;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import sun.reflect.generics.reflectiveObjects.NotImplementedException;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.browsing.Engine;
|
||||||
|
import com.metaweb.gridworks.browsing.FilteredRows;
|
||||||
|
import com.metaweb.gridworks.browsing.RowVisitor;
|
||||||
|
import com.metaweb.gridworks.expr.ExpressionUtils;
|
||||||
|
import com.metaweb.gridworks.history.Change;
|
||||||
|
import com.metaweb.gridworks.history.HistoryEntry;
|
||||||
|
import com.metaweb.gridworks.importers.ImporterUtilities;
|
||||||
|
import com.metaweb.gridworks.model.AbstractOperation;
|
||||||
|
import com.metaweb.gridworks.model.Column;
|
||||||
|
import com.metaweb.gridworks.model.Project;
|
||||||
|
import com.metaweb.gridworks.model.Row;
|
||||||
|
import com.metaweb.gridworks.model.changes.ColumnSplitChange;
|
||||||
|
import com.metaweb.gridworks.util.JSONUtilities;
|
||||||
|
|
||||||
|
public class ColumnSplitOperation extends EngineDependentOperation {
|
||||||
|
final protected String _columnName;
|
||||||
|
final protected boolean _guessCellType;
|
||||||
|
final protected boolean _removeOriginalColumn;
|
||||||
|
final protected String _mode;
|
||||||
|
|
||||||
|
final protected String _separator;
|
||||||
|
final protected boolean _regex;
|
||||||
|
final protected int _maxColumns;
|
||||||
|
|
||||||
|
final protected int[] _fieldLengths;
|
||||||
|
|
||||||
|
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
|
||||||
|
JSONObject engineConfig = obj.getJSONObject("engineConfig");
|
||||||
|
String mode = obj.getString("mode");
|
||||||
|
|
||||||
|
if ("separator".equals(mode)) {
|
||||||
|
return new ColumnSplitOperation(
|
||||||
|
engineConfig,
|
||||||
|
obj.getString("columnName"),
|
||||||
|
obj.getBoolean("guessCellType"),
|
||||||
|
obj.getBoolean("removeOriginalColumn"),
|
||||||
|
obj.getString("separator"),
|
||||||
|
obj.getBoolean("regex"),
|
||||||
|
obj.getInt("maxColumns")
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
return new ColumnSplitOperation(
|
||||||
|
engineConfig,
|
||||||
|
obj.getString("columnName"),
|
||||||
|
obj.getBoolean("guessCellType"),
|
||||||
|
obj.getBoolean("removeOriginalColumn"),
|
||||||
|
JSONUtilities.getIntArray(obj, "fieldLengths")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public ColumnSplitOperation(
|
||||||
|
JSONObject engineConfig,
|
||||||
|
String columnName,
|
||||||
|
boolean guessCellType,
|
||||||
|
boolean removeOriginalColumn,
|
||||||
|
String separator,
|
||||||
|
boolean regex,
|
||||||
|
int maxColumns
|
||||||
|
) {
|
||||||
|
super(engineConfig);
|
||||||
|
|
||||||
|
_columnName = columnName;
|
||||||
|
_guessCellType = guessCellType;
|
||||||
|
_removeOriginalColumn = removeOriginalColumn;
|
||||||
|
|
||||||
|
_mode = "separator";
|
||||||
|
_separator = separator;
|
||||||
|
_regex = regex;
|
||||||
|
_maxColumns = maxColumns;
|
||||||
|
|
||||||
|
_fieldLengths = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ColumnSplitOperation(
|
||||||
|
JSONObject engineConfig,
|
||||||
|
String columnName,
|
||||||
|
boolean guessCellType,
|
||||||
|
boolean removeOriginalColumn,
|
||||||
|
int[] fieldLengths
|
||||||
|
) {
|
||||||
|
super(engineConfig);
|
||||||
|
|
||||||
|
_columnName = columnName;
|
||||||
|
_guessCellType = guessCellType;
|
||||||
|
_removeOriginalColumn = removeOriginalColumn;
|
||||||
|
|
||||||
|
_mode = "lengths";
|
||||||
|
_separator = null;
|
||||||
|
_regex = false;
|
||||||
|
_maxColumns = -1;
|
||||||
|
|
||||||
|
_fieldLengths = fieldLengths;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
|
||||||
|
writer.object();
|
||||||
|
writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
|
||||||
|
writer.key("description"); writer.value(getBriefDescription(null));
|
||||||
|
writer.key("engineConfig"); writer.value(getEngineConfig());
|
||||||
|
writer.key("columnName"); writer.value(_columnName);
|
||||||
|
writer.key("guessCellType"); writer.value(_guessCellType);
|
||||||
|
writer.key("removeOriginalColumn"); writer.value(_removeOriginalColumn);
|
||||||
|
writer.key("mode"); writer.value(_mode);
|
||||||
|
if ("separator".equals(_mode)) {
|
||||||
|
writer.key("separator"); writer.value(_separator);
|
||||||
|
writer.key("regex"); writer.value(_regex);
|
||||||
|
writer.key("maxColumns"); writer.value(_maxColumns);
|
||||||
|
} else {
|
||||||
|
writer.key("fieldLengths"); writer.array();
|
||||||
|
for (int l : _fieldLengths) {
|
||||||
|
writer.value(l);
|
||||||
|
}
|
||||||
|
writer.endArray();
|
||||||
|
}
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String getBriefDescription(Project project) {
|
||||||
|
return "Split column " + _columnName +
|
||||||
|
("separator".equals(_mode) ? " by separator" : " by field lengths");
|
||||||
|
}
|
||||||
|
|
||||||
|
protected HistoryEntry createHistoryEntry(Project project) throws Exception {
|
||||||
|
Engine engine = createEngine(project);
|
||||||
|
|
||||||
|
Column column = project.columnModel.getColumnByName(_columnName);
|
||||||
|
if (column == null) {
|
||||||
|
throw new Exception("No column named " + _columnName);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> columnNames = new ArrayList<String>();
|
||||||
|
List<Integer> rowIndices = new ArrayList<Integer>(project.rows.size());
|
||||||
|
List<List<Serializable>> tuples = new ArrayList<List<Serializable>>(project.rows.size());
|
||||||
|
|
||||||
|
FilteredRows filteredRows = engine.getAllFilteredRows(false);
|
||||||
|
RowVisitor rowVisitor;
|
||||||
|
if ("lengths".equals(_mode)) {
|
||||||
|
rowVisitor = new ColumnSplitRowVisitor(project, column.getCellIndex(), columnNames, rowIndices, tuples) {
|
||||||
|
protected java.util.List<Serializable> split(String s) {
|
||||||
|
List<Serializable> results = new ArrayList<Serializable>(_fieldLengths.length + 1);
|
||||||
|
|
||||||
|
int lastIndex = 0;
|
||||||
|
for (int i = 0; i < _fieldLengths.length; i++) {
|
||||||
|
int from = lastIndex;
|
||||||
|
int length = _fieldLengths[i];
|
||||||
|
int to = Math.min(from + length, s.length());
|
||||||
|
|
||||||
|
results.add(stringToValue(s.substring(from, to)));
|
||||||
|
|
||||||
|
lastIndex = to;
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
} else if (_regex) {
|
||||||
|
Pattern pattern = Pattern.compile(_separator);
|
||||||
|
|
||||||
|
rowVisitor = new ColumnSplitRowVisitor(project, column.getCellIndex(), columnNames, rowIndices, tuples) {
|
||||||
|
Pattern _pattern;
|
||||||
|
|
||||||
|
protected java.util.List<Serializable> split(String s) {
|
||||||
|
return stringArrayToValueList(_pattern.split(s, _maxColumns));
|
||||||
|
};
|
||||||
|
|
||||||
|
public RowVisitor init(Pattern pattern) {
|
||||||
|
_pattern = pattern;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}.init(pattern);
|
||||||
|
} else {
|
||||||
|
rowVisitor = new ColumnSplitRowVisitor(project, column.getCellIndex(), columnNames, rowIndices, tuples) {
|
||||||
|
protected java.util.List<Serializable> split(String s) {
|
||||||
|
return stringArrayToValueList(
|
||||||
|
StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator, _maxColumns));
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
filteredRows.accept(project, rowVisitor);
|
||||||
|
|
||||||
|
String description =
|
||||||
|
"Split " + rowIndices.size() +
|
||||||
|
" cell(s) in column " + _columnName +
|
||||||
|
" into several columns" +
|
||||||
|
("separator".equals(_mode) ? " by separator" : " by field lengths");
|
||||||
|
|
||||||
|
Change change = new ColumnSplitChange(
|
||||||
|
_columnName,
|
||||||
|
columnNames,
|
||||||
|
rowIndices,
|
||||||
|
tuples,
|
||||||
|
_removeOriginalColumn
|
||||||
|
);
|
||||||
|
|
||||||
|
return new HistoryEntry(
|
||||||
|
project, description, this, change);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected class ColumnSplitRowVisitor implements RowVisitor {
|
||||||
|
Project project;
|
||||||
|
int cellIndex;
|
||||||
|
List<String> columnNames;
|
||||||
|
List<Integer> rowIndices;
|
||||||
|
List<List<Serializable>> tuples;
|
||||||
|
|
||||||
|
int columnNameIndex = 1;
|
||||||
|
|
||||||
|
ColumnSplitRowVisitor(
|
||||||
|
Project project,
|
||||||
|
int cellIndex,
|
||||||
|
List<String> columnNames,
|
||||||
|
List<Integer> rowIndices,
|
||||||
|
List<List<Serializable>> tuples
|
||||||
|
) {
|
||||||
|
this.project = project;
|
||||||
|
this.cellIndex = cellIndex;
|
||||||
|
this.columnNames = columnNames;
|
||||||
|
this.rowIndices = rowIndices;
|
||||||
|
this.tuples = tuples;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean visit(Project project, int rowIndex, Row row, boolean includeContextual, boolean includeDependent) {
|
||||||
|
Object value = row.getCellValue(cellIndex);
|
||||||
|
if (ExpressionUtils.isNonBlankData(value)) {
|
||||||
|
String s = value instanceof String ? ((String) value) : value.toString();
|
||||||
|
|
||||||
|
List<Serializable> tuple = split(s);
|
||||||
|
|
||||||
|
rowIndices.add(rowIndex);
|
||||||
|
tuples.add(tuple);
|
||||||
|
|
||||||
|
for (int i = columnNames.size(); i < tuple.size(); i++) {
|
||||||
|
while (true) {
|
||||||
|
String newColumnName = _columnName + " " + columnNameIndex++;
|
||||||
|
if (project.columnModel.getColumnByName(newColumnName) == null) {
|
||||||
|
columnNames.add(newColumnName);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected List<Serializable> split(String s) {
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Serializable stringToValue(String s) {
|
||||||
|
return _guessCellType ? ImporterUtilities.parseCellValue(s) : s;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected List<Serializable> stringArrayToValueList(String[] cells) {
|
||||||
|
List<Serializable> results = new ArrayList<Serializable>(cells.length);
|
||||||
|
for (String cell : cells) {
|
||||||
|
results.add(stringToValue(cell));
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -33,6 +33,7 @@ public abstract class OperationRegistry {
|
|||||||
register("column-addition", ColumnAdditionOperation.class);
|
register("column-addition", ColumnAdditionOperation.class);
|
||||||
register("column-removal", ColumnRemovalOperation.class);
|
register("column-removal", ColumnRemovalOperation.class);
|
||||||
register("column-rename", ColumnRenameOperation.class);
|
register("column-rename", ColumnRenameOperation.class);
|
||||||
|
register("column-split", ColumnSplitOperation.class);
|
||||||
register("extend-data", ExtendDataOperation.class);
|
register("extend-data", ExtendDataOperation.class);
|
||||||
|
|
||||||
register("row-removal", RowRemovalOperation.class);
|
register("row-removal", RowRemovalOperation.class);
|
||||||
|
@ -59,6 +59,21 @@ public class JSONUtilities {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static public int[] getIntArray(JSONObject obj, String key) {
|
||||||
|
try {
|
||||||
|
JSONArray a = obj.getJSONArray(key);
|
||||||
|
int[] r = new int[a.length()];
|
||||||
|
|
||||||
|
for (int i = 0; i < r.length; i++) {
|
||||||
|
r[i] = a.getInt(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
} catch (JSONException e) {
|
||||||
|
return new int[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static public String[] getStringArray(JSONObject obj, String key) {
|
static public String[] getStringArray(JSONObject obj, String key) {
|
||||||
try {
|
try {
|
||||||
JSONArray a = obj.getJSONArray(key);
|
JSONArray a = obj.getJSONArray(key);
|
||||||
|
@ -289,6 +289,10 @@ DataTableColumnHeaderUI.prototype._createMenuForColumnHeader = function(elmt) {
|
|||||||
{
|
{
|
||||||
label: "Edit Column",
|
label: "Edit Column",
|
||||||
submenu: [
|
submenu: [
|
||||||
|
{
|
||||||
|
label: "Split into Several Columns",
|
||||||
|
click: function() { self._doSplitColumn(); }
|
||||||
|
},
|
||||||
{
|
{
|
||||||
label: "Add Column Based on This Column ...",
|
label: "Add Column Based on This Column ...",
|
||||||
click: function() { self._doAddColumn("value"); }
|
click: function() { self._doAddColumn("value"); }
|
||||||
@ -941,3 +945,137 @@ DataTableColumnHeaderUI.prototype._doSplitMultiValueCells = function() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
DataTableColumnHeaderUI.prototype._doSplitColumn = function() {
|
||||||
|
var self = this;
|
||||||
|
var frame = DialogSystem.createDialog();
|
||||||
|
frame.width("600px");
|
||||||
|
|
||||||
|
var header = $('<div></div>').addClass("dialog-header").text("Split Column " + this._column.name + " into Several Columns").appendTo(frame);
|
||||||
|
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
|
||||||
|
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
|
||||||
|
|
||||||
|
body.html(
|
||||||
|
'<div class="grid-layout layout-looser layout-full"><table><tr>' +
|
||||||
|
'<td>' +
|
||||||
|
'<div class="grid-layout layout-tighter"><table>' +
|
||||||
|
'<tr>' +
|
||||||
|
'<td colspan="3"><h3>How to Split Column</h3></td>' +
|
||||||
|
'</tr>' +
|
||||||
|
'<tr>' +
|
||||||
|
'<td width="1%"><input type="radio" checked="true" name="split-by-mode" value="separator" /></td>' +
|
||||||
|
'<td colspan="2">by separator</td>' +
|
||||||
|
'</tr>' +
|
||||||
|
'<tr><td></td>' +
|
||||||
|
'<td>Separator</td>' +
|
||||||
|
'<td style="white-space: pre;">' +
|
||||||
|
'<input size="15" value="," bind="separatorInput" /> ' +
|
||||||
|
'<input type="checkbox" bind="regexInput" /> regular expression' +
|
||||||
|
'</td>' +
|
||||||
|
'</tr>' +
|
||||||
|
'<tr><td></td>' +
|
||||||
|
'<td>Split into</td>' +
|
||||||
|
'<td style="white-space: pre;"><input size="3" bind="maxColumnsInput" /> ' +
|
||||||
|
'columns at most (leave blank for no limit)</td>' +
|
||||||
|
'</tr>' +
|
||||||
|
'<tr>' +
|
||||||
|
'<td width="1%"><input type="radio" name="split-by-mode" value="lengths" /></td>' +
|
||||||
|
'<td colspan="2">by field lengths</td>' +
|
||||||
|
'</tr>' +
|
||||||
|
'<tr><td></td>' +
|
||||||
|
'<td colspan="2">' +
|
||||||
|
'<textarea style="width: 100%;" bind="lengthsTextarea"></textarea>' +
|
||||||
|
'</td>' +
|
||||||
|
'</tr>' +
|
||||||
|
'<tr><td></td>' +
|
||||||
|
'<td colspan="2">' +
|
||||||
|
'List of integers separated by commas, e.g., 5, 7, 15' +
|
||||||
|
'</td>' +
|
||||||
|
'</tr>' +
|
||||||
|
'</table></div>' +
|
||||||
|
'</td>' +
|
||||||
|
'<td>' +
|
||||||
|
'<div class="grid-layout layout-tighter"><table>' +
|
||||||
|
'<tr>' +
|
||||||
|
'<td colspan="3"><h3>After Splitting</h3></td>' +
|
||||||
|
'</tr>' +
|
||||||
|
'<tr>' +
|
||||||
|
'<td width="1%"><input type="checkbox" checked="true" bind="guessCellTypeInput" /></td>' +
|
||||||
|
'<td colspan="2">Guess cell type</td>' +
|
||||||
|
'</tr>' +
|
||||||
|
'<tr>' +
|
||||||
|
'<td width="1%"><input type="checkbox" checked="true" bind="removeColumnInput" /></td>' +
|
||||||
|
'<td colspan="2">Remove this column</td>' +
|
||||||
|
'</tr>' +
|
||||||
|
'</table></div>' +
|
||||||
|
'</td>' +
|
||||||
|
'</table></div>'
|
||||||
|
);
|
||||||
|
var bodyElmts = DOM.bind(body);
|
||||||
|
|
||||||
|
footer.html(
|
||||||
|
'<button bind="okButton"> OK </button>' +
|
||||||
|
'<button bind="cancelButton">Cancel</button>'
|
||||||
|
);
|
||||||
|
var footerElmts = DOM.bind(footer);
|
||||||
|
|
||||||
|
var level = DialogSystem.showDialog(frame);
|
||||||
|
var dismiss = function() {
|
||||||
|
DialogSystem.dismissUntil(level - 1);
|
||||||
|
};
|
||||||
|
|
||||||
|
footerElmts.okButton.click(function() {
|
||||||
|
var mode = $("input[name='split-by-mode']:checked")[0].value;
|
||||||
|
var config = {
|
||||||
|
columnName: self._column.name,
|
||||||
|
mode: mode,
|
||||||
|
guessCellType: bodyElmts.guessCellTypeInput[0].checked,
|
||||||
|
removeOriginalColumn: bodyElmts.removeColumnInput[0].checked
|
||||||
|
};
|
||||||
|
if (mode == "separator") {
|
||||||
|
config.separator = bodyElmts.separatorInput[0].value;
|
||||||
|
if (!(config.separator)) {
|
||||||
|
alert("Please specify a separator.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
config.regex = bodyElmts.regexInput[0].checked;
|
||||||
|
|
||||||
|
var s = bodyElmts.maxColumnsInput[0].value;
|
||||||
|
if (s) {
|
||||||
|
var n = parseInt(s);
|
||||||
|
if (!isNaN(n)) {
|
||||||
|
config.maxColumns = n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
var s = "[" + bodyElmts.lengthsTextarea[0].value + "]";
|
||||||
|
try {
|
||||||
|
var a = JSON.parse(s);
|
||||||
|
} catch (e) {
|
||||||
|
alert("The given field lengths are not properly formatted.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var lengths = [];
|
||||||
|
$.each(a, function(i,n) { if (typeof n == "number") lengths.push(n); });
|
||||||
|
|
||||||
|
if (lengths.length == 0) {
|
||||||
|
alert("No field length is specified.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
config.fieldLengths = JSON.stringify(lengths);
|
||||||
|
}
|
||||||
|
|
||||||
|
Gridworks.postProcess(
|
||||||
|
"split-column",
|
||||||
|
config,
|
||||||
|
null,
|
||||||
|
{ modelsChanged: true }
|
||||||
|
);
|
||||||
|
dismiss();
|
||||||
|
});
|
||||||
|
|
||||||
|
footerElmts.cancelButton.click(dismiss);
|
||||||
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user