Implemented column split command. It seems to be working in "by lengths" mode.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@510 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
d0df704d8a
commit
5ba67b7b26
@ -28,6 +28,7 @@ import com.metaweb.gridworks.commands.edit.RemoveColumnCommand;
|
||||
import com.metaweb.gridworks.commands.edit.RemoveRowsCommand;
|
||||
import com.metaweb.gridworks.commands.edit.RenameColumnCommand;
|
||||
import com.metaweb.gridworks.commands.edit.SaveProtographCommand;
|
||||
import com.metaweb.gridworks.commands.edit.SplitColumnCommand;
|
||||
import com.metaweb.gridworks.commands.edit.SplitMultiValueCellsCommand;
|
||||
import com.metaweb.gridworks.commands.edit.TextTransformCommand;
|
||||
import com.metaweb.gridworks.commands.edit.UndoRedoCommand;
|
||||
@ -103,6 +104,7 @@ public class GridworksServlet extends HttpServlet {
|
||||
_commands.put("add-column", new AddColumnCommand());
|
||||
_commands.put("remove-column", new RemoveColumnCommand());
|
||||
_commands.put("rename-column", new RenameColumnCommand());
|
||||
_commands.put("split-column", new SplitColumnCommand());
|
||||
_commands.put("extend-data", new ExtendDataCommand());
|
||||
|
||||
_commands.put("reconcile", new ReconcileCommand());
|
||||
|
@ -0,0 +1,52 @@
|
||||
package com.metaweb.gridworks.commands.edit;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.metaweb.gridworks.commands.EngineDependentCommand;
|
||||
import com.metaweb.gridworks.model.AbstractOperation;
|
||||
import com.metaweb.gridworks.model.Project;
|
||||
import com.metaweb.gridworks.operations.ColumnSplitOperation;
|
||||
import com.metaweb.gridworks.util.ParsingUtilities;
|
||||
|
||||
public class SplitColumnCommand extends EngineDependentCommand {
|
||||
@Override
|
||||
protected AbstractOperation createOperation(Project project,
|
||||
HttpServletRequest request, JSONObject engineConfig) throws Exception {
|
||||
|
||||
String columnName = request.getParameter("columnName");
|
||||
boolean guessCellType = Boolean.parseBoolean(request.getParameter("guessCellType"));
|
||||
boolean removeOriginalColumn = Boolean.parseBoolean(request.getParameter("removeOriginalColumn"));
|
||||
String mode = request.getParameter("mode");
|
||||
if ("separator".equals(mode)) {
|
||||
return new ColumnSplitOperation(
|
||||
engineConfig,
|
||||
columnName,
|
||||
guessCellType,
|
||||
removeOriginalColumn,
|
||||
request.getParameter("separator"),
|
||||
Boolean.parseBoolean(request.getParameter("regex")),
|
||||
Integer.parseInt(request.getParameter("maxColumns"))
|
||||
);
|
||||
} else {
|
||||
String s = request.getParameter("fieldLengths");
|
||||
|
||||
JSONArray a = ParsingUtilities.evaluateJsonStringToArray(s);
|
||||
int[] fieldLengths = new int[a.length()];
|
||||
|
||||
for (int i = 0; i < fieldLengths.length; i++) {
|
||||
fieldLengths[i] = a.getInt(i);
|
||||
}
|
||||
|
||||
return new ColumnSplitOperation(
|
||||
engineConfig,
|
||||
columnName,
|
||||
guessCellType,
|
||||
removeOriginalColumn,
|
||||
fieldLengths
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
@ -10,7 +10,6 @@ import com.metaweb.gridworks.gel.ControlFunctionRegistry;
|
||||
import com.metaweb.gridworks.gel.Function;
|
||||
|
||||
public class SplitByLengths implements Function {
|
||||
|
||||
public Object call(Properties bindings, Object[] args) {
|
||||
if (args.length >= 2 && args[0] != null) {
|
||||
Object o = args[0];
|
||||
|
@ -81,6 +81,16 @@ public class ColumnModel implements Jsonizable {
|
||||
return _nameToColumn.get(name);
|
||||
}
|
||||
|
||||
public int getColumnIndexByName(String name) {
|
||||
for (int i = 0; i < _columnNames.size(); i++) {
|
||||
String s = _columnNames.get(i);
|
||||
if (name.equals(s)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public Column getColumnByCellIndex(int cellIndex) {
|
||||
return _cellIndexToColumn.get(cellIndex);
|
||||
}
|
||||
|
@ -0,0 +1,334 @@
|
||||
package com.metaweb.gridworks.model.changes;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.Serializable;
|
||||
import java.io.Writer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONTokener;
|
||||
|
||||
import com.metaweb.gridworks.history.Change;
|
||||
import com.metaweb.gridworks.model.Cell;
|
||||
import com.metaweb.gridworks.model.Column;
|
||||
import com.metaweb.gridworks.model.Project;
|
||||
import com.metaweb.gridworks.model.Recon;
|
||||
import com.metaweb.gridworks.model.Row;
|
||||
|
||||
public class ColumnSplitChange implements Change {
|
||||
final protected String _columnName;
|
||||
|
||||
final protected List<String> _columnNames;
|
||||
final protected List<Integer> _rowIndices;
|
||||
final protected List<List<Serializable>> _tuples;
|
||||
|
||||
final protected boolean _removeOriginalColumn;
|
||||
|
||||
protected Column _column;
|
||||
protected int _columnIndex;
|
||||
|
||||
protected int _firstNewCellIndex = -1;
|
||||
protected List<Row> _oldRows;
|
||||
protected List<Row> _newRows;
|
||||
|
||||
public ColumnSplitChange(
|
||||
String columnName,
|
||||
List<String> columnNames,
|
||||
List<Integer> rowIndices,
|
||||
List<List<Serializable>> tuples,
|
||||
boolean removeOriginalColumn
|
||||
) {
|
||||
_columnName = columnName;
|
||||
|
||||
_columnNames = columnNames;
|
||||
_rowIndices = rowIndices;
|
||||
_tuples = tuples;
|
||||
|
||||
_removeOriginalColumn = removeOriginalColumn;
|
||||
}
|
||||
|
||||
protected ColumnSplitChange(
|
||||
String columnName,
|
||||
List<String> columnNames,
|
||||
List<Integer> rowIndices,
|
||||
List<List<Serializable>> tuples,
|
||||
boolean removeOriginalColumn,
|
||||
|
||||
Column column,
|
||||
int columnIndex,
|
||||
|
||||
int firstNewCellIndex,
|
||||
List<Row> oldRows,
|
||||
List<Row> newRows
|
||||
) {
|
||||
_columnName = columnName;
|
||||
|
||||
_columnNames = columnNames;
|
||||
_rowIndices = rowIndices;
|
||||
_tuples = tuples;
|
||||
|
||||
_removeOriginalColumn = removeOriginalColumn;
|
||||
|
||||
_column = column;
|
||||
_columnIndex = columnIndex;
|
||||
|
||||
_firstNewCellIndex = firstNewCellIndex;
|
||||
_oldRows = oldRows;
|
||||
_newRows = newRows;
|
||||
}
|
||||
|
||||
public void apply(Project project) {
|
||||
synchronized (project) {
|
||||
if (_firstNewCellIndex < 0) {
|
||||
_firstNewCellIndex = project.columnModel.allocateNewCellIndex();
|
||||
for (int i = 1; i < _columnNames.size(); i++) {
|
||||
project.columnModel.allocateNewCellIndex();
|
||||
}
|
||||
|
||||
_column = project.columnModel.getColumnByName(_columnName);
|
||||
_columnIndex = project.columnModel.getColumnIndexByName(_columnName);
|
||||
|
||||
_oldRows = new ArrayList<Row>(_rowIndices.size());
|
||||
_newRows = new ArrayList<Row>(_rowIndices.size());
|
||||
|
||||
int cellIndex = _column.getCellIndex();
|
||||
|
||||
for (int i = 0; i < _rowIndices.size(); i++) {
|
||||
int r = _rowIndices.get(i);
|
||||
List<Serializable> tuple = _tuples.get(i);
|
||||
|
||||
Row oldRow = project.rows.get(r);
|
||||
Row newRow = oldRow.dup();
|
||||
|
||||
_oldRows.add(oldRow);
|
||||
_newRows.add(newRow);
|
||||
|
||||
for (int c = 0; c < tuple.size(); c++) {
|
||||
Serializable value = tuple.get(c);
|
||||
if (value != null) {
|
||||
newRow.setCell(_firstNewCellIndex + c, new Cell(value, null));
|
||||
}
|
||||
}
|
||||
|
||||
if (_removeOriginalColumn) {
|
||||
newRow.setCell(cellIndex, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < _rowIndices.size(); i++) {
|
||||
int r = _rowIndices.get(i);
|
||||
Row newRow = _newRows.get(i);
|
||||
|
||||
project.rows.set(r, newRow);
|
||||
}
|
||||
|
||||
for (int i = 0; i < _columnNames.size(); i++) {
|
||||
String name = _columnNames.get(i);
|
||||
int cellIndex = _firstNewCellIndex + i;
|
||||
|
||||
Column column = new Column(cellIndex, name);
|
||||
|
||||
project.columnModel.columns.add(_columnIndex + 1 + i, column);
|
||||
}
|
||||
|
||||
if (_removeOriginalColumn) {
|
||||
project.columnModel.columns.remove(_columnIndex);
|
||||
}
|
||||
|
||||
project.columnModel.update();
|
||||
project.recomputeRowContextDependencies();
|
||||
}
|
||||
}
|
||||
|
||||
public void revert(Project project) {
|
||||
synchronized (project) {
|
||||
for (int i = 0; i < _rowIndices.size(); i++) {
|
||||
int r = _rowIndices.get(i);
|
||||
Row oldRow = _oldRows.get(i);
|
||||
|
||||
project.rows.set(r, oldRow);
|
||||
}
|
||||
|
||||
if (_removeOriginalColumn) {
|
||||
project.columnModel.columns.add(_columnIndex, _column);
|
||||
}
|
||||
|
||||
for (int i = 0; i < _columnNames.size(); i++) {
|
||||
project.columnModel.columns.remove(_columnIndex + 1);
|
||||
}
|
||||
|
||||
project.columnModel.update();
|
||||
project.recomputeRowContextDependencies();
|
||||
}
|
||||
}
|
||||
|
||||
public void save(Writer writer, Properties options) throws IOException {
|
||||
writer.write("columnName="); writer.write(_columnName); writer.write('\n');
|
||||
|
||||
writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n');
|
||||
for (String name : _columnNames) {
|
||||
writer.write(name); writer.write('\n');
|
||||
}
|
||||
writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n');
|
||||
for (Integer rowIndex : _rowIndices) {
|
||||
writer.write(rowIndex.toString()); writer.write('\n');
|
||||
}
|
||||
writer.write("tupleCount="); writer.write(Integer.toString(_tuples.size())); writer.write('\n');
|
||||
for (List<Serializable> tuple : _tuples) {
|
||||
writer.write(Integer.toString(tuple.size())); writer.write('\n');
|
||||
|
||||
for (Serializable value : tuple) {
|
||||
if (value == null) {
|
||||
writer.write("null");
|
||||
} else if (value instanceof String) {
|
||||
writer.write(JSONObject.quote((String) value));
|
||||
} else {
|
||||
writer.write(value.toString());
|
||||
}
|
||||
writer.write('\n');
|
||||
}
|
||||
}
|
||||
writer.write("removeOriginalColumn="); writer.write(Boolean.toString(_removeOriginalColumn)); writer.write('\n');
|
||||
|
||||
writer.write("column="); _column.save(writer); writer.write('\n');
|
||||
writer.write("columnIndex="); writer.write(Integer.toString(_columnIndex)); writer.write('\n');
|
||||
|
||||
writer.write("firstNewCellIndex="); writer.write(Integer.toString(_firstNewCellIndex)); writer.write('\n');
|
||||
|
||||
writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n');
|
||||
for (Row row : _newRows) {
|
||||
row.save(writer, options);
|
||||
writer.write('\n');
|
||||
}
|
||||
writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n');
|
||||
for (Row row : _oldRows) {
|
||||
row.save(writer, options);
|
||||
writer.write('\n');
|
||||
}
|
||||
writer.write("/ec/\n"); // end of change marker
|
||||
}
|
||||
|
||||
static public Change load(LineNumberReader reader) throws Exception {
|
||||
String columnName = null;
|
||||
List<String> columnNames = null;
|
||||
List<Integer> rowIndices = null;
|
||||
List<List<Serializable>> tuples = null;
|
||||
boolean removeOriginalColumn = false;
|
||||
|
||||
Column column = null;
|
||||
int columnIndex = -1;
|
||||
|
||||
int firstNewCellIndex = -1;
|
||||
List<Row> oldRows = null;
|
||||
List<Row> newRows = null;
|
||||
|
||||
Map<Long, Recon> reconCache = new HashMap<Long, Recon>();
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null && !"/ec/".equals(line)) {
|
||||
int equal = line.indexOf('=');
|
||||
CharSequence field = line.subSequence(0, equal);
|
||||
String value = line.substring(equal + 1);
|
||||
|
||||
if ("columnName".equals(field)) {
|
||||
columnName = value;
|
||||
} else if ("columnNameCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
columnNames = new ArrayList<String>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
if (line != null) {
|
||||
columnNames.add(line);
|
||||
}
|
||||
}
|
||||
} else if ("rowIndexCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
rowIndices = new ArrayList<Integer>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
if (line != null) {
|
||||
rowIndices.add(Integer.parseInt(line));
|
||||
}
|
||||
}
|
||||
} else if ("tupleCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
tuples = new ArrayList<List<Serializable>>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
|
||||
if (line == null) continue;
|
||||
|
||||
int valueCount = Integer.parseInt(line);
|
||||
|
||||
List<Serializable> tuple = new ArrayList<Serializable>(valueCount);
|
||||
for (int r = 0; r < valueCount; r++) {
|
||||
line = reader.readLine();
|
||||
|
||||
JSONTokener t = new JSONTokener(line);
|
||||
Object o = t.nextValue();
|
||||
|
||||
tuple.add((o != JSONObject.NULL) ? (Serializable) o : null);
|
||||
}
|
||||
|
||||
tuples.add(tuple);
|
||||
}
|
||||
} else if ("removeOriginalColumn".equals(field)) {
|
||||
removeOriginalColumn = Boolean.parseBoolean(value);
|
||||
|
||||
} else if ("column".equals(field)) {
|
||||
column = Column.load(value);
|
||||
} else if ("columnIndex".equals(field)) {
|
||||
columnIndex = Integer.parseInt(value);
|
||||
} else if ("firstNewCellIndex".equals(field)) {
|
||||
firstNewCellIndex = Integer.parseInt(value);
|
||||
} else if ("oldRowCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
oldRows = new ArrayList<Row>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
if (line != null) {
|
||||
oldRows.add(Row.load(line, reconCache));
|
||||
}
|
||||
}
|
||||
} else if ("newRowCount".equals(field)) {
|
||||
int count = Integer.parseInt(value);
|
||||
|
||||
newRows = new ArrayList<Row>(count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
line = reader.readLine();
|
||||
if (line != null) {
|
||||
newRows.add(Row.load(line, reconCache));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ColumnSplitChange change = new ColumnSplitChange(
|
||||
columnName,
|
||||
columnNames,
|
||||
rowIndices,
|
||||
tuples,
|
||||
removeOriginalColumn,
|
||||
|
||||
column,
|
||||
columnIndex,
|
||||
|
||||
firstNewCellIndex,
|
||||
oldRows,
|
||||
newRows
|
||||
);
|
||||
|
||||
|
||||
return change;
|
||||
}
|
||||
}
|
@ -0,0 +1,281 @@
|
||||
package com.metaweb.gridworks.operations;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import sun.reflect.generics.reflectiveObjects.NotImplementedException;
|
||||
|
||||
import com.metaweb.gridworks.browsing.Engine;
|
||||
import com.metaweb.gridworks.browsing.FilteredRows;
|
||||
import com.metaweb.gridworks.browsing.RowVisitor;
|
||||
import com.metaweb.gridworks.expr.ExpressionUtils;
|
||||
import com.metaweb.gridworks.history.Change;
|
||||
import com.metaweb.gridworks.history.HistoryEntry;
|
||||
import com.metaweb.gridworks.importers.ImporterUtilities;
|
||||
import com.metaweb.gridworks.model.AbstractOperation;
|
||||
import com.metaweb.gridworks.model.Column;
|
||||
import com.metaweb.gridworks.model.Project;
|
||||
import com.metaweb.gridworks.model.Row;
|
||||
import com.metaweb.gridworks.model.changes.ColumnSplitChange;
|
||||
import com.metaweb.gridworks.util.JSONUtilities;
|
||||
|
||||
public class ColumnSplitOperation extends EngineDependentOperation {
|
||||
final protected String _columnName;
|
||||
final protected boolean _guessCellType;
|
||||
final protected boolean _removeOriginalColumn;
|
||||
final protected String _mode;
|
||||
|
||||
final protected String _separator;
|
||||
final protected boolean _regex;
|
||||
final protected int _maxColumns;
|
||||
|
||||
final protected int[] _fieldLengths;
|
||||
|
||||
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
|
||||
JSONObject engineConfig = obj.getJSONObject("engineConfig");
|
||||
String mode = obj.getString("mode");
|
||||
|
||||
if ("separator".equals(mode)) {
|
||||
return new ColumnSplitOperation(
|
||||
engineConfig,
|
||||
obj.getString("columnName"),
|
||||
obj.getBoolean("guessCellType"),
|
||||
obj.getBoolean("removeOriginalColumn"),
|
||||
obj.getString("separator"),
|
||||
obj.getBoolean("regex"),
|
||||
obj.getInt("maxColumns")
|
||||
);
|
||||
} else {
|
||||
return new ColumnSplitOperation(
|
||||
engineConfig,
|
||||
obj.getString("columnName"),
|
||||
obj.getBoolean("guessCellType"),
|
||||
obj.getBoolean("removeOriginalColumn"),
|
||||
JSONUtilities.getIntArray(obj, "fieldLengths")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public ColumnSplitOperation(
|
||||
JSONObject engineConfig,
|
||||
String columnName,
|
||||
boolean guessCellType,
|
||||
boolean removeOriginalColumn,
|
||||
String separator,
|
||||
boolean regex,
|
||||
int maxColumns
|
||||
) {
|
||||
super(engineConfig);
|
||||
|
||||
_columnName = columnName;
|
||||
_guessCellType = guessCellType;
|
||||
_removeOriginalColumn = removeOriginalColumn;
|
||||
|
||||
_mode = "separator";
|
||||
_separator = separator;
|
||||
_regex = regex;
|
||||
_maxColumns = maxColumns;
|
||||
|
||||
_fieldLengths = null;
|
||||
}
|
||||
|
||||
public ColumnSplitOperation(
|
||||
JSONObject engineConfig,
|
||||
String columnName,
|
||||
boolean guessCellType,
|
||||
boolean removeOriginalColumn,
|
||||
int[] fieldLengths
|
||||
) {
|
||||
super(engineConfig);
|
||||
|
||||
_columnName = columnName;
|
||||
_guessCellType = guessCellType;
|
||||
_removeOriginalColumn = removeOriginalColumn;
|
||||
|
||||
_mode = "lengths";
|
||||
_separator = null;
|
||||
_regex = false;
|
||||
_maxColumns = -1;
|
||||
|
||||
_fieldLengths = fieldLengths;
|
||||
}
|
||||
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
|
||||
writer.key("description"); writer.value(getBriefDescription(null));
|
||||
writer.key("engineConfig"); writer.value(getEngineConfig());
|
||||
writer.key("columnName"); writer.value(_columnName);
|
||||
writer.key("guessCellType"); writer.value(_guessCellType);
|
||||
writer.key("removeOriginalColumn"); writer.value(_removeOriginalColumn);
|
||||
writer.key("mode"); writer.value(_mode);
|
||||
if ("separator".equals(_mode)) {
|
||||
writer.key("separator"); writer.value(_separator);
|
||||
writer.key("regex"); writer.value(_regex);
|
||||
writer.key("maxColumns"); writer.value(_maxColumns);
|
||||
} else {
|
||||
writer.key("fieldLengths"); writer.array();
|
||||
for (int l : _fieldLengths) {
|
||||
writer.value(l);
|
||||
}
|
||||
writer.endArray();
|
||||
}
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
protected String getBriefDescription(Project project) {
|
||||
return "Split column " + _columnName +
|
||||
("separator".equals(_mode) ? " by separator" : " by field lengths");
|
||||
}
|
||||
|
||||
protected HistoryEntry createHistoryEntry(Project project) throws Exception {
|
||||
Engine engine = createEngine(project);
|
||||
|
||||
Column column = project.columnModel.getColumnByName(_columnName);
|
||||
if (column == null) {
|
||||
throw new Exception("No column named " + _columnName);
|
||||
}
|
||||
|
||||
List<String> columnNames = new ArrayList<String>();
|
||||
List<Integer> rowIndices = new ArrayList<Integer>(project.rows.size());
|
||||
List<List<Serializable>> tuples = new ArrayList<List<Serializable>>(project.rows.size());
|
||||
|
||||
FilteredRows filteredRows = engine.getAllFilteredRows(false);
|
||||
RowVisitor rowVisitor;
|
||||
if ("lengths".equals(_mode)) {
|
||||
rowVisitor = new ColumnSplitRowVisitor(project, column.getCellIndex(), columnNames, rowIndices, tuples) {
|
||||
protected java.util.List<Serializable> split(String s) {
|
||||
List<Serializable> results = new ArrayList<Serializable>(_fieldLengths.length + 1);
|
||||
|
||||
int lastIndex = 0;
|
||||
for (int i = 0; i < _fieldLengths.length; i++) {
|
||||
int from = lastIndex;
|
||||
int length = _fieldLengths[i];
|
||||
int to = Math.min(from + length, s.length());
|
||||
|
||||
results.add(stringToValue(s.substring(from, to)));
|
||||
|
||||
lastIndex = to;
|
||||
}
|
||||
|
||||
return results;
|
||||
};
|
||||
};
|
||||
} else if (_regex) {
|
||||
Pattern pattern = Pattern.compile(_separator);
|
||||
|
||||
rowVisitor = new ColumnSplitRowVisitor(project, column.getCellIndex(), columnNames, rowIndices, tuples) {
|
||||
Pattern _pattern;
|
||||
|
||||
protected java.util.List<Serializable> split(String s) {
|
||||
return stringArrayToValueList(_pattern.split(s, _maxColumns));
|
||||
};
|
||||
|
||||
public RowVisitor init(Pattern pattern) {
|
||||
_pattern = pattern;
|
||||
return this;
|
||||
}
|
||||
}.init(pattern);
|
||||
} else {
|
||||
rowVisitor = new ColumnSplitRowVisitor(project, column.getCellIndex(), columnNames, rowIndices, tuples) {
|
||||
protected java.util.List<Serializable> split(String s) {
|
||||
return stringArrayToValueList(
|
||||
StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator, _maxColumns));
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
filteredRows.accept(project, rowVisitor);
|
||||
|
||||
String description =
|
||||
"Split " + rowIndices.size() +
|
||||
" cell(s) in column " + _columnName +
|
||||
" into several columns" +
|
||||
("separator".equals(_mode) ? " by separator" : " by field lengths");
|
||||
|
||||
Change change = new ColumnSplitChange(
|
||||
_columnName,
|
||||
columnNames,
|
||||
rowIndices,
|
||||
tuples,
|
||||
_removeOriginalColumn
|
||||
);
|
||||
|
||||
return new HistoryEntry(
|
||||
project, description, this, change);
|
||||
}
|
||||
|
||||
protected class ColumnSplitRowVisitor implements RowVisitor {
|
||||
Project project;
|
||||
int cellIndex;
|
||||
List<String> columnNames;
|
||||
List<Integer> rowIndices;
|
||||
List<List<Serializable>> tuples;
|
||||
|
||||
int columnNameIndex = 1;
|
||||
|
||||
ColumnSplitRowVisitor(
|
||||
Project project,
|
||||
int cellIndex,
|
||||
List<String> columnNames,
|
||||
List<Integer> rowIndices,
|
||||
List<List<Serializable>> tuples
|
||||
) {
|
||||
this.project = project;
|
||||
this.cellIndex = cellIndex;
|
||||
this.columnNames = columnNames;
|
||||
this.rowIndices = rowIndices;
|
||||
this.tuples = tuples;
|
||||
}
|
||||
|
||||
public boolean visit(Project project, int rowIndex, Row row, boolean includeContextual, boolean includeDependent) {
|
||||
Object value = row.getCellValue(cellIndex);
|
||||
if (ExpressionUtils.isNonBlankData(value)) {
|
||||
String s = value instanceof String ? ((String) value) : value.toString();
|
||||
|
||||
List<Serializable> tuple = split(s);
|
||||
|
||||
rowIndices.add(rowIndex);
|
||||
tuples.add(tuple);
|
||||
|
||||
for (int i = columnNames.size(); i < tuple.size(); i++) {
|
||||
while (true) {
|
||||
String newColumnName = _columnName + " " + columnNameIndex++;
|
||||
if (project.columnModel.getColumnByName(newColumnName) == null) {
|
||||
columnNames.add(newColumnName);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected List<Serializable> split(String s) {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
protected Serializable stringToValue(String s) {
|
||||
return _guessCellType ? ImporterUtilities.parseCellValue(s) : s;
|
||||
}
|
||||
|
||||
protected List<Serializable> stringArrayToValueList(String[] cells) {
|
||||
List<Serializable> results = new ArrayList<Serializable>(cells.length);
|
||||
for (String cell : cells) {
|
||||
results.add(stringToValue(cell));
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
}
|
@ -33,6 +33,7 @@ public abstract class OperationRegistry {
|
||||
register("column-addition", ColumnAdditionOperation.class);
|
||||
register("column-removal", ColumnRemovalOperation.class);
|
||||
register("column-rename", ColumnRenameOperation.class);
|
||||
register("column-split", ColumnSplitOperation.class);
|
||||
register("extend-data", ExtendDataOperation.class);
|
||||
|
||||
register("row-removal", RowRemovalOperation.class);
|
||||
|
@ -59,6 +59,21 @@ public class JSONUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
static public int[] getIntArray(JSONObject obj, String key) {
|
||||
try {
|
||||
JSONArray a = obj.getJSONArray(key);
|
||||
int[] r = new int[a.length()];
|
||||
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
r[i] = a.getInt(i);
|
||||
}
|
||||
|
||||
return r;
|
||||
} catch (JSONException e) {
|
||||
return new int[0];
|
||||
}
|
||||
}
|
||||
|
||||
static public String[] getStringArray(JSONObject obj, String key) {
|
||||
try {
|
||||
JSONArray a = obj.getJSONArray(key);
|
||||
|
@ -289,6 +289,10 @@ DataTableColumnHeaderUI.prototype._createMenuForColumnHeader = function(elmt) {
|
||||
{
|
||||
label: "Edit Column",
|
||||
submenu: [
|
||||
{
|
||||
label: "Split into Several Columns",
|
||||
click: function() { self._doSplitColumn(); }
|
||||
},
|
||||
{
|
||||
label: "Add Column Based on This Column ...",
|
||||
click: function() { self._doAddColumn("value"); }
|
||||
@ -941,3 +945,137 @@ DataTableColumnHeaderUI.prototype._doSplitMultiValueCells = function() {
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
DataTableColumnHeaderUI.prototype._doSplitColumn = function() {
|
||||
var self = this;
|
||||
var frame = DialogSystem.createDialog();
|
||||
frame.width("600px");
|
||||
|
||||
var header = $('<div></div>').addClass("dialog-header").text("Split Column " + this._column.name + " into Several Columns").appendTo(frame);
|
||||
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
|
||||
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
|
||||
|
||||
body.html(
|
||||
'<div class="grid-layout layout-looser layout-full"><table><tr>' +
|
||||
'<td>' +
|
||||
'<div class="grid-layout layout-tighter"><table>' +
|
||||
'<tr>' +
|
||||
'<td colspan="3"><h3>How to Split Column</h3></td>' +
|
||||
'</tr>' +
|
||||
'<tr>' +
|
||||
'<td width="1%"><input type="radio" checked="true" name="split-by-mode" value="separator" /></td>' +
|
||||
'<td colspan="2">by separator</td>' +
|
||||
'</tr>' +
|
||||
'<tr><td></td>' +
|
||||
'<td>Separator</td>' +
|
||||
'<td style="white-space: pre;">' +
|
||||
'<input size="15" value="," bind="separatorInput" /> ' +
|
||||
'<input type="checkbox" bind="regexInput" /> regular expression' +
|
||||
'</td>' +
|
||||
'</tr>' +
|
||||
'<tr><td></td>' +
|
||||
'<td>Split into</td>' +
|
||||
'<td style="white-space: pre;"><input size="3" bind="maxColumnsInput" /> ' +
|
||||
'columns at most (leave blank for no limit)</td>' +
|
||||
'</tr>' +
|
||||
'<tr>' +
|
||||
'<td width="1%"><input type="radio" name="split-by-mode" value="lengths" /></td>' +
|
||||
'<td colspan="2">by field lengths</td>' +
|
||||
'</tr>' +
|
||||
'<tr><td></td>' +
|
||||
'<td colspan="2">' +
|
||||
'<textarea style="width: 100%;" bind="lengthsTextarea"></textarea>' +
|
||||
'</td>' +
|
||||
'</tr>' +
|
||||
'<tr><td></td>' +
|
||||
'<td colspan="2">' +
|
||||
'List of integers separated by commas, e.g., 5, 7, 15' +
|
||||
'</td>' +
|
||||
'</tr>' +
|
||||
'</table></div>' +
|
||||
'</td>' +
|
||||
'<td>' +
|
||||
'<div class="grid-layout layout-tighter"><table>' +
|
||||
'<tr>' +
|
||||
'<td colspan="3"><h3>After Splitting</h3></td>' +
|
||||
'</tr>' +
|
||||
'<tr>' +
|
||||
'<td width="1%"><input type="checkbox" checked="true" bind="guessCellTypeInput" /></td>' +
|
||||
'<td colspan="2">Guess cell type</td>' +
|
||||
'</tr>' +
|
||||
'<tr>' +
|
||||
'<td width="1%"><input type="checkbox" checked="true" bind="removeColumnInput" /></td>' +
|
||||
'<td colspan="2">Remove this column</td>' +
|
||||
'</tr>' +
|
||||
'</table></div>' +
|
||||
'</td>' +
|
||||
'</table></div>'
|
||||
);
|
||||
var bodyElmts = DOM.bind(body);
|
||||
|
||||
footer.html(
|
||||
'<button bind="okButton"> OK </button>' +
|
||||
'<button bind="cancelButton">Cancel</button>'
|
||||
);
|
||||
var footerElmts = DOM.bind(footer);
|
||||
|
||||
var level = DialogSystem.showDialog(frame);
|
||||
var dismiss = function() {
|
||||
DialogSystem.dismissUntil(level - 1);
|
||||
};
|
||||
|
||||
footerElmts.okButton.click(function() {
|
||||
var mode = $("input[name='split-by-mode']:checked")[0].value;
|
||||
var config = {
|
||||
columnName: self._column.name,
|
||||
mode: mode,
|
||||
guessCellType: bodyElmts.guessCellTypeInput[0].checked,
|
||||
removeOriginalColumn: bodyElmts.removeColumnInput[0].checked
|
||||
};
|
||||
if (mode == "separator") {
|
||||
config.separator = bodyElmts.separatorInput[0].value;
|
||||
if (!(config.separator)) {
|
||||
alert("Please specify a separator.");
|
||||
return;
|
||||
}
|
||||
|
||||
config.regex = bodyElmts.regexInput[0].checked;
|
||||
|
||||
var s = bodyElmts.maxColumnsInput[0].value;
|
||||
if (s) {
|
||||
var n = parseInt(s);
|
||||
if (!isNaN(n)) {
|
||||
config.maxColumns = n;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
var s = "[" + bodyElmts.lengthsTextarea[0].value + "]";
|
||||
try {
|
||||
var a = JSON.parse(s);
|
||||
} catch (e) {
|
||||
alert("The given field lengths are not properly formatted.");
|
||||
return;
|
||||
}
|
||||
|
||||
var lengths = [];
|
||||
$.each(a, function(i,n) { if (typeof n == "number") lengths.push(n); });
|
||||
|
||||
if (lengths.length == 0) {
|
||||
alert("No field length is specified.");
|
||||
return;
|
||||
}
|
||||
|
||||
config.fieldLengths = JSON.stringify(lengths);
|
||||
}
|
||||
|
||||
Gridworks.postProcess(
|
||||
"split-column",
|
||||
config,
|
||||
null,
|
||||
{ modelsChanged: true }
|
||||
);
|
||||
dismiss();
|
||||
});
|
||||
|
||||
footerElmts.cancelButton.click(dismiss);
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user