Update split multi-valued cells to support split by regex and split by lengths
This commit is contained in:
parent
6fb7f1f476
commit
cccf1e55c9
@ -40,10 +40,13 @@ import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.json.JSONArray;
|
||||
|
||||
import com.google.refine.commands.Command;
|
||||
import com.google.refine.model.AbstractOperation;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.operations.cell.MultiValuedCellSplitOperation;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
import com.google.refine.process.Process;
|
||||
|
||||
public class SplitMultiValueCellsCommand extends Command {
|
||||
@ -58,11 +61,33 @@ public class SplitMultiValueCellsCommand extends Command {
|
||||
String keyColumnName = request.getParameter("keyColumnName");
|
||||
String separator = request.getParameter("separator");
|
||||
String mode = request.getParameter("mode");
|
||||
Boolean regex = Boolean.parseBoolean(request.getParameter("regex"));
|
||||
|
||||
AbstractOperation op = new MultiValuedCellSplitOperation(columnName, keyColumnName, separator, mode);
|
||||
if ("separator".equals(mode)) {
|
||||
AbstractOperation op = new MultiValuedCellSplitOperation(columnName,
|
||||
keyColumnName,
|
||||
separator,
|
||||
regex);
|
||||
Process process = op.createProcess(project, new Properties());
|
||||
|
||||
performProcessAndRespond(request, response, project, process);
|
||||
} else {
|
||||
String s = request.getParameter("fieldLengths");
|
||||
|
||||
JSONArray a = ParsingUtilities.evaluateJsonStringToArray(s);
|
||||
int[] fieldLengths = new int[a.length()];
|
||||
|
||||
for (int i = 0; i < fieldLengths.length; i++) {
|
||||
fieldLengths[i] = a.getInt(i);
|
||||
}
|
||||
|
||||
AbstractOperation op = new MultiValuedCellSplitOperation(columnName,
|
||||
keyColumnName,
|
||||
fieldLengths);
|
||||
Process process = op.createProcess(project, new Properties());
|
||||
|
||||
performProcessAndRespond(request, response, project, process);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
respondException(response, e);
|
||||
}
|
||||
|
@ -33,9 +33,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.operations.cell;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.json.JSONException;
|
||||
@ -50,32 +52,64 @@ import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.model.changes.MassRowChange;
|
||||
import com.google.refine.operations.OperationRegistry;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class MultiValuedCellSplitOperation extends AbstractOperation {
|
||||
final protected String _columnName;
|
||||
final protected String _keyColumnName;
|
||||
final protected String _separator;
|
||||
final protected String _mode;
|
||||
final protected String _separator;
|
||||
final protected boolean _regex;
|
||||
|
||||
final protected int[] _fieldLengths;
|
||||
|
||||
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
|
||||
String mode = obj.getString("mode");
|
||||
|
||||
if ("separator".equals(mode)) {
|
||||
return new MultiValuedCellSplitOperation(
|
||||
obj.getString("columnName"),
|
||||
obj.getString("keyColumnName"),
|
||||
obj.getString("separator"),
|
||||
obj.getString("mode")
|
||||
obj.getBoolean("regex")
|
||||
);
|
||||
} else {
|
||||
return new MultiValuedCellSplitOperation(
|
||||
obj.getString("columnName"),
|
||||
obj.getString("keyColumnName"),
|
||||
JSONUtilities.getIntArray(obj, "fieldLengths")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public MultiValuedCellSplitOperation(
|
||||
String columnName,
|
||||
String keyColumnName,
|
||||
String separator,
|
||||
String mode
|
||||
boolean regex
|
||||
) {
|
||||
_columnName = columnName;
|
||||
_keyColumnName = keyColumnName;
|
||||
_separator = separator;
|
||||
_mode = mode;
|
||||
_mode = "separator";
|
||||
_regex = regex;
|
||||
|
||||
_fieldLengths = null;
|
||||
}
|
||||
|
||||
public MultiValuedCellSplitOperation(
|
||||
String columnName,
|
||||
String keyColumnName,
|
||||
int[] fieldLengths
|
||||
) {
|
||||
_columnName = columnName;
|
||||
_keyColumnName = keyColumnName;
|
||||
|
||||
_mode = "lengths";
|
||||
_separator = null;
|
||||
_regex = false;
|
||||
|
||||
_fieldLengths = fieldLengths;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -87,8 +121,17 @@ public class MultiValuedCellSplitOperation extends AbstractOperation {
|
||||
writer.key("description"); writer.value("Split multi-valued cells in column " + _columnName);
|
||||
writer.key("columnName"); writer.value(_columnName);
|
||||
writer.key("keyColumnName"); writer.value(_keyColumnName);
|
||||
writer.key("separator"); writer.value(_separator);
|
||||
writer.key("mode"); writer.value(_mode);
|
||||
if ("separator".equals(_mode)) {
|
||||
writer.key("separator"); writer.value(_separator);
|
||||
writer.key("regex"); writer.value(_regex);
|
||||
} else {
|
||||
writer.key("fieldLengths"); writer.array();
|
||||
for (int l : _fieldLengths) {
|
||||
writer.value(l);
|
||||
}
|
||||
writer.endArray();
|
||||
}
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
@ -124,8 +167,31 @@ public class MultiValuedCellSplitOperation extends AbstractOperation {
|
||||
Object value = oldRow.getCellValue(cellIndex);
|
||||
String s = value instanceof String ? ((String) value) : value.toString();
|
||||
String[] values = null;
|
||||
if (_mode.equals("regex")) {
|
||||
values = s.split(_separator);
|
||||
if("lengths".equals(_mode)) {
|
||||
//do split by lengths
|
||||
if (_fieldLengths.length >= 0 && _fieldLengths[0] > 0) {
|
||||
Object o = _fieldLengths[0];
|
||||
|
||||
values = new String[_fieldLengths.length];
|
||||
|
||||
int lastIndex = 0;
|
||||
|
||||
for (int i = 0; i < _fieldLengths.length; i++) {
|
||||
int thisIndex = lastIndex;
|
||||
|
||||
Object o2 = _fieldLengths[i];
|
||||
if (o2 instanceof Number) {
|
||||
thisIndex = Math.min(s.length(), lastIndex + Math.max(0, ((Number) o2).intValue()));
|
||||
}
|
||||
|
||||
values[i] = s.substring(lastIndex, thisIndex);
|
||||
lastIndex = thisIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (_regex) {
|
||||
Pattern pattern = Pattern.compile(_separator);
|
||||
values = pattern.split(s);
|
||||
} else {
|
||||
values = StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user