[FEAT]Adds new options for split (#2471)

* added options ui

* added definition for both separators

* added tests

* removed definitions from backend and added them to frontend

* added reverse order and handling for accented characters

* added tests for accented characters and reverse split

* fixed build errors

* unicode character ranges instead

* added examples
This commit is contained in:
Lisa Chandra 2020-06-15 23:00:18 +05:30 committed by GitHub
parent 307a52ee95
commit 947356ddad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 168 additions and 28 deletions

View File

@ -66,26 +66,25 @@ public class SplitMultiValueCellsCommand extends Command {
String mode = request.getParameter("mode");
Boolean regex = Boolean.parseBoolean(request.getParameter("regex"));
if ("separator".equals(mode)) {
AbstractOperation op = new MultiValuedCellSplitOperation(columnName,
keyColumnName,
separator,
regex);
Process process = op.createProcess(project, new Properties());
performProcessAndRespond(request, response, project, process);
} else {
AbstractOperation op;
if ("lengths".equals(mode)) {
String s = request.getParameter("fieldLengths");
int[] fieldLengths = ParsingUtilities.mapper.readValue(s, new TypeReference<int[]>() {});
AbstractOperation op = new MultiValuedCellSplitOperation(columnName,
keyColumnName,
fieldLengths);
Process process = op.createProcess(project, new Properties());
op = new MultiValuedCellSplitOperation(columnName,
keyColumnName,
fieldLengths);
performProcessAndRespond(request, response, project, process);
}
} else {
op = new MultiValuedCellSplitOperation(columnName,
keyColumnName,
separator,
regex);
}
Process process = op.createProcess(project, new Properties());
performProcessAndRespond(request, response, project, process);
} catch (Exception e) {
respondException(response, e);
}

View File

@ -57,7 +57,7 @@ public class MultiValuedCellSplitOperation extends AbstractOperation {
final protected String _mode;
final protected String _separator;
final protected Boolean _regex;
final protected int[] _fieldLengths;
@JsonCreator
@ -183,7 +183,7 @@ public class MultiValuedCellSplitOperation extends AbstractOperation {
Object value = oldRow.getCellValue(cellIndex);
String s = value instanceof String ? ((String) value) : value.toString();
String[] values = null;
if("lengths".equals(_mode)) {
if ("lengths".equals(_mode)) {
if (_fieldLengths.length > 0 && _fieldLengths[0] > 0) {
values = new String[_fieldLengths.length];
@ -201,9 +201,8 @@ public class MultiValuedCellSplitOperation extends AbstractOperation {
lastIndex = thisIndex;
}
}
}
else if (_regex) {
Pattern pattern = Pattern.compile(_separator);
} else if (_regex) {
Pattern pattern = Pattern.compile(_separator, Pattern.UNICODE_CHARACTER_CLASS);
values = pattern.split(s);
} else {
values = StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator);

View File

@ -67,7 +67,7 @@ public class SplitMultiValuedCellsTests extends RefineTest {
public void createProject() {
project = createCSVProject(
"Key,Value\n"
+ "Record_1,one:two;three four\n");
+ "Record_1,one:two;three four;fiveSix SevèËight;niné91011twelve thirteen 14Àifteen\n");
}
@Test
@ -105,8 +105,8 @@ public class SplitMultiValuedCellsTests extends RefineTest {
"Key",
":",
false);
Process process = op.createProcess(project, new Properties());
process.performImmediate();
Process process = op.createProcess(project, new Properties());
process.performImmediate();
int keyCol = project.columnModel.getColumnByName("Key").getCellIndex();
int valueCol = project.columnModel.getColumnByName("Value").getCellIndex();
@ -114,7 +114,7 @@ public class SplitMultiValuedCellsTests extends RefineTest {
Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1");
Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one");
Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "two;three four");
Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "two;three four;fiveSix SevèËight;niné91011twelve thirteen 14Àifteen");
}
@Test
@ -124,8 +124,8 @@ public class SplitMultiValuedCellsTests extends RefineTest {
"Key",
"\\W",
true);
Process process = op.createProcess(project, new Properties());
process.performImmediate();
Process process = op.createProcess(project, new Properties());
process.performImmediate();
int keyCol = project.columnModel.getColumnByName("Key").getCellIndex();
int valueCol = project.columnModel.getColumnByName("Value").getCellIndex();
@ -163,5 +163,95 @@ public class SplitMultiValuedCellsTests extends RefineTest {
Assert.assertEquals(project.rows.get(3).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(3).getCellValue(valueCol), "four");
}
@Test
public void testSplitMultiValuedCellsTextCase() throws Exception {
AbstractOperation op = new MultiValuedCellSplitOperation(
"Value",
"Key",
"(?<=\\p{Lower}|[\\p{Lower}][\\s])(?=\\p{Upper})",
true);
Process process = op.createProcess(project, new Properties());
process.performImmediate();
int keyCol = project.columnModel.getColumnByName("Key").getCellIndex();
int valueCol = project.columnModel.getColumnByName("Value").getCellIndex();
Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1");
Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one:two;three four;five");
Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "Six ");
Assert.assertEquals(project.rows.get(2).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(2).getCellValue(valueCol), "Sevè");
Assert.assertEquals(project.rows.get(3).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(3).getCellValue(valueCol), "Ëight;niné91011twelve thirteen 14Àifteen");
}
@Test
public void testSplitMultiValuedCellsTextCaseReverse() throws Exception {
AbstractOperation op = new MultiValuedCellSplitOperation(
"Value",
"Key",
"(?<=\\p{Upper}|[\\p{Upper}][\\s])(?=\\p{Lower})",
true);
Process process = op.createProcess(project, new Properties());
process.performImmediate();
int keyCol = project.columnModel.getColumnByName("Key").getCellIndex();
int valueCol = project.columnModel.getColumnByName("Value").getCellIndex();
Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1");
Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one:two;three four;fiveS");
Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "ix S");
Assert.assertEquals(project.rows.get(2).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(2).getCellValue(valueCol), "evèË");
Assert.assertEquals(project.rows.get(3).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(3).getCellValue(valueCol), "ight;niné91011twelve thirteen 14À");
Assert.assertEquals(project.rows.get(4).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(4).getCellValue(valueCol), "ifteen");
}
@Test
public void testSplitMultiValuedCellsTextNumber() throws Exception {
AbstractOperation op = new MultiValuedCellSplitOperation(
"Value",
"Key",
"(?<=\\p{Digit}|[\\p{Digit}][\\s])(?=\\p{L})",
true);
Process process = op.createProcess(project, new Properties());
process.performImmediate();
int keyCol = project.columnModel.getColumnByName("Key").getCellIndex();
int valueCol = project.columnModel.getColumnByName("Value").getCellIndex();
Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1");
Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one:two;three four;fiveSix SevèËight;niné91011");
Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "twelve thirteen 14");
Assert.assertEquals(project.rows.get(2).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(2).getCellValue(valueCol), "Àifteen");
}
@Test
public void testSplitMultiValuedCellsTextNumberReverse() throws Exception {
AbstractOperation op = new MultiValuedCellSplitOperation(
"Value",
"Key",
"(?<=\\p{L}|[\\p{L}][\\s])(?=\\p{Digit})",
true);
Process process = op.createProcess(project, new Properties());
process.performImmediate();
int keyCol = project.columnModel.getColumnByName("Key").getCellIndex();
int valueCol = project.columnModel.getColumnByName("Value").getCellIndex();
Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1");
Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one:two;three four;fiveSix SevèËight;niné");
Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "91011twelve thirteen ");
Assert.assertEquals(project.rows.get(2).getCellValue(keyCol), null);
Assert.assertEquals(project.rows.get(2).getCellValue(valueCol), "14Àifteen");
}
}

View File

@ -567,6 +567,13 @@
"core-views/split-into": "Split into",
"core-views/col-at-most": "columns at most (leave blank for no limit)",
"core-views/field-len": "by field lengths",
"core-views/by-case": "by transition from lowercase to uppercase",
"core-views/by-number": "by transition from numbers to letters",
"core-views/by-rev": "Reverse splitting order",
"core-views/by-case-example": "[11Abc, Def22]",
"core-views/by-case-rev-example": "[11A, bcD, ef22]",
"core-views/by-number-example": "[11, AbcDef22]",
"core-views/by-number-rev-example": "[11AbcDef, 22]",
"core-views/list-int": "List of integers separated by commas, e.g., 5, 7, 15",
"core-views/after-split": "After Splitting",
"core-views/guess-cell": "Guess cell type",

View File

@ -300,6 +300,15 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
elmts.or_views_fieldLen.text($.i18n('core-views/field-len'));
elmts.or_views_listInt.text($.i18n('core-views/list-int'));
elmts.or_views_byCase.text($.i18n('core-views/by-case'));
elmts.or_views_byNumber.text($.i18n('core-views/by-number'));
elmts.or_views_revCase.text($.i18n('core-views/by-rev'));
elmts.or_views_revNum.text($.i18n('core-views/by-rev'));
elmts.or_views_caseExample.text($.i18n('core-views/by-case-example'));
elmts.or_views_caseReverseExample.text($.i18n('core-views/by-case-rev-example'));
elmts.or_views_numberExample.text($.i18n('core-views/by-number-example'));
elmts.or_views_numberReverseExample.text($.i18n('core-views/by-number-rev-example'));
elmts.okButton.html($.i18n('core-buttons/ok'));
elmts.cancelButton.text($.i18n('core-buttons/cancel'));
@ -325,7 +334,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
config.regex = elmts.regexInput[0].checked;
} else {
} else if (mode === "lengths") {
var s = "[" + elmts.lengthsTextarea[0].value + "]";
try {
var a = JSON.parse(s);
@ -348,6 +357,20 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
alert($.i18n('core-views/warning-format'));
return;
}
} else if (mode === "cases") {
if(elmts.reversTranistionCases[0].checked) {
config.separator = "(?<=\\p{Upper}|[\\p{Upper}][\\s])(?=\\p{Lower})";
} else {
config.separator = "(?<=\\p{Lower}|[\\p{Lower}][\\s])(?=\\p{Upper})";
}
config.regex = true;
} else if (mode === "number") {
if(elmts.reversTranistionNumbers[0].checked) {
config.separator = "(?<=\\p{L}|[\\p{L}][\\s])(?=\\p{Digit})";
} else {
config.separator = "(?<=\\p{Digit}|[\\p{Digit}][\\s])(?=\\p{L})";
}
config.regex = true;
}
Refine.postCoreProcess(

View File

@ -23,11 +23,33 @@
<td colspan="2"><label for="$split-multi-valued-cells-by-lengths" bind="or_views_fieldLen"></label></td>
</tr>
<tr><td></td>
<td colspan="2"><textarea style="width: 100%;" bind="lengthsTextarea"></textarea></td>
<td colspan="3"><textarea style="width: 100%;" bind="lengthsTextarea"></textarea></td>
</tr>
<tr><td></td>
<td colspan="2" bind="or_views_listInt"></td>
</tr>
<tr>
<td width="1%"><input type="radio" name="split-by-mode" value="cases" id="$split-multi-valued-cells-by-case" /></td>
<td colspan="2"><label for="$split-multi-valued-cells-by-case" bind="or_views_byCase"></label></td>
<td><input type="checkbox" bind="reversTranistionCases" id="$split-cell-transition-order-case" />
<label for="$split-cell-transition-order-case" bind="or_views_revCase"></label></td>
</tr>
<tr>
<td></td>
<td colspan="2"><span bind="or_views_caseExample"></span></td>
<td colspan="2"><span style="padding-left: 25px;" bind="or_views_caseReverseExample"></span></td>
</tr>
<tr>
<td width="1%"><input type="radio" name="split-by-mode" value="number" id="$split-multi-valued-cells-by-number" /></td>
<td colspan="2"><label for="$split-multi-valued-cells-by-number" bind="or_views_byNumber"></label></td>
<td><input type="checkbox" bind="reversTranistionNumbers" id="$split-cell-transition-order-number" />
<label for="$split-cell-transition-order-number" bind="or_views_revNum"></label></td>
</tr>
<tr>
<td></td>
<td colspan="2"><span bind="or_views_numberExample"></span></td>
<td colspan="2"><span style="padding-left: 25px;" bind="or_views_numberReverseExample"></span></td>
</tr>
</table></div>
</td>
</table></div>