diff --git a/main/src/com/google/refine/commands/cell/SplitMultiValueCellsCommand.java b/main/src/com/google/refine/commands/cell/SplitMultiValueCellsCommand.java index 0918b04ba..cc0f95d7e 100644 --- a/main/src/com/google/refine/commands/cell/SplitMultiValueCellsCommand.java +++ b/main/src/com/google/refine/commands/cell/SplitMultiValueCellsCommand.java @@ -66,26 +66,25 @@ public class SplitMultiValueCellsCommand extends Command { String mode = request.getParameter("mode"); Boolean regex = Boolean.parseBoolean(request.getParameter("regex")); - if ("separator".equals(mode)) { - AbstractOperation op = new MultiValuedCellSplitOperation(columnName, - keyColumnName, - separator, - regex); - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); - } else { + AbstractOperation op; + + if ("lengths".equals(mode)) { String s = request.getParameter("fieldLengths"); int[] fieldLengths = ParsingUtilities.mapper.readValue(s, new TypeReference() {}); - AbstractOperation op = new MultiValuedCellSplitOperation(columnName, - keyColumnName, - fieldLengths); - Process process = op.createProcess(project, new Properties()); + op = new MultiValuedCellSplitOperation(columnName, + keyColumnName, + fieldLengths); - performProcessAndRespond(request, response, project, process); - } + } else { + op = new MultiValuedCellSplitOperation(columnName, + keyColumnName, + separator, + regex); + } + Process process = op.createProcess(project, new Properties()); + performProcessAndRespond(request, response, project, process); } catch (Exception e) { respondException(response, e); } diff --git a/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java b/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java index 57f78f631..66ef20d8c 100644 --- a/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java +++ b/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java @@ -57,7 +57,7 @@ public class MultiValuedCellSplitOperation extends AbstractOperation { final protected String _mode; final protected String _separator; final protected Boolean _regex; - + final protected int[] _fieldLengths; @JsonCreator @@ -183,7 +183,7 @@ public class MultiValuedCellSplitOperation extends AbstractOperation { Object value = oldRow.getCellValue(cellIndex); String s = value instanceof String ? ((String) value) : value.toString(); String[] values = null; - if("lengths".equals(_mode)) { + if ("lengths".equals(_mode)) { if (_fieldLengths.length > 0 && _fieldLengths[0] > 0) { values = new String[_fieldLengths.length]; @@ -201,9 +201,8 @@ public class MultiValuedCellSplitOperation extends AbstractOperation { lastIndex = thisIndex; } } - } - else if (_regex) { - Pattern pattern = Pattern.compile(_separator); + } else if (_regex) { + Pattern pattern = Pattern.compile(_separator, Pattern.UNICODE_CHARACTER_CLASS); values = pattern.split(s); } else { values = StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator); diff --git a/main/tests/server/src/com/google/refine/operations/cell/SplitMultiValuedCellsTests.java b/main/tests/server/src/com/google/refine/operations/cell/SplitMultiValuedCellsTests.java index 8d2f23d15..b269324c8 100644 --- a/main/tests/server/src/com/google/refine/operations/cell/SplitMultiValuedCellsTests.java +++ b/main/tests/server/src/com/google/refine/operations/cell/SplitMultiValuedCellsTests.java @@ -67,7 +67,7 @@ public class SplitMultiValuedCellsTests extends RefineTest { public void createProject() { project = createCSVProject( "Key,Value\n" - + "Record_1,one:two;three four\n"); + + "Record_1,one:two;three four;fiveSix SevèËight;niné91011twelve thirteen 14Àifteen\n"); } @Test @@ -105,8 +105,8 @@ public class SplitMultiValuedCellsTests extends RefineTest { "Key", ":", false); - Process process = op.createProcess(project, new Properties()); - process.performImmediate(); + Process process = op.createProcess(project, new Properties()); + process.performImmediate(); int keyCol = project.columnModel.getColumnByName("Key").getCellIndex(); int valueCol = project.columnModel.getColumnByName("Value").getCellIndex(); @@ -114,7 +114,7 @@ public class SplitMultiValuedCellsTests extends RefineTest { Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1"); Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one"); Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null); - Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "two;three four"); + Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "two;three four;fiveSix SevèËight;niné91011twelve thirteen 14Àifteen"); } @Test @@ -124,8 +124,8 @@ public class SplitMultiValuedCellsTests extends RefineTest { "Key", "\\W", true); - Process process = op.createProcess(project, new Properties()); - process.performImmediate(); + Process process = op.createProcess(project, new Properties()); + process.performImmediate(); int keyCol = project.columnModel.getColumnByName("Key").getCellIndex(); int valueCol = project.columnModel.getColumnByName("Value").getCellIndex(); @@ -163,5 +163,95 @@ public class SplitMultiValuedCellsTests extends RefineTest { Assert.assertEquals(project.rows.get(3).getCellValue(keyCol), null); Assert.assertEquals(project.rows.get(3).getCellValue(valueCol), "four"); } + + @Test + public void testSplitMultiValuedCellsTextCase() throws Exception { + AbstractOperation op = new MultiValuedCellSplitOperation( + "Value", + "Key", + "(?<=\\p{Lower}|[\\p{Lower}][\\s])(?=\\p{Upper})", + true); + Process process = op.createProcess(project, new Properties()); + process.performImmediate(); + + int keyCol = project.columnModel.getColumnByName("Key").getCellIndex(); + int valueCol = project.columnModel.getColumnByName("Value").getCellIndex(); + + Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1"); + Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one:two;three four;five"); + Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "Six "); + Assert.assertEquals(project.rows.get(2).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(2).getCellValue(valueCol), "Sevè"); + Assert.assertEquals(project.rows.get(3).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(3).getCellValue(valueCol), "Ëight;niné91011twelve thirteen 14Àifteen"); + } + + @Test + public void testSplitMultiValuedCellsTextCaseReverse() throws Exception { + AbstractOperation op = new MultiValuedCellSplitOperation( + "Value", + "Key", + "(?<=\\p{Upper}|[\\p{Upper}][\\s])(?=\\p{Lower})", + true); + Process process = op.createProcess(project, new Properties()); + process.performImmediate(); + + int keyCol = project.columnModel.getColumnByName("Key").getCellIndex(); + int valueCol = project.columnModel.getColumnByName("Value").getCellIndex(); + + Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1"); + Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one:two;three four;fiveS"); + Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "ix S"); + Assert.assertEquals(project.rows.get(2).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(2).getCellValue(valueCol), "evèË"); + Assert.assertEquals(project.rows.get(3).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(3).getCellValue(valueCol), "ight;niné91011twelve thirteen 14À"); + Assert.assertEquals(project.rows.get(4).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(4).getCellValue(valueCol), "ifteen"); + } + + @Test + public void testSplitMultiValuedCellsTextNumber() throws Exception { + AbstractOperation op = new MultiValuedCellSplitOperation( + "Value", + "Key", + "(?<=\\p{Digit}|[\\p{Digit}][\\s])(?=\\p{L})", + true); + Process process = op.createProcess(project, new Properties()); + process.performImmediate(); + + int keyCol = project.columnModel.getColumnByName("Key").getCellIndex(); + int valueCol = project.columnModel.getColumnByName("Value").getCellIndex(); + + Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1"); + Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one:two;three four;fiveSix SevèËight;niné91011"); + Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "twelve thirteen 14"); + Assert.assertEquals(project.rows.get(2).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(2).getCellValue(valueCol), "Àifteen"); + } + + @Test + public void testSplitMultiValuedCellsTextNumberReverse() throws Exception { + AbstractOperation op = new MultiValuedCellSplitOperation( + "Value", + "Key", + "(?<=\\p{L}|[\\p{L}][\\s])(?=\\p{Digit})", + true); + Process process = op.createProcess(project, new Properties()); + process.performImmediate(); + + int keyCol = project.columnModel.getColumnByName("Key").getCellIndex(); + int valueCol = project.columnModel.getColumnByName("Value").getCellIndex(); + + Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1"); + Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one:two;three four;fiveSix SevèËight;niné"); + Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "91011twelve thirteen "); + Assert.assertEquals(project.rows.get(2).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(2).getCellValue(valueCol), "14Àifteen"); + } } diff --git a/main/webapp/modules/core/langs/translation-en.json b/main/webapp/modules/core/langs/translation-en.json index fc35fb1f4..ce37cdcaa 100644 --- a/main/webapp/modules/core/langs/translation-en.json +++ b/main/webapp/modules/core/langs/translation-en.json @@ -567,6 +567,13 @@ "core-views/split-into": "Split into", "core-views/col-at-most": "columns at most (leave blank for no limit)", "core-views/field-len": "by field lengths", + "core-views/by-case": "by transition from lowercase to uppercase", + "core-views/by-number": "by transition from numbers to letters", + "core-views/by-rev": "Reverse splitting order", + "core-views/by-case-example": "[11Abc, Def22]", + "core-views/by-case-rev-example": "[11A, bcD, ef22]", + "core-views/by-number-example": "[11, AbcDef22]", + "core-views/by-number-rev-example": "[11AbcDef, 22]", "core-views/list-int": "List of integers separated by commas, e.g., 5, 7, 15", "core-views/after-split": "After Splitting", "core-views/guess-cell": "Guess cell type", diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js b/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js index 406f4739e..eaa07e84b 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js @@ -300,6 +300,15 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { elmts.or_views_fieldLen.text($.i18n('core-views/field-len')); elmts.or_views_listInt.text($.i18n('core-views/list-int')); + elmts.or_views_byCase.text($.i18n('core-views/by-case')); + elmts.or_views_byNumber.text($.i18n('core-views/by-number')); + elmts.or_views_revCase.text($.i18n('core-views/by-rev')); + elmts.or_views_revNum.text($.i18n('core-views/by-rev')); + elmts.or_views_caseExample.text($.i18n('core-views/by-case-example')); + elmts.or_views_caseReverseExample.text($.i18n('core-views/by-case-rev-example')); + elmts.or_views_numberExample.text($.i18n('core-views/by-number-example')); + elmts.or_views_numberReverseExample.text($.i18n('core-views/by-number-rev-example')); + elmts.okButton.html($.i18n('core-buttons/ok')); elmts.cancelButton.text($.i18n('core-buttons/cancel')); @@ -325,7 +334,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { config.regex = elmts.regexInput[0].checked; - } else { + } else if (mode === "lengths") { var s = "[" + elmts.lengthsTextarea[0].value + "]"; try { var a = JSON.parse(s); @@ -348,6 +357,20 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { alert($.i18n('core-views/warning-format')); return; } + } else if (mode === "cases") { + if(elmts.reversTranistionCases[0].checked) { + config.separator = "(?<=\\p{Upper}|[\\p{Upper}][\\s])(?=\\p{Lower})"; + } else { + config.separator = "(?<=\\p{Lower}|[\\p{Lower}][\\s])(?=\\p{Upper})"; + } + config.regex = true; + } else if (mode === "number") { + if(elmts.reversTranistionNumbers[0].checked) { + config.separator = "(?<=\\p{L}|[\\p{L}][\\s])(?=\\p{Digit})"; + } else { + config.separator = "(?<=\\p{Digit}|[\\p{Digit}][\\s])(?=\\p{L})"; + } + config.regex = true; } Refine.postCoreProcess( diff --git a/main/webapp/modules/core/scripts/views/data-table/split-multi-valued-cells-dialog.html b/main/webapp/modules/core/scripts/views/data-table/split-multi-valued-cells-dialog.html index 3b40cebea..bb2eeca0d 100644 --- a/main/webapp/modules/core/scripts/views/data-table/split-multi-valued-cells-dialog.html +++ b/main/webapp/modules/core/scripts/views/data-table/split-multi-valued-cells-dialog.html @@ -23,11 +23,33 @@ - + + + + + + + + + + + + + + + + + + + + + + +