From 6fb7f1f476c87c3dc3cb95450ec27b9e9e6cee93 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Sun, 22 Oct 2017 23:53:18 +0100 Subject: [PATCH 01/11] Add UI for split multi-valued cells by sep/regex/length --- .../modules/core/langs/translation-en.json | 1 + .../views/data-table/menu-edit-cells.js | 71 +++++++++++++++++-- .../split-multi-valued-cells-dialog.html | 40 +++++++++++ 3 files changed, 108 insertions(+), 4 deletions(-) create mode 100644 main/webapp/modules/core/scripts/views/data-table/split-multi-valued-cells-dialog.html diff --git a/main/webapp/modules/core/langs/translation-en.json b/main/webapp/modules/core/langs/translation-en.json index dc0c725dc..f2885b87a 100644 --- a/main/webapp/modules/core/langs/translation-en.json +++ b/main/webapp/modules/core/langs/translation-en.json @@ -525,6 +525,7 @@ "split-col": "Split column", "several-col": "into several columns", "how-split": "How to Split Column", + "how-split-cells": "How to split multi-valued cells", "by-sep": "by separator", "separator": "Separator", "reg-exp": "regular expression", diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js b/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js index 9ea92d8ee..f829af812 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js @@ -134,20 +134,83 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { }; var doSplitMultiValueCells = function() { - var separator = window.prompt($.i18n._('core-views')["what-separator"], ","); - if (separator !== null) { + + var frame = $(DOM.loadHTML("core", "scripts/views/data-table/split-multi-valued-cells-dialog.html")); + var elmts = DOM.bind(frame); + elmts.dialogHeader.text($.i18n._('core-views')["split-cells"]); + + elmts.or_views_howSplit.text($.i18n._('core-views')["how-split-cells"]); + elmts.or_views_bySep.text($.i18n._('core-views')["by-sep"]); + elmts.or_views_separator.text($.i18n._('core-views')["separator"]); + elmts.or_views_regExp.text($.i18n._('core-views')["reg-exp"]); + + elmts.or_views_fieldLen.text($.i18n._('core-views')["field-len"]); + elmts.or_views_listInt.text($.i18n._('core-views')["list-int"]); + + elmts.okButton.html($.i18n._('core-buttons')["ok"]); + elmts.cancelButton.text($.i18n._('core-buttons')["cancel"]); + + var level = DialogSystem.showDialog(frame); + var dismiss = function() { DialogSystem.dismissUntil(level - 1); }; + + elmts.cancelButton.click(dismiss); + elmts.okButton.click(function() { + var mode = $("input[name='split-by-mode']:checked")[0].value; + var config = { + columnName: column.name, + keyColumnName: theProject.columnModel.keyColumnName, + mode: mode + }; + if (mode == "separator") { + config.separator = elmts.separatorInput[0].value; + if (!(config.separator)) { + alert($.i18n._('core-views')["specify-sep"]); + return; + } + + config.regex = elmts.regexInput[0].checked; + + } else { + var s = "[" + elmts.lengthsTextarea[0].value + "]"; + try { + var a = JSON.parse(s); + + var lengths = []; + $.each(a, function(i,n) { + if (typeof n == "number") { + lengths.push(n); + } + }); + + if (lengths.length === 0) { + alert($.i18n._('core-views')["warning-no-length"]); + return; + } + + config.fieldLengths = JSON.stringify(lengths); + + } catch (e) { + alert($.i18n._('core-views')["warning-format"]); + return; + } + } + Refine.postCoreProcess( "split-multi-value-cells", + config, +/* Old config { columnName: column.name, keyColumnName: theProject.columnModel.keyColumnName, separator: separator, mode: "plain" - }, + },*/ null, { rowsChanged: true } ); - } + + dismiss(); + }); }; MenuSystem.appendTo(menu, [ "core/edit-cells" ], [ diff --git a/main/webapp/modules/core/scripts/views/data-table/split-multi-valued-cells-dialog.html b/main/webapp/modules/core/scripts/views/data-table/split-multi-valued-cells-dialog.html new file mode 100644 index 000000000..c68744865 --- /dev/null +++ b/main/webapp/modules/core/scripts/views/data-table/split-multi-valued-cells-dialog.html @@ -0,0 +1,40 @@ +
+
+
+
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + +

+ +
+
+
+ +
+
\ No newline at end of file From cccf1e55c98937f37de0a21798eefbd628ff0df8 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Sun, 22 Oct 2017 23:54:18 +0100 Subject: [PATCH 02/11] Update split multi-valued cells to support split by regex and split by lengths --- .../cell/SplitMultiValueCellsCommand.java | 37 +++++-- .../cell/MultiValuedCellSplitOperation.java | 96 ++++++++++++++++--- 2 files changed, 112 insertions(+), 21 deletions(-) diff --git a/main/src/com/google/refine/commands/cell/SplitMultiValueCellsCommand.java b/main/src/com/google/refine/commands/cell/SplitMultiValueCellsCommand.java index 6cca50637..355fc1406 100644 --- a/main/src/com/google/refine/commands/cell/SplitMultiValueCellsCommand.java +++ b/main/src/com/google/refine/commands/cell/SplitMultiValueCellsCommand.java @@ -33,17 +33,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.refine.commands.cell; - import java.io.IOException; +import java.io.IOException; import java.util.Properties; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import org.json.JSONArray; + import com.google.refine.commands.Command; import com.google.refine.model.AbstractOperation; import com.google.refine.model.Project; import com.google.refine.operations.cell.MultiValuedCellSplitOperation; +import com.google.refine.util.ParsingUtilities; import com.google.refine.process.Process; public class SplitMultiValueCellsCommand extends Command { @@ -58,11 +61,33 @@ public class SplitMultiValueCellsCommand extends Command { String keyColumnName = request.getParameter("keyColumnName"); String separator = request.getParameter("separator"); String mode = request.getParameter("mode"); - - AbstractOperation op = new MultiValuedCellSplitOperation(columnName, keyColumnName, separator, mode); - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); + Boolean regex = Boolean.parseBoolean(request.getParameter("regex")); + + if ("separator".equals(mode)) { + AbstractOperation op = new MultiValuedCellSplitOperation(columnName, + keyColumnName, + separator, + regex); + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } else { + String s = request.getParameter("fieldLengths"); + + JSONArray a = ParsingUtilities.evaluateJsonStringToArray(s); + int[] fieldLengths = new int[a.length()]; + + for (int i = 0; i < fieldLengths.length; i++) { + fieldLengths[i] = a.getInt(i); + } + + AbstractOperation op = new MultiValuedCellSplitOperation(columnName, + keyColumnName, + fieldLengths); + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } } catch (Exception e) { respondException(response, e); } diff --git a/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java b/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java index 3a100e562..9ef71f09c 100644 --- a/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java +++ b/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java @@ -33,9 +33,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.refine.operations.cell; - import java.util.ArrayList; +import java.io.Serializable; +import java.util.ArrayList; import java.util.List; import java.util.Properties; +import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.json.JSONException; @@ -50,32 +52,64 @@ import com.google.refine.model.Project; import com.google.refine.model.Row; import com.google.refine.model.changes.MassRowChange; import com.google.refine.operations.OperationRegistry; +import com.google.refine.util.JSONUtilities; public class MultiValuedCellSplitOperation extends AbstractOperation { final protected String _columnName; final protected String _keyColumnName; - final protected String _separator; final protected String _mode; + final protected String _separator; + final protected boolean _regex; + + final protected int[] _fieldLengths; static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - return new MultiValuedCellSplitOperation( - obj.getString("columnName"), - obj.getString("keyColumnName"), - obj.getString("separator"), - obj.getString("mode") - ); + String mode = obj.getString("mode"); + + if ("separator".equals(mode)) { + return new MultiValuedCellSplitOperation( + obj.getString("columnName"), + obj.getString("keyColumnName"), + obj.getString("separator"), + obj.getBoolean("regex") + ); + } else { + return new MultiValuedCellSplitOperation( + obj.getString("columnName"), + obj.getString("keyColumnName"), + JSONUtilities.getIntArray(obj, "fieldLengths") + ); + } } public MultiValuedCellSplitOperation( String columnName, String keyColumnName, - String separator, - String mode + String separator, + boolean regex ) { _columnName = columnName; _keyColumnName = keyColumnName; _separator = separator; - _mode = mode; + _mode = "separator"; + _regex = regex; + + _fieldLengths = null; + } + + public MultiValuedCellSplitOperation( + String columnName, + String keyColumnName, + int[] fieldLengths + ) { + _columnName = columnName; + _keyColumnName = keyColumnName; + + _mode = "lengths"; + _separator = null; + _regex = false; + + _fieldLengths = fieldLengths; } @Override @@ -87,8 +121,17 @@ public class MultiValuedCellSplitOperation extends AbstractOperation { writer.key("description"); writer.value("Split multi-valued cells in column " + _columnName); writer.key("columnName"); writer.value(_columnName); writer.key("keyColumnName"); writer.value(_keyColumnName); - writer.key("separator"); writer.value(_separator); writer.key("mode"); writer.value(_mode); + if ("separator".equals(_mode)) { + writer.key("separator"); writer.value(_separator); + writer.key("regex"); writer.value(_regex); + } else { + writer.key("fieldLengths"); writer.array(); + for (int l : _fieldLengths) { + writer.value(l); + } + writer.endArray(); + } writer.endObject(); } @@ -110,7 +153,7 @@ public class MultiValuedCellSplitOperation extends AbstractOperation { throw new Exception("No key column named " + _keyColumnName); } int keyCellIndex = keyColumn.getCellIndex(); - + List newRows = new ArrayList(); int oldRowCount = project.rows.size(); @@ -124,8 +167,31 @@ public class MultiValuedCellSplitOperation extends AbstractOperation { Object value = oldRow.getCellValue(cellIndex); String s = value instanceof String ? ((String) value) : value.toString(); String[] values = null; - if (_mode.equals("regex")) { - values = s.split(_separator); + if("lengths".equals(_mode)) { + //do split by lengths + if (_fieldLengths.length >= 0 && _fieldLengths[0] > 0) { + Object o = _fieldLengths[0]; + + values = new String[_fieldLengths.length]; + + int lastIndex = 0; + + for (int i = 0; i < _fieldLengths.length; i++) { + int thisIndex = lastIndex; + + Object o2 = _fieldLengths[i]; + if (o2 instanceof Number) { + thisIndex = Math.min(s.length(), lastIndex + Math.max(0, ((Number) o2).intValue())); + } + + values[i] = s.substring(lastIndex, thisIndex); + lastIndex = thisIndex; + } + } + } + else if (_regex) { + Pattern pattern = Pattern.compile(_separator); + values = pattern.split(s); } else { values = StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator); } From 23b643426a0bac51342f49a0ec0f000ecc7544e0 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Mon, 23 Oct 2017 08:41:14 +0200 Subject: [PATCH 03/11] Fix Codacy warnings in MultiValuedCellSplitOperation --- .../refine/operations/cell/MultiValuedCellSplitOperation.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java b/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java index 9ef71f09c..6b009fb5b 100644 --- a/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java +++ b/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java @@ -33,7 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.refine.operations.cell; -import java.io.Serializable; import java.util.ArrayList; import java.util.List; import java.util.Properties; @@ -170,8 +169,6 @@ public class MultiValuedCellSplitOperation extends AbstractOperation { if("lengths".equals(_mode)) { //do split by lengths if (_fieldLengths.length >= 0 && _fieldLengths[0] > 0) { - Object o = _fieldLengths[0]; - values = new String[_fieldLengths.length]; int lastIndex = 0; From 46c3ec100e73695aa2beda082c5a2c23147ef855 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Mon, 23 Oct 2017 07:45:58 +0100 Subject: [PATCH 04/11] Remove unused local variables and imports --- .../operations/cell/MultiValuedCellSplitOperation.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java b/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java index 6b009fb5b..025d40ee6 100644 --- a/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java +++ b/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java @@ -167,7 +167,6 @@ public class MultiValuedCellSplitOperation extends AbstractOperation { String s = value instanceof String ? ((String) value) : value.toString(); String[] values = null; if("lengths".equals(_mode)) { - //do split by lengths if (_fieldLengths.length >= 0 && _fieldLengths[0] > 0) { values = new String[_fieldLengths.length]; @@ -176,9 +175,9 @@ public class MultiValuedCellSplitOperation extends AbstractOperation { for (int i = 0; i < _fieldLengths.length; i++) { int thisIndex = lastIndex; - Object o2 = _fieldLengths[i]; - if (o2 instanceof Number) { - thisIndex = Math.min(s.length(), lastIndex + Math.max(0, ((Number) o2).intValue())); + Object o = _fieldLengths[i]; + if (o instanceof Number) { + thisIndex = Math.min(s.length(), lastIndex + Math.max(0, ((Number) o).intValue())); } values[i] = s.substring(lastIndex, thisIndex); From 47520d4a71b50df157be2c1943a82e0ae5b6b847 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Mon, 23 Oct 2017 07:46:33 +0100 Subject: [PATCH 05/11] Minor fixes to syntax --- .../core/scripts/views/data-table/menu-edit-cells.js | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js b/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js index f829af812..d3224ab88 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js @@ -125,7 +125,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { { columnName: column.name, keyColumnName: theProject.columnModel.keyColumnName, - separator: separator + separator }, null, { rowsChanged: true } @@ -161,7 +161,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { keyColumnName: theProject.columnModel.keyColumnName, mode: mode }; - if (mode == "separator") { + if (mode === "separator") { config.separator = elmts.separatorInput[0].value; if (!(config.separator)) { alert($.i18n._('core-views')["specify-sep"]); @@ -198,13 +198,6 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { Refine.postCoreProcess( "split-multi-value-cells", config, -/* Old config - { - columnName: column.name, - keyColumnName: theProject.columnModel.keyColumnName, - separator: separator, - mode: "plain" - },*/ null, { rowsChanged: true } ); From b378dea417d6f884e68a65db6f6f4a68831e1417 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Mon, 23 Oct 2017 08:54:26 +0100 Subject: [PATCH 06/11] Minor syntax fix --- .../modules/core/scripts/views/data-table/menu-edit-cells.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js b/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js index d3224ab88..4c77f889f 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-edit-cells.js @@ -159,7 +159,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { var config = { columnName: column.name, keyColumnName: theProject.columnModel.keyColumnName, - mode: mode + mode }; if (mode === "separator") { config.separator = elmts.separatorInput[0].value; From bb6b3d6c296af8aeb81ff1ddf33bd9392390b9ee Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Mon, 23 Oct 2017 18:54:59 +0100 Subject: [PATCH 07/11] Re-locate operations tests in correct folder --- .../tests/{model => operations/cell}/KeyValueColumnizeTests.java | 0 .../google/refine/{ => tests}/operations/cell/TransposeTests.java | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename main/tests/server/src/com/google/refine/tests/{model => operations/cell}/KeyValueColumnizeTests.java (100%) rename main/tests/server/src/com/google/refine/{ => tests}/operations/cell/TransposeTests.java (100%) diff --git a/main/tests/server/src/com/google/refine/tests/model/KeyValueColumnizeTests.java b/main/tests/server/src/com/google/refine/tests/operations/cell/KeyValueColumnizeTests.java similarity index 100% rename from main/tests/server/src/com/google/refine/tests/model/KeyValueColumnizeTests.java rename to main/tests/server/src/com/google/refine/tests/operations/cell/KeyValueColumnizeTests.java diff --git a/main/tests/server/src/com/google/refine/operations/cell/TransposeTests.java b/main/tests/server/src/com/google/refine/tests/operations/cell/TransposeTests.java similarity index 100% rename from main/tests/server/src/com/google/refine/operations/cell/TransposeTests.java rename to main/tests/server/src/com/google/refine/tests/operations/cell/TransposeTests.java From 224210625d600475ffda2b5d4d79cb391ad56c98 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Tue, 24 Oct 2017 08:28:37 +0100 Subject: [PATCH 08/11] Remove automatic trim of split values --- .../refine/operations/cell/MultiValuedCellSplitOperation.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java b/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java index 025d40ee6..9b614f60e 100644 --- a/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java +++ b/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java @@ -200,14 +200,14 @@ public class MultiValuedCellSplitOperation extends AbstractOperation { // First value goes into the same row { Row firstNewRow = oldRow.dup(); - firstNewRow.setCell(cellIndex, new Cell(values[0].trim(), null)); + firstNewRow.setCell(cellIndex, new Cell(values[0], null)); newRows.add(firstNewRow); } int r2 = r + 1; for (int v = 1; v < values.length; v++) { - Cell newCell = new Cell(values[v].trim(), null); + Cell newCell = new Cell(values[v], null); if (r2 < project.rows.size()) { Row oldRow2 = project.rows.get(r2); From 209500bf3a30068b972fd5f273dcc6ce554e6bc7 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Tue, 24 Oct 2017 08:29:00 +0100 Subject: [PATCH 09/11] Add tests for SplitMulitValuedCells --- .../cell/SplitMultiValuedCellsTests.java | 224 ++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 main/tests/server/src/com/google/refine/tests/operations/cell/SplitMultiValuedCellsTests.java diff --git a/main/tests/server/src/com/google/refine/tests/operations/cell/SplitMultiValuedCellsTests.java b/main/tests/server/src/com/google/refine/tests/operations/cell/SplitMultiValuedCellsTests.java new file mode 100644 index 000000000..3c14e337f --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/operations/cell/SplitMultiValuedCellsTests.java @@ -0,0 +1,224 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.tests.model; + +import static org.mockito.Mockito.mock; + +import java.io.File; +import java.io.IOException; +import java.io.StringReader; +import java.util.Properties; +import java.util.List; +import java.util.ArrayList; + +import org.json.JSONException; +import org.json.JSONObject; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.RefineServlet; +import com.google.refine.importers.SeparatorBasedImporter; +import com.google.refine.importing.ImportingJob; +import com.google.refine.importing.ImportingManager; +import com.google.refine.io.FileProjectManager; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.ModelException; +import com.google.refine.model.Project; +import com.google.refine.process.Process; +import com.google.refine.operations.cell.MultiValuedCellSplitOperation; +import com.google.refine.util.JSONUtilities; +import com.google.refine.tests.RefineServletStub; +import com.google.refine.tests.RefineTest; +import com.google.refine.tests.util.TestUtils; + + +public class SplitMultiValuedCellsTests extends RefineTest { + // dependencies + private RefineServlet servlet; + private Project project; + private ProjectMetadata pm; + private JSONObject options; + private ImportingJob job; + private SeparatorBasedImporter importer; + + + @Override + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + @BeforeMethod + public void SetUp() throws JSONException, IOException, ModelException { + servlet = new RefineServletStub(); + File dir = TestUtils.createTempDirectory("openrefine-test-workspace-dir"); + FileProjectManager.initialize(dir); + project = new Project(); + pm = new ProjectMetadata(); + pm.setName("SplitMultiValuedCells test"); + ProjectManager.singleton.registerProject(project, pm); + options = mock(JSONObject.class); + + ImportingManager.initialize(servlet); + job = ImportingManager.createJob(); + importer = new SeparatorBasedImporter(); + } + + @AfterMethod + public void TearDown() { + ImportingManager.disposeJob(job.id); + ProjectManager.singleton.deleteProject(project.id); + job = null; + project = null; + pm = null; + options = null; + } + + /** + * Test to demonstrate the intended behaviour of the function, for issue #1214 + * https://github.com/OpenRefine/OpenRefine/issues/1268 + */ + + @Test + public void testSplitMultiValuedCellsTextSeparator() throws Exception { + String csv = "Key,Value\n" + + "Record_1,one:two;three four\n"; + prepareOptions(",", 10, 0, 0, 1, false, false); + List exceptions = new ArrayList(); + importer.parseOneFile(project, pm, job, "filesource", new StringReader(csv), -1, options, exceptions); + project.update(); + ProjectManager.singleton.registerProject(project, pm); + + AbstractOperation op = new MultiValuedCellSplitOperation( + "Value", + "Key", + ":", + false); + Process process = op.createProcess(project, new Properties()); + process.performImmediate(); + + int keyCol = project.columnModel.getColumnByName("Key").getCellIndex(); + int valueCol = project.columnModel.getColumnByName("Value").getCellIndex(); + + Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1"); + Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one"); + Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "two;three four"); + } + + @Test + public void testSplitMultiValuedCellsRegExSeparator() throws Exception { + String csv = "Key,Value\n" + + "Record_1,one:two;three four\n"; + prepareOptions(",", 10, 0, 0, 1, false, false); + List exceptions = new ArrayList(); + importer.parseOneFile(project, pm, job, "filesource", new StringReader(csv), -1, options, exceptions); + project.update(); + ProjectManager.singleton.registerProject(project, pm); + + AbstractOperation op = new MultiValuedCellSplitOperation( + "Value", + "Key", + "\\W", + true); + Process process = op.createProcess(project, new Properties()); + process.performImmediate(); + + int keyCol = project.columnModel.getColumnByName("Key").getCellIndex(); + int valueCol = project.columnModel.getColumnByName("Value").getCellIndex(); + + Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1"); + Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one"); + Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "two"); + Assert.assertEquals(project.rows.get(2).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(2).getCellValue(valueCol), "three"); + Assert.assertEquals(project.rows.get(3).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(3).getCellValue(valueCol), "four"); + } + + @Test + public void testSplitMultiValuedCellsLengths() throws Exception { + String csv = "Key,Value\n" + + "Record_1,one:two;three four\n"; + prepareOptions(",", 10, 0, 0, 1, false, false); + List exceptions = new ArrayList(); + importer.parseOneFile(project, pm, job, "filesource", new StringReader(csv), -1, options, exceptions); + project.update(); + ProjectManager.singleton.registerProject(project, pm); + int[] lengths = {4,4,6,4}; + + AbstractOperation op = new MultiValuedCellSplitOperation( + "Value", + "Key", + lengths); + Process process = op.createProcess(project, new Properties()); + process.performImmediate(); + + int keyCol = project.columnModel.getColumnByName("Key").getCellIndex(); + int valueCol = project.columnModel.getColumnByName("Value").getCellIndex(); + + Assert.assertEquals(project.rows.get(0).getCellValue(keyCol), "Record_1"); + Assert.assertEquals(project.rows.get(0).getCellValue(valueCol), "one:"); + Assert.assertEquals(project.rows.get(1).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(1).getCellValue(valueCol), "two;"); + Assert.assertEquals(project.rows.get(2).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(2).getCellValue(valueCol), "three "); + Assert.assertEquals(project.rows.get(3).getCellValue(keyCol), null); + Assert.assertEquals(project.rows.get(3).getCellValue(valueCol), "four"); + } + + private void prepareOptions( + String sep, int limit, int skip, int ignoreLines, + int headerLines, boolean guessValueType, boolean ignoreQuotes) { + + whenGetStringOption("separator", options, sep); + whenGetIntegerOption("limit", options, limit); + whenGetIntegerOption("skipDataLines", options, skip); + whenGetIntegerOption("ignoreLines", options, ignoreLines); + whenGetIntegerOption("headerLines", options, headerLines); + whenGetBooleanOption("guessCellValueTypes", options, guessValueType); + whenGetBooleanOption("processQuotes", options, !ignoreQuotes); + whenGetBooleanOption("storeBlankCellsAsNulls", options, true); + } + + +} + From 219e747147914fd8b42d6993f58642f92a75b382 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Tue, 24 Oct 2017 08:54:58 +0100 Subject: [PATCH 10/11] Removed unused dependency --- .../refine/tests/operations/cell/SplitMultiValuedCellsTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/main/tests/server/src/com/google/refine/tests/operations/cell/SplitMultiValuedCellsTests.java b/main/tests/server/src/com/google/refine/tests/operations/cell/SplitMultiValuedCellsTests.java index 3c14e337f..0e23a5f16 100644 --- a/main/tests/server/src/com/google/refine/tests/operations/cell/SplitMultiValuedCellsTests.java +++ b/main/tests/server/src/com/google/refine/tests/operations/cell/SplitMultiValuedCellsTests.java @@ -63,7 +63,6 @@ import com.google.refine.model.ModelException; import com.google.refine.model.Project; import com.google.refine.process.Process; import com.google.refine.operations.cell.MultiValuedCellSplitOperation; -import com.google.refine.util.JSONUtilities; import com.google.refine.tests.RefineServletStub; import com.google.refine.tests.RefineTest; import com.google.refine.tests.util.TestUtils; From 300edcca74d22ffe204bdd4fa392e3778afc43da Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Tue, 24 Oct 2017 08:58:32 +0100 Subject: [PATCH 11/11] Minor fix to comment in code --- .../tests/operations/cell/SplitMultiValuedCellsTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/tests/server/src/com/google/refine/tests/operations/cell/SplitMultiValuedCellsTests.java b/main/tests/server/src/com/google/refine/tests/operations/cell/SplitMultiValuedCellsTests.java index 0e23a5f16..312899ae6 100644 --- a/main/tests/server/src/com/google/refine/tests/operations/cell/SplitMultiValuedCellsTests.java +++ b/main/tests/server/src/com/google/refine/tests/operations/cell/SplitMultiValuedCellsTests.java @@ -111,7 +111,7 @@ public class SplitMultiValuedCellsTests extends RefineTest { } /** - * Test to demonstrate the intended behaviour of the function, for issue #1214 + * Test to demonstrate the intended behaviour of the function, for issue #1268 * https://github.com/OpenRefine/OpenRefine/issues/1268 */