From 7f5e58ef519d57f5965cf7e9c61540879639efb3 Mon Sep 17 00:00:00 2001 From: Steffen Stundzig Date: Fri, 30 Oct 2015 14:32:46 +0100 Subject: [PATCH 1/6] #1086 add support for quote character --- .../importers/SeparatorBasedImporter.java | 10 +++++- .../tests/importers/TsvCsvImporterTests.java | 36 +++++++++++++++++-- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/main/src/com/google/refine/importers/SeparatorBasedImporter.java b/main/src/com/google/refine/importers/SeparatorBasedImporter.java index 4980b0a87..116a0145a 100644 --- a/main/src/com/google/refine/importers/SeparatorBasedImporter.java +++ b/main/src/com/google/refine/importers/SeparatorBasedImporter.java @@ -50,6 +50,7 @@ import java.util.List; import java.util.Map; import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang.StringUtils; import org.json.JSONObject; import au.com.bytecode.opencsv.CSVParser; @@ -75,6 +76,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase { JSONUtilities.safePut(options, "guessCellValueTypes", false); JSONUtilities.safePut(options, "processQuotes", true); + JSONUtilities.safePut(options, "quoteCharacter", CSVParser.DEFAULT_QUOTE_CHARACTER); return options; } @@ -98,9 +100,15 @@ public class SeparatorBasedImporter extends TabularImportingParserBase { boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true); boolean strictQuotes = JSONUtilities.getBoolean(options, "strictQuotes", false); + Character quote = CSVParser.DEFAULT_QUOTE_CHARACTER; + String quoteCharacter = JSONUtilities.getString(options, "quoteCharacter", null); + if (!StringUtils.isBlank(quoteCharacter)) { + quote = quoteCharacter.charAt(0); + } + final CSVParser parser = new CSVParser( sep, - CSVParser.DEFAULT_QUOTE_CHARACTER, + quote, (char) 0, // we don't want escape processing strictQuotes, CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, diff --git a/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java index 8a2ac99e2..3eba3f68a 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java @@ -487,6 +487,33 @@ public class TsvCsvImporterTests extends ImporterTest { Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); } + + @Test(dataProvider = "CSV-TSV-AutoDetermine") + public void readSimpleData_CSV_1Header_1Row_singleQuote(String sep){ + //create input to test with + String inputSeparator = sep == null ? "\t" : sep; + String input = "'col1'" + inputSeparator + "'col2'" + inputSeparator + "'col3'\n" + + "'data1'" + inputSeparator + "'data2'" + inputSeparator + "'data3'"; + + + try { + prepareOptions(sep, -1, 0, 0, 1, false, false, "'"); + parseOneFile(SUT, new StringReader(input)); + } catch (Exception e) { + Assert.fail("Exception during file parse",e); + } + + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + //---------------------read tests------------------------ @Test public void readCsvWithProperties() { @@ -542,12 +569,17 @@ public class TsvCsvImporterTests extends ImporterTest { {","},{"\t"},{null} }; } - + private void prepareOptions( + String sep, int limit, int skip, int ignoreLines, + int headerLines, boolean guessValueType, boolean ignoreQuotes) { + prepareOptions(sep, limit, skip, ignoreLines, headerLines, guessValueType, ignoreQuotes, "\""); + } private void prepareOptions( String sep, int limit, int skip, int ignoreLines, - int headerLines, boolean guessValueType, boolean ignoreQuotes) { + int headerLines, boolean guessValueType, boolean ignoreQuotes, String quoteCharacter) { whenGetStringOption("separator", options, sep); + whenGetStringOption("quoteCharacter", options, quoteCharacter); whenGetIntegerOption("limit", options, limit); whenGetIntegerOption("skipDataLines", options, skip); whenGetIntegerOption("ignoreLines", options, ignoreLines); From dc4815ba7b38a1bd03f3ac849dea109e2047e027 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Sat, 3 Feb 2018 09:00:53 +0000 Subject: [PATCH 2/6] Fix initial options for the quote character --- .../src/com/google/refine/importers/SeparatorBasedImporter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/com/google/refine/importers/SeparatorBasedImporter.java b/main/src/com/google/refine/importers/SeparatorBasedImporter.java index ad7f7afc1..7c2196bc8 100644 --- a/main/src/com/google/refine/importers/SeparatorBasedImporter.java +++ b/main/src/com/google/refine/importers/SeparatorBasedImporter.java @@ -76,7 +76,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase { JSONUtilities.safePut(options, "guessCellValueTypes", false); JSONUtilities.safePut(options, "processQuotes", true); - JSONUtilities.safePut(options, "quoteCharacter", CSVParser.DEFAULT_QUOTE_CHARACTER); + JSONUtilities.safePut(options, "quoteCharacter", String.valueOf(CSVParser.DEFAULT_QUOTE_CHARACTER)); return options; } From 86be1c12e831b13e4c7b5916fe6b18c28a344aab Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Sat, 3 Feb 2018 09:01:07 +0000 Subject: [PATCH 3/6] Expose quote character setting in UI --- main/webapp/modules/core/langs/translation-en.json | 3 ++- .../parser-interfaces/separator-based-parser-ui.html | 12 ++++++++---- .../parser-interfaces/separator-based-parser-ui.js | 7 ++++++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/main/webapp/modules/core/langs/translation-en.json b/main/webapp/modules/core/langs/translation-en.json index 07ce3d8b7..4569e724a 100644 --- a/main/webapp/modules/core/langs/translation-en.json +++ b/main/webapp/modules/core/langs/translation-en.json @@ -151,7 +151,8 @@ "tabs": "tabs (TSV)", "custom": "custom", "escape": "Escape special characters with \\", - "quotation-mark": "Quotation marks are used
to enclose cells containing
column separators", + "use-quote": "Use character", + "quote-delimits-cells": "to enclose cells containing column separators", "click-xml": "Click on the first XML element corresponding to the first record to load." }, "core-dialogs": { diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html index d615c83b2..1add776a3 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html @@ -46,6 +46,12 @@ + + + + + + @@ -54,9 +60,7 @@
- - -
+
@@ -69,4 +73,4 @@
- \ No newline at end of file + diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js index a8c1a6ea8..0cc8637ee 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js @@ -111,6 +111,9 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() { options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked; options.processQuotes = this._optionContainerElmts.processQuoteMarksCheckbox[0].checked; + if (options.processQuotes) { + options.quoteCharacter = this._optionContainerElmts.quoteCharacterInput[0].value; + } options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked; options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked; @@ -143,7 +146,8 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() { $('#or-import-load').text($.i18n._('core-index-parser')["load-at-most"]); $('#or-import-rows2').text($.i18n._('core-index-parser')["rows-data"]); $('#or-import-parseCell').html($.i18n._('core-index-parser')["parse-cell"]); - $('#or-import-quote').html($.i18n._('core-index-parser')["quotation-mark"]); + $('#or-import-quote').html($.i18n._('core-index-parser')["use-quote"]); + $('#or-import-quote-character').html($.i18n._('core-index-parser')["quote-delimits-cells"]); $('#or-import-blank').text($.i18n._('core-index-parser')["store-blank"]); $('#or-import-null').text($.i18n._('core-index-parser')["store-nulls"]); $('#or-import-source').html($.i18n._('core-index-parser')["store-source"]); @@ -187,6 +191,7 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() { } if (this._config.processQuotes) { this._optionContainerElmts.processQuoteMarksCheckbox.prop("checked", true); + this._optionContainerElmts.quoteCharacterInput[0].value = this._config.quoteCharacter; } if (this._config.storeBlankCellsAsNulls) { From a191d13b14f78b440fb5852352f0c8c7324105f9 Mon Sep 17 00:00:00 2001 From: Thad Guidry Date: Sat, 3 Feb 2018 07:26:47 -0600 Subject: [PATCH 4/6] adding missing StringUtils import --- main/src/com/google/refine/importers/SeparatorBasedImporter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/main/src/com/google/refine/importers/SeparatorBasedImporter.java b/main/src/com/google/refine/importers/SeparatorBasedImporter.java index 40c6614e9..75bf64b0d 100644 --- a/main/src/com/google/refine/importers/SeparatorBasedImporter.java +++ b/main/src/com/google/refine/importers/SeparatorBasedImporter.java @@ -50,6 +50,7 @@ import java.util.List; import java.util.Map; import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.lang3.StringUtils; import org.json.JSONObject; From a89bbcbbe2cc268c7884279c14ae536fa34734ce Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Sat, 3 Feb 2018 14:29:24 +0000 Subject: [PATCH 5/6] Add missing method removed during merge --- .../google/refine/tests/importers/TsvCsvImporterTests.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java index ce2871efd..1880488b8 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java @@ -569,6 +569,12 @@ public class TsvCsvImporterTests extends ImporterTest { {","},{"\t"},{null} }; } + + protected void prepareOptions( + String sep, int limit, int skip, int ignoreLines, + int headerLines, boolean guessValueType, boolean ignoreQuotes) { + prepareOptions(sep, limit, skip, ignoreLines, headerLines, guessValueType, ignoreQuotes, "\""); + } protected void prepareOptions( String sep, int limit, int skip, int ignoreLines, From f5ff5565ffdc74002f2570b505e37e3090de9982 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Sat, 3 Feb 2018 16:37:15 +0000 Subject: [PATCH 6/6] Cleanup for Codacy and better parsing of quote character --- .../com/google/refine/importers/SeparatorBasedImporter.java | 4 ++-- .../google/refine/tests/importers/TsvCsvImporterTests.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/main/src/com/google/refine/importers/SeparatorBasedImporter.java b/main/src/com/google/refine/importers/SeparatorBasedImporter.java index 75bf64b0d..18cba2080 100644 --- a/main/src/com/google/refine/importers/SeparatorBasedImporter.java +++ b/main/src/com/google/refine/importers/SeparatorBasedImporter.java @@ -103,8 +103,8 @@ public class SeparatorBasedImporter extends TabularImportingParserBase { Character quote = CSVParser.DEFAULT_QUOTE_CHARACTER; String quoteCharacter = JSONUtilities.getString(options, "quoteCharacter", null); - if (!StringUtils.isBlank(quoteCharacter)) { - quote = quoteCharacter.charAt(0); + if (quoteCharacter != null && quoteCharacter.trim().length() == 1) { + quote = quoteCharacter.trim().charAt(0); } final CSVParser parser = new CSVParser( diff --git a/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java index 1880488b8..5d714ad98 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java @@ -489,7 +489,7 @@ public class TsvCsvImporterTests extends ImporterTest { @Test(dataProvider = "CSV-TSV-AutoDetermine") - public void readSimpleData_CSV_1Header_1Row_singleQuote(String sep){ + public void customQuoteCharacter(String sep){ //create input to test with String inputSeparator = sep == null ? "\t" : sep; String input = "'col1'" + inputSeparator + "'col2'" + inputSeparator + "'col3'\n" +