diff --git a/main/src/com/google/refine/importers/SeparatorBasedImporter.java b/main/src/com/google/refine/importers/SeparatorBasedImporter.java index 58900b9af..18cba2080 100644 --- a/main/src/com/google/refine/importers/SeparatorBasedImporter.java +++ b/main/src/com/google/refine/importers/SeparatorBasedImporter.java @@ -50,6 +50,8 @@ import java.util.List; import java.util.Map; import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.lang3.StringUtils; + import org.json.JSONObject; import au.com.bytecode.opencsv.CSVParser; @@ -75,6 +77,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase { JSONUtilities.safePut(options, "guessCellValueTypes", false); JSONUtilities.safePut(options, "processQuotes", true); + JSONUtilities.safePut(options, "quoteCharacter", String.valueOf(CSVParser.DEFAULT_QUOTE_CHARACTER)); return options; } @@ -98,9 +101,15 @@ public class SeparatorBasedImporter extends TabularImportingParserBase { boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true); boolean strictQuotes = JSONUtilities.getBoolean(options, "strictQuotes", false); + Character quote = CSVParser.DEFAULT_QUOTE_CHARACTER; + String quoteCharacter = JSONUtilities.getString(options, "quoteCharacter", null); + if (quoteCharacter != null && quoteCharacter.trim().length() == 1) { + quote = quoteCharacter.trim().charAt(0); + } + final CSVParser parser = new CSVParser( sep, - CSVParser.DEFAULT_QUOTE_CHARACTER, + quote, (char) 0, // we don't want escape processing strictQuotes, CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, diff --git a/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java index ba9145541..5d714ad98 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java @@ -487,6 +487,33 @@ public class TsvCsvImporterTests extends ImporterTest { Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); } + + @Test(dataProvider = "CSV-TSV-AutoDetermine") + public void customQuoteCharacter(String sep){ + //create input to test with + String inputSeparator = sep == null ? "\t" : sep; + String input = "'col1'" + inputSeparator + "'col2'" + inputSeparator + "'col3'\n" + + "'data1'" + inputSeparator + "'data2'" + inputSeparator + "'data3'"; + + + try { + prepareOptions(sep, -1, 0, 0, 1, false, false, "'"); + parseOneFile(SUT, new StringReader(input)); + } catch (Exception e) { + Assert.fail("Exception during file parse",e); + } + + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + //---------------------read tests------------------------ @Test public void readCsvWithProperties() { @@ -543,11 +570,18 @@ public class TsvCsvImporterTests extends ImporterTest { }; } + protected void prepareOptions( + String sep, int limit, int skip, int ignoreLines, + int headerLines, boolean guessValueType, boolean ignoreQuotes) { + prepareOptions(sep, limit, skip, ignoreLines, headerLines, guessValueType, ignoreQuotes, "\""); + } + protected void prepareOptions( String sep, int limit, int skip, int ignoreLines, - int headerLines, boolean guessValueType, boolean ignoreQuotes) { + int headerLines, boolean guessValueType, boolean ignoreQuotes, String quoteCharacter) { whenGetStringOption("separator", options, sep); + whenGetStringOption("quoteCharacter", options, quoteCharacter); whenGetIntegerOption("limit", options, limit); whenGetIntegerOption("skipDataLines", options, skip); whenGetIntegerOption("ignoreLines", options, ignoreLines); diff --git a/main/webapp/modules/core/langs/translation-en.json b/main/webapp/modules/core/langs/translation-en.json index b04e73197..98047ee75 100644 --- a/main/webapp/modules/core/langs/translation-en.json +++ b/main/webapp/modules/core/langs/translation-en.json @@ -153,7 +153,8 @@ "tabs": "tabs (TSV)", "custom": "custom", "escape": "Escape special characters with \\", - "quotation-mark": "Quotation marks are used
to enclose cells containing
column separators", + "use-quote": "Use character", + "quote-delimits-cells": "to enclose cells containing column separators", "click-xml": "Click on the first XML element corresponding to the first record to load." }, "core-dialogs": { diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html index d615c83b2..1add776a3 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html @@ -46,6 +46,12 @@ + + + + + + @@ -54,9 +60,7 @@
- - -
+
@@ -69,4 +73,4 @@
- \ No newline at end of file + diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js index a8c1a6ea8..0cc8637ee 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js @@ -111,6 +111,9 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() { options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked; options.processQuotes = this._optionContainerElmts.processQuoteMarksCheckbox[0].checked; + if (options.processQuotes) { + options.quoteCharacter = this._optionContainerElmts.quoteCharacterInput[0].value; + } options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked; options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked; @@ -143,7 +146,8 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() { $('#or-import-load').text($.i18n._('core-index-parser')["load-at-most"]); $('#or-import-rows2').text($.i18n._('core-index-parser')["rows-data"]); $('#or-import-parseCell').html($.i18n._('core-index-parser')["parse-cell"]); - $('#or-import-quote').html($.i18n._('core-index-parser')["quotation-mark"]); + $('#or-import-quote').html($.i18n._('core-index-parser')["use-quote"]); + $('#or-import-quote-character').html($.i18n._('core-index-parser')["quote-delimits-cells"]); $('#or-import-blank').text($.i18n._('core-index-parser')["store-blank"]); $('#or-import-null').text($.i18n._('core-index-parser')["store-nulls"]); $('#or-import-source').html($.i18n._('core-index-parser')["store-source"]); @@ -187,6 +191,7 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() { } if (this._config.processQuotes) { this._optionContainerElmts.processQuoteMarksCheckbox.prop("checked", true); + this._optionContainerElmts.quoteCharacterInput[0].value = this._config.quoteCharacter; } if (this._config.storeBlankCellsAsNulls) {