From a7c81880a880e77820e3b927b155e2a0494e2495 Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Fri, 4 Nov 2011 19:04:16 +0000 Subject: [PATCH] Issue 475 - Support escaped custom separators git-svn-id: http://google-refine.googlecode.com/svn/trunk@2355 7d457c2a-affb-35e4-300a-418c747d4874 --- .../refine/importers/FixedWidthImporter.java | 5 +++-- .../refine/importers/LineBasedImporter.java | 2 +- .../importers/SeparatorBasedImporter.java | 15 +++++++++------ .../dialogs/custom-tabular-exporter-dialog.js | 14 +++++++------- .../fixed-width-parser-ui.js | 9 ++++----- .../parser-interfaces/line-based-parser-ui.js | 9 ++++----- .../separator-based-parser-ui.js | 19 ++++++++----------- 7 files changed, 36 insertions(+), 37 deletions(-) diff --git a/main/src/com/google/refine/importers/FixedWidthImporter.java b/main/src/com/google/refine/importers/FixedWidthImporter.java index eb1ddaca8..7a476db17 100644 --- a/main/src/com/google/refine/importers/FixedWidthImporter.java +++ b/main/src/com/google/refine/importers/FixedWidthImporter.java @@ -44,7 +44,7 @@ public class FixedWidthImporter extends TabularImportingParserBase { } } - JSONUtilities.safePut(options, "lineSeparator", "\n"); + JSONUtilities.safePut(options, "lineSeparator", "\\n"); JSONUtilities.safePut(options, "headerLines", 0); JSONUtilities.safePut(options, "columnWidths", columnWidths); JSONUtilities.safePut(options, "guessCellValueTypes", true); @@ -63,7 +63,8 @@ public class FixedWidthImporter extends TabularImportingParserBase { JSONObject options, List exceptions ) { - // String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n"); +// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\\n"); +// lineSeparator = StringEscapeUtils.unescapeJava(lineSeparator); final int[] columnWidths = JSONUtilities.getIntArray(options, "columnWidths"); List retrievedColumnNames = null; diff --git a/main/src/com/google/refine/importers/LineBasedImporter.java b/main/src/com/google/refine/importers/LineBasedImporter.java index 066c15e8d..5d623bfd0 100644 --- a/main/src/com/google/refine/importers/LineBasedImporter.java +++ b/main/src/com/google/refine/importers/LineBasedImporter.java @@ -27,7 +27,7 @@ public class LineBasedImporter extends TabularImportingParserBase { ImportingJob job, List fileRecords, String format) { JSONObject options = super.createParserUIInitializationData(job, fileRecords, format); - JSONUtilities.safePut(options, "lineSeparator", "\n"); + JSONUtilities.safePut(options, "lineSeparator", "\\n"); JSONUtilities.safePut(options, "linesPerRow", 1); JSONUtilities.safePut(options, "headerLines", 0); JSONUtilities.safePut(options, "guessCellValueTypes", true); diff --git a/main/src/com/google/refine/importers/SeparatorBasedImporter.java b/main/src/com/google/refine/importers/SeparatorBasedImporter.java index f1cfe4710..943794a78 100644 --- a/main/src/com/google/refine/importers/SeparatorBasedImporter.java +++ b/main/src/com/google/refine/importers/SeparatorBasedImporter.java @@ -48,6 +48,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.commons.lang.StringEscapeUtils; import org.json.JSONObject; import au.com.bytecode.opencsv.CSVParser; @@ -68,10 +69,10 @@ public class SeparatorBasedImporter extends TabularImportingParserBase { List fileRecords, String format) { JSONObject options = super.createParserUIInitializationData(job, fileRecords, format); - JSONUtilities.safePut(options, "lineSeparator", "\n"); + JSONUtilities.safePut(options, "lineSeparator", "\\n"); String separator = guessSeparator(job, fileRecords); - JSONUtilities.safePut(options, "separator", separator != null ? separator : "\t"); + JSONUtilities.safePut(options, "separator", separator != null ? separator : "\\t"); JSONUtilities.safePut(options, "guessCellValueTypes", true); JSONUtilities.safePut(options, "processQuotes", true); @@ -90,11 +91,13 @@ public class SeparatorBasedImporter extends TabularImportingParserBase { JSONObject options, List exceptions ) { - // String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n"); - String sep = JSONUtilities.getString(options, "separator", "\t"); +// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\\n"); +// lineSeparator = StringEscapeUtils.unescapeJava(lineSeparator); + String sep = JSONUtilities.getString(options, "separator", "\\t"); if (sep == null || "".equals(sep)) { - sep = "\t"; + sep = "\\t"; } + sep = StringEscapeUtils.unescapeJava(sep); boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true); final CSVParser parser = new CSVParser( @@ -149,7 +152,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase { File file = new File(job.getRawDataDir(), location); Separator separator = guessSeparator(file, encoding); if (separator != null) { - return Character.toString(separator.separator); + return StringEscapeUtils.escapeJava(Character.toString(separator.separator)); } } } diff --git a/main/webapp/modules/core/scripts/dialogs/custom-tabular-exporter-dialog.js b/main/webapp/modules/core/scripts/dialogs/custom-tabular-exporter-dialog.js index 47c448b25..11484b9ed 100644 --- a/main/webapp/modules/core/scripts/dialogs/custom-tabular-exporter-dialog.js +++ b/main/webapp/modules/core/scripts/dialogs/custom-tabular-exporter-dialog.js @@ -34,8 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. function CustomTabularExporterDialog(options) { options = options || { format: 'tsv', - lineSeparator: '\n', - separator: '\t', + lineSeparator: '\\n', + separator: '\\t', encoding: 'UTF-8', outputColumnHeaders: true, outputBlankRows: false, @@ -202,8 +202,8 @@ CustomTabularExporterDialog.prototype._createDialog = function(options) { CustomTabularExporterDialog.prototype._configureUIFromOptionCode = function(options) { this._dialog.find('input[name="custom-tabular-exporter-download-format"][value="' + options.format + '"]').attr('checked', 'checked'); - this._elmts.separatorInput[0].value = String.encodeSeparator(options.separator || ','); - this._elmts.lineSeparatorInput[0].value = String.encodeSeparator(options.lineSeparator || '\n'); + this._elmts.separatorInput[0].value = options.separator || ','; + this._elmts.lineSeparatorInput[0].value = options.lineSeparator || '\\n'; this._elmts.encodingInput[0].value = options.encoding; this._elmts.outputColumnHeadersCheckbox.attr('checked', (options.outputColumnHeaders) ? 'checked' : ''); this._elmts.outputBlankRowsCheckbox.attr('checked', (options.outputBlankRows) ? 'checked' : ''); @@ -336,13 +336,13 @@ CustomTabularExporterDialog.prototype._getOptionCode = function() { if (options.format == 'tsv' || options.format == 'csv' || options.format == '*sv') { if (options.format == 'tsv') { - options.separator = '\t'; + options.separator = '\\t'; } else if (options.format == 'csv') { options.separator = ','; } else { - options.separator = String.decodeSeparator(this._elmts.separatorInput.val()); + options.separator = this._elmts.separatorInput.val(); } - options.lineSeparator = String.decodeSeparator(this._elmts.lineSeparatorInput.val()); + options.lineSeparator = this._elmts.lineSeparatorInput.val(); options.encoding = this._elmts.encodingInput.val(); } options.outputColumnHeaders = this._elmts.outputColumnHeadersCheckbox[0].checked; diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/fixed-width-parser-ui.js b/main/webapp/modules/core/scripts/index/parser-interfaces/fixed-width-parser-ui.js index 51a61352d..035efcbab 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/fixed-width-parser-ui.js +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/fixed-width-parser-ui.js @@ -73,9 +73,9 @@ Refine.FixedWidthParserUI.prototype.getOptions = function() { } if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") { - options.lineSeparator = "\n"; + options.lineSeparator = "\\n"; } else { - options.lineSeparator = String.decodeSeparator(this._optionContainerElmts.rowSeparatorInput[0].value); + options.lineSeparator = this._optionContainerElmts.rowSeparatorInput[0].value; } var parseIntDefault = function(s, def) { @@ -140,11 +140,10 @@ Refine.FixedWidthParserUI.prototype._initialize = function() { this._optionContainerElmts.columnNamesInput[0].value = this._config.columnNames.join(','); } - var rowSeparatorValue = (this._config.lineSeparator == "\n") ? 'new-line' : 'custom'; + var rowSeparatorValue = (this._config.lineSeparator == "\\n") ? 'new-line' : 'custom'; this._optionContainer.find( "input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked"); - this._optionContainerElmts.rowSeparatorInput[0].value = - String.encodeSeparator(this._config.lineSeparator); + this._optionContainerElmts.rowSeparatorInput[0].value = this._config.lineSeparator; if (this._config.ignoreLines > 0) { this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked"); diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/line-based-parser-ui.js b/main/webapp/modules/core/scripts/index/parser-interfaces/line-based-parser-ui.js index 861b804ee..8a38ac693 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/line-based-parser-ui.js +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/line-based-parser-ui.js @@ -81,9 +81,9 @@ Refine.LineBasedParserUI.prototype.getOptions = function() { options.linesPerRow = parseIntDefault(this._optionContainerElmts.linesPerRowInput[0].value, 1); if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") { - options.lineSeparator = "\n"; + options.lineSeparator = "\\n"; } else { - options.lineSeparator = String.decodeSeparator(this._optionContainerElmts.rowSeparatorInput[0].value); + options.lineSeparator = this._optionContainerElmts.rowSeparatorInput[0].value; } if (this._optionContainerElmts.ignoreCheckbox[0].checked) { @@ -127,11 +127,10 @@ Refine.LineBasedParserUI.prototype._initialize = function() { this._optionContainerElmts.linesPerRowInput[0].value = this._config.linesPerRow.toString(); - var rowSeparatorValue = (this._config.lineSeparator == "\n") ? 'new-line' : 'custom'; + var rowSeparatorValue = (this._config.lineSeparator == "\\n") ? 'new-line' : 'custom'; this._optionContainer.find( "input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked"); - this._optionContainerElmts.rowSeparatorInput[0].value = - String.encodeSeparator(this._config.lineSeparator); + this._optionContainerElmts.rowSeparatorInput[0].value = this._config.lineSeparator; if (this._config.ignoreLines > 0) { this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked"); diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js index f249dea0a..eb5be8889 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js @@ -66,9 +66,9 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() { }; if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") { - options.lineSeparator = "\n"; + options.lineSeparator = "\\n"; } else { - options.lineSeparator = String.decodeSeparator(this._optionContainerElmts.rowSeparatorInput[0].value); + options.lineSeparator = this._optionContainerElmts.rowSeparatorInput[0].value; } switch (this._optionContainer.find("input[name='column-separator']:checked")[0].value) { @@ -76,11 +76,10 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() { options.separator = ","; break; case 'tab': - options.separator = "\t"; + options.separator = "\\t"; break; default: - options.separator = String.decodeSeparator( - this._optionContainerElmts.columnSeparatorInput[0].value); + options.separator = this._optionContainerElmts.columnSeparatorInput[0].value; } var parseIntDefault = function(s, def) { @@ -141,18 +140,16 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() { }); }); - var rowSeparatorValue = (this._config.lineSeparator == "\n") ? 'new-line' : 'custom'; + var rowSeparatorValue = (this._config.lineSeparator == "\\n") ? 'new-line' : 'custom'; this._optionContainer.find( "input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked"); - this._optionContainerElmts.rowSeparatorInput[0].value = - String.encodeSeparator(this._config.lineSeparator); + this._optionContainerElmts.rowSeparatorInput[0].value = this._config.lineSeparator; var columnSeparatorValue = (this._config.separator == ",") ? 'comma' : - ((this._config.separator == "\t") ? 'tab' : 'custom'); + ((this._config.separator == "\\t") ? 'tab' : 'custom'); this._optionContainer.find( "input[name='column-separator'][value='" + columnSeparatorValue + "']").attr("checked", "checked"); - this._optionContainerElmts.columnSeparatorInput[0].value = - String.encodeSeparator(this._config.separator); + this._optionContainerElmts.columnSeparatorInput[0].value = this._config.separator; if (this._config.ignoreLines > 0) { this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");