Issue 475 - Support escaped custom separators

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2355 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Tom Morris 2011-11-04 19:04:16 +00:00
parent cacbedd352
commit a7c81880a8
7 changed files with 36 additions and 37 deletions

View File

@ -44,7 +44,7 @@ public class FixedWidthImporter extends TabularImportingParserBase {
} }
} }
JSONUtilities.safePut(options, "lineSeparator", "\n"); JSONUtilities.safePut(options, "lineSeparator", "\\n");
JSONUtilities.safePut(options, "headerLines", 0); JSONUtilities.safePut(options, "headerLines", 0);
JSONUtilities.safePut(options, "columnWidths", columnWidths); JSONUtilities.safePut(options, "columnWidths", columnWidths);
JSONUtilities.safePut(options, "guessCellValueTypes", true); JSONUtilities.safePut(options, "guessCellValueTypes", true);
@ -63,7 +63,8 @@ public class FixedWidthImporter extends TabularImportingParserBase {
JSONObject options, JSONObject options,
List<Exception> exceptions List<Exception> exceptions
) { ) {
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n"); // String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\\n");
// lineSeparator = StringEscapeUtils.unescapeJava(lineSeparator);
final int[] columnWidths = JSONUtilities.getIntArray(options, "columnWidths"); final int[] columnWidths = JSONUtilities.getIntArray(options, "columnWidths");
List<Object> retrievedColumnNames = null; List<Object> retrievedColumnNames = null;

View File

@ -27,7 +27,7 @@ public class LineBasedImporter extends TabularImportingParserBase {
ImportingJob job, List<JSONObject> fileRecords, String format) { ImportingJob job, List<JSONObject> fileRecords, String format) {
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format); JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
JSONUtilities.safePut(options, "lineSeparator", "\n"); JSONUtilities.safePut(options, "lineSeparator", "\\n");
JSONUtilities.safePut(options, "linesPerRow", 1); JSONUtilities.safePut(options, "linesPerRow", 1);
JSONUtilities.safePut(options, "headerLines", 0); JSONUtilities.safePut(options, "headerLines", 0);
JSONUtilities.safePut(options, "guessCellValueTypes", true); JSONUtilities.safePut(options, "guessCellValueTypes", true);

View File

@ -48,6 +48,7 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.commons.lang.StringEscapeUtils;
import org.json.JSONObject; import org.json.JSONObject;
import au.com.bytecode.opencsv.CSVParser; import au.com.bytecode.opencsv.CSVParser;
@ -68,10 +69,10 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
List<JSONObject> fileRecords, String format) { List<JSONObject> fileRecords, String format) {
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format); JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
JSONUtilities.safePut(options, "lineSeparator", "\n"); JSONUtilities.safePut(options, "lineSeparator", "\\n");
String separator = guessSeparator(job, fileRecords); String separator = guessSeparator(job, fileRecords);
JSONUtilities.safePut(options, "separator", separator != null ? separator : "\t"); JSONUtilities.safePut(options, "separator", separator != null ? separator : "\\t");
JSONUtilities.safePut(options, "guessCellValueTypes", true); JSONUtilities.safePut(options, "guessCellValueTypes", true);
JSONUtilities.safePut(options, "processQuotes", true); JSONUtilities.safePut(options, "processQuotes", true);
@ -90,11 +91,13 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
JSONObject options, JSONObject options,
List<Exception> exceptions List<Exception> exceptions
) { ) {
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n"); // String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\\n");
String sep = JSONUtilities.getString(options, "separator", "\t"); // lineSeparator = StringEscapeUtils.unescapeJava(lineSeparator);
String sep = JSONUtilities.getString(options, "separator", "\\t");
if (sep == null || "".equals(sep)) { if (sep == null || "".equals(sep)) {
sep = "\t"; sep = "\\t";
} }
sep = StringEscapeUtils.unescapeJava(sep);
boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true); boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true);
final CSVParser parser = new CSVParser( final CSVParser parser = new CSVParser(
@ -149,7 +152,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
File file = new File(job.getRawDataDir(), location); File file = new File(job.getRawDataDir(), location);
Separator separator = guessSeparator(file, encoding); Separator separator = guessSeparator(file, encoding);
if (separator != null) { if (separator != null) {
return Character.toString(separator.separator); return StringEscapeUtils.escapeJava(Character.toString(separator.separator));
} }
} }
} }

View File

@ -34,8 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
function CustomTabularExporterDialog(options) { function CustomTabularExporterDialog(options) {
options = options || { options = options || {
format: 'tsv', format: 'tsv',
lineSeparator: '\n', lineSeparator: '\\n',
separator: '\t', separator: '\\t',
encoding: 'UTF-8', encoding: 'UTF-8',
outputColumnHeaders: true, outputColumnHeaders: true,
outputBlankRows: false, outputBlankRows: false,
@ -202,8 +202,8 @@ CustomTabularExporterDialog.prototype._createDialog = function(options) {
CustomTabularExporterDialog.prototype._configureUIFromOptionCode = function(options) { CustomTabularExporterDialog.prototype._configureUIFromOptionCode = function(options) {
this._dialog.find('input[name="custom-tabular-exporter-download-format"][value="' + options.format + '"]').attr('checked', 'checked'); this._dialog.find('input[name="custom-tabular-exporter-download-format"][value="' + options.format + '"]').attr('checked', 'checked');
this._elmts.separatorInput[0].value = String.encodeSeparator(options.separator || ','); this._elmts.separatorInput[0].value = options.separator || ',';
this._elmts.lineSeparatorInput[0].value = String.encodeSeparator(options.lineSeparator || '\n'); this._elmts.lineSeparatorInput[0].value = options.lineSeparator || '\\n';
this._elmts.encodingInput[0].value = options.encoding; this._elmts.encodingInput[0].value = options.encoding;
this._elmts.outputColumnHeadersCheckbox.attr('checked', (options.outputColumnHeaders) ? 'checked' : ''); this._elmts.outputColumnHeadersCheckbox.attr('checked', (options.outputColumnHeaders) ? 'checked' : '');
this._elmts.outputBlankRowsCheckbox.attr('checked', (options.outputBlankRows) ? 'checked' : ''); this._elmts.outputBlankRowsCheckbox.attr('checked', (options.outputBlankRows) ? 'checked' : '');
@ -336,13 +336,13 @@ CustomTabularExporterDialog.prototype._getOptionCode = function() {
if (options.format == 'tsv' || options.format == 'csv' || options.format == '*sv') { if (options.format == 'tsv' || options.format == 'csv' || options.format == '*sv') {
if (options.format == 'tsv') { if (options.format == 'tsv') {
options.separator = '\t'; options.separator = '\\t';
} else if (options.format == 'csv') { } else if (options.format == 'csv') {
options.separator = ','; options.separator = ',';
} else { } else {
options.separator = String.decodeSeparator(this._elmts.separatorInput.val()); options.separator = this._elmts.separatorInput.val();
} }
options.lineSeparator = String.decodeSeparator(this._elmts.lineSeparatorInput.val()); options.lineSeparator = this._elmts.lineSeparatorInput.val();
options.encoding = this._elmts.encodingInput.val(); options.encoding = this._elmts.encodingInput.val();
} }
options.outputColumnHeaders = this._elmts.outputColumnHeadersCheckbox[0].checked; options.outputColumnHeaders = this._elmts.outputColumnHeadersCheckbox[0].checked;

View File

@ -73,9 +73,9 @@ Refine.FixedWidthParserUI.prototype.getOptions = function() {
} }
if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") { if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") {
options.lineSeparator = "\n"; options.lineSeparator = "\\n";
} else { } else {
options.lineSeparator = String.decodeSeparator(this._optionContainerElmts.rowSeparatorInput[0].value); options.lineSeparator = this._optionContainerElmts.rowSeparatorInput[0].value;
} }
var parseIntDefault = function(s, def) { var parseIntDefault = function(s, def) {
@ -140,11 +140,10 @@ Refine.FixedWidthParserUI.prototype._initialize = function() {
this._optionContainerElmts.columnNamesInput[0].value = this._config.columnNames.join(','); this._optionContainerElmts.columnNamesInput[0].value = this._config.columnNames.join(',');
} }
var rowSeparatorValue = (this._config.lineSeparator == "\n") ? 'new-line' : 'custom'; var rowSeparatorValue = (this._config.lineSeparator == "\\n") ? 'new-line' : 'custom';
this._optionContainer.find( this._optionContainer.find(
"input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked"); "input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked");
this._optionContainerElmts.rowSeparatorInput[0].value = this._optionContainerElmts.rowSeparatorInput[0].value = this._config.lineSeparator;
String.encodeSeparator(this._config.lineSeparator);
if (this._config.ignoreLines > 0) { if (this._config.ignoreLines > 0) {
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked"); this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");

View File

@ -81,9 +81,9 @@ Refine.LineBasedParserUI.prototype.getOptions = function() {
options.linesPerRow = parseIntDefault(this._optionContainerElmts.linesPerRowInput[0].value, 1); options.linesPerRow = parseIntDefault(this._optionContainerElmts.linesPerRowInput[0].value, 1);
if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") { if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") {
options.lineSeparator = "\n"; options.lineSeparator = "\\n";
} else { } else {
options.lineSeparator = String.decodeSeparator(this._optionContainerElmts.rowSeparatorInput[0].value); options.lineSeparator = this._optionContainerElmts.rowSeparatorInput[0].value;
} }
if (this._optionContainerElmts.ignoreCheckbox[0].checked) { if (this._optionContainerElmts.ignoreCheckbox[0].checked) {
@ -127,11 +127,10 @@ Refine.LineBasedParserUI.prototype._initialize = function() {
this._optionContainerElmts.linesPerRowInput[0].value = this._optionContainerElmts.linesPerRowInput[0].value =
this._config.linesPerRow.toString(); this._config.linesPerRow.toString();
var rowSeparatorValue = (this._config.lineSeparator == "\n") ? 'new-line' : 'custom'; var rowSeparatorValue = (this._config.lineSeparator == "\\n") ? 'new-line' : 'custom';
this._optionContainer.find( this._optionContainer.find(
"input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked"); "input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked");
this._optionContainerElmts.rowSeparatorInput[0].value = this._optionContainerElmts.rowSeparatorInput[0].value = this._config.lineSeparator;
String.encodeSeparator(this._config.lineSeparator);
if (this._config.ignoreLines > 0) { if (this._config.ignoreLines > 0) {
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked"); this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");

View File

@ -66,9 +66,9 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() {
}; };
if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") { if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") {
options.lineSeparator = "\n"; options.lineSeparator = "\\n";
} else { } else {
options.lineSeparator = String.decodeSeparator(this._optionContainerElmts.rowSeparatorInput[0].value); options.lineSeparator = this._optionContainerElmts.rowSeparatorInput[0].value;
} }
switch (this._optionContainer.find("input[name='column-separator']:checked")[0].value) { switch (this._optionContainer.find("input[name='column-separator']:checked")[0].value) {
@ -76,11 +76,10 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() {
options.separator = ","; options.separator = ",";
break; break;
case 'tab': case 'tab':
options.separator = "\t"; options.separator = "\\t";
break; break;
default: default:
options.separator = String.decodeSeparator( options.separator = this._optionContainerElmts.columnSeparatorInput[0].value;
this._optionContainerElmts.columnSeparatorInput[0].value);
} }
var parseIntDefault = function(s, def) { var parseIntDefault = function(s, def) {
@ -141,18 +140,16 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
}); });
}); });
var rowSeparatorValue = (this._config.lineSeparator == "\n") ? 'new-line' : 'custom'; var rowSeparatorValue = (this._config.lineSeparator == "\\n") ? 'new-line' : 'custom';
this._optionContainer.find( this._optionContainer.find(
"input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked"); "input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked");
this._optionContainerElmts.rowSeparatorInput[0].value = this._optionContainerElmts.rowSeparatorInput[0].value = this._config.lineSeparator;
String.encodeSeparator(this._config.lineSeparator);
var columnSeparatorValue = (this._config.separator == ",") ? 'comma' : var columnSeparatorValue = (this._config.separator == ",") ? 'comma' :
((this._config.separator == "\t") ? 'tab' : 'custom'); ((this._config.separator == "\\t") ? 'tab' : 'custom');
this._optionContainer.find( this._optionContainer.find(
"input[name='column-separator'][value='" + columnSeparatorValue + "']").attr("checked", "checked"); "input[name='column-separator'][value='" + columnSeparatorValue + "']").attr("checked", "checked");
this._optionContainerElmts.columnSeparatorInput[0].value = this._optionContainerElmts.columnSeparatorInput[0].value = this._config.separator;
String.encodeSeparator(this._config.separator);
if (this._config.ignoreLines > 0) { if (this._config.ignoreLines > 0) {
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked"); this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");