Issue 475 - Support escaped custom separators
git-svn-id: http://google-refine.googlecode.com/svn/trunk@2355 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
cacbedd352
commit
a7c81880a8
@ -44,7 +44,7 @@ public class FixedWidthImporter extends TabularImportingParserBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
JSONUtilities.safePut(options, "lineSeparator", "\n");
|
JSONUtilities.safePut(options, "lineSeparator", "\\n");
|
||||||
JSONUtilities.safePut(options, "headerLines", 0);
|
JSONUtilities.safePut(options, "headerLines", 0);
|
||||||
JSONUtilities.safePut(options, "columnWidths", columnWidths);
|
JSONUtilities.safePut(options, "columnWidths", columnWidths);
|
||||||
JSONUtilities.safePut(options, "guessCellValueTypes", true);
|
JSONUtilities.safePut(options, "guessCellValueTypes", true);
|
||||||
@ -63,7 +63,8 @@ public class FixedWidthImporter extends TabularImportingParserBase {
|
|||||||
JSONObject options,
|
JSONObject options,
|
||||||
List<Exception> exceptions
|
List<Exception> exceptions
|
||||||
) {
|
) {
|
||||||
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n");
|
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\\n");
|
||||||
|
// lineSeparator = StringEscapeUtils.unescapeJava(lineSeparator);
|
||||||
final int[] columnWidths = JSONUtilities.getIntArray(options, "columnWidths");
|
final int[] columnWidths = JSONUtilities.getIntArray(options, "columnWidths");
|
||||||
|
|
||||||
List<Object> retrievedColumnNames = null;
|
List<Object> retrievedColumnNames = null;
|
||||||
|
@ -27,7 +27,7 @@ public class LineBasedImporter extends TabularImportingParserBase {
|
|||||||
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||||
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||||
|
|
||||||
JSONUtilities.safePut(options, "lineSeparator", "\n");
|
JSONUtilities.safePut(options, "lineSeparator", "\\n");
|
||||||
JSONUtilities.safePut(options, "linesPerRow", 1);
|
JSONUtilities.safePut(options, "linesPerRow", 1);
|
||||||
JSONUtilities.safePut(options, "headerLines", 0);
|
JSONUtilities.safePut(options, "headerLines", 0);
|
||||||
JSONUtilities.safePut(options, "guessCellValueTypes", true);
|
JSONUtilities.safePut(options, "guessCellValueTypes", true);
|
||||||
|
@ -48,6 +48,7 @@ import java.util.HashMap;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringEscapeUtils;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
|
|
||||||
import au.com.bytecode.opencsv.CSVParser;
|
import au.com.bytecode.opencsv.CSVParser;
|
||||||
@ -68,10 +69,10 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
|
|||||||
List<JSONObject> fileRecords, String format) {
|
List<JSONObject> fileRecords, String format) {
|
||||||
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||||
|
|
||||||
JSONUtilities.safePut(options, "lineSeparator", "\n");
|
JSONUtilities.safePut(options, "lineSeparator", "\\n");
|
||||||
|
|
||||||
String separator = guessSeparator(job, fileRecords);
|
String separator = guessSeparator(job, fileRecords);
|
||||||
JSONUtilities.safePut(options, "separator", separator != null ? separator : "\t");
|
JSONUtilities.safePut(options, "separator", separator != null ? separator : "\\t");
|
||||||
|
|
||||||
JSONUtilities.safePut(options, "guessCellValueTypes", true);
|
JSONUtilities.safePut(options, "guessCellValueTypes", true);
|
||||||
JSONUtilities.safePut(options, "processQuotes", true);
|
JSONUtilities.safePut(options, "processQuotes", true);
|
||||||
@ -90,11 +91,13 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
|
|||||||
JSONObject options,
|
JSONObject options,
|
||||||
List<Exception> exceptions
|
List<Exception> exceptions
|
||||||
) {
|
) {
|
||||||
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n");
|
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\\n");
|
||||||
String sep = JSONUtilities.getString(options, "separator", "\t");
|
// lineSeparator = StringEscapeUtils.unescapeJava(lineSeparator);
|
||||||
|
String sep = JSONUtilities.getString(options, "separator", "\\t");
|
||||||
if (sep == null || "".equals(sep)) {
|
if (sep == null || "".equals(sep)) {
|
||||||
sep = "\t";
|
sep = "\\t";
|
||||||
}
|
}
|
||||||
|
sep = StringEscapeUtils.unescapeJava(sep);
|
||||||
boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true);
|
boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true);
|
||||||
|
|
||||||
final CSVParser parser = new CSVParser(
|
final CSVParser parser = new CSVParser(
|
||||||
@ -149,7 +152,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
|
|||||||
File file = new File(job.getRawDataDir(), location);
|
File file = new File(job.getRawDataDir(), location);
|
||||||
Separator separator = guessSeparator(file, encoding);
|
Separator separator = guessSeparator(file, encoding);
|
||||||
if (separator != null) {
|
if (separator != null) {
|
||||||
return Character.toString(separator.separator);
|
return StringEscapeUtils.escapeJava(Character.toString(separator.separator));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -34,8 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
function CustomTabularExporterDialog(options) {
|
function CustomTabularExporterDialog(options) {
|
||||||
options = options || {
|
options = options || {
|
||||||
format: 'tsv',
|
format: 'tsv',
|
||||||
lineSeparator: '\n',
|
lineSeparator: '\\n',
|
||||||
separator: '\t',
|
separator: '\\t',
|
||||||
encoding: 'UTF-8',
|
encoding: 'UTF-8',
|
||||||
outputColumnHeaders: true,
|
outputColumnHeaders: true,
|
||||||
outputBlankRows: false,
|
outputBlankRows: false,
|
||||||
@ -202,8 +202,8 @@ CustomTabularExporterDialog.prototype._createDialog = function(options) {
|
|||||||
|
|
||||||
CustomTabularExporterDialog.prototype._configureUIFromOptionCode = function(options) {
|
CustomTabularExporterDialog.prototype._configureUIFromOptionCode = function(options) {
|
||||||
this._dialog.find('input[name="custom-tabular-exporter-download-format"][value="' + options.format + '"]').attr('checked', 'checked');
|
this._dialog.find('input[name="custom-tabular-exporter-download-format"][value="' + options.format + '"]').attr('checked', 'checked');
|
||||||
this._elmts.separatorInput[0].value = String.encodeSeparator(options.separator || ',');
|
this._elmts.separatorInput[0].value = options.separator || ',';
|
||||||
this._elmts.lineSeparatorInput[0].value = String.encodeSeparator(options.lineSeparator || '\n');
|
this._elmts.lineSeparatorInput[0].value = options.lineSeparator || '\\n';
|
||||||
this._elmts.encodingInput[0].value = options.encoding;
|
this._elmts.encodingInput[0].value = options.encoding;
|
||||||
this._elmts.outputColumnHeadersCheckbox.attr('checked', (options.outputColumnHeaders) ? 'checked' : '');
|
this._elmts.outputColumnHeadersCheckbox.attr('checked', (options.outputColumnHeaders) ? 'checked' : '');
|
||||||
this._elmts.outputBlankRowsCheckbox.attr('checked', (options.outputBlankRows) ? 'checked' : '');
|
this._elmts.outputBlankRowsCheckbox.attr('checked', (options.outputBlankRows) ? 'checked' : '');
|
||||||
@ -336,13 +336,13 @@ CustomTabularExporterDialog.prototype._getOptionCode = function() {
|
|||||||
|
|
||||||
if (options.format == 'tsv' || options.format == 'csv' || options.format == '*sv') {
|
if (options.format == 'tsv' || options.format == 'csv' || options.format == '*sv') {
|
||||||
if (options.format == 'tsv') {
|
if (options.format == 'tsv') {
|
||||||
options.separator = '\t';
|
options.separator = '\\t';
|
||||||
} else if (options.format == 'csv') {
|
} else if (options.format == 'csv') {
|
||||||
options.separator = ',';
|
options.separator = ',';
|
||||||
} else {
|
} else {
|
||||||
options.separator = String.decodeSeparator(this._elmts.separatorInput.val());
|
options.separator = this._elmts.separatorInput.val();
|
||||||
}
|
}
|
||||||
options.lineSeparator = String.decodeSeparator(this._elmts.lineSeparatorInput.val());
|
options.lineSeparator = this._elmts.lineSeparatorInput.val();
|
||||||
options.encoding = this._elmts.encodingInput.val();
|
options.encoding = this._elmts.encodingInput.val();
|
||||||
}
|
}
|
||||||
options.outputColumnHeaders = this._elmts.outputColumnHeadersCheckbox[0].checked;
|
options.outputColumnHeaders = this._elmts.outputColumnHeadersCheckbox[0].checked;
|
||||||
|
@ -73,9 +73,9 @@ Refine.FixedWidthParserUI.prototype.getOptions = function() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") {
|
if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") {
|
||||||
options.lineSeparator = "\n";
|
options.lineSeparator = "\\n";
|
||||||
} else {
|
} else {
|
||||||
options.lineSeparator = String.decodeSeparator(this._optionContainerElmts.rowSeparatorInput[0].value);
|
options.lineSeparator = this._optionContainerElmts.rowSeparatorInput[0].value;
|
||||||
}
|
}
|
||||||
|
|
||||||
var parseIntDefault = function(s, def) {
|
var parseIntDefault = function(s, def) {
|
||||||
@ -140,11 +140,10 @@ Refine.FixedWidthParserUI.prototype._initialize = function() {
|
|||||||
this._optionContainerElmts.columnNamesInput[0].value = this._config.columnNames.join(',');
|
this._optionContainerElmts.columnNamesInput[0].value = this._config.columnNames.join(',');
|
||||||
}
|
}
|
||||||
|
|
||||||
var rowSeparatorValue = (this._config.lineSeparator == "\n") ? 'new-line' : 'custom';
|
var rowSeparatorValue = (this._config.lineSeparator == "\\n") ? 'new-line' : 'custom';
|
||||||
this._optionContainer.find(
|
this._optionContainer.find(
|
||||||
"input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked");
|
"input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked");
|
||||||
this._optionContainerElmts.rowSeparatorInput[0].value =
|
this._optionContainerElmts.rowSeparatorInput[0].value = this._config.lineSeparator;
|
||||||
String.encodeSeparator(this._config.lineSeparator);
|
|
||||||
|
|
||||||
if (this._config.ignoreLines > 0) {
|
if (this._config.ignoreLines > 0) {
|
||||||
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");
|
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");
|
||||||
|
@ -81,9 +81,9 @@ Refine.LineBasedParserUI.prototype.getOptions = function() {
|
|||||||
options.linesPerRow = parseIntDefault(this._optionContainerElmts.linesPerRowInput[0].value, 1);
|
options.linesPerRow = parseIntDefault(this._optionContainerElmts.linesPerRowInput[0].value, 1);
|
||||||
|
|
||||||
if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") {
|
if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") {
|
||||||
options.lineSeparator = "\n";
|
options.lineSeparator = "\\n";
|
||||||
} else {
|
} else {
|
||||||
options.lineSeparator = String.decodeSeparator(this._optionContainerElmts.rowSeparatorInput[0].value);
|
options.lineSeparator = this._optionContainerElmts.rowSeparatorInput[0].value;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this._optionContainerElmts.ignoreCheckbox[0].checked) {
|
if (this._optionContainerElmts.ignoreCheckbox[0].checked) {
|
||||||
@ -127,11 +127,10 @@ Refine.LineBasedParserUI.prototype._initialize = function() {
|
|||||||
this._optionContainerElmts.linesPerRowInput[0].value =
|
this._optionContainerElmts.linesPerRowInput[0].value =
|
||||||
this._config.linesPerRow.toString();
|
this._config.linesPerRow.toString();
|
||||||
|
|
||||||
var rowSeparatorValue = (this._config.lineSeparator == "\n") ? 'new-line' : 'custom';
|
var rowSeparatorValue = (this._config.lineSeparator == "\\n") ? 'new-line' : 'custom';
|
||||||
this._optionContainer.find(
|
this._optionContainer.find(
|
||||||
"input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked");
|
"input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked");
|
||||||
this._optionContainerElmts.rowSeparatorInput[0].value =
|
this._optionContainerElmts.rowSeparatorInput[0].value = this._config.lineSeparator;
|
||||||
String.encodeSeparator(this._config.lineSeparator);
|
|
||||||
|
|
||||||
if (this._config.ignoreLines > 0) {
|
if (this._config.ignoreLines > 0) {
|
||||||
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");
|
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");
|
||||||
|
@ -66,9 +66,9 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") {
|
if (this._optionContainer.find("input[name='row-separator']:checked")[0].value === "new-line") {
|
||||||
options.lineSeparator = "\n";
|
options.lineSeparator = "\\n";
|
||||||
} else {
|
} else {
|
||||||
options.lineSeparator = String.decodeSeparator(this._optionContainerElmts.rowSeparatorInput[0].value);
|
options.lineSeparator = this._optionContainerElmts.rowSeparatorInput[0].value;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (this._optionContainer.find("input[name='column-separator']:checked")[0].value) {
|
switch (this._optionContainer.find("input[name='column-separator']:checked")[0].value) {
|
||||||
@ -76,11 +76,10 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() {
|
|||||||
options.separator = ",";
|
options.separator = ",";
|
||||||
break;
|
break;
|
||||||
case 'tab':
|
case 'tab':
|
||||||
options.separator = "\t";
|
options.separator = "\\t";
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
options.separator = String.decodeSeparator(
|
options.separator = this._optionContainerElmts.columnSeparatorInput[0].value;
|
||||||
this._optionContainerElmts.columnSeparatorInput[0].value);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var parseIntDefault = function(s, def) {
|
var parseIntDefault = function(s, def) {
|
||||||
@ -141,18 +140,16 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
var rowSeparatorValue = (this._config.lineSeparator == "\n") ? 'new-line' : 'custom';
|
var rowSeparatorValue = (this._config.lineSeparator == "\\n") ? 'new-line' : 'custom';
|
||||||
this._optionContainer.find(
|
this._optionContainer.find(
|
||||||
"input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked");
|
"input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked");
|
||||||
this._optionContainerElmts.rowSeparatorInput[0].value =
|
this._optionContainerElmts.rowSeparatorInput[0].value = this._config.lineSeparator;
|
||||||
String.encodeSeparator(this._config.lineSeparator);
|
|
||||||
|
|
||||||
var columnSeparatorValue = (this._config.separator == ",") ? 'comma' :
|
var columnSeparatorValue = (this._config.separator == ",") ? 'comma' :
|
||||||
((this._config.separator == "\t") ? 'tab' : 'custom');
|
((this._config.separator == "\\t") ? 'tab' : 'custom');
|
||||||
this._optionContainer.find(
|
this._optionContainer.find(
|
||||||
"input[name='column-separator'][value='" + columnSeparatorValue + "']").attr("checked", "checked");
|
"input[name='column-separator'][value='" + columnSeparatorValue + "']").attr("checked", "checked");
|
||||||
this._optionContainerElmts.columnSeparatorInput[0].value =
|
this._optionContainerElmts.columnSeparatorInput[0].value = this._config.separator;
|
||||||
String.encodeSeparator(this._config.separator);
|
|
||||||
|
|
||||||
if (this._config.ignoreLines > 0) {
|
if (this._config.ignoreLines > 0) {
|
||||||
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");
|
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");
|
||||||
|
Loading…
Reference in New Issue
Block a user