Merge pull request #1452 from OpenRefine/stundzig-develop/1086-quotecharacter
Add support for quote character
This commit is contained in:
commit
a9ac38e53f
@ -50,6 +50,8 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.lang3.StringEscapeUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
import au.com.bytecode.opencsv.CSVParser;
|
||||
@ -75,6 +77,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
|
||||
|
||||
JSONUtilities.safePut(options, "guessCellValueTypes", false);
|
||||
JSONUtilities.safePut(options, "processQuotes", true);
|
||||
JSONUtilities.safePut(options, "quoteCharacter", String.valueOf(CSVParser.DEFAULT_QUOTE_CHARACTER));
|
||||
|
||||
return options;
|
||||
}
|
||||
@ -98,9 +101,15 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
|
||||
boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true);
|
||||
boolean strictQuotes = JSONUtilities.getBoolean(options, "strictQuotes", false);
|
||||
|
||||
Character quote = CSVParser.DEFAULT_QUOTE_CHARACTER;
|
||||
String quoteCharacter = JSONUtilities.getString(options, "quoteCharacter", null);
|
||||
if (quoteCharacter != null && quoteCharacter.trim().length() == 1) {
|
||||
quote = quoteCharacter.trim().charAt(0);
|
||||
}
|
||||
|
||||
final CSVParser parser = new CSVParser(
|
||||
sep,
|
||||
CSVParser.DEFAULT_QUOTE_CHARACTER,
|
||||
quote,
|
||||
(char) 0, // we don't want escape processing
|
||||
strictQuotes,
|
||||
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
|
||||
|
@ -487,6 +487,33 @@ public class TsvCsvImporterTests extends ImporterTest {
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2");
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "CSV-TSV-AutoDetermine")
|
||||
public void customQuoteCharacter(String sep){
|
||||
//create input to test with
|
||||
String inputSeparator = sep == null ? "\t" : sep;
|
||||
String input = "'col1'" + inputSeparator + "'col2'" + inputSeparator + "'col3'\n" +
|
||||
"'data1'" + inputSeparator + "'data2'" + inputSeparator + "'data3'";
|
||||
|
||||
|
||||
try {
|
||||
prepareOptions(sep, -1, 0, 0, 1, false, false, "'");
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Exception during file parse",e);
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||
Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3");
|
||||
Assert.assertEquals(project.rows.size(), 1);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3");
|
||||
}
|
||||
|
||||
//---------------------read tests------------------------
|
||||
@Test
|
||||
public void readCsvWithProperties() {
|
||||
@ -543,11 +570,18 @@ public class TsvCsvImporterTests extends ImporterTest {
|
||||
};
|
||||
}
|
||||
|
||||
protected void prepareOptions(
|
||||
String sep, int limit, int skip, int ignoreLines,
|
||||
int headerLines, boolean guessValueType, boolean ignoreQuotes) {
|
||||
prepareOptions(sep, limit, skip, ignoreLines, headerLines, guessValueType, ignoreQuotes, "\"");
|
||||
}
|
||||
|
||||
protected void prepareOptions(
|
||||
String sep, int limit, int skip, int ignoreLines,
|
||||
int headerLines, boolean guessValueType, boolean ignoreQuotes) {
|
||||
int headerLines, boolean guessValueType, boolean ignoreQuotes, String quoteCharacter) {
|
||||
|
||||
whenGetStringOption("separator", options, sep);
|
||||
whenGetStringOption("quoteCharacter", options, quoteCharacter);
|
||||
whenGetIntegerOption("limit", options, limit);
|
||||
whenGetIntegerOption("skipDataLines", options, skip);
|
||||
whenGetIntegerOption("ignoreLines", options, ignoreLines);
|
||||
|
@ -153,7 +153,8 @@
|
||||
"tabs": "tabs (TSV)",
|
||||
"custom": "custom",
|
||||
"escape": "Escape special characters with \\",
|
||||
"quotation-mark": "Quotation marks are used<br/>to enclose cells containing<br/>column separators",
|
||||
"use-quote": "Use character",
|
||||
"quote-delimits-cells": "to enclose cells containing column separators",
|
||||
"click-xml": "Click on the first XML element corresponding to the first record to load."
|
||||
},
|
||||
"core-dialogs": {
|
||||
|
@ -46,6 +46,12 @@
|
||||
<td><label for="$limit" id="or-import-load"></label></td>
|
||||
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" />
|
||||
<label for="$limit" id="or-import-rows2"></label></td></tr>
|
||||
|
||||
<tr><td width="1%"><input type="checkbox" bind="processQuoteMarksCheckbox" id="$quotes" /></td>
|
||||
<td><label for="$quotes" id="or-import-quote"></label></td>
|
||||
<td><input bind="quoteCharacterInput" type="text" class="lightweight" maxlength="1" size="2" value=""" id="$quoteCharacter" />
|
||||
<label for="$quoteCharacter" id="or-import-quote-character"></label></td></tr>
|
||||
|
||||
</table></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -54,9 +60,7 @@
|
||||
<td><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" id="$guess" /></td>
|
||||
<td><label for="$guess" id="or-import-parseCell"></label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="processQuoteMarksCheckbox" id="$quotes" /></td>
|
||||
<td><label for="$quotes" id="or-import-quote"></label></td></tr>
|
||||
</table></div></td>
|
||||
</table></div></td>
|
||||
|
||||
<td><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" id="$store-blank-rows" /></td>
|
||||
@ -69,4 +73,4 @@
|
||||
<td><label for="$include-file-sources" id="or-import-source"></label></td></tr>
|
||||
</table></div></td>
|
||||
</tr>
|
||||
</table></div>
|
||||
</table></div>
|
||||
|
@ -111,6 +111,9 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() {
|
||||
|
||||
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
|
||||
options.processQuotes = this._optionContainerElmts.processQuoteMarksCheckbox[0].checked;
|
||||
if (options.processQuotes) {
|
||||
options.quoteCharacter = this._optionContainerElmts.quoteCharacterInput[0].value;
|
||||
}
|
||||
|
||||
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
|
||||
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||
@ -143,7 +146,8 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
|
||||
$('#or-import-load').text($.i18n._('core-index-parser')["load-at-most"]);
|
||||
$('#or-import-rows2').text($.i18n._('core-index-parser')["rows-data"]);
|
||||
$('#or-import-parseCell').html($.i18n._('core-index-parser')["parse-cell"]);
|
||||
$('#or-import-quote').html($.i18n._('core-index-parser')["quotation-mark"]);
|
||||
$('#or-import-quote').html($.i18n._('core-index-parser')["use-quote"]);
|
||||
$('#or-import-quote-character').html($.i18n._('core-index-parser')["quote-delimits-cells"]);
|
||||
$('#or-import-blank').text($.i18n._('core-index-parser')["store-blank"]);
|
||||
$('#or-import-null').text($.i18n._('core-index-parser')["store-nulls"]);
|
||||
$('#or-import-source').html($.i18n._('core-index-parser')["store-source"]);
|
||||
@ -187,6 +191,7 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
|
||||
}
|
||||
if (this._config.processQuotes) {
|
||||
this._optionContainerElmts.processQuoteMarksCheckbox.prop("checked", true);
|
||||
this._optionContainerElmts.quoteCharacterInput[0].value = this._config.quoteCharacter;
|
||||
}
|
||||
|
||||
if (this._config.storeBlankCellsAsNulls) {
|
||||
|
Loading…
Reference in New Issue
Block a user