Merge pull request #1452 from OpenRefine/stundzig-develop/1086-quotecharacter

Add support for quote character
This commit is contained in:
Jacky 2018-02-03 16:17:28 -05:00 committed by GitHub
commit a9ac38e53f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 61 additions and 8 deletions

View File

@ -50,6 +50,8 @@ import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.json.JSONObject;
import au.com.bytecode.opencsv.CSVParser;
@ -75,6 +77,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
JSONUtilities.safePut(options, "guessCellValueTypes", false);
JSONUtilities.safePut(options, "processQuotes", true);
JSONUtilities.safePut(options, "quoteCharacter", String.valueOf(CSVParser.DEFAULT_QUOTE_CHARACTER));
return options;
}
@ -98,9 +101,15 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true);
boolean strictQuotes = JSONUtilities.getBoolean(options, "strictQuotes", false);
Character quote = CSVParser.DEFAULT_QUOTE_CHARACTER;
String quoteCharacter = JSONUtilities.getString(options, "quoteCharacter", null);
if (quoteCharacter != null && quoteCharacter.trim().length() == 1) {
quote = quoteCharacter.trim().charAt(0);
}
final CSVParser parser = new CSVParser(
sep,
CSVParser.DEFAULT_QUOTE_CHARACTER,
quote,
(char) 0, // we don't want escape processing
strictQuotes,
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,

View File

@ -487,6 +487,33 @@ public class TsvCsvImporterTests extends ImporterTest {
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2");
}
@Test(dataProvider = "CSV-TSV-AutoDetermine")
public void customQuoteCharacter(String sep){
//create input to test with
String inputSeparator = sep == null ? "\t" : sep;
String input = "'col1'" + inputSeparator + "'col2'" + inputSeparator + "'col3'\n" +
"'data1'" + inputSeparator + "'data2'" + inputSeparator + "'data3'";
try {
prepareOptions(sep, -1, 0, 0, 1, false, false, "'");
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail("Exception during file parse",e);
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3");
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2");
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3");
}
//---------------------read tests------------------------
@Test
public void readCsvWithProperties() {
@ -543,11 +570,18 @@ public class TsvCsvImporterTests extends ImporterTest {
};
}
protected void prepareOptions(
String sep, int limit, int skip, int ignoreLines,
int headerLines, boolean guessValueType, boolean ignoreQuotes) {
prepareOptions(sep, limit, skip, ignoreLines, headerLines, guessValueType, ignoreQuotes, "\"");
}
protected void prepareOptions(
String sep, int limit, int skip, int ignoreLines,
int headerLines, boolean guessValueType, boolean ignoreQuotes) {
int headerLines, boolean guessValueType, boolean ignoreQuotes, String quoteCharacter) {
whenGetStringOption("separator", options, sep);
whenGetStringOption("quoteCharacter", options, quoteCharacter);
whenGetIntegerOption("limit", options, limit);
whenGetIntegerOption("skipDataLines", options, skip);
whenGetIntegerOption("ignoreLines", options, ignoreLines);

View File

@ -153,7 +153,8 @@
"tabs": "tabs (TSV)",
"custom": "custom",
"escape": "Escape special characters with \\",
"quotation-mark": "Quotation marks are used<br/>to enclose cells containing<br/>column separators",
"use-quote": "Use character",
"quote-delimits-cells": "to enclose cells containing column separators",
"click-xml": "Click on the first XML element corresponding to the first record to load."
},
"core-dialogs": {

View File

@ -46,6 +46,12 @@
<td><label for="$limit" id="or-import-load"></label></td>
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" />
<label for="$limit" id="or-import-rows2"></label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="processQuoteMarksCheckbox" id="$quotes" /></td>
<td><label for="$quotes" id="or-import-quote"></label></td>
<td><input bind="quoteCharacterInput" type="text" class="lightweight" maxlength="1" size="2" value="&quot;" id="$quoteCharacter" />
<label for="$quoteCharacter" id="or-import-quote-character"></label></td></tr>
</table></div></td>
</tr>
<tr>
@ -54,9 +60,7 @@
<td><div class="grid-layout layout-tightest"><table>
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" id="$guess" /></td>
<td><label for="$guess" id="or-import-parseCell"></label></td></tr>
<tr><td width="1%"><input type="checkbox" bind="processQuoteMarksCheckbox" id="$quotes" /></td>
<td><label for="$quotes" id="or-import-quote"></label></td></tr>
</table></div></td>
</table></div></td>
<td><div class="grid-layout layout-tightest"><table>
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" id="$store-blank-rows" /></td>
@ -69,4 +73,4 @@
<td><label for="$include-file-sources" id="or-import-source"></label></td></tr>
</table></div></td>
</tr>
</table></div>
</table></div>

View File

@ -111,6 +111,9 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() {
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
options.processQuotes = this._optionContainerElmts.processQuoteMarksCheckbox[0].checked;
if (options.processQuotes) {
options.quoteCharacter = this._optionContainerElmts.quoteCharacterInput[0].value;
}
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
@ -143,7 +146,8 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
$('#or-import-load').text($.i18n._('core-index-parser')["load-at-most"]);
$('#or-import-rows2').text($.i18n._('core-index-parser')["rows-data"]);
$('#or-import-parseCell').html($.i18n._('core-index-parser')["parse-cell"]);
$('#or-import-quote').html($.i18n._('core-index-parser')["quotation-mark"]);
$('#or-import-quote').html($.i18n._('core-index-parser')["use-quote"]);
$('#or-import-quote-character').html($.i18n._('core-index-parser')["quote-delimits-cells"]);
$('#or-import-blank').text($.i18n._('core-index-parser')["store-blank"]);
$('#or-import-null').text($.i18n._('core-index-parser')["store-nulls"]);
$('#or-import-source').html($.i18n._('core-index-parser')["store-source"]);
@ -187,6 +191,7 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
}
if (this._config.processQuotes) {
this._optionContainerElmts.processQuoteMarksCheckbox.prop("checked", true);
this._optionContainerElmts.quoteCharacterInput[0].value = this._config.quoteCharacter;
}
if (this._config.storeBlankCellsAsNulls) {