Adds trim whitespace option to separator based files (#2408)

* added trim ui to csv importer

* added trim functionality

* trimStrings handler only for strings

* added test for trimStrings option in csv/tsv files

* made trim option enabled by default
This commit is contained in:
Lisa Chandra 2020-03-21 16:08:43 +05:30 committed by GitHub
parent bf8af23493
commit ef8ad85c3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 65 additions and 0 deletions

View File

@ -76,6 +76,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
JSONUtilities.safePut(options, "guessCellValueTypes", false); JSONUtilities.safePut(options, "guessCellValueTypes", false);
JSONUtilities.safePut(options, "processQuotes", true); JSONUtilities.safePut(options, "processQuotes", true);
JSONUtilities.safePut(options, "quoteCharacter", String.valueOf(CSVParser.DEFAULT_QUOTE_CHARACTER)); JSONUtilities.safePut(options, "quoteCharacter", String.valueOf(CSVParser.DEFAULT_QUOTE_CHARACTER));
JSONUtilities.safePut(options, "trimStrings", true);
return options; return options;
} }

View File

@ -104,6 +104,7 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true); boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true);
boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true); boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true);
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false); boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", false);
int filenameColumnIndex = -1; int filenameColumnIndex = -1;
if (includeFileSources) { if (includeFileSources) {
@ -168,6 +169,9 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
if (value instanceof String) { if (value instanceof String) {
storedValue = guessCellValueTypes ? storedValue = guessCellValueTypes ?
ImporterUtilities.parseCellValue((String) value) : (String) value; ImporterUtilities.parseCellValue((String) value) : (String) value;
if(trimStrings) {
storedValue = ((String) storedValue).toString().trim();
}
} else { } else {
storedValue = ExpressionUtils.wrapStorable(value); storedValue = ExpressionUtils.wrapStorable(value);
} }

View File

@ -206,6 +206,46 @@ public class TsvCsvImporterTests extends ImporterTest {
Assert.assertEquals(project.rows.get(0).cells.get(2).value, " data3"); Assert.assertEquals(project.rows.get(0).cells.get(2).value, " data3");
} }
@Test(groups = { }, dataProvider = "CSV-TSV-AutoDetermine")
public void readTrimsLeadingTrailingWhitespaceOnTrimStrings(String sep){
//create input to test with
String inputSeparator = sep == null ? "\t" : sep;
String input = " data1 " + inputSeparator + " 3.4 " + inputSeparator + " data3 ";
try {
prepareOptions(sep, -1, 0, 0, 0, false, false, true);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail("Exception during file parse",e);
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "3.4");
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3");
}
@Test(groups = { }, dataProvider = "CSV-TSV-AutoDetermine")
public void readDoesNotTrimLeadingTrailingWhitespaceOnNoTrimStrings(String sep){
//create input to test with
String inputSeparator = sep == null ? "\t" : sep;
String input = " data1 " + inputSeparator + " 3.4 " + inputSeparator + " data3 ";
try {
prepareOptions(sep, -1, 0, 0, 0, false, false, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail("Exception during file parse",e);
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, " data1 ");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, " 3.4 ");
Assert.assertEquals(project.rows.get(0).cells.get(2).value, " data3 ");
}
@Test(dataProvider = "CSV-TSV-AutoDetermine") @Test(dataProvider = "CSV-TSV-AutoDetermine")
public void readCanAddNull(String sep){ public void readCanAddNull(String sep){
//create input to test with //create input to test with
@ -597,6 +637,19 @@ public class TsvCsvImporterTests extends ImporterTest {
prepareOptions(sep, limit, skip, ignoreLines, headerLines, guessValueType, ignoreQuotes, quoteCharacter,"[]"); prepareOptions(sep, limit, skip, ignoreLines, headerLines, guessValueType, ignoreQuotes, quoteCharacter,"[]");
} }
protected void prepareOptions(
String sep, int limit, int skip, int ignoreLines,
int headerLines, boolean guessValueType, boolean ignoreQuotes, boolean trimStrings) {
whenGetStringOption("separator", options, sep);
whenGetIntegerOption("limit", options, limit);
whenGetIntegerOption("skipDataLines", options, skip);
whenGetIntegerOption("ignoreLines", options, ignoreLines);
whenGetIntegerOption("headerLines", options, headerLines);
whenGetBooleanOption("guessCellValueTypes", options, guessValueType);
whenGetBooleanOption("processQuotes", options, !ignoreQuotes);
whenGetBooleanOption("trimStrings", options, trimStrings);
}
protected void prepareOptions( protected void prepareOptions(
String sep, int limit, int skip, int ignoreLines, String sep, int limit, int skip, int ignoreLines,
int headerLines, boolean guessValueType, boolean ignoreQuotes, String quoteCharacter, String columnNames) { int headerLines, boolean guessValueType, boolean ignoreQuotes, String quoteCharacter, String columnNames) {

View File

@ -23,6 +23,8 @@
<tr><td width="1%"><input type="radio" name="column-separator" value="custom" id="$column-separator-custom" /></td> <tr><td width="1%"><input type="radio" name="column-separator" value="custom" id="$column-separator-custom" /></td>
<td><label for="$column-separator-custom" id="or-import-custom"></label> <td><label for="$column-separator-custom" id="or-import-custom"></label>
<input bind="columnSeparatorInput" type="text" class="lightweight" size="5" /></td></tr> <input bind="columnSeparatorInput" type="text" class="lightweight" size="5" /></td></tr>
<tr><td width="1%"><input type="checkbox" bind="trimStringsCheckbox" id="$trim" /></td>
<td><label for="$trim" id="or-import-trim"></label></td></tr>
<tr><td colspan="2" id="or-import-escape"></td></tr> <tr><td colspan="2" id="or-import-escape"></td></tr>
</table></div></td> </table></div></td>

View File

@ -117,6 +117,7 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() {
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked; options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked; options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
options.trimStrings = this._optionContainerElmts.trimStringsCheckbox[0].checked;
if (this._optionContainerElmts.columnNamesCheckbox[0].checked) { if (this._optionContainerElmts.columnNamesCheckbox[0].checked) {
var columnNames = this._optionContainerElmts.columnNamesInput.val(); var columnNames = this._optionContainerElmts.columnNamesInput.val();
@ -145,6 +146,7 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
$('#or-import-escape').html($.i18n('core-index-parser/escape')); $('#or-import-escape').html($.i18n('core-index-parser/escape'));
$('#or-import-columnNames').html($.i18n('core-index-parser/column-names-label') + ':'); $('#or-import-columnNames').html($.i18n('core-index-parser/column-names-label') + ':');
$('#or-import-optional').html($.i18n('core-index-parser/column-names-optional')); $('#or-import-optional').html($.i18n('core-index-parser/column-names-optional'));
$('#or-import-trim').html($.i18n('core-index-parser/trim'));
self._optionContainerElmts.columnNamesInput.prop('disabled', true); self._optionContainerElmts.columnNamesInput.prop('disabled', true);
@ -236,6 +238,9 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
if (this._config.includeFileSources) { if (this._config.includeFileSources) {
this._optionContainerElmts.includeFileSourcesCheckbox.prop("checked", true); this._optionContainerElmts.includeFileSourcesCheckbox.prop("checked", true);
} }
if (this._config.trimStrings) {
this._optionContainerElmts.trimStringsCheckbox.attr("checked", "unchecked");
}
var onChange = function() { var onChange = function() {
self._scheduleUpdatePreview(); self._scheduleUpdatePreview();