Adds trim whitespace option to separator based files (#2408)

* added trim ui to csv importer

* added trim functionality

* trimStrings handler only for strings

* added test for trimStrings option in csv/tsv files

* made trim option enabled by default
This commit is contained in:
Lisa Chandra 2020-03-21 16:08:43 +05:30 committed by GitHub
parent bf8af23493
commit ef8ad85c3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 65 additions and 0 deletions

View File

@ -76,6 +76,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
JSONUtilities.safePut(options, "guessCellValueTypes", false);
JSONUtilities.safePut(options, "processQuotes", true);
JSONUtilities.safePut(options, "quoteCharacter", String.valueOf(CSVParser.DEFAULT_QUOTE_CHARACTER));
JSONUtilities.safePut(options, "trimStrings", true);
return options;
}

View File

@ -104,6 +104,7 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true);
boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true);
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", false);
int filenameColumnIndex = -1;
if (includeFileSources) {
@ -168,6 +169,9 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
if (value instanceof String) {
storedValue = guessCellValueTypes ?
ImporterUtilities.parseCellValue((String) value) : (String) value;
if(trimStrings) {
storedValue = ((String) storedValue).toString().trim();
}
} else {
storedValue = ExpressionUtils.wrapStorable(value);
}

View File

@ -206,6 +206,46 @@ public class TsvCsvImporterTests extends ImporterTest {
Assert.assertEquals(project.rows.get(0).cells.get(2).value, " data3");
}
@Test(groups = { }, dataProvider = "CSV-TSV-AutoDetermine")
public void readTrimsLeadingTrailingWhitespaceOnTrimStrings(String sep){
//create input to test with
String inputSeparator = sep == null ? "\t" : sep;
String input = " data1 " + inputSeparator + " 3.4 " + inputSeparator + " data3 ";
try {
prepareOptions(sep, -1, 0, 0, 0, false, false, true);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail("Exception during file parse",e);
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "3.4");
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3");
}
@Test(groups = { }, dataProvider = "CSV-TSV-AutoDetermine")
public void readDoesNotTrimLeadingTrailingWhitespaceOnNoTrimStrings(String sep){
//create input to test with
String inputSeparator = sep == null ? "\t" : sep;
String input = " data1 " + inputSeparator + " 3.4 " + inputSeparator + " data3 ";
try {
prepareOptions(sep, -1, 0, 0, 0, false, false, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail("Exception during file parse",e);
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, " data1 ");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, " 3.4 ");
Assert.assertEquals(project.rows.get(0).cells.get(2).value, " data3 ");
}
@Test(dataProvider = "CSV-TSV-AutoDetermine")
public void readCanAddNull(String sep){
//create input to test with
@ -596,6 +636,19 @@ public class TsvCsvImporterTests extends ImporterTest {
prepareOptions(sep, limit, skip, ignoreLines, headerLines, guessValueType, ignoreQuotes, quoteCharacter,"[]");
}
protected void prepareOptions(
String sep, int limit, int skip, int ignoreLines,
int headerLines, boolean guessValueType, boolean ignoreQuotes, boolean trimStrings) {
whenGetStringOption("separator", options, sep);
whenGetIntegerOption("limit", options, limit);
whenGetIntegerOption("skipDataLines", options, skip);
whenGetIntegerOption("ignoreLines", options, ignoreLines);
whenGetIntegerOption("headerLines", options, headerLines);
whenGetBooleanOption("guessCellValueTypes", options, guessValueType);
whenGetBooleanOption("processQuotes", options, !ignoreQuotes);
whenGetBooleanOption("trimStrings", options, trimStrings);
}
protected void prepareOptions(
String sep, int limit, int skip, int ignoreLines,

View File

@ -23,6 +23,8 @@
<tr><td width="1%"><input type="radio" name="column-separator" value="custom" id="$column-separator-custom" /></td>
<td><label for="$column-separator-custom" id="or-import-custom"></label>
<input bind="columnSeparatorInput" type="text" class="lightweight" size="5" /></td></tr>
<tr><td width="1%"><input type="checkbox" bind="trimStringsCheckbox" id="$trim" /></td>
<td><label for="$trim" id="or-import-trim"></label></td></tr>
<tr><td colspan="2" id="or-import-escape"></td></tr>
</table></div></td>

View File

@ -117,6 +117,7 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() {
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
options.trimStrings = this._optionContainerElmts.trimStringsCheckbox[0].checked;
if (this._optionContainerElmts.columnNamesCheckbox[0].checked) {
var columnNames = this._optionContainerElmts.columnNamesInput.val();
@ -145,6 +146,7 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
$('#or-import-escape').html($.i18n('core-index-parser/escape'));
$('#or-import-columnNames').html($.i18n('core-index-parser/column-names-label') + ':');
$('#or-import-optional').html($.i18n('core-index-parser/column-names-optional'));
$('#or-import-trim').html($.i18n('core-index-parser/trim'));
self._optionContainerElmts.columnNamesInput.prop('disabled', true);
@ -236,6 +238,9 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() {
if (this._config.includeFileSources) {
this._optionContainerElmts.includeFileSourcesCheckbox.prop("checked", true);
}
if (this._config.trimStrings) {
this._optionContainerElmts.trimStringsCheckbox.attr("checked", "unchecked");
}
var onChange = function() {
self._scheduleUpdatePreview();