diff --git a/main/src/com/google/refine/importers/SeparatorBasedImporter.java b/main/src/com/google/refine/importers/SeparatorBasedImporter.java index 0ba84153d..51234c4f3 100644 --- a/main/src/com/google/refine/importers/SeparatorBasedImporter.java +++ b/main/src/com/google/refine/importers/SeparatorBasedImporter.java @@ -76,6 +76,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase { JSONUtilities.safePut(options, "guessCellValueTypes", false); JSONUtilities.safePut(options, "processQuotes", true); JSONUtilities.safePut(options, "quoteCharacter", String.valueOf(CSVParser.DEFAULT_QUOTE_CHARACTER)); + JSONUtilities.safePut(options, "trimStrings", true); return options; } diff --git a/main/src/com/google/refine/importers/TabularImportingParserBase.java b/main/src/com/google/refine/importers/TabularImportingParserBase.java index b36827030..3db0b7514 100644 --- a/main/src/com/google/refine/importers/TabularImportingParserBase.java +++ b/main/src/com/google/refine/importers/TabularImportingParserBase.java @@ -104,6 +104,7 @@ abstract public class TabularImportingParserBase extends ImportingParserBase { boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true); boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true); boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false); + boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", false); int filenameColumnIndex = -1; if (includeFileSources) { @@ -168,6 +169,9 @@ abstract public class TabularImportingParserBase extends ImportingParserBase { if (value instanceof String) { storedValue = guessCellValueTypes ? ImporterUtilities.parseCellValue((String) value) : (String) value; + if(trimStrings) { + storedValue = ((String) storedValue).toString().trim(); + } } else { storedValue = ExpressionUtils.wrapStorable(value); } diff --git a/main/tests/server/src/com/google/refine/importers/TsvCsvImporterTests.java b/main/tests/server/src/com/google/refine/importers/TsvCsvImporterTests.java index 4f4d7e9e4..c573c098c 100644 --- a/main/tests/server/src/com/google/refine/importers/TsvCsvImporterTests.java +++ b/main/tests/server/src/com/google/refine/importers/TsvCsvImporterTests.java @@ -206,6 +206,46 @@ public class TsvCsvImporterTests extends ImporterTest { Assert.assertEquals(project.rows.get(0).cells.get(2).value, " data3"); } + @Test(groups = { }, dataProvider = "CSV-TSV-AutoDetermine") + public void readTrimsLeadingTrailingWhitespaceOnTrimStrings(String sep){ + //create input to test with + String inputSeparator = sep == null ? "\t" : sep; + String input = " data1 " + inputSeparator + " 3.4 " + inputSeparator + " data3 "; + + try { + prepareOptions(sep, -1, 0, 0, 0, false, false, true); + parseOneFile(SUT, new StringReader(input)); + } catch (Exception e) { + Assert.fail("Exception during file parse",e); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "3.4"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(groups = { }, dataProvider = "CSV-TSV-AutoDetermine") + public void readDoesNotTrimLeadingTrailingWhitespaceOnNoTrimStrings(String sep){ + //create input to test with + String inputSeparator = sep == null ? "\t" : sep; + String input = " data1 " + inputSeparator + " 3.4 " + inputSeparator + " data3 "; + + try { + prepareOptions(sep, -1, 0, 0, 0, false, false, false); + parseOneFile(SUT, new StringReader(input)); + } catch (Exception e) { + Assert.fail("Exception during file parse",e); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, " data1 "); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, " 3.4 "); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, " data3 "); + } + @Test(dataProvider = "CSV-TSV-AutoDetermine") public void readCanAddNull(String sep){ //create input to test with @@ -596,6 +636,19 @@ public class TsvCsvImporterTests extends ImporterTest { prepareOptions(sep, limit, skip, ignoreLines, headerLines, guessValueType, ignoreQuotes, quoteCharacter,"[]"); } + + protected void prepareOptions( + String sep, int limit, int skip, int ignoreLines, + int headerLines, boolean guessValueType, boolean ignoreQuotes, boolean trimStrings) { + whenGetStringOption("separator", options, sep); + whenGetIntegerOption("limit", options, limit); + whenGetIntegerOption("skipDataLines", options, skip); + whenGetIntegerOption("ignoreLines", options, ignoreLines); + whenGetIntegerOption("headerLines", options, headerLines); + whenGetBooleanOption("guessCellValueTypes", options, guessValueType); + whenGetBooleanOption("processQuotes", options, !ignoreQuotes); + whenGetBooleanOption("trimStrings", options, trimStrings); + } protected void prepareOptions( String sep, int limit, int skip, int ignoreLines, diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html index addb4cdd9..953d4fc56 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html @@ -23,6 +23,8 @@ + + diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js index 3ed6ffc3d..c3cfce199 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.js @@ -117,6 +117,7 @@ Refine.SeparatorBasedParserUI.prototype.getOptions = function() { options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked; options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked; + options.trimStrings = this._optionContainerElmts.trimStringsCheckbox[0].checked; if (this._optionContainerElmts.columnNamesCheckbox[0].checked) { var columnNames = this._optionContainerElmts.columnNamesInput.val(); @@ -145,6 +146,7 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() { $('#or-import-escape').html($.i18n('core-index-parser/escape')); $('#or-import-columnNames').html($.i18n('core-index-parser/column-names-label') + ':'); $('#or-import-optional').html($.i18n('core-index-parser/column-names-optional')); + $('#or-import-trim').html($.i18n('core-index-parser/trim')); self._optionContainerElmts.columnNamesInput.prop('disabled', true); @@ -236,6 +238,9 @@ Refine.SeparatorBasedParserUI.prototype._initialize = function() { if (this._config.includeFileSources) { this._optionContainerElmts.includeFileSourcesCheckbox.prop("checked", true); } + if (this._config.trimStrings) { + this._optionContainerElmts.trimStringsCheckbox.attr("checked", "unchecked"); + } var onChange = function() { self._scheduleUpdatePreview();