diff --git a/main/src/com/google/refine/importers/SeparatorBasedImporter.java b/main/src/com/google/refine/importers/SeparatorBasedImporter.java index 0ba84153d..51234c4f3 100644 --- a/main/src/com/google/refine/importers/SeparatorBasedImporter.java +++ b/main/src/com/google/refine/importers/SeparatorBasedImporter.java @@ -76,6 +76,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase { JSONUtilities.safePut(options, "guessCellValueTypes", false); JSONUtilities.safePut(options, "processQuotes", true); JSONUtilities.safePut(options, "quoteCharacter", String.valueOf(CSVParser.DEFAULT_QUOTE_CHARACTER)); + JSONUtilities.safePut(options, "trimStrings", true); return options; } diff --git a/main/src/com/google/refine/importers/TabularImportingParserBase.java b/main/src/com/google/refine/importers/TabularImportingParserBase.java index b36827030..3db0b7514 100644 --- a/main/src/com/google/refine/importers/TabularImportingParserBase.java +++ b/main/src/com/google/refine/importers/TabularImportingParserBase.java @@ -104,6 +104,7 @@ abstract public class TabularImportingParserBase extends ImportingParserBase { boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true); boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true); boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false); + boolean trimStrings = JSONUtilities.getBoolean(options, "trimStrings", false); int filenameColumnIndex = -1; if (includeFileSources) { @@ -168,6 +169,9 @@ abstract public class TabularImportingParserBase extends ImportingParserBase { if (value instanceof String) { storedValue = guessCellValueTypes ? ImporterUtilities.parseCellValue((String) value) : (String) value; + if(trimStrings) { + storedValue = ((String) storedValue).toString().trim(); + } } else { storedValue = ExpressionUtils.wrapStorable(value); } diff --git a/main/tests/server/src/com/google/refine/importers/TsvCsvImporterTests.java b/main/tests/server/src/com/google/refine/importers/TsvCsvImporterTests.java index 4f4d7e9e4..c573c098c 100644 --- a/main/tests/server/src/com/google/refine/importers/TsvCsvImporterTests.java +++ b/main/tests/server/src/com/google/refine/importers/TsvCsvImporterTests.java @@ -206,6 +206,46 @@ public class TsvCsvImporterTests extends ImporterTest { Assert.assertEquals(project.rows.get(0).cells.get(2).value, " data3"); } + @Test(groups = { }, dataProvider = "CSV-TSV-AutoDetermine") + public void readTrimsLeadingTrailingWhitespaceOnTrimStrings(String sep){ + //create input to test with + String inputSeparator = sep == null ? "\t" : sep; + String input = " data1 " + inputSeparator + " 3.4 " + inputSeparator + " data3 "; + + try { + prepareOptions(sep, -1, 0, 0, 0, false, false, true); + parseOneFile(SUT, new StringReader(input)); + } catch (Exception e) { + Assert.fail("Exception during file parse",e); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "3.4"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(groups = { }, dataProvider = "CSV-TSV-AutoDetermine") + public void readDoesNotTrimLeadingTrailingWhitespaceOnNoTrimStrings(String sep){ + //create input to test with + String inputSeparator = sep == null ? "\t" : sep; + String input = " data1 " + inputSeparator + " 3.4 " + inputSeparator + " data3 "; + + try { + prepareOptions(sep, -1, 0, 0, 0, false, false, false); + parseOneFile(SUT, new StringReader(input)); + } catch (Exception e) { + Assert.fail("Exception during file parse",e); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, " data1 "); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, " 3.4 "); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, " data3 "); + } + @Test(dataProvider = "CSV-TSV-AutoDetermine") public void readCanAddNull(String sep){ //create input to test with @@ -596,6 +636,19 @@ public class TsvCsvImporterTests extends ImporterTest { prepareOptions(sep, limit, skip, ignoreLines, headerLines, guessValueType, ignoreQuotes, quoteCharacter,"[]"); } + + protected void prepareOptions( + String sep, int limit, int skip, int ignoreLines, + int headerLines, boolean guessValueType, boolean ignoreQuotes, boolean trimStrings) { + whenGetStringOption("separator", options, sep); + whenGetIntegerOption("limit", options, limit); + whenGetIntegerOption("skipDataLines", options, skip); + whenGetIntegerOption("ignoreLines", options, ignoreLines); + whenGetIntegerOption("headerLines", options, headerLines); + whenGetBooleanOption("guessCellValueTypes", options, guessValueType); + whenGetBooleanOption("processQuotes", options, !ignoreQuotes); + whenGetBooleanOption("trimStrings", options, trimStrings); + } protected void prepareOptions( String sep, int limit, int skip, int ignoreLines, diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html index addb4cdd9..953d4fc56 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/separator-based-parser-ui.html @@ -23,6 +23,8 @@