From 73042712ed6a520e8016b7e62c7a81756074f3f1 Mon Sep 17 00:00:00 2001 From: David Huynh Date: Fri, 15 Oct 2010 05:30:15 +0000 Subject: [PATCH] Made csv/tsv importer not trim whitespace even if "guess cells' types" is checked (for cells that are strings). Updated csv tests to expect un-trimmed cells. git-svn-id: http://google-refine.googlecode.com/svn/trunk@1557 7d457c2a-affb-35e4-300a-418c747d4874 --- .../refine/importers/ImporterUtilities.java | 24 +++++++++-------- .../refine/importers/TsvCsvImporter.java | 2 +- .../tests/importers/TsvCsvImporterTests.java | 26 +++++++++---------- 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/main/src/com/google/refine/importers/ImporterUtilities.java b/main/src/com/google/refine/importers/ImporterUtilities.java index 6c5b9bac0..74d14ed8d 100644 --- a/main/src/com/google/refine/importers/ImporterUtilities.java +++ b/main/src/com/google/refine/importers/ImporterUtilities.java @@ -18,19 +18,21 @@ public class ImporterUtilities { return text.substring(1, text.length() - 1); } - try { - return Long.parseLong(text); - } catch (NumberFormatException e) { - } - - try { - double d = Double.parseDouble(text); - if (!Double.isInfinite(d) && !Double.isNaN(d)) { - return d; + String text2 = text.trim(); + if (text2.length() > 0) { + try { + return Long.parseLong(text2); + } catch (NumberFormatException e) { + } + + try { + double d = Double.parseDouble(text2); + if (!Double.isInfinite(d) && !Double.isNaN(d)) { + return d; + } + } catch (NumberFormatException e) { } - } catch (NumberFormatException e) { } - text = text.trim(); } return text; } diff --git a/main/src/com/google/refine/importers/TsvCsvImporter.java b/main/src/com/google/refine/importers/TsvCsvImporter.java index 7d959df02..e6097cb03 100644 --- a/main/src/com/google/refine/importers/TsvCsvImporter.java +++ b/main/src/com/google/refine/importers/TsvCsvImporter.java @@ -136,7 +136,7 @@ public class TsvCsvImporter implements ReaderImporter,StreamImporter { //add parsed data to row for(String s : cells){ if (ExpressionUtils.isNonBlankData(s)) { - Serializable value = guessValueType ? ImporterUtilities.parseCellValue(s.trim()) : s; + Serializable value = guessValueType ? ImporterUtilities.parseCellValue(s) : s; row.cells.add(new Cell(value, null)); }else{ row.cells.add(null); diff --git a/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java index 7ff2eed04..8ee9503a4 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java @@ -173,8 +173,8 @@ public class TsvCsvImporterTests extends RefineTest { } @Test(groups = { }, dataProvider = "CSV-or-null") - public void readDoesTrimsLeadingTrailingWhitespace(String sep){ - String input = " data1 , data2 , data3 "; + public void readDoesNotTrimLeadingTrailingWhitespace(String sep){ + String input = " data1 , 3.4 , data3 "; LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, false); @@ -184,14 +184,14 @@ public class TsvCsvImporterTests extends RefineTest { Assert.assertEquals(project.columnModel.columns.size(), 3); Assert.assertEquals(project.rows.size(), 1); Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, " data1 "); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, " 3.4 "); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, " data3 "); } @Test(dataProvider = "CSV-or-null") - public void readTrimsLeadingTrailingWhitespace(String sep){ - String input = " data1, data2, data3"; + public void readDoesNotTrimLeadingWhitespace(String sep){ + String input = " data1, 12, data3"; LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { SUT.read(lnReader, project, sep, -1, 0, 0, 0, true, true, false); @@ -201,14 +201,14 @@ public class TsvCsvImporterTests extends RefineTest { Assert.assertEquals(project.columnModel.columns.size(), 3); Assert.assertEquals(project.rows.size(), 1); Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, " data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, 12L); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, " data3"); } @Test(dataProvider = "CSV-or-null") public void readCanAddNull(String sep){ - String input = " data1, , data3"; + String input = " data1,, data3"; LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { SUT.read(lnReader, project, sep, -1, 0, 0, 0, true, true, false); @@ -218,9 +218,9 @@ public class TsvCsvImporterTests extends RefineTest { Assert.assertEquals(project.columnModel.columns.size(), 3); Assert.assertEquals(project.rows.size(), 1); Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, " data1"); Assert.assertNull(project.rows.get(0).cells.get(1)); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, " data3"); } @Test(dataProvider = "CSV-or-null")