diff --git a/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java b/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java index 89f578454..4831662cc 100644 --- a/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java +++ b/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java @@ -30,16 +30,17 @@ public class TsvCsvImporter implements Importer { boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true); LineNumberReader lnReader = new LineNumberReader(reader); - RowParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ? - new SeparatorRowParser(sep) : null; - - read(parser, lnReader, project, sep, - limit, skip, ignoreLines, headerLines, + + + read(lnReader, project, sep, + limit, skip, ignoreLines, headerLines, guessValueType, splitIntoColumns ); } - public void read(RowParser parser, LineNumberReader lnReader, Project project, String sep, int limit, int skip, int ignoreLines, int headerLines, boolean guessValueType, boolean splitIntoColumns ) throws IOException{ + public void read(LineNumberReader lnReader, Project project, String sep, int limit, int skip, int ignoreLines, int headerLines, boolean guessValueType, boolean splitIntoColumns ) throws IOException{ + RowParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ? + new SeparatorRowParser(sep) : null; List columnNames = new ArrayList(); String line = null; int rowsWithData = 0; diff --git a/tests/java/src/com/metaweb/gridworks/tests/importers/TsvCsvImporterTests.java b/tests/java/src/com/metaweb/gridworks/tests/importers/TsvCsvImporterTests.java index 895fd91d9..44f0bd53e 100644 --- a/tests/java/src/com/metaweb/gridworks/tests/importers/TsvCsvImporterTests.java +++ b/tests/java/src/com/metaweb/gridworks/tests/importers/TsvCsvImporterTests.java @@ -35,6 +35,7 @@ public class TsvCsvImporterTests { Project project = null; Properties properties = null; + @BeforeMethod public void SetUp(){ SUT = new TsvCsvImporter(); @@ -52,11 +53,10 @@ public class TsvCsvImporterTests { @Test public void readJustColumns(){ String input = "col1,col2,col3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); + SUT.read(lnReader, project, ",", -1, 0, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -70,10 +70,9 @@ public class TsvCsvImporterTests { public void readSimpleData_CSV_1Header_1Row(){ String input = "col1,col2,col3\n" + "data1,data2,data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); + SUT.read(lnReader, project, ",", -1, 0, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -92,10 +91,9 @@ public class TsvCsvImporterTests { public void readSimpleData_TSV_1Header_1Row(){ String input = "col1\tcol2\tcol3\n" + "data1\tdata2\tdata3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); + SUT.read(lnReader, project, "\t", -1, 0, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -113,10 +111,9 @@ public class TsvCsvImporterTests { @Test public void readSimpleData_0Header_1Row(){ String input = "data1,data2,data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 0, 0, 0, false, true); + SUT.read(lnReader, project, ",", -1, 0, 0, 0, false, true); } catch (IOException e) { Assert.fail(); } @@ -134,10 +131,9 @@ public class TsvCsvImporterTests { @Test public void readDoesNotTrimLeadingTrailingWhitespaceWhenNotGuessingValue(){ String input = " data1, data2, data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 0, 0, 0, false, true); + SUT.read(lnReader, project, ",", -1, 0, 0, 0, false, true); } catch (IOException e) { Assert.fail(); } @@ -154,7 +150,7 @@ public class TsvCsvImporterTests { String input = " data1, data2, data3"; LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 0, 0, 0, true, true); + SUT.read(lnReader, project, ",", -1, 0, 0, 0, true, true); } catch (IOException e) { Assert.fail(); } @@ -171,7 +167,7 @@ public class TsvCsvImporterTests { String input = " data1, , data3"; LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 0, 0, 0, true, true); + SUT.read(lnReader, project, ",", -1, 0, 0, 0, true, true); } catch (IOException e) { Assert.fail(); } @@ -188,10 +184,9 @@ public class TsvCsvImporterTests { String input = "col1,col2,col3\n" + "sub1,sub2,sub3\n" + "data1,data2,data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 0, 0, 2, false, true); + SUT.read(lnReader, project, ",", -1, 0, 0, 2, false, true); } catch (IOException e) { Assert.fail(); } @@ -210,10 +205,9 @@ public class TsvCsvImporterTests { public void readSimpleData_RowLongerThanHeader(){ String input = "col1,col2,col3\n" + "data1,data2,data3,data4,data5,data6"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); + SUT.read(lnReader, project, ",", -1, 0, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -238,10 +232,9 @@ public class TsvCsvImporterTests { public void readQuotedData(){ String input = "col1,col2,col3\n" + "\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\",data2"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); + SUT.read(lnReader, project, null, -1, 0, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -262,7 +255,7 @@ public class TsvCsvImporterTests { "data1,data2,data3"; LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 0, 1, 1, false, true); + SUT.read(lnReader, project, ",", -1, 0, 1, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -282,10 +275,9 @@ public class TsvCsvImporterTests { String input = "col1,col2,col3\n" + "skip1\n" + "data1,data2,data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 1, 0, 1, false, true); + SUT.read(lnReader, project, ",", -1, 1, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -311,7 +303,7 @@ public class TsvCsvImporterTests { "data1,data2,data3"; LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 1, 3, 2, false, true); + SUT.read(lnReader, project, ",", -1, 1, 3, 2, false, true); } catch (IOException e) { Assert.fail(); } @@ -338,10 +330,9 @@ public class TsvCsvImporterTests { "data-row1-cell1,data-row1-cell2,data-row1-cell3\n" + "data-row2-cell1,data-row2-cell2,\n" + //missing last data point of this row on purpose "data-row3-cell1,data-row3-cell2,data-row1-cell3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, 2, 2, 3, 2, false, true); + SUT.read(lnReader, project, ",", 2, 2, 3, 2, false, true); } catch (IOException e) { Assert.fail(); } @@ -354,7 +345,7 @@ public class TsvCsvImporterTests { Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data-row1-cell1"); Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data-row1-cell2"); Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data-row1-cell3"); - Assert.assertEquals(project.rows.get(1).cells.size(), 2); + Assert.assertEquals(project.rows.get(1).cells.size(), 3); Assert.assertEquals(project.rows.get(1).cells.get(0).value, "data-row2-cell1"); Assert.assertEquals(project.rows.get(1).cells.get(1).value, "data-row2-cell2"); } @@ -363,10 +354,9 @@ public class TsvCsvImporterTests { public void readWithMultiLinedQuotedData(){ String input = "col1,col2,col3\n" + "\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\",data2"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); + SUT.read(lnReader, project, null, -1, 0, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } diff --git a/tests/java/src/com/metaweb/gridworks/tests/importers/parsers/CSVRowParserTests.java b/tests/java/src/com/metaweb/gridworks/tests/importers/parsers/CSVRowParserTests.java index 3cd60bc55..265e6d70e 100644 --- a/tests/java/src/com/metaweb/gridworks/tests/importers/parsers/CSVRowParserTests.java +++ b/tests/java/src/com/metaweb/gridworks/tests/importers/parsers/CSVRowParserTests.java @@ -26,8 +26,8 @@ public class CSVRowParserTests { String SAMPLE_ROW = "NDB_No,Shrt_Desc,Water"; String ROW_WITH_QUOTED_COMMA = "01001,\"BUTTER,WITH SALT\",15.87"; String UNCLOSED_QUOTED_ROW = "\"Open quoted value, with commas"; - String LEADING_QUOTE_WITH_COMMA = "value1, \"Open quoted, with commas\" and close quote but no comma, value3"; - String QUOTED = "value1, value2 with \"quote\" in middle, value3"; + String LEADING_QUOTE_WITH_COMMA = "value1, \"\"\"Open quoted, with commas\"\" and close quote but no comma\", value3"; + String QUOTED = "value1, \"value2 with \"\"quote\"\" in middle\", value3"; String SAMPLE_CSV = SAMPLE_ROW + "\n" + ROW_WITH_QUOTED_COMMA; //Unix line endings? @@ -48,9 +48,9 @@ public class CSVRowParserTests { lineReader = null; SUT = null; } - + //------------split tests------------------------- - + @Test public void split(){ List splitLine = SUT.split(SAMPLE_ROW, lineReader); @@ -63,47 +63,46 @@ public class CSVRowParserTests { @Test public void splitWithQuotedComma(){ List splitLine = SUT.split(ROW_WITH_QUOTED_COMMA, lineReader); - Assert.assertEquals(3, splitLine.size()); - Assert.assertEquals("01001", splitLine.get(0)); - Assert.assertEquals("BUTTER,WITH SALT", splitLine.get(1)); - Assert.assertEquals("15.87", splitLine.get(2)); + Assert.assertEquals(splitLine.size(), 3); + Assert.assertEquals(splitLine.get(0), "01001"); + Assert.assertEquals(splitLine.get(1), "BUTTER,WITH SALT"); + Assert.assertEquals(splitLine.get(2), "15.87"); } - - @Test(enabled = false, groups = { "broken" }) + + @Test public void splitWithUnclosedQuote(){ try { - when(lineReader.readLine()).thenReturn("continuation of row above, with comma\",value2"); + when(lineReader.readLine()).thenReturn(" continuation of row above, with comma\",value2"); } catch (IOException e) { Assert.fail(); } List splitLine = SUT.split(UNCLOSED_QUOTED_ROW, lineReader); - Assert.assertEquals(1, splitLine.size()); - Assert.assertEquals(UNCLOSED_QUOTED_ROW, splitLine.get(0)); - Assert.assertEquals(UNCLOSED_QUOTED_ROW + "\ncontinuation of row above, with comma\"", splitLine.get(0)); - Assert.assertEquals("value2", splitLine.get(1)); - + Assert.assertEquals(splitLine.size(), 2); + Assert.assertEquals(splitLine.get(0), "Open quoted value, with commas\n continuation of row above, with comma"); + Assert.assertEquals(splitLine.get(1), "value2"); + try { verify(lineReader, times(1)).readLine(); } catch (IOException e) { Assert.fail(); } } - + @Test(enabled = false, groups = { "broken" }) public void splitWithLeadingQuoteWithComma(){ List splitLine = SUT.split(LEADING_QUOTE_WITH_COMMA, lineReader); - Assert.assertEquals(3, splitLine.size()); - Assert.assertEquals("value1", splitLine.get(0)); - Assert.assertEquals("\"Open quoted, with commas\" and close quote but no comma", splitLine.get(0)); - Assert.assertEquals("value3", splitLine.get(2)); + Assert.assertEquals(splitLine.size(), 3); + Assert.assertEquals(splitLine.get(0), "value1"); + Assert.assertEquals(splitLine.get(1), "\"Open quoted, with commas\" and close quote but no comma"); + Assert.assertEquals(splitLine.get(2), "value3"); } - + @Test(enabled = false, groups = { "broken" }) public void splitWithQuoteInsideValue(){ List splitLine = SUT.split(QUOTED, lineReader); - Assert.assertEquals(3, splitLine.size()); - Assert.assertEquals("value1", splitLine.get(0)); - Assert.assertEquals("value2 with \"quote\" in middle", splitLine.get(1)); - Assert.assertEquals("value3", splitLine.get(2)); + Assert.assertEquals(splitLine.size(), 3); + Assert.assertEquals(splitLine.get(0), "value1"); + Assert.assertEquals(splitLine.get(1), "value2 with \"quote\" in middle"); + Assert.assertEquals(splitLine.get(2), "value3"); } }