RowParser is no longer a parameter of read method in TsvCsvImporter (now handled in the method itself)

CSVRowParserTests is now tidied - with expected and actual being input into the correct parameters of Assert.assertEquals.  One more test passes, but still 2 tests failing.


git-svn-id: http://google-refine.googlecode.com/svn/trunk@792 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-05-17 06:24:58 +00:00
parent 71b08ebdd9
commit 03b1ab01f4
3 changed files with 49 additions and 59 deletions

View File

@ -30,16 +30,17 @@ public class TsvCsvImporter implements Importer {
boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true); boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true);
LineNumberReader lnReader = new LineNumberReader(reader); LineNumberReader lnReader = new LineNumberReader(reader);
RowParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ?
new SeparatorRowParser(sep) : null;
read(lnReader, project, sep,
read(parser, lnReader, project, sep, limit, skip, ignoreLines, headerLines,
limit, skip, ignoreLines, headerLines,
guessValueType, splitIntoColumns guessValueType, splitIntoColumns
); );
} }
public void read(RowParser parser, LineNumberReader lnReader, Project project, String sep, int limit, int skip, int ignoreLines, int headerLines, boolean guessValueType, boolean splitIntoColumns ) throws IOException{ public void read(LineNumberReader lnReader, Project project, String sep, int limit, int skip, int ignoreLines, int headerLines, boolean guessValueType, boolean splitIntoColumns ) throws IOException{
RowParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ?
new SeparatorRowParser(sep) : null;
List<String> columnNames = new ArrayList<String>(); List<String> columnNames = new ArrayList<String>();
String line = null; String line = null;
int rowsWithData = 0; int rowsWithData = 0;

View File

@ -35,6 +35,7 @@ public class TsvCsvImporterTests {
Project project = null; Project project = null;
Properties properties = null; Properties properties = null;
@BeforeMethod @BeforeMethod
public void SetUp(){ public void SetUp(){
SUT = new TsvCsvImporter(); SUT = new TsvCsvImporter();
@ -52,11 +53,10 @@ public class TsvCsvImporterTests {
@Test @Test
public void readJustColumns(){ public void readJustColumns(){
String input = "col1,col2,col3"; String input = "col1,col2,col3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); SUT.read(lnReader, project, ",", -1, 0, 0, 1, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -70,10 +70,9 @@ public class TsvCsvImporterTests {
public void readSimpleData_CSV_1Header_1Row(){ public void readSimpleData_CSV_1Header_1Row(){
String input = "col1,col2,col3\n" + String input = "col1,col2,col3\n" +
"data1,data2,data3"; "data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); SUT.read(lnReader, project, ",", -1, 0, 0, 1, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -92,10 +91,9 @@ public class TsvCsvImporterTests {
public void readSimpleData_TSV_1Header_1Row(){ public void readSimpleData_TSV_1Header_1Row(){
String input = "col1\tcol2\tcol3\n" + String input = "col1\tcol2\tcol3\n" +
"data1\tdata2\tdata3"; "data1\tdata2\tdata3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); SUT.read(lnReader, project, "\t", -1, 0, 0, 1, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -113,10 +111,9 @@ public class TsvCsvImporterTests {
@Test @Test
public void readSimpleData_0Header_1Row(){ public void readSimpleData_0Header_1Row(){
String input = "data1,data2,data3"; String input = "data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, false, true); SUT.read(lnReader, project, ",", -1, 0, 0, 0, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -134,10 +131,9 @@ public class TsvCsvImporterTests {
@Test @Test
public void readDoesNotTrimLeadingTrailingWhitespaceWhenNotGuessingValue(){ public void readDoesNotTrimLeadingTrailingWhitespaceWhenNotGuessingValue(){
String input = " data1, data2, data3"; String input = " data1, data2, data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, false, true); SUT.read(lnReader, project, ",", -1, 0, 0, 0, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -154,7 +150,7 @@ public class TsvCsvImporterTests {
String input = " data1, data2, data3"; String input = " data1, data2, data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, true, true); SUT.read(lnReader, project, ",", -1, 0, 0, 0, true, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -171,7 +167,7 @@ public class TsvCsvImporterTests {
String input = " data1, , data3"; String input = " data1, , data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, true, true); SUT.read(lnReader, project, ",", -1, 0, 0, 0, true, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -188,10 +184,9 @@ public class TsvCsvImporterTests {
String input = "col1,col2,col3\n" + String input = "col1,col2,col3\n" +
"sub1,sub2,sub3\n" + "sub1,sub2,sub3\n" +
"data1,data2,data3"; "data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 2, false, true); SUT.read(lnReader, project, ",", -1, 0, 0, 2, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -210,10 +205,9 @@ public class TsvCsvImporterTests {
public void readSimpleData_RowLongerThanHeader(){ public void readSimpleData_RowLongerThanHeader(){
String input = "col1,col2,col3\n" + String input = "col1,col2,col3\n" +
"data1,data2,data3,data4,data5,data6"; "data1,data2,data3,data4,data5,data6";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); SUT.read(lnReader, project, ",", -1, 0, 0, 1, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -238,10 +232,9 @@ public class TsvCsvImporterTests {
public void readQuotedData(){ public void readQuotedData(){
String input = "col1,col2,col3\n" + String input = "col1,col2,col3\n" +
"\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\",data2"; "\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\",data2";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); SUT.read(lnReader, project, null, -1, 0, 0, 1, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -262,7 +255,7 @@ public class TsvCsvImporterTests {
"data1,data2,data3"; "data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 0, 1, 1, false, true); SUT.read(lnReader, project, ",", -1, 0, 1, 1, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -282,10 +275,9 @@ public class TsvCsvImporterTests {
String input = "col1,col2,col3\n" + String input = "col1,col2,col3\n" +
"skip1\n" + "skip1\n" +
"data1,data2,data3"; "data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 1, 0, 1, false, true); SUT.read(lnReader, project, ",", -1, 1, 0, 1, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -311,7 +303,7 @@ public class TsvCsvImporterTests {
"data1,data2,data3"; "data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 1, 3, 2, false, true); SUT.read(lnReader, project, ",", -1, 1, 3, 2, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -338,10 +330,9 @@ public class TsvCsvImporterTests {
"data-row1-cell1,data-row1-cell2,data-row1-cell3\n" + "data-row1-cell1,data-row1-cell2,data-row1-cell3\n" +
"data-row2-cell1,data-row2-cell2,\n" + //missing last data point of this row on purpose "data-row2-cell1,data-row2-cell2,\n" + //missing last data point of this row on purpose
"data-row3-cell1,data-row3-cell2,data-row1-cell3"; "data-row3-cell1,data-row3-cell2,data-row1-cell3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, 2, 2, 3, 2, false, true); SUT.read(lnReader, project, ",", 2, 2, 3, 2, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
@ -354,7 +345,7 @@ public class TsvCsvImporterTests {
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data-row1-cell1"); Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data-row1-cell1");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data-row1-cell2"); Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data-row1-cell2");
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data-row1-cell3"); Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data-row1-cell3");
Assert.assertEquals(project.rows.get(1).cells.size(), 2); Assert.assertEquals(project.rows.get(1).cells.size(), 3);
Assert.assertEquals(project.rows.get(1).cells.get(0).value, "data-row2-cell1"); Assert.assertEquals(project.rows.get(1).cells.get(0).value, "data-row2-cell1");
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "data-row2-cell2"); Assert.assertEquals(project.rows.get(1).cells.get(1).value, "data-row2-cell2");
} }
@ -363,10 +354,9 @@ public class TsvCsvImporterTests {
public void readWithMultiLinedQuotedData(){ public void readWithMultiLinedQuotedData(){
String input = "col1,col2,col3\n" + String input = "col1,col2,col3\n" +
"\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\",data2"; "\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\",data2";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try { try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); SUT.read(lnReader, project, null, -1, 0, 0, 1, false, true);
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }

View File

@ -26,8 +26,8 @@ public class CSVRowParserTests {
String SAMPLE_ROW = "NDB_No,Shrt_Desc,Water"; String SAMPLE_ROW = "NDB_No,Shrt_Desc,Water";
String ROW_WITH_QUOTED_COMMA = "01001,\"BUTTER,WITH SALT\",15.87"; String ROW_WITH_QUOTED_COMMA = "01001,\"BUTTER,WITH SALT\",15.87";
String UNCLOSED_QUOTED_ROW = "\"Open quoted value, with commas"; String UNCLOSED_QUOTED_ROW = "\"Open quoted value, with commas";
String LEADING_QUOTE_WITH_COMMA = "value1, \"Open quoted, with commas\" and close quote but no comma, value3"; String LEADING_QUOTE_WITH_COMMA = "value1, \"\"\"Open quoted, with commas\"\" and close quote but no comma\", value3";
String QUOTED = "value1, value2 with \"quote\" in middle, value3"; String QUOTED = "value1, \"value2 with \"\"quote\"\" in middle\", value3";
String SAMPLE_CSV = SAMPLE_ROW + "\n" + ROW_WITH_QUOTED_COMMA; //Unix line endings? String SAMPLE_CSV = SAMPLE_ROW + "\n" + ROW_WITH_QUOTED_COMMA; //Unix line endings?
@ -48,9 +48,9 @@ public class CSVRowParserTests {
lineReader = null; lineReader = null;
SUT = null; SUT = null;
} }
//------------split tests------------------------- //------------split tests-------------------------
@Test @Test
public void split(){ public void split(){
List<String> splitLine = SUT.split(SAMPLE_ROW, lineReader); List<String> splitLine = SUT.split(SAMPLE_ROW, lineReader);
@ -63,47 +63,46 @@ public class CSVRowParserTests {
@Test @Test
public void splitWithQuotedComma(){ public void splitWithQuotedComma(){
List<String> splitLine = SUT.split(ROW_WITH_QUOTED_COMMA, lineReader); List<String> splitLine = SUT.split(ROW_WITH_QUOTED_COMMA, lineReader);
Assert.assertEquals(3, splitLine.size()); Assert.assertEquals(splitLine.size(), 3);
Assert.assertEquals("01001", splitLine.get(0)); Assert.assertEquals(splitLine.get(0), "01001");
Assert.assertEquals("BUTTER,WITH SALT", splitLine.get(1)); Assert.assertEquals(splitLine.get(1), "BUTTER,WITH SALT");
Assert.assertEquals("15.87", splitLine.get(2)); Assert.assertEquals(splitLine.get(2), "15.87");
} }
@Test(enabled = false, groups = { "broken" }) @Test
public void splitWithUnclosedQuote(){ public void splitWithUnclosedQuote(){
try { try {
when(lineReader.readLine()).thenReturn("continuation of row above, with comma\",value2"); when(lineReader.readLine()).thenReturn(" continuation of row above, with comma\",value2");
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
List<String> splitLine = SUT.split(UNCLOSED_QUOTED_ROW, lineReader); List<String> splitLine = SUT.split(UNCLOSED_QUOTED_ROW, lineReader);
Assert.assertEquals(1, splitLine.size()); Assert.assertEquals(splitLine.size(), 2);
Assert.assertEquals(UNCLOSED_QUOTED_ROW, splitLine.get(0)); Assert.assertEquals(splitLine.get(0), "Open quoted value, with commas\n continuation of row above, with comma");
Assert.assertEquals(UNCLOSED_QUOTED_ROW + "\ncontinuation of row above, with comma\"", splitLine.get(0)); Assert.assertEquals(splitLine.get(1), "value2");
Assert.assertEquals("value2", splitLine.get(1));
try { try {
verify(lineReader, times(1)).readLine(); verify(lineReader, times(1)).readLine();
} catch (IOException e) { } catch (IOException e) {
Assert.fail(); Assert.fail();
} }
} }
@Test(enabled = false, groups = { "broken" }) @Test(enabled = false, groups = { "broken" })
public void splitWithLeadingQuoteWithComma(){ public void splitWithLeadingQuoteWithComma(){
List<String> splitLine = SUT.split(LEADING_QUOTE_WITH_COMMA, lineReader); List<String> splitLine = SUT.split(LEADING_QUOTE_WITH_COMMA, lineReader);
Assert.assertEquals(3, splitLine.size()); Assert.assertEquals(splitLine.size(), 3);
Assert.assertEquals("value1", splitLine.get(0)); Assert.assertEquals(splitLine.get(0), "value1");
Assert.assertEquals("\"Open quoted, with commas\" and close quote but no comma", splitLine.get(0)); Assert.assertEquals(splitLine.get(1), "\"Open quoted, with commas\" and close quote but no comma");
Assert.assertEquals("value3", splitLine.get(2)); Assert.assertEquals(splitLine.get(2), "value3");
} }
@Test(enabled = false, groups = { "broken" }) @Test(enabled = false, groups = { "broken" })
public void splitWithQuoteInsideValue(){ public void splitWithQuoteInsideValue(){
List<String> splitLine = SUT.split(QUOTED, lineReader); List<String> splitLine = SUT.split(QUOTED, lineReader);
Assert.assertEquals(3, splitLine.size()); Assert.assertEquals(splitLine.size(), 3);
Assert.assertEquals("value1", splitLine.get(0)); Assert.assertEquals(splitLine.get(0), "value1");
Assert.assertEquals("value2 with \"quote\" in middle", splitLine.get(1)); Assert.assertEquals(splitLine.get(1), "value2 with \"quote\" in middle");
Assert.assertEquals("value3", splitLine.get(2)); Assert.assertEquals(splitLine.get(2), "value3");
} }
} }