RowParser is no longer a parameter of read method in TsvCsvImporter (now handled in the method itself)

CSVRowParserTests is now tidied - with expected and actual being input into the correct parameters of Assert.assertEquals.  One more test passes, but still 2 tests failing.


git-svn-id: http://google-refine.googlecode.com/svn/trunk@792 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-05-17 06:24:58 +00:00
parent 71b08ebdd9
commit 03b1ab01f4
3 changed files with 49 additions and 59 deletions

View File

@ -30,16 +30,17 @@ public class TsvCsvImporter implements Importer {
boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true);
LineNumberReader lnReader = new LineNumberReader(reader);
RowParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ?
new SeparatorRowParser(sep) : null;
read(parser, lnReader, project, sep,
limit, skip, ignoreLines, headerLines,
read(lnReader, project, sep,
limit, skip, ignoreLines, headerLines,
guessValueType, splitIntoColumns
);
}
public void read(RowParser parser, LineNumberReader lnReader, Project project, String sep, int limit, int skip, int ignoreLines, int headerLines, boolean guessValueType, boolean splitIntoColumns ) throws IOException{
public void read(LineNumberReader lnReader, Project project, String sep, int limit, int skip, int ignoreLines, int headerLines, boolean guessValueType, boolean splitIntoColumns ) throws IOException{
RowParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ?
new SeparatorRowParser(sep) : null;
List<String> columnNames = new ArrayList<String>();
String line = null;
int rowsWithData = 0;

View File

@ -35,6 +35,7 @@ public class TsvCsvImporterTests {
Project project = null;
Properties properties = null;
@BeforeMethod
public void SetUp(){
SUT = new TsvCsvImporter();
@ -52,11 +53,10 @@ public class TsvCsvImporterTests {
@Test
public void readJustColumns(){
String input = "col1,col2,col3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
SUT.read(lnReader, project, ",", -1, 0, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -70,10 +70,9 @@ public class TsvCsvImporterTests {
public void readSimpleData_CSV_1Header_1Row(){
String input = "col1,col2,col3\n" +
"data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
SUT.read(lnReader, project, ",", -1, 0, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -92,10 +91,9 @@ public class TsvCsvImporterTests {
public void readSimpleData_TSV_1Header_1Row(){
String input = "col1\tcol2\tcol3\n" +
"data1\tdata2\tdata3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
SUT.read(lnReader, project, "\t", -1, 0, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -113,10 +111,9 @@ public class TsvCsvImporterTests {
@Test
public void readSimpleData_0Header_1Row(){
String input = "data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, false, true);
SUT.read(lnReader, project, ",", -1, 0, 0, 0, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -134,10 +131,9 @@ public class TsvCsvImporterTests {
@Test
public void readDoesNotTrimLeadingTrailingWhitespaceWhenNotGuessingValue(){
String input = " data1, data2, data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, false, true);
SUT.read(lnReader, project, ",", -1, 0, 0, 0, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -154,7 +150,7 @@ public class TsvCsvImporterTests {
String input = " data1, data2, data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, true, true);
SUT.read(lnReader, project, ",", -1, 0, 0, 0, true, true);
} catch (IOException e) {
Assert.fail();
}
@ -171,7 +167,7 @@ public class TsvCsvImporterTests {
String input = " data1, , data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, true, true);
SUT.read(lnReader, project, ",", -1, 0, 0, 0, true, true);
} catch (IOException e) {
Assert.fail();
}
@ -188,10 +184,9 @@ public class TsvCsvImporterTests {
String input = "col1,col2,col3\n" +
"sub1,sub2,sub3\n" +
"data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 2, false, true);
SUT.read(lnReader, project, ",", -1, 0, 0, 2, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -210,10 +205,9 @@ public class TsvCsvImporterTests {
public void readSimpleData_RowLongerThanHeader(){
String input = "col1,col2,col3\n" +
"data1,data2,data3,data4,data5,data6";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
SUT.read(lnReader, project, ",", -1, 0, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -238,10 +232,9 @@ public class TsvCsvImporterTests {
public void readQuotedData(){
String input = "col1,col2,col3\n" +
"\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\",data2";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
SUT.read(lnReader, project, null, -1, 0, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -262,7 +255,7 @@ public class TsvCsvImporterTests {
"data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 0, 1, 1, false, true);
SUT.read(lnReader, project, ",", -1, 0, 1, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -282,10 +275,9 @@ public class TsvCsvImporterTests {
String input = "col1,col2,col3\n" +
"skip1\n" +
"data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 1, 0, 1, false, true);
SUT.read(lnReader, project, ",", -1, 1, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -311,7 +303,7 @@ public class TsvCsvImporterTests {
"data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 1, 3, 2, false, true);
SUT.read(lnReader, project, ",", -1, 1, 3, 2, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -338,10 +330,9 @@ public class TsvCsvImporterTests {
"data-row1-cell1,data-row1-cell2,data-row1-cell3\n" +
"data-row2-cell1,data-row2-cell2,\n" + //missing last data point of this row on purpose
"data-row3-cell1,data-row3-cell2,data-row1-cell3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, 2, 2, 3, 2, false, true);
SUT.read(lnReader, project, ",", 2, 2, 3, 2, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -354,7 +345,7 @@ public class TsvCsvImporterTests {
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data-row1-cell1");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data-row1-cell2");
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data-row1-cell3");
Assert.assertEquals(project.rows.get(1).cells.size(), 2);
Assert.assertEquals(project.rows.get(1).cells.size(), 3);
Assert.assertEquals(project.rows.get(1).cells.get(0).value, "data-row2-cell1");
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "data-row2-cell2");
}
@ -363,10 +354,9 @@ public class TsvCsvImporterTests {
public void readWithMultiLinedQuotedData(){
String input = "col1,col2,col3\n" +
"\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\",data2";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
SUT.read(lnReader, project, null, -1, 0, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}

View File

@ -26,8 +26,8 @@ public class CSVRowParserTests {
String SAMPLE_ROW = "NDB_No,Shrt_Desc,Water";
String ROW_WITH_QUOTED_COMMA = "01001,\"BUTTER,WITH SALT\",15.87";
String UNCLOSED_QUOTED_ROW = "\"Open quoted value, with commas";
String LEADING_QUOTE_WITH_COMMA = "value1, \"Open quoted, with commas\" and close quote but no comma, value3";
String QUOTED = "value1, value2 with \"quote\" in middle, value3";
String LEADING_QUOTE_WITH_COMMA = "value1, \"\"\"Open quoted, with commas\"\" and close quote but no comma\", value3";
String QUOTED = "value1, \"value2 with \"\"quote\"\" in middle\", value3";
String SAMPLE_CSV = SAMPLE_ROW + "\n" + ROW_WITH_QUOTED_COMMA; //Unix line endings?
@ -48,9 +48,9 @@ public class CSVRowParserTests {
lineReader = null;
SUT = null;
}
//------------split tests-------------------------
@Test
public void split(){
List<String> splitLine = SUT.split(SAMPLE_ROW, lineReader);
@ -63,47 +63,46 @@ public class CSVRowParserTests {
@Test
public void splitWithQuotedComma(){
List<String> splitLine = SUT.split(ROW_WITH_QUOTED_COMMA, lineReader);
Assert.assertEquals(3, splitLine.size());
Assert.assertEquals("01001", splitLine.get(0));
Assert.assertEquals("BUTTER,WITH SALT", splitLine.get(1));
Assert.assertEquals("15.87", splitLine.get(2));
Assert.assertEquals(splitLine.size(), 3);
Assert.assertEquals(splitLine.get(0), "01001");
Assert.assertEquals(splitLine.get(1), "BUTTER,WITH SALT");
Assert.assertEquals(splitLine.get(2), "15.87");
}
@Test(enabled = false, groups = { "broken" })
@Test
public void splitWithUnclosedQuote(){
try {
when(lineReader.readLine()).thenReturn("continuation of row above, with comma\",value2");
when(lineReader.readLine()).thenReturn(" continuation of row above, with comma\",value2");
} catch (IOException e) {
Assert.fail();
}
List<String> splitLine = SUT.split(UNCLOSED_QUOTED_ROW, lineReader);
Assert.assertEquals(1, splitLine.size());
Assert.assertEquals(UNCLOSED_QUOTED_ROW, splitLine.get(0));
Assert.assertEquals(UNCLOSED_QUOTED_ROW + "\ncontinuation of row above, with comma\"", splitLine.get(0));
Assert.assertEquals("value2", splitLine.get(1));
Assert.assertEquals(splitLine.size(), 2);
Assert.assertEquals(splitLine.get(0), "Open quoted value, with commas\n continuation of row above, with comma");
Assert.assertEquals(splitLine.get(1), "value2");
try {
verify(lineReader, times(1)).readLine();
} catch (IOException e) {
Assert.fail();
}
}
@Test(enabled = false, groups = { "broken" })
public void splitWithLeadingQuoteWithComma(){
List<String> splitLine = SUT.split(LEADING_QUOTE_WITH_COMMA, lineReader);
Assert.assertEquals(3, splitLine.size());
Assert.assertEquals("value1", splitLine.get(0));
Assert.assertEquals("\"Open quoted, with commas\" and close quote but no comma", splitLine.get(0));
Assert.assertEquals("value3", splitLine.get(2));
Assert.assertEquals(splitLine.size(), 3);
Assert.assertEquals(splitLine.get(0), "value1");
Assert.assertEquals(splitLine.get(1), "\"Open quoted, with commas\" and close quote but no comma");
Assert.assertEquals(splitLine.get(2), "value3");
}
@Test(enabled = false, groups = { "broken" })
public void splitWithQuoteInsideValue(){
List<String> splitLine = SUT.split(QUOTED, lineReader);
Assert.assertEquals(3, splitLine.size());
Assert.assertEquals("value1", splitLine.get(0));
Assert.assertEquals("value2 with \"quote\" in middle", splitLine.get(1));
Assert.assertEquals("value3", splitLine.get(2));
Assert.assertEquals(splitLine.size(), 3);
Assert.assertEquals(splitLine.get(0), "value1");
Assert.assertEquals(splitLine.get(1), "value2 with \"quote\" in middle");
Assert.assertEquals(splitLine.get(2), "value3");
}
}