Resolved issue 19 for TSV as well, not just for CSV.

Touched up TsvCsvImporterTests to leave the comma vs. tab guessing to the importer itself. All tests still pass.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@790 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-05-17 05:55:02 +00:00
parent 6f74bcaa26
commit bd8d214a49
4 changed files with 42 additions and 30 deletions

View File

@ -10,7 +10,7 @@ import java.util.Properties;
import org.apache.commons.lang.StringUtils;
import com.metaweb.gridworks.importers.parsers.CSVRowParser;
import com.metaweb.gridworks.importers.parsers.TsvCsvRowParser;
import com.metaweb.gridworks.importers.parsers.NonSplitRowParser;
import com.metaweb.gridworks.importers.parsers.RowParser;
import com.metaweb.gridworks.importers.parsers.SeparatorRowParser;
@ -57,10 +57,10 @@ public class TsvCsvImporter implements Importer {
int tab = line.indexOf('\t');
if (tab >= 0) {
sep = "\t";
parser = new SeparatorRowParser(sep);
parser = new TsvCsvRowParser('\t');
} else {
sep = ",";
parser = new CSVRowParser();
parser = new TsvCsvRowParser(',');
}
} else {
parser = new NonSplitRowParser();

View File

@ -11,7 +11,13 @@ import com.metaweb.gridworks.importers.ImporterUtilities;
import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Row;
public class CSVRowParser extends RowParser {
public class TsvCsvRowParser extends RowParser {
final protected char _sep;
public TsvCsvRowParser(char sep) {
_sep = sep;
}
public List<String> split(String line, LineNumberReader lineReader) {
List<String> results = new ArrayList<String>();
@ -43,8 +49,8 @@ public class CSVRowParser extends RowParser {
} else {
sb.append(line.substring(start, quote));
start = quote + 1;
if (start < line.length() && line.charAt(start) == ',') {
start++; // skip ,
if (start < line.length() && line.charAt(start) == _sep) {
start++; // skip separator
}
break;
}
@ -53,7 +59,7 @@ public class CSVRowParser extends RowParser {
text = sb.toString();
} else {
int next = line.indexOf(',', start);
int next = line.indexOf(_sep, start);
if (next < 0) {
text = line.substring(start);
start = line.length();

View File

@ -18,8 +18,6 @@ import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import com.metaweb.gridworks.importers.TsvCsvImporter;
import com.metaweb.gridworks.importers.parsers.CSVRowParser;
import com.metaweb.gridworks.importers.parsers.RowParser;
import com.metaweb.gridworks.model.Project;
@ -32,19 +30,16 @@ public class TsvCsvImporterTests {
//System Under Test
TsvCsvImporter SUT = null;
RowParser parser = null;
//mock dependencies
Project project = null;
Properties properties = null;
@BeforeMethod
public void SetUp(){
SUT = new TsvCsvImporter();
project = new Project(); //FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
properties = mock(Properties.class);
parser = new CSVRowParser();
}
@AfterMethod
@ -57,10 +52,11 @@ public class TsvCsvImporterTests {
@Test
public void readJustColumns(){
String input = "col1,col2,col3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 1, false, true);
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -74,9 +70,10 @@ public class TsvCsvImporterTests {
public void readSimpleData_CSV_1Header_1Row(){
String input = "col1,col2,col3\n" +
"data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 1, false, true);
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -95,9 +92,10 @@ public class TsvCsvImporterTests {
public void readSimpleData_TSV_1Header_1Row(){
String input = "col1\tcol2\tcol3\n" +
"data1\tdata2\tdata3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, "\t", -1, 0, 0, 1, false, true);
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -115,9 +113,10 @@ public class TsvCsvImporterTests {
@Test
public void readSimpleData_0Header_1Row(){
String input = "data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 0, false, true);
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -135,9 +134,10 @@ public class TsvCsvImporterTests {
@Test
public void readDoesNotTrimLeadingTrailingWhitespaceWhenNotGuessingValue(){
String input = " data1, data2, data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 0, false, true);
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -154,7 +154,7 @@ public class TsvCsvImporterTests {
String input = " data1, data2, data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 0, true, true);
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, true, true);
} catch (IOException e) {
Assert.fail();
}
@ -171,7 +171,7 @@ public class TsvCsvImporterTests {
String input = " data1, , data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 0, true, true);
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, true, true);
} catch (IOException e) {
Assert.fail();
}
@ -188,9 +188,10 @@ public class TsvCsvImporterTests {
String input = "col1,col2,col3\n" +
"sub1,sub2,sub3\n" +
"data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 2, false, true);
SUT.read(null, lnReader, project, null, -1, 0, 0, 2, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -209,9 +210,10 @@ public class TsvCsvImporterTests {
public void readSimpleData_RowLongerThanHeader(){
String input = "col1,col2,col3\n" +
"data1,data2,data3,data4,data5,data6";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 1, false, true);
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -236,9 +238,10 @@ public class TsvCsvImporterTests {
public void readQuotedData(){
String input = "col1,col2,col3\n" +
"\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\",data2";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, ",", -1, 0, 0, 1, false, true);
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -259,7 +262,7 @@ public class TsvCsvImporterTests {
"data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(parser, lnReader, project, ",", -1, 0, 1, 1, false, true);
SUT.read(null, lnReader, project, null, -1, 0, 1, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -279,9 +282,10 @@ public class TsvCsvImporterTests {
String input = "col1,col2,col3\n" +
"skip1\n" +
"data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(parser, lnReader, project, ",", -1, 1, 0, 1, false, true);
SUT.read(null, lnReader, project, null, -1, 1, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -307,7 +311,7 @@ public class TsvCsvImporterTests {
"data1,data2,data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(parser, lnReader, project, ",", -1, 1, 3, 2, false, true);
SUT.read(null, lnReader, project, null, -1, 1, 3, 2, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -334,9 +338,10 @@ public class TsvCsvImporterTests {
"data-row1-cell1,data-row1-cell2,data-row1-cell3\n" +
"data-row2-cell1,data-row2-cell2,\n" + //missing last data point of this row on purpose
"data-row3-cell1,data-row3-cell2,data-row1-cell3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, ",", 2, 2, 3, 2, false, true);
SUT.read(null, lnReader, project, null, 2, 2, 3, 2, false, true);
} catch (IOException e) {
Assert.fail();
}
@ -358,9 +363,10 @@ public class TsvCsvImporterTests {
public void readWithMultiLinedQuotedData(){
String input = "col1,col2,col3\n" +
"\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\",data2";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(null, lnReader, project, ",", -1, 0, 0, 1, false, true);
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
} catch (IOException e) {
Assert.fail();
}

View File

@ -16,7 +16,7 @@ import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import com.metaweb.gridworks.importers.parsers.CSVRowParser;
import com.metaweb.gridworks.importers.parsers.TsvCsvRowParser;
public class CSVRowParserTests {
// logging
@ -32,7 +32,7 @@ public class CSVRowParserTests {
String SAMPLE_CSV = SAMPLE_ROW + "\n" + ROW_WITH_QUOTED_COMMA; //Unix line endings?
//System Under Test
CSVRowParser SUT = null;
TsvCsvRowParser SUT = null;
//mocked dependencies
LineNumberReader lineReader = null;
@ -40,7 +40,7 @@ public class CSVRowParserTests {
@BeforeMethod
public void SetUp(){
lineReader = mock(LineNumberReader.class);
SUT = new CSVRowParser();
SUT = new TsvCsvRowParser(',');
}
@AfterMethod