Resolved issue 19 for TSV as well, not just for CSV.
Touched up TsvCsvImporterTests to leave the comma vs. tab guessing to the importer itself. All tests still pass. git-svn-id: http://google-refine.googlecode.com/svn/trunk@790 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
6f74bcaa26
commit
bd8d214a49
@ -10,7 +10,7 @@ import java.util.Properties;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import com.metaweb.gridworks.importers.parsers.CSVRowParser;
|
||||
import com.metaweb.gridworks.importers.parsers.TsvCsvRowParser;
|
||||
import com.metaweb.gridworks.importers.parsers.NonSplitRowParser;
|
||||
import com.metaweb.gridworks.importers.parsers.RowParser;
|
||||
import com.metaweb.gridworks.importers.parsers.SeparatorRowParser;
|
||||
@ -57,10 +57,10 @@ public class TsvCsvImporter implements Importer {
|
||||
int tab = line.indexOf('\t');
|
||||
if (tab >= 0) {
|
||||
sep = "\t";
|
||||
parser = new SeparatorRowParser(sep);
|
||||
parser = new TsvCsvRowParser('\t');
|
||||
} else {
|
||||
sep = ",";
|
||||
parser = new CSVRowParser();
|
||||
parser = new TsvCsvRowParser(',');
|
||||
}
|
||||
} else {
|
||||
parser = new NonSplitRowParser();
|
||||
|
@ -11,7 +11,13 @@ import com.metaweb.gridworks.importers.ImporterUtilities;
|
||||
import com.metaweb.gridworks.model.Cell;
|
||||
import com.metaweb.gridworks.model.Row;
|
||||
|
||||
public class CSVRowParser extends RowParser {
|
||||
public class TsvCsvRowParser extends RowParser {
|
||||
final protected char _sep;
|
||||
|
||||
public TsvCsvRowParser(char sep) {
|
||||
_sep = sep;
|
||||
}
|
||||
|
||||
public List<String> split(String line, LineNumberReader lineReader) {
|
||||
List<String> results = new ArrayList<String>();
|
||||
|
||||
@ -43,8 +49,8 @@ public class CSVRowParser extends RowParser {
|
||||
} else {
|
||||
sb.append(line.substring(start, quote));
|
||||
start = quote + 1;
|
||||
if (start < line.length() && line.charAt(start) == ',') {
|
||||
start++; // skip ,
|
||||
if (start < line.length() && line.charAt(start) == _sep) {
|
||||
start++; // skip separator
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -53,7 +59,7 @@ public class CSVRowParser extends RowParser {
|
||||
|
||||
text = sb.toString();
|
||||
} else {
|
||||
int next = line.indexOf(',', start);
|
||||
int next = line.indexOf(_sep, start);
|
||||
if (next < 0) {
|
||||
text = line.substring(start);
|
||||
start = line.length();
|
@ -18,8 +18,6 @@ import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.metaweb.gridworks.importers.TsvCsvImporter;
|
||||
import com.metaweb.gridworks.importers.parsers.CSVRowParser;
|
||||
import com.metaweb.gridworks.importers.parsers.RowParser;
|
||||
import com.metaweb.gridworks.model.Project;
|
||||
|
||||
|
||||
@ -32,19 +30,16 @@ public class TsvCsvImporterTests {
|
||||
|
||||
//System Under Test
|
||||
TsvCsvImporter SUT = null;
|
||||
RowParser parser = null;
|
||||
|
||||
//mock dependencies
|
||||
Project project = null;
|
||||
Properties properties = null;
|
||||
|
||||
|
||||
@BeforeMethod
|
||||
public void SetUp(){
|
||||
SUT = new TsvCsvImporter();
|
||||
project = new Project(); //FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
|
||||
properties = mock(Properties.class);
|
||||
parser = new CSVRowParser();
|
||||
}
|
||||
|
||||
@AfterMethod
|
||||
@ -57,10 +52,11 @@ public class TsvCsvImporterTests {
|
||||
@Test
|
||||
public void readJustColumns(){
|
||||
String input = "col1,col2,col3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
|
||||
try {
|
||||
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 1, false, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -74,9 +70,10 @@ public class TsvCsvImporterTests {
|
||||
public void readSimpleData_CSV_1Header_1Row(){
|
||||
String input = "col1,col2,col3\n" +
|
||||
"data1,data2,data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 1, false, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -95,9 +92,10 @@ public class TsvCsvImporterTests {
|
||||
public void readSimpleData_TSV_1Header_1Row(){
|
||||
String input = "col1\tcol2\tcol3\n" +
|
||||
"data1\tdata2\tdata3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(null, lnReader, project, "\t", -1, 0, 0, 1, false, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -115,9 +113,10 @@ public class TsvCsvImporterTests {
|
||||
@Test
|
||||
public void readSimpleData_0Header_1Row(){
|
||||
String input = "data1,data2,data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 0, false, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -135,9 +134,10 @@ public class TsvCsvImporterTests {
|
||||
@Test
|
||||
public void readDoesNotTrimLeadingTrailingWhitespaceWhenNotGuessingValue(){
|
||||
String input = " data1, data2, data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 0, false, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -154,7 +154,7 @@ public class TsvCsvImporterTests {
|
||||
String input = " data1, data2, data3";
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 0, true, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, true, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -171,7 +171,7 @@ public class TsvCsvImporterTests {
|
||||
String input = " data1, , data3";
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 0, true, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 0, 0, 0, true, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -188,9 +188,10 @@ public class TsvCsvImporterTests {
|
||||
String input = "col1,col2,col3\n" +
|
||||
"sub1,sub2,sub3\n" +
|
||||
"data1,data2,data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 2, false, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 0, 0, 2, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -209,9 +210,10 @@ public class TsvCsvImporterTests {
|
||||
public void readSimpleData_RowLongerThanHeader(){
|
||||
String input = "col1,col2,col3\n" +
|
||||
"data1,data2,data3,data4,data5,data6";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(parser, lnReader, project, ",", -1, 0, 0, 1, false, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -236,9 +238,10 @@ public class TsvCsvImporterTests {
|
||||
public void readQuotedData(){
|
||||
String input = "col1,col2,col3\n" +
|
||||
"\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\",data2";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(null, lnReader, project, ",", -1, 0, 0, 1, false, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -259,7 +262,7 @@ public class TsvCsvImporterTests {
|
||||
"data1,data2,data3";
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(parser, lnReader, project, ",", -1, 0, 1, 1, false, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 0, 1, 1, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -279,9 +282,10 @@ public class TsvCsvImporterTests {
|
||||
String input = "col1,col2,col3\n" +
|
||||
"skip1\n" +
|
||||
"data1,data2,data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(parser, lnReader, project, ",", -1, 1, 0, 1, false, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 1, 0, 1, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -307,7 +311,7 @@ public class TsvCsvImporterTests {
|
||||
"data1,data2,data3";
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(parser, lnReader, project, ",", -1, 1, 3, 2, false, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 1, 3, 2, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -334,9 +338,10 @@ public class TsvCsvImporterTests {
|
||||
"data-row1-cell1,data-row1-cell2,data-row1-cell3\n" +
|
||||
"data-row2-cell1,data-row2-cell2,\n" + //missing last data point of this row on purpose
|
||||
"data-row3-cell1,data-row3-cell2,data-row1-cell3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(null, lnReader, project, ",", 2, 2, 3, 2, false, true);
|
||||
SUT.read(null, lnReader, project, null, 2, 2, 3, 2, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -358,9 +363,10 @@ public class TsvCsvImporterTests {
|
||||
public void readWithMultiLinedQuotedData(){
|
||||
String input = "col1,col2,col3\n" +
|
||||
"\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\",data2";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(null, lnReader, project, ",", -1, 0, 0, 1, false, true);
|
||||
SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true);
|
||||
} catch (IOException e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ import org.testng.annotations.AfterMethod;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.metaweb.gridworks.importers.parsers.CSVRowParser;
|
||||
import com.metaweb.gridworks.importers.parsers.TsvCsvRowParser;
|
||||
|
||||
public class CSVRowParserTests {
|
||||
// logging
|
||||
@ -32,7 +32,7 @@ public class CSVRowParserTests {
|
||||
String SAMPLE_CSV = SAMPLE_ROW + "\n" + ROW_WITH_QUOTED_COMMA; //Unix line endings?
|
||||
|
||||
//System Under Test
|
||||
CSVRowParser SUT = null;
|
||||
TsvCsvRowParser SUT = null;
|
||||
|
||||
//mocked dependencies
|
||||
LineNumberReader lineReader = null;
|
||||
@ -40,7 +40,7 @@ public class CSVRowParserTests {
|
||||
@BeforeMethod
|
||||
public void SetUp(){
|
||||
lineReader = mock(LineNumberReader.class);
|
||||
SUT = new CSVRowParser();
|
||||
SUT = new TsvCsvRowParser(',');
|
||||
}
|
||||
|
||||
@AfterMethod
|
||||
|
Loading…
Reference in New Issue
Block a user