From bd8d214a49cd6a1a274bdde36c1f0db5dc5e2e18 Mon Sep 17 00:00:00 2001 From: David Huynh Date: Mon, 17 May 2010 05:55:02 +0000 Subject: [PATCH] Resolved issue 19 for TSV as well, not just for CSV. Touched up TsvCsvImporterTests to leave the comma vs. tab guessing to the importer itself. All tests still pass. git-svn-id: http://google-refine.googlecode.com/svn/trunk@790 7d457c2a-affb-35e4-300a-418c747d4874 --- .../gridworks/importers/TsvCsvImporter.java | 6 +-- ...CSVRowParser.java => TsvCsvRowParser.java} | 14 ++++-- .../tests/importers/TsvCsvImporterTests.java | 46 +++++++++++-------- .../importers/parsers/CSVRowParserTests.java | 6 +-- 4 files changed, 42 insertions(+), 30 deletions(-) rename src/main/java/com/metaweb/gridworks/importers/parsers/{CSVRowParser.java => TsvCsvRowParser.java} (91%) diff --git a/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java b/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java index c23617a99..89f578454 100644 --- a/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java +++ b/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java @@ -10,7 +10,7 @@ import java.util.Properties; import org.apache.commons.lang.StringUtils; -import com.metaweb.gridworks.importers.parsers.CSVRowParser; +import com.metaweb.gridworks.importers.parsers.TsvCsvRowParser; import com.metaweb.gridworks.importers.parsers.NonSplitRowParser; import com.metaweb.gridworks.importers.parsers.RowParser; import com.metaweb.gridworks.importers.parsers.SeparatorRowParser; @@ -57,10 +57,10 @@ public class TsvCsvImporter implements Importer { int tab = line.indexOf('\t'); if (tab >= 0) { sep = "\t"; - parser = new SeparatorRowParser(sep); + parser = new TsvCsvRowParser('\t'); } else { sep = ","; - parser = new CSVRowParser(); + parser = new TsvCsvRowParser(','); } } else { parser = new NonSplitRowParser(); diff --git a/src/main/java/com/metaweb/gridworks/importers/parsers/CSVRowParser.java b/src/main/java/com/metaweb/gridworks/importers/parsers/TsvCsvRowParser.java similarity index 91% rename from src/main/java/com/metaweb/gridworks/importers/parsers/CSVRowParser.java rename to src/main/java/com/metaweb/gridworks/importers/parsers/TsvCsvRowParser.java index b74fe181c..e841e481a 100644 --- a/src/main/java/com/metaweb/gridworks/importers/parsers/CSVRowParser.java +++ b/src/main/java/com/metaweb/gridworks/importers/parsers/TsvCsvRowParser.java @@ -11,7 +11,13 @@ import com.metaweb.gridworks.importers.ImporterUtilities; import com.metaweb.gridworks.model.Cell; import com.metaweb.gridworks.model.Row; -public class CSVRowParser extends RowParser { +public class TsvCsvRowParser extends RowParser { + final protected char _sep; + + public TsvCsvRowParser(char sep) { + _sep = sep; + } + public List split(String line, LineNumberReader lineReader) { List results = new ArrayList(); @@ -43,8 +49,8 @@ public class CSVRowParser extends RowParser { } else { sb.append(line.substring(start, quote)); start = quote + 1; - if (start < line.length() && line.charAt(start) == ',') { - start++; // skip , + if (start < line.length() && line.charAt(start) == _sep) { + start++; // skip separator } break; } @@ -53,7 +59,7 @@ public class CSVRowParser extends RowParser { text = sb.toString(); } else { - int next = line.indexOf(',', start); + int next = line.indexOf(_sep, start); if (next < 0) { text = line.substring(start); start = line.length(); diff --git a/tests/java/src/com/metaweb/gridworks/tests/importers/TsvCsvImporterTests.java b/tests/java/src/com/metaweb/gridworks/tests/importers/TsvCsvImporterTests.java index 7ad9b5221..895fd91d9 100644 --- a/tests/java/src/com/metaweb/gridworks/tests/importers/TsvCsvImporterTests.java +++ b/tests/java/src/com/metaweb/gridworks/tests/importers/TsvCsvImporterTests.java @@ -18,8 +18,6 @@ import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import com.metaweb.gridworks.importers.TsvCsvImporter; -import com.metaweb.gridworks.importers.parsers.CSVRowParser; -import com.metaweb.gridworks.importers.parsers.RowParser; import com.metaweb.gridworks.model.Project; @@ -32,19 +30,16 @@ public class TsvCsvImporterTests { //System Under Test TsvCsvImporter SUT = null; - RowParser parser = null; //mock dependencies Project project = null; Properties properties = null; - @BeforeMethod public void SetUp(){ SUT = new TsvCsvImporter(); project = new Project(); //FIXME - should we try and use mock(Project.class); - seems unnecessary complexity properties = mock(Properties.class); - parser = new CSVRowParser(); } @AfterMethod @@ -57,10 +52,11 @@ public class TsvCsvImporterTests { @Test public void readJustColumns(){ String input = "col1,col2,col3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(parser, lnReader, project, ",", -1, 0, 0, 1, false, true); + SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -74,9 +70,10 @@ public class TsvCsvImporterTests { public void readSimpleData_CSV_1Header_1Row(){ String input = "col1,col2,col3\n" + "data1,data2,data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(parser, lnReader, project, ",", -1, 0, 0, 1, false, true); + SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -95,9 +92,10 @@ public class TsvCsvImporterTests { public void readSimpleData_TSV_1Header_1Row(){ String input = "col1\tcol2\tcol3\n" + "data1\tdata2\tdata3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, "\t", -1, 0, 0, 1, false, true); + SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -115,9 +113,10 @@ public class TsvCsvImporterTests { @Test public void readSimpleData_0Header_1Row(){ String input = "data1,data2,data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(parser, lnReader, project, ",", -1, 0, 0, 0, false, true); + SUT.read(null, lnReader, project, null, -1, 0, 0, 0, false, true); } catch (IOException e) { Assert.fail(); } @@ -135,9 +134,10 @@ public class TsvCsvImporterTests { @Test public void readDoesNotTrimLeadingTrailingWhitespaceWhenNotGuessingValue(){ String input = " data1, data2, data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(parser, lnReader, project, ",", -1, 0, 0, 0, false, true); + SUT.read(null, lnReader, project, null, -1, 0, 0, 0, false, true); } catch (IOException e) { Assert.fail(); } @@ -154,7 +154,7 @@ public class TsvCsvImporterTests { String input = " data1, data2, data3"; LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(parser, lnReader, project, ",", -1, 0, 0, 0, true, true); + SUT.read(null, lnReader, project, null, -1, 0, 0, 0, true, true); } catch (IOException e) { Assert.fail(); } @@ -171,7 +171,7 @@ public class TsvCsvImporterTests { String input = " data1, , data3"; LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(parser, lnReader, project, ",", -1, 0, 0, 0, true, true); + SUT.read(null, lnReader, project, null, -1, 0, 0, 0, true, true); } catch (IOException e) { Assert.fail(); } @@ -188,9 +188,10 @@ public class TsvCsvImporterTests { String input = "col1,col2,col3\n" + "sub1,sub2,sub3\n" + "data1,data2,data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(parser, lnReader, project, ",", -1, 0, 0, 2, false, true); + SUT.read(null, lnReader, project, null, -1, 0, 0, 2, false, true); } catch (IOException e) { Assert.fail(); } @@ -209,9 +210,10 @@ public class TsvCsvImporterTests { public void readSimpleData_RowLongerThanHeader(){ String input = "col1,col2,col3\n" + "data1,data2,data3,data4,data5,data6"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(parser, lnReader, project, ",", -1, 0, 0, 1, false, true); + SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -236,9 +238,10 @@ public class TsvCsvImporterTests { public void readQuotedData(){ String input = "col1,col2,col3\n" + "\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\",data2"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, ",", -1, 0, 0, 1, false, true); + SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -259,7 +262,7 @@ public class TsvCsvImporterTests { "data1,data2,data3"; LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(parser, lnReader, project, ",", -1, 0, 1, 1, false, true); + SUT.read(null, lnReader, project, null, -1, 0, 1, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -279,9 +282,10 @@ public class TsvCsvImporterTests { String input = "col1,col2,col3\n" + "skip1\n" + "data1,data2,data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(parser, lnReader, project, ",", -1, 1, 0, 1, false, true); + SUT.read(null, lnReader, project, null, -1, 1, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } @@ -307,7 +311,7 @@ public class TsvCsvImporterTests { "data1,data2,data3"; LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(parser, lnReader, project, ",", -1, 1, 3, 2, false, true); + SUT.read(null, lnReader, project, null, -1, 1, 3, 2, false, true); } catch (IOException e) { Assert.fail(); } @@ -334,9 +338,10 @@ public class TsvCsvImporterTests { "data-row1-cell1,data-row1-cell2,data-row1-cell3\n" + "data-row2-cell1,data-row2-cell2,\n" + //missing last data point of this row on purpose "data-row3-cell1,data-row3-cell2,data-row1-cell3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, ",", 2, 2, 3, 2, false, true); + SUT.read(null, lnReader, project, null, 2, 2, 3, 2, false, true); } catch (IOException e) { Assert.fail(); } @@ -358,9 +363,10 @@ public class TsvCsvImporterTests { public void readWithMultiLinedQuotedData(){ String input = "col1,col2,col3\n" + "\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\",data2"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); try { - SUT.read(null, lnReader, project, ",", -1, 0, 0, 1, false, true); + SUT.read(null, lnReader, project, null, -1, 0, 0, 1, false, true); } catch (IOException e) { Assert.fail(); } diff --git a/tests/java/src/com/metaweb/gridworks/tests/importers/parsers/CSVRowParserTests.java b/tests/java/src/com/metaweb/gridworks/tests/importers/parsers/CSVRowParserTests.java index cb8e24d7f..3cd60bc55 100644 --- a/tests/java/src/com/metaweb/gridworks/tests/importers/parsers/CSVRowParserTests.java +++ b/tests/java/src/com/metaweb/gridworks/tests/importers/parsers/CSVRowParserTests.java @@ -16,7 +16,7 @@ import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; -import com.metaweb.gridworks.importers.parsers.CSVRowParser; +import com.metaweb.gridworks.importers.parsers.TsvCsvRowParser; public class CSVRowParserTests { // logging @@ -32,7 +32,7 @@ public class CSVRowParserTests { String SAMPLE_CSV = SAMPLE_ROW + "\n" + ROW_WITH_QUOTED_COMMA; //Unix line endings? //System Under Test - CSVRowParser SUT = null; + TsvCsvRowParser SUT = null; //mocked dependencies LineNumberReader lineReader = null; @@ -40,7 +40,7 @@ public class CSVRowParserTests { @BeforeMethod public void SetUp(){ lineReader = mock(LineNumberReader.class); - SUT = new CSVRowParser(); + SUT = new TsvCsvRowParser(','); } @AfterMethod