From 5928a689e22f360ee21614dbce9809ddcc806008 Mon Sep 17 00:00:00 2001 From: David Huynh Date: Sun, 11 Apr 2010 03:42:44 +0000 Subject: [PATCH] Use RowParser for parsing the header row, too. git-svn-id: http://google-refine.googlecode.com/svn/trunk@444 7d457c2a-affb-35e4-300a-418c747d4874 --- .../gridworks/importers/TsvCsvImporter.java | 9 ++++---- .../importers/parsers/CSVRowParser.java | 21 +++++++++++++++---- .../importers/parsers/RowParser.java | 3 +++ .../importers/parsers/SeparatorRowParser.java | 13 ++++++++++++ 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java b/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java index 6eadd8676..b5a67e9d7 100644 --- a/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java +++ b/src/main/java/com/metaweb/gridworks/importers/TsvCsvImporter.java @@ -4,6 +4,7 @@ import java.io.InputStream; import java.io.LineNumberReader; import java.io.Reader; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Properties; @@ -47,12 +48,12 @@ public class TsvCsvImporter implements Importer { } if (first) { - String[] cells = StringUtils.splitPreserveAllTokens(line, sep); + List cells = parser.split(line); Map nameToIndex = new HashMap(); first = false; - for (int c = 0; c < cells.length; c++) { - String cell = cells[c]; + for (int c = 0; c < cells.size(); c++) { + String cell = cells.get(c); if (cell.startsWith("\"") && cell.endsWith("\"")) { cell = cell.substring(1, cell.length() - 1); } @@ -71,7 +72,7 @@ public class TsvCsvImporter implements Importer { project.columnModel.columns.add(column); } - cellCount = cells.length; + cellCount = cells.size(); } else { Row row = new Row(cellCount); diff --git a/src/main/java/com/metaweb/gridworks/importers/parsers/CSVRowParser.java b/src/main/java/com/metaweb/gridworks/importers/parsers/CSVRowParser.java index 1ae8e4be2..6f8daf977 100644 --- a/src/main/java/com/metaweb/gridworks/importers/parsers/CSVRowParser.java +++ b/src/main/java/com/metaweb/gridworks/importers/parsers/CSVRowParser.java @@ -1,6 +1,8 @@ package com.metaweb.gridworks.importers.parsers; import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; import com.metaweb.gridworks.expr.ExpressionUtils; import com.metaweb.gridworks.importers.ImporterUtilities; @@ -8,9 +10,8 @@ import com.metaweb.gridworks.model.Cell; import com.metaweb.gridworks.model.Row; public class CSVRowParser extends RowParser { - - public boolean parseRow(Row row, String line) { - boolean hasData = false; + public List split(String line) { + List results = new ArrayList(); int start = 0; while (start < line.length()) { @@ -53,7 +54,19 @@ public class CSVRowParser extends RowParser { } } - Serializable value = ImporterUtilities.parseCellValue(text); + results.add(text); + } + + return results; + } + + public boolean parseRow(Row row, String line) { + boolean hasData = false; + + List strings = split(line); + for (String s : strings) { + Serializable value = ImporterUtilities.parseCellValue(s); + if (ExpressionUtils.isNonBlankData(value)) { row.cells.add(new Cell(value, null)); hasData = true; diff --git a/src/main/java/com/metaweb/gridworks/importers/parsers/RowParser.java b/src/main/java/com/metaweb/gridworks/importers/parsers/RowParser.java index e424312b4..f09605989 100644 --- a/src/main/java/com/metaweb/gridworks/importers/parsers/RowParser.java +++ b/src/main/java/com/metaweb/gridworks/importers/parsers/RowParser.java @@ -1,8 +1,11 @@ package com.metaweb.gridworks.importers.parsers; +import java.util.List; + import com.metaweb.gridworks.model.Row; public abstract class RowParser { + public abstract List split(String line); public abstract boolean parseRow(Row row, String line); } diff --git a/src/main/java/com/metaweb/gridworks/importers/parsers/SeparatorRowParser.java b/src/main/java/com/metaweb/gridworks/importers/parsers/SeparatorRowParser.java index 1a1f0947c..bdc473f40 100644 --- a/src/main/java/com/metaweb/gridworks/importers/parsers/SeparatorRowParser.java +++ b/src/main/java/com/metaweb/gridworks/importers/parsers/SeparatorRowParser.java @@ -1,6 +1,8 @@ package com.metaweb.gridworks.importers.parsers; import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; import org.apache.commons.lang.StringUtils; @@ -17,6 +19,17 @@ public class SeparatorRowParser extends RowParser { this.sep = sep; } + public List split(String line) { + String[] cells = StringUtils.splitPreserveAllTokens(line, sep); + + List results = new ArrayList(); + for (int c = 0; c < cells.length; c++) { + results.add(cells[c]); + } + + return results; + } + public boolean parseRow(Row row, String line) { boolean hasData = false;