Use RowParser for parsing the header row, too.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@444 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-04-11 03:42:44 +00:00
parent 73288c5e2a
commit 5928a689e2
4 changed files with 38 additions and 8 deletions

View File

@ -4,6 +4,7 @@ import java.io.InputStream;
import java.io.LineNumberReader; import java.io.LineNumberReader;
import java.io.Reader; import java.io.Reader;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
@ -47,12 +48,12 @@ public class TsvCsvImporter implements Importer {
} }
if (first) { if (first) {
String[] cells = StringUtils.splitPreserveAllTokens(line, sep); List<String> cells = parser.split(line);
Map<String, Integer> nameToIndex = new HashMap<String, Integer>(); Map<String, Integer> nameToIndex = new HashMap<String, Integer>();
first = false; first = false;
for (int c = 0; c < cells.length; c++) { for (int c = 0; c < cells.size(); c++) {
String cell = cells[c]; String cell = cells.get(c);
if (cell.startsWith("\"") && cell.endsWith("\"")) { if (cell.startsWith("\"") && cell.endsWith("\"")) {
cell = cell.substring(1, cell.length() - 1); cell = cell.substring(1, cell.length() - 1);
} }
@ -71,7 +72,7 @@ public class TsvCsvImporter implements Importer {
project.columnModel.columns.add(column); project.columnModel.columns.add(column);
} }
cellCount = cells.length; cellCount = cells.size();
} else { } else {
Row row = new Row(cellCount); Row row = new Row(cellCount);

View File

@ -1,6 +1,8 @@
package com.metaweb.gridworks.importers.parsers; package com.metaweb.gridworks.importers.parsers;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import com.metaweb.gridworks.expr.ExpressionUtils; import com.metaweb.gridworks.expr.ExpressionUtils;
import com.metaweb.gridworks.importers.ImporterUtilities; import com.metaweb.gridworks.importers.ImporterUtilities;
@ -8,9 +10,8 @@ import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Row; import com.metaweb.gridworks.model.Row;
public class CSVRowParser extends RowParser { public class CSVRowParser extends RowParser {
public List<String> split(String line) {
public boolean parseRow(Row row, String line) { List<String> results = new ArrayList<String>();
boolean hasData = false;
int start = 0; int start = 0;
while (start < line.length()) { while (start < line.length()) {
@ -53,7 +54,19 @@ public class CSVRowParser extends RowParser {
} }
} }
Serializable value = ImporterUtilities.parseCellValue(text); results.add(text);
}
return results;
}
public boolean parseRow(Row row, String line) {
boolean hasData = false;
List<String> strings = split(line);
for (String s : strings) {
Serializable value = ImporterUtilities.parseCellValue(s);
if (ExpressionUtils.isNonBlankData(value)) { if (ExpressionUtils.isNonBlankData(value)) {
row.cells.add(new Cell(value, null)); row.cells.add(new Cell(value, null));
hasData = true; hasData = true;

View File

@ -1,8 +1,11 @@
package com.metaweb.gridworks.importers.parsers; package com.metaweb.gridworks.importers.parsers;
import java.util.List;
import com.metaweb.gridworks.model.Row; import com.metaweb.gridworks.model.Row;
public abstract class RowParser { public abstract class RowParser {
public abstract List<String> split(String line);
public abstract boolean parseRow(Row row, String line); public abstract boolean parseRow(Row row, String line);
} }

View File

@ -1,6 +1,8 @@
package com.metaweb.gridworks.importers.parsers; package com.metaweb.gridworks.importers.parsers;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
@ -17,6 +19,17 @@ public class SeparatorRowParser extends RowParser {
this.sep = sep; this.sep = sep;
} }
public List<String> split(String line) {
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
List<String> results = new ArrayList<String>();
for (int c = 0; c < cells.length; c++) {
results.add(cells[c]);
}
return results;
}
public boolean parseRow(Row row, String line) { public boolean parseRow(Row row, String line) {
boolean hasData = false; boolean hasData = false;