Use RowParser for parsing the header row, too.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@444 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-04-11 03:42:44 +00:00
parent 73288c5e2a
commit 5928a689e2
4 changed files with 38 additions and 8 deletions

View File

@ -4,6 +4,7 @@ import java.io.InputStream;
import java.io.LineNumberReader;
import java.io.Reader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@ -47,12 +48,12 @@ public class TsvCsvImporter implements Importer {
}
if (first) {
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
List<String> cells = parser.split(line);
Map<String, Integer> nameToIndex = new HashMap<String, Integer>();
first = false;
for (int c = 0; c < cells.length; c++) {
String cell = cells[c];
for (int c = 0; c < cells.size(); c++) {
String cell = cells.get(c);
if (cell.startsWith("\"") && cell.endsWith("\"")) {
cell = cell.substring(1, cell.length() - 1);
}
@ -71,7 +72,7 @@ public class TsvCsvImporter implements Importer {
project.columnModel.columns.add(column);
}
cellCount = cells.length;
cellCount = cells.size();
} else {
Row row = new Row(cellCount);

View File

@ -1,6 +1,8 @@
package com.metaweb.gridworks.importers.parsers;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import com.metaweb.gridworks.expr.ExpressionUtils;
import com.metaweb.gridworks.importers.ImporterUtilities;
@ -8,9 +10,8 @@ import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Row;
public class CSVRowParser extends RowParser {
public boolean parseRow(Row row, String line) {
boolean hasData = false;
public List<String> split(String line) {
List<String> results = new ArrayList<String>();
int start = 0;
while (start < line.length()) {
@ -53,7 +54,19 @@ public class CSVRowParser extends RowParser {
}
}
Serializable value = ImporterUtilities.parseCellValue(text);
results.add(text);
}
return results;
}
public boolean parseRow(Row row, String line) {
boolean hasData = false;
List<String> strings = split(line);
for (String s : strings) {
Serializable value = ImporterUtilities.parseCellValue(s);
if (ExpressionUtils.isNonBlankData(value)) {
row.cells.add(new Cell(value, null));
hasData = true;

View File

@ -1,8 +1,11 @@
package com.metaweb.gridworks.importers.parsers;
import java.util.List;
import com.metaweb.gridworks.model.Row;
public abstract class RowParser {
public abstract List<String> split(String line);
public abstract boolean parseRow(Row row, String line);
}

View File

@ -1,6 +1,8 @@
package com.metaweb.gridworks.importers.parsers;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
@ -17,6 +19,17 @@ public class SeparatorRowParser extends RowParser {
this.sep = sep;
}
public List<String> split(String line) {
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
List<String> results = new ArrayList<String>();
for (int c = 0; c < cells.length; c++) {
results.add(cells[c]);
}
return results;
}
public boolean parseRow(Row row, String line) {
boolean hasData = false;