RandomSec/src/main/java/com/metaweb/gridworks/importers/parsers/SeparatorRowParser.java
Stefano Mazzocchi dced641599 - added the ability to specify the character separator for CSV or TSV files that don't use commas or tabs (this was needed to parse a dataset that we got from the BBC to try things out)
- used commons-lang split function instead of the java String.split one, this is necessary to avoid having to escape separators that might be confused for regexps


git-svn-id: http://google-refine.googlecode.com/svn/trunk@368 7d457c2a-affb-35e4-300a-418c747d4874
2010-03-31 22:34:21 +00:00

39 lines
1.0 KiB
Java

package com.metaweb.gridworks.importers.parsers;
import java.io.Serializable;
import org.apache.commons.lang.StringUtils;
import com.metaweb.gridworks.expr.ExpressionUtils;
import com.metaweb.gridworks.importers.ImporterUtilities;
import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Row;
public class SeparatorRowParser extends RowParser {
String sep;
public SeparatorRowParser(String sep) {
this.sep = sep;
}
public boolean parseRow(Row row, String line) {
boolean hasData = false;
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
for (int c = 0; c < cells.length; c++) {
String text = cells[c];
Serializable value = ImporterUtilities.parseCellValue(text);
if (ExpressionUtils.isNonBlankData(value)) {
row.cells.add(new Cell(value, null));
hasData = true;
} else {
row.cells.add(null);
}
}
return hasData;
}
}