2010-02-05 20:19:38 +01:00
|
|
|
package com.metaweb.gridworks.importers;
|
|
|
|
|
|
|
|
import java.io.InputStream;
|
|
|
|
import java.io.LineNumberReader;
|
|
|
|
import java.io.Reader;
|
2010-04-06 19:55:36 +02:00
|
|
|
import java.util.HashMap;
|
2010-04-11 05:42:44 +02:00
|
|
|
import java.util.List;
|
2010-04-06 19:55:36 +02:00
|
|
|
import java.util.Map;
|
2010-02-05 20:19:38 +01:00
|
|
|
import java.util.Properties;
|
|
|
|
|
|
|
|
import org.apache.commons.lang.NotImplementedException;
|
2010-04-01 00:34:21 +02:00
|
|
|
import org.apache.commons.lang.StringUtils;
|
2010-02-05 20:19:38 +01:00
|
|
|
|
2010-04-01 00:34:21 +02:00
|
|
|
import com.metaweb.gridworks.importers.parsers.CSVRowParser;
|
|
|
|
import com.metaweb.gridworks.importers.parsers.RowParser;
|
|
|
|
import com.metaweb.gridworks.importers.parsers.SeparatorRowParser;
|
2010-02-05 20:19:38 +01:00
|
|
|
import com.metaweb.gridworks.model.Column;
|
|
|
|
import com.metaweb.gridworks.model.Project;
|
|
|
|
import com.metaweb.gridworks.model.Row;
|
|
|
|
|
|
|
|
public class TsvCsvImporter implements Importer {
|
|
|
|
|
2010-04-11 23:54:56 +02:00
|
|
|
public void read(Reader reader, Project project, Properties options) throws Exception {
|
|
|
|
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
|
|
|
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
|
|
|
boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true);
|
|
|
|
|
2010-03-03 05:19:58 +01:00
|
|
|
LineNumberReader lnReader = new LineNumberReader(reader);
|
2010-04-04 09:48:47 +02:00
|
|
|
String sep = options.getProperty("separator"); // auto-detect if not present
|
|
|
|
String line = null;
|
|
|
|
boolean first = true;
|
|
|
|
int cellCount = 1;
|
|
|
|
RowParser parser = (sep == null || (sep.length() == 0)) ? null : new SeparatorRowParser(sep);
|
|
|
|
|
|
|
|
int rowsWithData = 0;
|
|
|
|
while ((line = lnReader.readLine()) != null) {
|
2010-04-09 02:14:11 +02:00
|
|
|
if (StringUtils.isBlank(line)) {
|
2010-04-04 09:48:47 +02:00
|
|
|
continue;
|
|
|
|
}
|
2010-03-03 05:19:58 +01:00
|
|
|
|
2010-04-04 09:48:47 +02:00
|
|
|
if (parser == null) {
|
|
|
|
int tab = line.indexOf('\t');
|
|
|
|
if (tab >= 0) {
|
|
|
|
sep = "\t";
|
|
|
|
parser = new SeparatorRowParser(sep);
|
|
|
|
} else {
|
|
|
|
sep = ",";
|
|
|
|
parser = new CSVRowParser();
|
2010-03-03 05:19:58 +01:00
|
|
|
}
|
2010-04-04 09:48:47 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (first) {
|
2010-04-11 05:42:44 +02:00
|
|
|
List<String> cells = parser.split(line);
|
2010-04-06 19:55:36 +02:00
|
|
|
Map<String, Integer> nameToIndex = new HashMap<String, Integer>();
|
2010-04-04 09:48:47 +02:00
|
|
|
|
|
|
|
first = false;
|
2010-04-11 05:42:44 +02:00
|
|
|
for (int c = 0; c < cells.size(); c++) {
|
2010-04-13 08:28:13 +02:00
|
|
|
String cell = cells.get(c).trim();
|
2010-04-04 09:48:47 +02:00
|
|
|
if (cell.startsWith("\"") && cell.endsWith("\"")) {
|
2010-04-13 08:28:13 +02:00
|
|
|
cell = cell.substring(1, cell.length() - 1).trim();
|
2010-03-03 05:19:58 +01:00
|
|
|
}
|
2010-04-04 09:48:47 +02:00
|
|
|
|
2010-04-06 19:55:36 +02:00
|
|
|
if (nameToIndex.containsKey(cell)) {
|
|
|
|
int index = nameToIndex.get(cell);
|
|
|
|
nameToIndex.put(cell, index + 1);
|
|
|
|
|
|
|
|
cell = cell.contains(" ") ? (cell + " " + index) : (cell + index);
|
|
|
|
} else {
|
|
|
|
nameToIndex.put(cell, 2);
|
|
|
|
}
|
|
|
|
|
2010-04-04 09:48:47 +02:00
|
|
|
Column column = new Column(c, cell);
|
|
|
|
|
|
|
|
project.columnModel.columns.add(column);
|
2010-03-03 05:19:58 +01:00
|
|
|
}
|
|
|
|
|
2010-04-11 05:42:44 +02:00
|
|
|
cellCount = cells.size();
|
2010-04-04 09:48:47 +02:00
|
|
|
} else {
|
|
|
|
Row row = new Row(cellCount);
|
|
|
|
|
2010-04-11 23:54:56 +02:00
|
|
|
if (parser.parseRow(row, line, guessValueType)) {
|
2010-04-04 09:48:47 +02:00
|
|
|
rowsWithData++;
|
2010-03-03 05:19:58 +01:00
|
|
|
|
2010-04-04 09:48:47 +02:00
|
|
|
if (skip <= 0 || rowsWithData > skip) {
|
|
|
|
project.rows.add(row);
|
|
|
|
project.columnModel.setMaxCellIndex(row.cells.size());
|
2010-03-03 05:19:58 +01:00
|
|
|
|
2010-04-04 09:48:47 +02:00
|
|
|
if (limit > 0 && project.rows.size() >= limit) {
|
|
|
|
break;
|
2010-03-03 05:19:58 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2010-02-05 20:19:38 +01:00
|
|
|
|
2010-04-11 23:54:56 +02:00
|
|
|
public void read(InputStream inputStream, Project project, Properties options) throws Exception {
|
2010-03-03 05:19:58 +01:00
|
|
|
throw new NotImplementedException();
|
|
|
|
}
|
2010-02-05 20:19:38 +01:00
|
|
|
|
2010-03-03 05:19:58 +01:00
|
|
|
public boolean takesReader() {
|
|
|
|
return true;
|
|
|
|
}
|
2010-02-05 20:19:38 +01:00
|
|
|
}
|