- Issue 112: Refactor Importer API (patch from tfmorris)
- Added support for storing custom metadata in ProjectMetadata. git-svn-id: http://google-refine.googlecode.com/svn/trunk@1138 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
00c6865d95
commit
f411dc9104
@ -1,6 +1,10 @@
|
||||
package com.google.gridworks;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
@ -23,7 +27,8 @@ public class ProjectMetadata implements Jsonizable {
|
||||
private String _encoding;
|
||||
private int _encodingConfidence;
|
||||
|
||||
private PreferenceStore _preferenceStore = new PreferenceStore();
|
||||
private Map<String, Serializable> _customMetadata = new HashMap<String, Serializable>();
|
||||
private PreferenceStore _preferenceStore = new PreferenceStore();
|
||||
|
||||
final Logger logger = LoggerFactory.getLogger("project_metadata");
|
||||
|
||||
@ -51,13 +56,20 @@ public class ProjectMetadata implements Jsonizable {
|
||||
|
||||
writer.key("encoding"); writer.value(_encoding);
|
||||
writer.key("encodingConfidence"); writer.value(_encodingConfidence);
|
||||
|
||||
writer.key("customMetadata"); writer.object();
|
||||
for (String key : _customMetadata.keySet()) {
|
||||
Serializable value = _customMetadata.get(key);
|
||||
writer.key(key);
|
||||
writer.value(value);
|
||||
}
|
||||
writer.endObject();
|
||||
|
||||
writer.key("preferences"); _preferenceStore.write(writer, options);
|
||||
}
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public void write(JSONWriter jsonWriter) throws Exception {
|
||||
Properties options = new Properties();
|
||||
options.setProperty("mode", "save");
|
||||
@ -92,6 +104,24 @@ public class ProjectMetadata implements Jsonizable {
|
||||
}
|
||||
}
|
||||
|
||||
if (obj.has("customMetadata") && !obj.isNull("customMetadata")) {
|
||||
try {
|
||||
JSONObject obj2 = obj.getJSONObject("customMetadata");
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Iterator<String> keys = obj2.keys();
|
||||
while (keys.hasNext()) {
|
||||
String key = keys.next();
|
||||
Object value = obj2.get(key);
|
||||
if (value != null && value instanceof Serializable) {
|
||||
pm._customMetadata.put(key, (Serializable) value);
|
||||
}
|
||||
}
|
||||
} catch (JSONException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
return pm;
|
||||
}
|
||||
|
||||
@ -153,4 +183,16 @@ public class ProjectMetadata implements Jsonizable {
|
||||
public PreferenceStore getPreferenceStore() {
|
||||
return _preferenceStore;
|
||||
}
|
||||
|
||||
public Serializable getCustomMetadata(String key) {
|
||||
return _customMetadata.get(key);
|
||||
}
|
||||
|
||||
public void setCustomMetadata(String key, Serializable value) {
|
||||
if (value == null) {
|
||||
_customMetadata.remove(key);
|
||||
} else {
|
||||
_customMetadata.put(key, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -43,7 +43,10 @@ import com.google.gridworks.ProjectManager;
|
||||
import com.google.gridworks.ProjectMetadata;
|
||||
import com.google.gridworks.commands.Command;
|
||||
import com.google.gridworks.importers.Importer;
|
||||
import com.google.gridworks.importers.ReaderImporter;
|
||||
import com.google.gridworks.importers.StreamImporter;
|
||||
import com.google.gridworks.importers.TsvCsvImporter;
|
||||
import com.google.gridworks.importers.UrlImporter;
|
||||
import com.google.gridworks.model.Project;
|
||||
import com.google.gridworks.util.IOUtils;
|
||||
import com.google.gridworks.util.ParsingUtilities;
|
||||
@ -400,39 +403,42 @@ public class CreateProjectCommand extends Command {
|
||||
return result;
|
||||
}
|
||||
|
||||
protected void internalImportURL(
|
||||
HttpServletRequest request,
|
||||
Project project,
|
||||
Properties options,
|
||||
String urlString
|
||||
) throws Exception {
|
||||
protected void internalImportURL(HttpServletRequest request,
|
||||
Project project, Properties options, String urlString)
|
||||
throws Exception {
|
||||
URL url = new URL(urlString);
|
||||
URLConnection connection = null;
|
||||
|
||||
try {
|
||||
connection = url.openConnection();
|
||||
connection.setConnectTimeout(5000);
|
||||
connection.connect();
|
||||
} catch (Exception e) {
|
||||
throw new Exception("Cannot connect to " + urlString, e);
|
||||
}
|
||||
// Try for a URL importer first
|
||||
Importer importer = guessUrlImporter(url);
|
||||
if (importer instanceof UrlImporter) {
|
||||
((UrlImporter) importer).read(url, project, options);
|
||||
return;
|
||||
} else {
|
||||
// If we couldn't find one, try opening URL and treating as a stream
|
||||
try {
|
||||
connection = url.openConnection();
|
||||
connection.setConnectTimeout(5000);
|
||||
connection.connect();
|
||||
} catch (Exception e) {
|
||||
throw new Exception("Cannot connect to " + urlString, e);
|
||||
}
|
||||
|
||||
InputStream inputStream = null;
|
||||
try {
|
||||
inputStream = connection.getInputStream();
|
||||
} catch (Exception e) {
|
||||
throw new Exception("Cannot retrieve content from " + url, e);
|
||||
}
|
||||
InputStream inputStream = null;
|
||||
try {
|
||||
inputStream = connection.getInputStream();
|
||||
} catch (Exception e) {
|
||||
throw new Exception("Cannot retrieve content from " + url, e);
|
||||
}
|
||||
|
||||
try {
|
||||
Importer importer = guessImporter(
|
||||
connection.getContentType(),
|
||||
url.getPath()
|
||||
);
|
||||
|
||||
internalInvokeImporter(project, importer, options, inputStream, connection.getContentEncoding());
|
||||
} finally {
|
||||
inputStream.close();
|
||||
try {
|
||||
importer = guessImporter(connection.getContentType(),
|
||||
url.getPath());
|
||||
internalInvokeImporter(project, importer, options, inputStream,
|
||||
connection.getContentEncoding());
|
||||
} finally {
|
||||
inputStream.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -443,7 +449,7 @@ public class CreateProjectCommand extends Command {
|
||||
InputStream rawInputStream,
|
||||
String encoding
|
||||
) throws Exception {
|
||||
if (importer.takesReader()) {
|
||||
if (importer instanceof ReaderImporter) {
|
||||
|
||||
BufferedInputStream inputStream = new BufferedInputStream(rawInputStream);
|
||||
|
||||
@ -482,28 +488,45 @@ public class CreateProjectCommand extends Command {
|
||||
new InputStreamReader(inputStream);
|
||||
}
|
||||
|
||||
importer.read(reader, project, options);
|
||||
((ReaderImporter) importer).read(reader, project, options);
|
||||
} else {
|
||||
importer.read(rawInputStream, project, options);
|
||||
((StreamImporter) importer).read(rawInputStream, project, options);
|
||||
}
|
||||
}
|
||||
|
||||
protected void internalInvokeImporter(
|
||||
Project project,
|
||||
Importer importer,
|
||||
Properties options,
|
||||
Reader reader
|
||||
Project project,
|
||||
ReaderImporter importer,
|
||||
Properties options,
|
||||
Reader reader
|
||||
) throws Exception {
|
||||
importer.read(reader, project, options);
|
||||
}
|
||||
|
||||
protected Importer guessImporter(String contentType, String fileName) {
|
||||
for(Importer i : importers.values()){
|
||||
protected Importer guessImporter(String contentType, String fileName, boolean provideDefault) {
|
||||
for (Importer i : importers.values()){
|
||||
if(i.canImportData(contentType, fileName)){
|
||||
return i;
|
||||
}
|
||||
}
|
||||
if (provideDefault) {
|
||||
return new TsvCsvImporter(); // default
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
protected Importer guessImporter(String contentType, String filename) {
|
||||
return guessImporter(contentType, filename, true);
|
||||
}
|
||||
|
||||
return new TsvCsvImporter(); //default
|
||||
protected Importer guessUrlImporter(URL url) {
|
||||
for (Importer importer : importers.values()){
|
||||
if (importer instanceof UrlImporter
|
||||
&& ((UrlImporter) importer).canImportData(url)) {
|
||||
return importer;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,6 @@ package com.google.gridworks.importers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
@ -28,18 +27,11 @@ import com.google.gridworks.model.ReconCandidate;
|
||||
import com.google.gridworks.model.Row;
|
||||
import com.google.gridworks.model.Recon.Judgment;
|
||||
|
||||
public class ExcelImporter implements Importer {
|
||||
public class ExcelImporter implements StreamImporter {
|
||||
protected boolean _xmlBased;
|
||||
|
||||
public boolean takesReader() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void read(Reader reader, Project project, Properties options) throws Exception {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public void read(InputStream inputStream, Project project, Properties options) throws Exception {
|
||||
|
||||
@Override
|
||||
public void read(InputStream inputStream, Project project, Properties options) throws ImportException {
|
||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
||||
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
||||
int limit = ImporterUtilities.getIntegerOption("limit", options, -1);
|
||||
@ -51,7 +43,7 @@ public class ExcelImporter implements Importer {
|
||||
new XSSFWorkbook(inputStream) :
|
||||
new HSSFWorkbook(new POIFSFileSystem(inputStream));
|
||||
} catch (IOException e) {
|
||||
throw new Exception(
|
||||
throw new ImportException(
|
||||
"Attempted to parse file as Excel file but failed. " +
|
||||
"Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.",
|
||||
e
|
||||
@ -94,8 +86,9 @@ public class ExcelImporter implements Importer {
|
||||
for (int c = firstCell; c <= lastCell; c++) {
|
||||
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
|
||||
if (cell != null) {
|
||||
String text = cell.getStringCellValue().trim();
|
||||
if (text.length() > 0) {
|
||||
Serializable value = extractCell(cell);
|
||||
String text = value != null ? value.toString() : null;
|
||||
if (text != null && text.length() > 0) {
|
||||
while (columnNames.size() < c + 1) {
|
||||
columnNames.add(null);
|
||||
}
|
||||
@ -194,7 +187,7 @@ public class ExcelImporter implements Importer {
|
||||
}
|
||||
}
|
||||
|
||||
protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
|
||||
protected Serializable extractCell(org.apache.poi.ss.usermodel.Cell cell) {
|
||||
int cellType = cell.getCellType();
|
||||
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_ERROR ||
|
||||
cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BLANK) {
|
||||
@ -222,6 +215,12 @@ public class ExcelImporter implements Importer {
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
|
||||
Serializable value = extractCell(cell);
|
||||
|
||||
if (value != null) {
|
||||
Recon recon = null;
|
||||
|
||||
@ -273,6 +272,7 @@ public class ExcelImporter implements Importer {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canImportData(String contentType, String fileName) {
|
||||
if (contentType != null) {
|
||||
contentType = contentType.toLowerCase().trim();
|
||||
|
15
main/src/com/google/gridworks/importers/ImportException.java
Normal file
15
main/src/com/google/gridworks/importers/ImportException.java
Normal file
@ -0,0 +1,15 @@
|
||||
package com.google.gridworks.importers;
|
||||
|
||||
/**
|
||||
* Exception thrown by importers. Typically contains a nested exception
|
||||
* indicating the underlying cause of the problem.
|
||||
*/
|
||||
public class ImportException extends Exception {
|
||||
|
||||
private static final long serialVersionUID = 7077314805989174181L;
|
||||
|
||||
public ImportException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
}
|
@ -1,16 +1,14 @@
|
||||
package com.google.gridworks.importers;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
import java.util.Properties;
|
||||
|
||||
import com.google.gridworks.model.Project;
|
||||
|
||||
public interface Importer {
|
||||
public boolean takesReader();
|
||||
|
||||
public void read(Reader reader, Project project, Properties options) throws Exception;
|
||||
public void read(InputStream inputStream, Project project, Properties options) throws Exception;
|
||||
|
||||
/**
|
||||
* Determine whether importer can handle given contentType and filename.
|
||||
*
|
||||
* @param contentType
|
||||
* @param fileName
|
||||
* @return true if the importer can handle this
|
||||
*/
|
||||
public boolean canImportData(String contentType, String fileName);
|
||||
}
|
||||
|
@ -2,10 +2,11 @@ package com.google.gridworks.importers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.Reader;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.marc4j.MarcPermissiveStreamReader;
|
||||
@ -15,27 +16,23 @@ import org.marc4j.marc.Record;
|
||||
|
||||
import com.google.gridworks.model.Project;
|
||||
|
||||
public class MarcImporter implements Importer {
|
||||
|
||||
public boolean takesReader() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void read(Reader reader, Project project, Properties options)
|
||||
throws Exception {
|
||||
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
public class MarcImporter implements StreamImporter {
|
||||
|
||||
@Override
|
||||
public void read(
|
||||
InputStream inputStream,
|
||||
Project project,
|
||||
Properties options
|
||||
) throws Exception {
|
||||
) throws ImportException {
|
||||
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
||||
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
||||
|
||||
File tempFile = File.createTempFile("gridworks-import-", ".marc.xml");
|
||||
File tempFile;
|
||||
try {
|
||||
tempFile = File.createTempFile("gridworks-import-", ".marc.xml");
|
||||
} catch (IOException e) {
|
||||
throw new ImportException("Unexpected error creating temp file",e);
|
||||
}
|
||||
try {
|
||||
OutputStream os = new FileOutputStream(tempFile);
|
||||
try {
|
||||
@ -62,20 +59,31 @@ public class MarcImporter implements Importer {
|
||||
}
|
||||
writer.close();
|
||||
} finally {
|
||||
os.close();
|
||||
try {
|
||||
os.close();
|
||||
} catch (IOException e) {
|
||||
// Just ignore - not much we can do anyway
|
||||
}
|
||||
}
|
||||
|
||||
InputStream is = new FileInputStream(tempFile);
|
||||
try {
|
||||
new XmlImporter().read(is, project, options);
|
||||
} finally {
|
||||
is.close();
|
||||
try {
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
// Just ignore - not much we can do anyway
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new ImportException("Input file not found", e);
|
||||
} finally {
|
||||
tempFile.delete();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canImportData(String contentType, String fileName) {
|
||||
if (contentType != null) {
|
||||
contentType = contentType.toLowerCase().trim();
|
||||
|
@ -1,6 +1,6 @@
|
||||
package com.google.gridworks.importers;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
@ -14,6 +14,8 @@ import org.jrdf.SortedMemoryJRDFFactory;
|
||||
import org.jrdf.collection.MemMapFactory;
|
||||
import org.jrdf.graph.Graph;
|
||||
import org.jrdf.graph.Triple;
|
||||
import org.jrdf.parser.ParseException;
|
||||
import org.jrdf.parser.StatementHandlerException;
|
||||
import org.jrdf.parser.line.GraphLineParser;
|
||||
import org.jrdf.parser.line.LineHandler;
|
||||
import org.jrdf.parser.ntriples.NTriplesParserFactory;
|
||||
@ -29,25 +31,33 @@ import com.google.gridworks.model.ModelException;
|
||||
import com.google.gridworks.model.Project;
|
||||
import com.google.gridworks.model.Row;
|
||||
|
||||
public class RdfTripleImporter implements Importer{
|
||||
JRDFFactory JrdfFactory;
|
||||
NTriplesParserFactory nTriplesParserFactory;
|
||||
MemMapFactory newMapFactory;
|
||||
public class RdfTripleImporter implements ReaderImporter{
|
||||
private JRDFFactory _jrdfFactory;
|
||||
private NTriplesParserFactory _nTriplesParserFactory;
|
||||
private MemMapFactory _newMapFactory;
|
||||
|
||||
public RdfTripleImporter(){
|
||||
JrdfFactory = SortedMemoryJRDFFactory.getFactory();
|
||||
nTriplesParserFactory = new NTriplesParserFactory();
|
||||
newMapFactory = new MemMapFactory();
|
||||
_jrdfFactory = SortedMemoryJRDFFactory.getFactory();
|
||||
_nTriplesParserFactory = new NTriplesParserFactory();
|
||||
_newMapFactory = new MemMapFactory();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void read(Reader reader, Project project, Properties options) throws Exception {
|
||||
public void read(Reader reader, Project project, Properties options) throws ImportException {
|
||||
String baseUrl = options.getProperty("base-url");
|
||||
|
||||
Graph graph = JrdfFactory.getNewGraph();
|
||||
LineHandler lineHandler = nTriplesParserFactory.createParser(graph, newMapFactory);
|
||||
Graph graph = _jrdfFactory.getNewGraph();
|
||||
LineHandler lineHandler = _nTriplesParserFactory.createParser(graph, _newMapFactory);
|
||||
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
|
||||
parser.parse(reader, baseUrl); // fills JRDF graph
|
||||
try {
|
||||
parser.parse(reader, baseUrl); // fills JRDF graph
|
||||
} catch (IOException e) {
|
||||
throw new ImportException("i/o error while parsing RDF",e);
|
||||
} catch (ParseException e) {
|
||||
throw new ImportException("error parsing RDF",e);
|
||||
} catch (StatementHandlerException e) {
|
||||
throw new ImportException("error parsing RDF",e);
|
||||
}
|
||||
|
||||
Map<String, List<Row>> subjectToRows = new HashMap<String, List<Row>>();
|
||||
|
||||
@ -64,62 +74,53 @@ public class RdfTripleImporter implements Importer{
|
||||
|
||||
Column column = project.columnModel.getColumnByName(predicate);
|
||||
if (column == null) {
|
||||
column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
|
||||
try {
|
||||
project.columnModel.addColumn(-1, column, true);
|
||||
} catch (ModelException e) {
|
||||
// ignore
|
||||
}
|
||||
column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
|
||||
try {
|
||||
project.columnModel.addColumn(-1, column, true);
|
||||
} catch (ModelException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
int cellIndex = column.getCellIndex();
|
||||
if (subjectToRows.containsKey(subject)) {
|
||||
List<Row> rows = subjectToRows.get(subject);
|
||||
for (Row row : rows) {
|
||||
if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) {
|
||||
row.setCell(cellIndex, new Cell(object, null));
|
||||
object = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
List<Row> rows = subjectToRows.get(subject);
|
||||
for (Row row : rows) {
|
||||
if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) {
|
||||
row.setCell(cellIndex, new Cell(object, null));
|
||||
object = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (object != null) {
|
||||
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
||||
rows.add(row);
|
||||
if (object != null) {
|
||||
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
||||
rows.add(row);
|
||||
|
||||
row.setCell(cellIndex, new Cell(object, null));
|
||||
}
|
||||
row.setCell(cellIndex, new Cell(object, null));
|
||||
}
|
||||
} else {
|
||||
List<Row> rows = new ArrayList<Row>();
|
||||
subjectToRows.put(subject, rows);
|
||||
List<Row> rows = new ArrayList<Row>();
|
||||
subjectToRows.put(subject, rows);
|
||||
|
||||
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
||||
rows.add(row);
|
||||
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
||||
rows.add(row);
|
||||
|
||||
row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
|
||||
row.setCell(cellIndex, new Cell(object, null));
|
||||
row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
|
||||
row.setCell(cellIndex, new Cell(object, null));
|
||||
}
|
||||
}
|
||||
|
||||
for (Entry<String, List<Row>> entry : subjectToRows.entrySet()) {
|
||||
project.rows.addAll(entry.getValue());
|
||||
project.rows.addAll(entry.getValue());
|
||||
}
|
||||
} finally {
|
||||
triples.iterator().close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void read(InputStream inputStream, Project project, Properties options) throws Exception {
|
||||
// TODO
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean takesReader() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean canImportData(String contentType, String fileName) {
|
||||
if (contentType != null) {
|
||||
contentType = contentType.toLowerCase().trim();
|
||||
|
27
main/src/com/google/gridworks/importers/ReaderImporter.java
Normal file
27
main/src/com/google/gridworks/importers/ReaderImporter.java
Normal file
@ -0,0 +1,27 @@
|
||||
package com.google.gridworks.importers;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Properties;
|
||||
|
||||
import com.google.gridworks.model.Project;
|
||||
|
||||
/**
|
||||
* Interface for importers which take a Reader as input.
|
||||
*/
|
||||
public interface ReaderImporter extends Importer {
|
||||
|
||||
/**
|
||||
* Read data from a input reader into project.
|
||||
*
|
||||
* @param reader
|
||||
* reader to import data from. It is assumed to be positioned at
|
||||
* the correct point and ready to go.
|
||||
* @param project
|
||||
* project which will contain data
|
||||
* @param options
|
||||
* set of properties with import options
|
||||
* @throws ImportException
|
||||
*/
|
||||
public void read(Reader reader, Project project, Properties options)
|
||||
throws ImportException;
|
||||
}
|
19
main/src/com/google/gridworks/importers/StreamImporter.java
Normal file
19
main/src/com/google/gridworks/importers/StreamImporter.java
Normal file
@ -0,0 +1,19 @@
|
||||
package com.google.gridworks.importers;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Properties;
|
||||
|
||||
import com.google.gridworks.model.Project;
|
||||
|
||||
public interface StreamImporter extends Importer {
|
||||
|
||||
/**
|
||||
* @param inputStream stream to be imported
|
||||
* @param project project to import stream into
|
||||
* @param options
|
||||
* @throws ImportException
|
||||
*/
|
||||
public void read(InputStream inputStream, Project project,
|
||||
Properties options) throws ImportException;
|
||||
|
||||
}
|
@ -19,8 +19,10 @@ import com.google.gridworks.model.Cell;
|
||||
import com.google.gridworks.model.Project;
|
||||
import com.google.gridworks.model.Row;
|
||||
|
||||
public class TsvCsvImporter implements Importer {
|
||||
public void read(Reader reader, Project project, Properties options) throws Exception {
|
||||
public class TsvCsvImporter implements ReaderImporter,StreamImporter {
|
||||
|
||||
@Override
|
||||
public void read(Reader reader, Project project, Properties options) throws ImportException {
|
||||
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
|
||||
|
||||
String sep = options.getProperty("separator"); // auto-detect if not present
|
||||
@ -33,11 +35,15 @@ public class TsvCsvImporter implements Importer {
|
||||
boolean ignoreQuotes = ImporterUtilities.getBooleanOption("ignore-quotes", options, false);
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(reader);
|
||||
|
||||
read(lnReader, project, sep,
|
||||
limit, skip, ignoreLines, headerLines,
|
||||
guessValueType, splitIntoColumns, ignoreQuotes
|
||||
);
|
||||
|
||||
try {
|
||||
read(lnReader, project, sep,
|
||||
limit, skip, ignoreLines, headerLines,
|
||||
guessValueType, splitIntoColumns, ignoreQuotes
|
||||
);
|
||||
} catch (IOException e) {
|
||||
throw new ImportException("Import failed",e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -170,18 +176,22 @@ public class TsvCsvImporter implements Importer {
|
||||
return cells;
|
||||
}
|
||||
|
||||
public void read(InputStream inputStream, Project project, Properties options) throws Exception {
|
||||
@Override
|
||||
public void read(InputStream inputStream, Project project,
|
||||
Properties options) throws ImportException {
|
||||
read(new InputStreamReader(inputStream), project, options);
|
||||
}
|
||||
|
||||
public boolean takesReader() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canImportData(String contentType, String fileName) {
|
||||
if (contentType != null) {
|
||||
contentType = contentType.toLowerCase().trim();
|
||||
return false;
|
||||
return
|
||||
"text/plain".equals(contentType) ||
|
||||
"text/csv".equals(contentType) ||
|
||||
"text/x-csv".equals(contentType) ||
|
||||
"text/tab-separated-value".equals(contentType);
|
||||
|
||||
} else if (fileName != null) {
|
||||
fileName = fileName.toLowerCase();
|
||||
if (fileName.endsWith(".tsv")) {
|
||||
|
14
main/src/com/google/gridworks/importers/UrlImporter.java
Normal file
14
main/src/com/google/gridworks/importers/UrlImporter.java
Normal file
@ -0,0 +1,14 @@
|
||||
package com.google.gridworks.importers;
|
||||
|
||||
import java.net.URL;
|
||||
import java.util.Properties;
|
||||
|
||||
import com.google.gridworks.model.Project;
|
||||
|
||||
public interface UrlImporter extends Importer {
|
||||
|
||||
public void read(URL url, Project project, Properties options) throws Exception;
|
||||
|
||||
public boolean canImportData(URL url);
|
||||
|
||||
}
|
@ -1,9 +1,9 @@
|
||||
package com.google.gridworks.importers;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.io.Reader;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
@ -12,27 +12,18 @@ import org.slf4j.LoggerFactory;
|
||||
import com.google.gridworks.importers.XmlImportUtilities.ImportColumnGroup;
|
||||
import com.google.gridworks.model.Project;
|
||||
|
||||
public class XmlImporter implements Importer {
|
||||
public class XmlImporter implements StreamImporter {
|
||||
|
||||
final static Logger logger = LoggerFactory.getLogger("XmlImporter");
|
||||
|
||||
public static final int BUFFER_SIZE = 64 * 1024;
|
||||
|
||||
public boolean takesReader() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void read(Reader reader, Project project, Properties options)
|
||||
throws Exception {
|
||||
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void read(
|
||||
InputStream inputStream,
|
||||
Project project,
|
||||
Properties options
|
||||
) throws Exception {
|
||||
) throws ImportException {
|
||||
logger.trace("XmlImporter.read");
|
||||
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
|
||||
|
||||
@ -40,13 +31,17 @@ public class XmlImporter implements Importer {
|
||||
{
|
||||
byte[] buffer = new byte[BUFFER_SIZE];
|
||||
int bytes_read = 0;
|
||||
while (bytes_read < BUFFER_SIZE) {
|
||||
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
|
||||
if (c == -1) break;
|
||||
bytes_read +=c ;
|
||||
try {
|
||||
while (bytes_read < BUFFER_SIZE) {
|
||||
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
|
||||
if (c == -1) break;
|
||||
bytes_read +=c ;
|
||||
}
|
||||
pis.unread(buffer, 0, bytes_read);
|
||||
} catch (IOException e) {
|
||||
throw new ImportException("Read error",e);
|
||||
}
|
||||
pis.unread(buffer, 0, bytes_read);
|
||||
|
||||
|
||||
if (options.containsKey("importer-record-tag")) {
|
||||
recordPath = XmlImportUtilities.detectPathFromTag(
|
||||
new ByteArrayInputStream(buffer, 0, bytes_read),
|
||||
@ -68,6 +63,7 @@ public class XmlImporter implements Importer {
|
||||
project.columnModel.update();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canImportData(String contentType, String fileName) {
|
||||
if (contentType != null) {
|
||||
contentType = contentType.toLowerCase().trim();
|
||||
|
Loading…
Reference in New Issue
Block a user