- Issue 112: Refactor Importer API (patch from tfmorris)

- Added support for storing custom metadata in ProjectMetadata.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1138 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-08-06 05:04:25 +00:00
parent 00c6865d95
commit f411dc9104
12 changed files with 316 additions and 163 deletions

View File

@ -1,6 +1,10 @@
package com.google.gridworks; package com.google.gridworks;
import java.io.Serializable;
import java.util.Date; import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties; import java.util.Properties;
import org.json.JSONException; import org.json.JSONException;
@ -23,7 +27,8 @@ public class ProjectMetadata implements Jsonizable {
private String _encoding; private String _encoding;
private int _encodingConfidence; private int _encodingConfidence;
private PreferenceStore _preferenceStore = new PreferenceStore(); private Map<String, Serializable> _customMetadata = new HashMap<String, Serializable>();
private PreferenceStore _preferenceStore = new PreferenceStore();
final Logger logger = LoggerFactory.getLogger("project_metadata"); final Logger logger = LoggerFactory.getLogger("project_metadata");
@ -51,13 +56,20 @@ public class ProjectMetadata implements Jsonizable {
writer.key("encoding"); writer.value(_encoding); writer.key("encoding"); writer.value(_encoding);
writer.key("encodingConfidence"); writer.value(_encodingConfidence); writer.key("encodingConfidence"); writer.value(_encodingConfidence);
writer.key("customMetadata"); writer.object();
for (String key : _customMetadata.keySet()) {
Serializable value = _customMetadata.get(key);
writer.key(key);
writer.value(value);
}
writer.endObject();
writer.key("preferences"); _preferenceStore.write(writer, options); writer.key("preferences"); _preferenceStore.write(writer, options);
} }
writer.endObject(); writer.endObject();
} }
public void write(JSONWriter jsonWriter) throws Exception { public void write(JSONWriter jsonWriter) throws Exception {
Properties options = new Properties(); Properties options = new Properties();
options.setProperty("mode", "save"); options.setProperty("mode", "save");
@ -92,6 +104,24 @@ public class ProjectMetadata implements Jsonizable {
} }
} }
if (obj.has("customMetadata") && !obj.isNull("customMetadata")) {
try {
JSONObject obj2 = obj.getJSONObject("customMetadata");
@SuppressWarnings("unchecked")
Iterator<String> keys = obj2.keys();
while (keys.hasNext()) {
String key = keys.next();
Object value = obj2.get(key);
if (value != null && value instanceof Serializable) {
pm._customMetadata.put(key, (Serializable) value);
}
}
} catch (JSONException e) {
// ignore
}
}
return pm; return pm;
} }
@ -153,4 +183,16 @@ public class ProjectMetadata implements Jsonizable {
public PreferenceStore getPreferenceStore() { public PreferenceStore getPreferenceStore() {
return _preferenceStore; return _preferenceStore;
} }
public Serializable getCustomMetadata(String key) {
return _customMetadata.get(key);
}
public void setCustomMetadata(String key, Serializable value) {
if (value == null) {
_customMetadata.remove(key);
} else {
_customMetadata.put(key, value);
}
}
} }

View File

@ -43,7 +43,10 @@ import com.google.gridworks.ProjectManager;
import com.google.gridworks.ProjectMetadata; import com.google.gridworks.ProjectMetadata;
import com.google.gridworks.commands.Command; import com.google.gridworks.commands.Command;
import com.google.gridworks.importers.Importer; import com.google.gridworks.importers.Importer;
import com.google.gridworks.importers.ReaderImporter;
import com.google.gridworks.importers.StreamImporter;
import com.google.gridworks.importers.TsvCsvImporter; import com.google.gridworks.importers.TsvCsvImporter;
import com.google.gridworks.importers.UrlImporter;
import com.google.gridworks.model.Project; import com.google.gridworks.model.Project;
import com.google.gridworks.util.IOUtils; import com.google.gridworks.util.IOUtils;
import com.google.gridworks.util.ParsingUtilities; import com.google.gridworks.util.ParsingUtilities;
@ -400,39 +403,42 @@ public class CreateProjectCommand extends Command {
return result; return result;
} }
protected void internalImportURL( protected void internalImportURL(HttpServletRequest request,
HttpServletRequest request, Project project, Properties options, String urlString)
Project project, throws Exception {
Properties options,
String urlString
) throws Exception {
URL url = new URL(urlString); URL url = new URL(urlString);
URLConnection connection = null; URLConnection connection = null;
try { // Try for a URL importer first
connection = url.openConnection(); Importer importer = guessUrlImporter(url);
connection.setConnectTimeout(5000); if (importer instanceof UrlImporter) {
connection.connect(); ((UrlImporter) importer).read(url, project, options);
} catch (Exception e) { return;
throw new Exception("Cannot connect to " + urlString, e); } else {
} // If we couldn't find one, try opening URL and treating as a stream
try {
connection = url.openConnection();
connection.setConnectTimeout(5000);
connection.connect();
} catch (Exception e) {
throw new Exception("Cannot connect to " + urlString, e);
}
InputStream inputStream = null; InputStream inputStream = null;
try { try {
inputStream = connection.getInputStream(); inputStream = connection.getInputStream();
} catch (Exception e) { } catch (Exception e) {
throw new Exception("Cannot retrieve content from " + url, e); throw new Exception("Cannot retrieve content from " + url, e);
} }
try { try {
Importer importer = guessImporter( importer = guessImporter(connection.getContentType(),
connection.getContentType(), url.getPath());
url.getPath() internalInvokeImporter(project, importer, options, inputStream,
); connection.getContentEncoding());
} finally {
internalInvokeImporter(project, importer, options, inputStream, connection.getContentEncoding()); inputStream.close();
} finally { }
inputStream.close();
} }
} }
@ -443,7 +449,7 @@ public class CreateProjectCommand extends Command {
InputStream rawInputStream, InputStream rawInputStream,
String encoding String encoding
) throws Exception { ) throws Exception {
if (importer.takesReader()) { if (importer instanceof ReaderImporter) {
BufferedInputStream inputStream = new BufferedInputStream(rawInputStream); BufferedInputStream inputStream = new BufferedInputStream(rawInputStream);
@ -482,28 +488,45 @@ public class CreateProjectCommand extends Command {
new InputStreamReader(inputStream); new InputStreamReader(inputStream);
} }
importer.read(reader, project, options); ((ReaderImporter) importer).read(reader, project, options);
} else { } else {
importer.read(rawInputStream, project, options); ((StreamImporter) importer).read(rawInputStream, project, options);
} }
} }
protected void internalInvokeImporter( protected void internalInvokeImporter(
Project project, Project project,
Importer importer, ReaderImporter importer,
Properties options, Properties options,
Reader reader Reader reader
) throws Exception { ) throws Exception {
importer.read(reader, project, options); importer.read(reader, project, options);
} }
protected Importer guessImporter(String contentType, String fileName) { protected Importer guessImporter(String contentType, String fileName, boolean provideDefault) {
for(Importer i : importers.values()){ for (Importer i : importers.values()){
if(i.canImportData(contentType, fileName)){ if(i.canImportData(contentType, fileName)){
return i; return i;
} }
} }
if (provideDefault) {
return new TsvCsvImporter(); // default
} else {
return null;
}
}
return new TsvCsvImporter(); //default protected Importer guessImporter(String contentType, String filename) {
return guessImporter(contentType, filename, true);
}
protected Importer guessUrlImporter(URL url) {
for (Importer importer : importers.values()){
if (importer instanceof UrlImporter
&& ((UrlImporter) importer).canImportData(url)) {
return importer;
}
}
return null;
} }
} }

View File

@ -2,7 +2,6 @@ package com.google.gridworks.importers;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.Reader;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
@ -28,18 +27,11 @@ import com.google.gridworks.model.ReconCandidate;
import com.google.gridworks.model.Row; import com.google.gridworks.model.Row;
import com.google.gridworks.model.Recon.Judgment; import com.google.gridworks.model.Recon.Judgment;
public class ExcelImporter implements Importer { public class ExcelImporter implements StreamImporter {
protected boolean _xmlBased; protected boolean _xmlBased;
public boolean takesReader() { @Override
return false; public void read(InputStream inputStream, Project project, Properties options) throws ImportException {
}
public void read(Reader reader, Project project, Properties options) throws Exception {
throw new UnsupportedOperationException();
}
public void read(InputStream inputStream, Project project, Properties options) throws Exception {
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1); int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1); int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
int limit = ImporterUtilities.getIntegerOption("limit", options, -1); int limit = ImporterUtilities.getIntegerOption("limit", options, -1);
@ -51,7 +43,7 @@ public class ExcelImporter implements Importer {
new XSSFWorkbook(inputStream) : new XSSFWorkbook(inputStream) :
new HSSFWorkbook(new POIFSFileSystem(inputStream)); new HSSFWorkbook(new POIFSFileSystem(inputStream));
} catch (IOException e) { } catch (IOException e) {
throw new Exception( throw new ImportException(
"Attempted to parse file as Excel file but failed. " + "Attempted to parse file as Excel file but failed. " +
"Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.", "Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.",
e e
@ -94,8 +86,9 @@ public class ExcelImporter implements Importer {
for (int c = firstCell; c <= lastCell; c++) { for (int c = firstCell; c <= lastCell; c++) {
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c); org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
if (cell != null) { if (cell != null) {
String text = cell.getStringCellValue().trim(); Serializable value = extractCell(cell);
if (text.length() > 0) { String text = value != null ? value.toString() : null;
if (text != null && text.length() > 0) {
while (columnNames.size() < c + 1) { while (columnNames.size() < c + 1) {
columnNames.add(null); columnNames.add(null);
} }
@ -194,7 +187,7 @@ public class ExcelImporter implements Importer {
} }
} }
protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) { protected Serializable extractCell(org.apache.poi.ss.usermodel.Cell cell) {
int cellType = cell.getCellType(); int cellType = cell.getCellType();
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_ERROR || if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_ERROR ||
cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BLANK) { cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BLANK) {
@ -222,6 +215,12 @@ public class ExcelImporter implements Importer {
} }
} }
return value;
}
protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
Serializable value = extractCell(cell);
if (value != null) { if (value != null) {
Recon recon = null; Recon recon = null;
@ -273,6 +272,7 @@ public class ExcelImporter implements Importer {
} }
} }
@Override
public boolean canImportData(String contentType, String fileName) { public boolean canImportData(String contentType, String fileName) {
if (contentType != null) { if (contentType != null) {
contentType = contentType.toLowerCase().trim(); contentType = contentType.toLowerCase().trim();

View File

@ -0,0 +1,15 @@
package com.google.gridworks.importers;
/**
* Exception thrown by importers. Typically contains a nested exception
* indicating the underlying cause of the problem.
*/
public class ImportException extends Exception {
private static final long serialVersionUID = 7077314805989174181L;
public ImportException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@ -1,16 +1,14 @@
package com.google.gridworks.importers; package com.google.gridworks.importers;
import java.io.InputStream;
import java.io.Reader;
import java.util.Properties;
import com.google.gridworks.model.Project;
public interface Importer { public interface Importer {
public boolean takesReader();
public void read(Reader reader, Project project, Properties options) throws Exception;
public void read(InputStream inputStream, Project project, Properties options) throws Exception;
/**
* Determine whether importer can handle given contentType and filename.
*
* @param contentType
* @param fileName
* @return true if the importer can handle this
*/
public boolean canImportData(String contentType, String fileName); public boolean canImportData(String contentType, String fileName);
} }

View File

@ -2,10 +2,11 @@ package com.google.gridworks.importers;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.Reader;
import java.util.Properties; import java.util.Properties;
import org.marc4j.MarcPermissiveStreamReader; import org.marc4j.MarcPermissiveStreamReader;
@ -15,27 +16,23 @@ import org.marc4j.marc.Record;
import com.google.gridworks.model.Project; import com.google.gridworks.model.Project;
public class MarcImporter implements Importer { public class MarcImporter implements StreamImporter {
public boolean takesReader() {
return false;
}
public void read(Reader reader, Project project, Properties options)
throws Exception {
throw new UnsupportedOperationException();
}
@Override
public void read( public void read(
InputStream inputStream, InputStream inputStream,
Project project, Project project,
Properties options Properties options
) throws Exception { ) throws ImportException {
int limit = ImporterUtilities.getIntegerOption("limit",options,-1); int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
int skip = ImporterUtilities.getIntegerOption("skip",options,0); int skip = ImporterUtilities.getIntegerOption("skip",options,0);
File tempFile = File.createTempFile("gridworks-import-", ".marc.xml"); File tempFile;
try {
tempFile = File.createTempFile("gridworks-import-", ".marc.xml");
} catch (IOException e) {
throw new ImportException("Unexpected error creating temp file",e);
}
try { try {
OutputStream os = new FileOutputStream(tempFile); OutputStream os = new FileOutputStream(tempFile);
try { try {
@ -62,20 +59,31 @@ public class MarcImporter implements Importer {
} }
writer.close(); writer.close();
} finally { } finally {
os.close(); try {
os.close();
} catch (IOException e) {
// Just ignore - not much we can do anyway
}
} }
InputStream is = new FileInputStream(tempFile); InputStream is = new FileInputStream(tempFile);
try { try {
new XmlImporter().read(is, project, options); new XmlImporter().read(is, project, options);
} finally { } finally {
is.close(); try {
is.close();
} catch (IOException e) {
// Just ignore - not much we can do anyway
}
} }
} catch (FileNotFoundException e) {
throw new ImportException("Input file not found", e);
} finally { } finally {
tempFile.delete(); tempFile.delete();
} }
} }
@Override
public boolean canImportData(String contentType, String fileName) { public boolean canImportData(String contentType, String fileName) {
if (contentType != null) { if (contentType != null) {
contentType = contentType.toLowerCase().trim(); contentType = contentType.toLowerCase().trim();

View File

@ -1,6 +1,6 @@
package com.google.gridworks.importers; package com.google.gridworks.importers;
import java.io.InputStream; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
@ -14,6 +14,8 @@ import org.jrdf.SortedMemoryJRDFFactory;
import org.jrdf.collection.MemMapFactory; import org.jrdf.collection.MemMapFactory;
import org.jrdf.graph.Graph; import org.jrdf.graph.Graph;
import org.jrdf.graph.Triple; import org.jrdf.graph.Triple;
import org.jrdf.parser.ParseException;
import org.jrdf.parser.StatementHandlerException;
import org.jrdf.parser.line.GraphLineParser; import org.jrdf.parser.line.GraphLineParser;
import org.jrdf.parser.line.LineHandler; import org.jrdf.parser.line.LineHandler;
import org.jrdf.parser.ntriples.NTriplesParserFactory; import org.jrdf.parser.ntriples.NTriplesParserFactory;
@ -29,25 +31,33 @@ import com.google.gridworks.model.ModelException;
import com.google.gridworks.model.Project; import com.google.gridworks.model.Project;
import com.google.gridworks.model.Row; import com.google.gridworks.model.Row;
public class RdfTripleImporter implements Importer{ public class RdfTripleImporter implements ReaderImporter{
JRDFFactory JrdfFactory; private JRDFFactory _jrdfFactory;
NTriplesParserFactory nTriplesParserFactory; private NTriplesParserFactory _nTriplesParserFactory;
MemMapFactory newMapFactory; private MemMapFactory _newMapFactory;
public RdfTripleImporter(){ public RdfTripleImporter(){
JrdfFactory = SortedMemoryJRDFFactory.getFactory(); _jrdfFactory = SortedMemoryJRDFFactory.getFactory();
nTriplesParserFactory = new NTriplesParserFactory(); _nTriplesParserFactory = new NTriplesParserFactory();
newMapFactory = new MemMapFactory(); _newMapFactory = new MemMapFactory();
} }
@Override @Override
public void read(Reader reader, Project project, Properties options) throws Exception { public void read(Reader reader, Project project, Properties options) throws ImportException {
String baseUrl = options.getProperty("base-url"); String baseUrl = options.getProperty("base-url");
Graph graph = JrdfFactory.getNewGraph(); Graph graph = _jrdfFactory.getNewGraph();
LineHandler lineHandler = nTriplesParserFactory.createParser(graph, newMapFactory); LineHandler lineHandler = _nTriplesParserFactory.createParser(graph, _newMapFactory);
GraphLineParser parser = new GraphLineParser(graph, lineHandler); GraphLineParser parser = new GraphLineParser(graph, lineHandler);
parser.parse(reader, baseUrl); // fills JRDF graph try {
parser.parse(reader, baseUrl); // fills JRDF graph
} catch (IOException e) {
throw new ImportException("i/o error while parsing RDF",e);
} catch (ParseException e) {
throw new ImportException("error parsing RDF",e);
} catch (StatementHandlerException e) {
throw new ImportException("error parsing RDF",e);
}
Map<String, List<Row>> subjectToRows = new HashMap<String, List<Row>>(); Map<String, List<Row>> subjectToRows = new HashMap<String, List<Row>>();
@ -64,62 +74,53 @@ public class RdfTripleImporter implements Importer{
Column column = project.columnModel.getColumnByName(predicate); Column column = project.columnModel.getColumnByName(predicate);
if (column == null) { if (column == null) {
column = new Column(project.columnModel.allocateNewCellIndex(), predicate); column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
try { try {
project.columnModel.addColumn(-1, column, true); project.columnModel.addColumn(-1, column, true);
} catch (ModelException e) { } catch (ModelException e) {
// ignore // ignore
} }
} }
int cellIndex = column.getCellIndex(); int cellIndex = column.getCellIndex();
if (subjectToRows.containsKey(subject)) { if (subjectToRows.containsKey(subject)) {
List<Row> rows = subjectToRows.get(subject); List<Row> rows = subjectToRows.get(subject);
for (Row row : rows) { for (Row row : rows) {
if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) { if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) {
row.setCell(cellIndex, new Cell(object, null)); row.setCell(cellIndex, new Cell(object, null));
object = null; object = null;
break; break;
} }
} }
if (object != null) { if (object != null) {
Row row = new Row(project.columnModel.getMaxCellIndex() + 1); Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
rows.add(row); rows.add(row);
row.setCell(cellIndex, new Cell(object, null)); row.setCell(cellIndex, new Cell(object, null));
} }
} else { } else {
List<Row> rows = new ArrayList<Row>(); List<Row> rows = new ArrayList<Row>();
subjectToRows.put(subject, rows); subjectToRows.put(subject, rows);
Row row = new Row(project.columnModel.getMaxCellIndex() + 1); Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
rows.add(row); rows.add(row);
row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null)); row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
row.setCell(cellIndex, new Cell(object, null)); row.setCell(cellIndex, new Cell(object, null));
} }
} }
for (Entry<String, List<Row>> entry : subjectToRows.entrySet()) { for (Entry<String, List<Row>> entry : subjectToRows.entrySet()) {
project.rows.addAll(entry.getValue()); project.rows.addAll(entry.getValue());
} }
} finally { } finally {
triples.iterator().close(); triples.iterator().close();
} }
} }
@Override
public void read(InputStream inputStream, Project project, Properties options) throws Exception {
// TODO
throw new UnsupportedOperationException();
}
@Override @Override
public boolean takesReader() {
return true;
}
public boolean canImportData(String contentType, String fileName) { public boolean canImportData(String contentType, String fileName) {
if (contentType != null) { if (contentType != null) {
contentType = contentType.toLowerCase().trim(); contentType = contentType.toLowerCase().trim();

View File

@ -0,0 +1,27 @@
package com.google.gridworks.importers;
import java.io.Reader;
import java.util.Properties;
import com.google.gridworks.model.Project;
/**
* Interface for importers which take a Reader as input.
*/
public interface ReaderImporter extends Importer {
/**
* Read data from a input reader into project.
*
* @param reader
* reader to import data from. It is assumed to be positioned at
* the correct point and ready to go.
* @param project
* project which will contain data
* @param options
* set of properties with import options
* @throws ImportException
*/
public void read(Reader reader, Project project, Properties options)
throws ImportException;
}

View File

@ -0,0 +1,19 @@
package com.google.gridworks.importers;
import java.io.InputStream;
import java.util.Properties;
import com.google.gridworks.model.Project;
public interface StreamImporter extends Importer {
/**
* @param inputStream stream to be imported
* @param project project to import stream into
* @param options
* @throws ImportException
*/
public void read(InputStream inputStream, Project project,
Properties options) throws ImportException;
}

View File

@ -19,8 +19,10 @@ import com.google.gridworks.model.Cell;
import com.google.gridworks.model.Project; import com.google.gridworks.model.Project;
import com.google.gridworks.model.Row; import com.google.gridworks.model.Row;
public class TsvCsvImporter implements Importer { public class TsvCsvImporter implements ReaderImporter,StreamImporter {
public void read(Reader reader, Project project, Properties options) throws Exception {
@Override
public void read(Reader reader, Project project, Properties options) throws ImportException {
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true); boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
String sep = options.getProperty("separator"); // auto-detect if not present String sep = options.getProperty("separator"); // auto-detect if not present
@ -34,10 +36,14 @@ public class TsvCsvImporter implements Importer {
LineNumberReader lnReader = new LineNumberReader(reader); LineNumberReader lnReader = new LineNumberReader(reader);
read(lnReader, project, sep, try {
limit, skip, ignoreLines, headerLines, read(lnReader, project, sep,
guessValueType, splitIntoColumns, ignoreQuotes limit, skip, ignoreLines, headerLines,
); guessValueType, splitIntoColumns, ignoreQuotes
);
} catch (IOException e) {
throw new ImportException("Import failed",e);
}
} }
/** /**
@ -170,18 +176,22 @@ public class TsvCsvImporter implements Importer {
return cells; return cells;
} }
public void read(InputStream inputStream, Project project, Properties options) throws Exception { @Override
public void read(InputStream inputStream, Project project,
Properties options) throws ImportException {
read(new InputStreamReader(inputStream), project, options); read(new InputStreamReader(inputStream), project, options);
} }
public boolean takesReader() { @Override
return true;
}
public boolean canImportData(String contentType, String fileName) { public boolean canImportData(String contentType, String fileName) {
if (contentType != null) { if (contentType != null) {
contentType = contentType.toLowerCase().trim(); contentType = contentType.toLowerCase().trim();
return false; return
"text/plain".equals(contentType) ||
"text/csv".equals(contentType) ||
"text/x-csv".equals(contentType) ||
"text/tab-separated-value".equals(contentType);
} else if (fileName != null) { } else if (fileName != null) {
fileName = fileName.toLowerCase(); fileName = fileName.toLowerCase();
if (fileName.endsWith(".tsv")) { if (fileName.endsWith(".tsv")) {

View File

@ -0,0 +1,14 @@
package com.google.gridworks.importers;
import java.net.URL;
import java.util.Properties;
import com.google.gridworks.model.Project;
public interface UrlImporter extends Importer {
public void read(URL url, Project project, Properties options) throws Exception;
public boolean canImportData(URL url);
}

View File

@ -1,9 +1,9 @@
package com.google.gridworks.importers; package com.google.gridworks.importers;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream; import java.io.PushbackInputStream;
import java.io.Reader;
import java.util.Properties; import java.util.Properties;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -12,27 +12,18 @@ import org.slf4j.LoggerFactory;
import com.google.gridworks.importers.XmlImportUtilities.ImportColumnGroup; import com.google.gridworks.importers.XmlImportUtilities.ImportColumnGroup;
import com.google.gridworks.model.Project; import com.google.gridworks.model.Project;
public class XmlImporter implements Importer { public class XmlImporter implements StreamImporter {
final static Logger logger = LoggerFactory.getLogger("XmlImporter"); final static Logger logger = LoggerFactory.getLogger("XmlImporter");
public static final int BUFFER_SIZE = 64 * 1024; public static final int BUFFER_SIZE = 64 * 1024;
public boolean takesReader() { @Override
return false;
}
public void read(Reader reader, Project project, Properties options)
throws Exception {
throw new UnsupportedOperationException();
}
public void read( public void read(
InputStream inputStream, InputStream inputStream,
Project project, Project project,
Properties options Properties options
) throws Exception { ) throws ImportException {
logger.trace("XmlImporter.read"); logger.trace("XmlImporter.read");
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE); PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
@ -40,12 +31,16 @@ public class XmlImporter implements Importer {
{ {
byte[] buffer = new byte[BUFFER_SIZE]; byte[] buffer = new byte[BUFFER_SIZE];
int bytes_read = 0; int bytes_read = 0;
while (bytes_read < BUFFER_SIZE) { try {
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read); while (bytes_read < BUFFER_SIZE) {
if (c == -1) break; int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
bytes_read +=c ; if (c == -1) break;
bytes_read +=c ;
}
pis.unread(buffer, 0, bytes_read);
} catch (IOException e) {
throw new ImportException("Read error",e);
} }
pis.unread(buffer, 0, bytes_read);
if (options.containsKey("importer-record-tag")) { if (options.containsKey("importer-record-tag")) {
recordPath = XmlImportUtilities.detectPathFromTag( recordPath = XmlImportUtilities.detectPathFromTag(
@ -68,6 +63,7 @@ public class XmlImporter implements Importer {
project.columnModel.update(); project.columnModel.update();
} }
@Override
public boolean canImportData(String contentType, String fileName) { public boolean canImportData(String contentType, String fileName) {
if (contentType != null) { if (contentType != null) {
contentType = contentType.toLowerCase().trim(); contentType = contentType.toLowerCase().trim();