- Issue 112: Refactor Importer API (patch from tfmorris)
- Added support for storing custom metadata in ProjectMetadata. git-svn-id: http://google-refine.googlecode.com/svn/trunk@1138 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
00c6865d95
commit
f411dc9104
@ -1,6 +1,10 @@
|
|||||||
package com.google.gridworks;
|
package com.google.gridworks;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
@ -23,7 +27,8 @@ public class ProjectMetadata implements Jsonizable {
|
|||||||
private String _encoding;
|
private String _encoding;
|
||||||
private int _encodingConfidence;
|
private int _encodingConfidence;
|
||||||
|
|
||||||
private PreferenceStore _preferenceStore = new PreferenceStore();
|
private Map<String, Serializable> _customMetadata = new HashMap<String, Serializable>();
|
||||||
|
private PreferenceStore _preferenceStore = new PreferenceStore();
|
||||||
|
|
||||||
final Logger logger = LoggerFactory.getLogger("project_metadata");
|
final Logger logger = LoggerFactory.getLogger("project_metadata");
|
||||||
|
|
||||||
@ -51,13 +56,20 @@ public class ProjectMetadata implements Jsonizable {
|
|||||||
|
|
||||||
writer.key("encoding"); writer.value(_encoding);
|
writer.key("encoding"); writer.value(_encoding);
|
||||||
writer.key("encodingConfidence"); writer.value(_encodingConfidence);
|
writer.key("encodingConfidence"); writer.value(_encodingConfidence);
|
||||||
|
|
||||||
|
writer.key("customMetadata"); writer.object();
|
||||||
|
for (String key : _customMetadata.keySet()) {
|
||||||
|
Serializable value = _customMetadata.get(key);
|
||||||
|
writer.key(key);
|
||||||
|
writer.value(value);
|
||||||
|
}
|
||||||
|
writer.endObject();
|
||||||
|
|
||||||
writer.key("preferences"); _preferenceStore.write(writer, options);
|
writer.key("preferences"); _preferenceStore.write(writer, options);
|
||||||
}
|
}
|
||||||
writer.endObject();
|
writer.endObject();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public void write(JSONWriter jsonWriter) throws Exception {
|
public void write(JSONWriter jsonWriter) throws Exception {
|
||||||
Properties options = new Properties();
|
Properties options = new Properties();
|
||||||
options.setProperty("mode", "save");
|
options.setProperty("mode", "save");
|
||||||
@ -92,6 +104,24 @@ public class ProjectMetadata implements Jsonizable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (obj.has("customMetadata") && !obj.isNull("customMetadata")) {
|
||||||
|
try {
|
||||||
|
JSONObject obj2 = obj.getJSONObject("customMetadata");
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Iterator<String> keys = obj2.keys();
|
||||||
|
while (keys.hasNext()) {
|
||||||
|
String key = keys.next();
|
||||||
|
Object value = obj2.get(key);
|
||||||
|
if (value != null && value instanceof Serializable) {
|
||||||
|
pm._customMetadata.put(key, (Serializable) value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return pm;
|
return pm;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -153,4 +183,16 @@ public class ProjectMetadata implements Jsonizable {
|
|||||||
public PreferenceStore getPreferenceStore() {
|
public PreferenceStore getPreferenceStore() {
|
||||||
return _preferenceStore;
|
return _preferenceStore;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Serializable getCustomMetadata(String key) {
|
||||||
|
return _customMetadata.get(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCustomMetadata(String key, Serializable value) {
|
||||||
|
if (value == null) {
|
||||||
|
_customMetadata.remove(key);
|
||||||
|
} else {
|
||||||
|
_customMetadata.put(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,7 +43,10 @@ import com.google.gridworks.ProjectManager;
|
|||||||
import com.google.gridworks.ProjectMetadata;
|
import com.google.gridworks.ProjectMetadata;
|
||||||
import com.google.gridworks.commands.Command;
|
import com.google.gridworks.commands.Command;
|
||||||
import com.google.gridworks.importers.Importer;
|
import com.google.gridworks.importers.Importer;
|
||||||
|
import com.google.gridworks.importers.ReaderImporter;
|
||||||
|
import com.google.gridworks.importers.StreamImporter;
|
||||||
import com.google.gridworks.importers.TsvCsvImporter;
|
import com.google.gridworks.importers.TsvCsvImporter;
|
||||||
|
import com.google.gridworks.importers.UrlImporter;
|
||||||
import com.google.gridworks.model.Project;
|
import com.google.gridworks.model.Project;
|
||||||
import com.google.gridworks.util.IOUtils;
|
import com.google.gridworks.util.IOUtils;
|
||||||
import com.google.gridworks.util.ParsingUtilities;
|
import com.google.gridworks.util.ParsingUtilities;
|
||||||
@ -400,39 +403,42 @@ public class CreateProjectCommand extends Command {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void internalImportURL(
|
protected void internalImportURL(HttpServletRequest request,
|
||||||
HttpServletRequest request,
|
Project project, Properties options, String urlString)
|
||||||
Project project,
|
throws Exception {
|
||||||
Properties options,
|
|
||||||
String urlString
|
|
||||||
) throws Exception {
|
|
||||||
URL url = new URL(urlString);
|
URL url = new URL(urlString);
|
||||||
URLConnection connection = null;
|
URLConnection connection = null;
|
||||||
|
|
||||||
try {
|
// Try for a URL importer first
|
||||||
connection = url.openConnection();
|
Importer importer = guessUrlImporter(url);
|
||||||
connection.setConnectTimeout(5000);
|
if (importer instanceof UrlImporter) {
|
||||||
connection.connect();
|
((UrlImporter) importer).read(url, project, options);
|
||||||
} catch (Exception e) {
|
return;
|
||||||
throw new Exception("Cannot connect to " + urlString, e);
|
} else {
|
||||||
}
|
// If we couldn't find one, try opening URL and treating as a stream
|
||||||
|
try {
|
||||||
|
connection = url.openConnection();
|
||||||
|
connection.setConnectTimeout(5000);
|
||||||
|
connection.connect();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new Exception("Cannot connect to " + urlString, e);
|
||||||
|
}
|
||||||
|
|
||||||
InputStream inputStream = null;
|
InputStream inputStream = null;
|
||||||
try {
|
try {
|
||||||
inputStream = connection.getInputStream();
|
inputStream = connection.getInputStream();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new Exception("Cannot retrieve content from " + url, e);
|
throw new Exception("Cannot retrieve content from " + url, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Importer importer = guessImporter(
|
importer = guessImporter(connection.getContentType(),
|
||||||
connection.getContentType(),
|
url.getPath());
|
||||||
url.getPath()
|
internalInvokeImporter(project, importer, options, inputStream,
|
||||||
);
|
connection.getContentEncoding());
|
||||||
|
} finally {
|
||||||
internalInvokeImporter(project, importer, options, inputStream, connection.getContentEncoding());
|
inputStream.close();
|
||||||
} finally {
|
}
|
||||||
inputStream.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -443,7 +449,7 @@ public class CreateProjectCommand extends Command {
|
|||||||
InputStream rawInputStream,
|
InputStream rawInputStream,
|
||||||
String encoding
|
String encoding
|
||||||
) throws Exception {
|
) throws Exception {
|
||||||
if (importer.takesReader()) {
|
if (importer instanceof ReaderImporter) {
|
||||||
|
|
||||||
BufferedInputStream inputStream = new BufferedInputStream(rawInputStream);
|
BufferedInputStream inputStream = new BufferedInputStream(rawInputStream);
|
||||||
|
|
||||||
@ -482,28 +488,45 @@ public class CreateProjectCommand extends Command {
|
|||||||
new InputStreamReader(inputStream);
|
new InputStreamReader(inputStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
importer.read(reader, project, options);
|
((ReaderImporter) importer).read(reader, project, options);
|
||||||
} else {
|
} else {
|
||||||
importer.read(rawInputStream, project, options);
|
((StreamImporter) importer).read(rawInputStream, project, options);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void internalInvokeImporter(
|
protected void internalInvokeImporter(
|
||||||
Project project,
|
Project project,
|
||||||
Importer importer,
|
ReaderImporter importer,
|
||||||
Properties options,
|
Properties options,
|
||||||
Reader reader
|
Reader reader
|
||||||
) throws Exception {
|
) throws Exception {
|
||||||
importer.read(reader, project, options);
|
importer.read(reader, project, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Importer guessImporter(String contentType, String fileName) {
|
protected Importer guessImporter(String contentType, String fileName, boolean provideDefault) {
|
||||||
for(Importer i : importers.values()){
|
for (Importer i : importers.values()){
|
||||||
if(i.canImportData(contentType, fileName)){
|
if(i.canImportData(contentType, fileName)){
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (provideDefault) {
|
||||||
|
return new TsvCsvImporter(); // default
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return new TsvCsvImporter(); //default
|
protected Importer guessImporter(String contentType, String filename) {
|
||||||
|
return guessImporter(contentType, filename, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Importer guessUrlImporter(URL url) {
|
||||||
|
for (Importer importer : importers.values()){
|
||||||
|
if (importer instanceof UrlImporter
|
||||||
|
&& ((UrlImporter) importer).canImportData(url)) {
|
||||||
|
return importer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,6 @@ package com.google.gridworks.importers;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.Reader;
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -28,18 +27,11 @@ import com.google.gridworks.model.ReconCandidate;
|
|||||||
import com.google.gridworks.model.Row;
|
import com.google.gridworks.model.Row;
|
||||||
import com.google.gridworks.model.Recon.Judgment;
|
import com.google.gridworks.model.Recon.Judgment;
|
||||||
|
|
||||||
public class ExcelImporter implements Importer {
|
public class ExcelImporter implements StreamImporter {
|
||||||
protected boolean _xmlBased;
|
protected boolean _xmlBased;
|
||||||
|
|
||||||
public boolean takesReader() {
|
@Override
|
||||||
return false;
|
public void read(InputStream inputStream, Project project, Properties options) throws ImportException {
|
||||||
}
|
|
||||||
|
|
||||||
public void read(Reader reader, Project project, Properties options) throws Exception {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void read(InputStream inputStream, Project project, Properties options) throws Exception {
|
|
||||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
||||||
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
||||||
int limit = ImporterUtilities.getIntegerOption("limit", options, -1);
|
int limit = ImporterUtilities.getIntegerOption("limit", options, -1);
|
||||||
@ -51,7 +43,7 @@ public class ExcelImporter implements Importer {
|
|||||||
new XSSFWorkbook(inputStream) :
|
new XSSFWorkbook(inputStream) :
|
||||||
new HSSFWorkbook(new POIFSFileSystem(inputStream));
|
new HSSFWorkbook(new POIFSFileSystem(inputStream));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new Exception(
|
throw new ImportException(
|
||||||
"Attempted to parse file as Excel file but failed. " +
|
"Attempted to parse file as Excel file but failed. " +
|
||||||
"Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.",
|
"Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.",
|
||||||
e
|
e
|
||||||
@ -94,8 +86,9 @@ public class ExcelImporter implements Importer {
|
|||||||
for (int c = firstCell; c <= lastCell; c++) {
|
for (int c = firstCell; c <= lastCell; c++) {
|
||||||
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
|
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
|
||||||
if (cell != null) {
|
if (cell != null) {
|
||||||
String text = cell.getStringCellValue().trim();
|
Serializable value = extractCell(cell);
|
||||||
if (text.length() > 0) {
|
String text = value != null ? value.toString() : null;
|
||||||
|
if (text != null && text.length() > 0) {
|
||||||
while (columnNames.size() < c + 1) {
|
while (columnNames.size() < c + 1) {
|
||||||
columnNames.add(null);
|
columnNames.add(null);
|
||||||
}
|
}
|
||||||
@ -194,7 +187,7 @@ public class ExcelImporter implements Importer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
|
protected Serializable extractCell(org.apache.poi.ss.usermodel.Cell cell) {
|
||||||
int cellType = cell.getCellType();
|
int cellType = cell.getCellType();
|
||||||
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_ERROR ||
|
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_ERROR ||
|
||||||
cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BLANK) {
|
cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BLANK) {
|
||||||
@ -222,6 +215,12 @@ public class ExcelImporter implements Importer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
|
||||||
|
Serializable value = extractCell(cell);
|
||||||
|
|
||||||
if (value != null) {
|
if (value != null) {
|
||||||
Recon recon = null;
|
Recon recon = null;
|
||||||
|
|
||||||
@ -273,6 +272,7 @@ public class ExcelImporter implements Importer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public boolean canImportData(String contentType, String fileName) {
|
public boolean canImportData(String contentType, String fileName) {
|
||||||
if (contentType != null) {
|
if (contentType != null) {
|
||||||
contentType = contentType.toLowerCase().trim();
|
contentType = contentType.toLowerCase().trim();
|
||||||
|
15
main/src/com/google/gridworks/importers/ImportException.java
Normal file
15
main/src/com/google/gridworks/importers/ImportException.java
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
package com.google.gridworks.importers;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exception thrown by importers. Typically contains a nested exception
|
||||||
|
* indicating the underlying cause of the problem.
|
||||||
|
*/
|
||||||
|
public class ImportException extends Exception {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 7077314805989174181L;
|
||||||
|
|
||||||
|
public ImportException(String message, Throwable cause) {
|
||||||
|
super(message, cause);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,16 +1,14 @@
|
|||||||
package com.google.gridworks.importers;
|
package com.google.gridworks.importers;
|
||||||
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import com.google.gridworks.model.Project;
|
|
||||||
|
|
||||||
public interface Importer {
|
public interface Importer {
|
||||||
public boolean takesReader();
|
|
||||||
|
|
||||||
public void read(Reader reader, Project project, Properties options) throws Exception;
|
|
||||||
public void read(InputStream inputStream, Project project, Properties options) throws Exception;
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine whether importer can handle given contentType and filename.
|
||||||
|
*
|
||||||
|
* @param contentType
|
||||||
|
* @param fileName
|
||||||
|
* @return true if the importer can handle this
|
||||||
|
*/
|
||||||
public boolean canImportData(String contentType, String fileName);
|
public boolean canImportData(String contentType, String fileName);
|
||||||
}
|
}
|
||||||
|
@ -2,10 +2,11 @@ package com.google.gridworks.importers;
|
|||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
import org.marc4j.MarcPermissiveStreamReader;
|
import org.marc4j.MarcPermissiveStreamReader;
|
||||||
@ -15,27 +16,23 @@ import org.marc4j.marc.Record;
|
|||||||
|
|
||||||
import com.google.gridworks.model.Project;
|
import com.google.gridworks.model.Project;
|
||||||
|
|
||||||
public class MarcImporter implements Importer {
|
public class MarcImporter implements StreamImporter {
|
||||||
|
|
||||||
public boolean takesReader() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void read(Reader reader, Project project, Properties options)
|
|
||||||
throws Exception {
|
|
||||||
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
@Override
|
||||||
public void read(
|
public void read(
|
||||||
InputStream inputStream,
|
InputStream inputStream,
|
||||||
Project project,
|
Project project,
|
||||||
Properties options
|
Properties options
|
||||||
) throws Exception {
|
) throws ImportException {
|
||||||
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
||||||
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
||||||
|
|
||||||
File tempFile = File.createTempFile("gridworks-import-", ".marc.xml");
|
File tempFile;
|
||||||
|
try {
|
||||||
|
tempFile = File.createTempFile("gridworks-import-", ".marc.xml");
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ImportException("Unexpected error creating temp file",e);
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
OutputStream os = new FileOutputStream(tempFile);
|
OutputStream os = new FileOutputStream(tempFile);
|
||||||
try {
|
try {
|
||||||
@ -62,20 +59,31 @@ public class MarcImporter implements Importer {
|
|||||||
}
|
}
|
||||||
writer.close();
|
writer.close();
|
||||||
} finally {
|
} finally {
|
||||||
os.close();
|
try {
|
||||||
|
os.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// Just ignore - not much we can do anyway
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
InputStream is = new FileInputStream(tempFile);
|
InputStream is = new FileInputStream(tempFile);
|
||||||
try {
|
try {
|
||||||
new XmlImporter().read(is, project, options);
|
new XmlImporter().read(is, project, options);
|
||||||
} finally {
|
} finally {
|
||||||
is.close();
|
try {
|
||||||
|
is.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// Just ignore - not much we can do anyway
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new ImportException("Input file not found", e);
|
||||||
} finally {
|
} finally {
|
||||||
tempFile.delete();
|
tempFile.delete();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public boolean canImportData(String contentType, String fileName) {
|
public boolean canImportData(String contentType, String fileName) {
|
||||||
if (contentType != null) {
|
if (contentType != null) {
|
||||||
contentType = contentType.toLowerCase().trim();
|
contentType = contentType.toLowerCase().trim();
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
package com.google.gridworks.importers;
|
package com.google.gridworks.importers;
|
||||||
|
|
||||||
import java.io.InputStream;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -14,6 +14,8 @@ import org.jrdf.SortedMemoryJRDFFactory;
|
|||||||
import org.jrdf.collection.MemMapFactory;
|
import org.jrdf.collection.MemMapFactory;
|
||||||
import org.jrdf.graph.Graph;
|
import org.jrdf.graph.Graph;
|
||||||
import org.jrdf.graph.Triple;
|
import org.jrdf.graph.Triple;
|
||||||
|
import org.jrdf.parser.ParseException;
|
||||||
|
import org.jrdf.parser.StatementHandlerException;
|
||||||
import org.jrdf.parser.line.GraphLineParser;
|
import org.jrdf.parser.line.GraphLineParser;
|
||||||
import org.jrdf.parser.line.LineHandler;
|
import org.jrdf.parser.line.LineHandler;
|
||||||
import org.jrdf.parser.ntriples.NTriplesParserFactory;
|
import org.jrdf.parser.ntriples.NTriplesParserFactory;
|
||||||
@ -29,25 +31,33 @@ import com.google.gridworks.model.ModelException;
|
|||||||
import com.google.gridworks.model.Project;
|
import com.google.gridworks.model.Project;
|
||||||
import com.google.gridworks.model.Row;
|
import com.google.gridworks.model.Row;
|
||||||
|
|
||||||
public class RdfTripleImporter implements Importer{
|
public class RdfTripleImporter implements ReaderImporter{
|
||||||
JRDFFactory JrdfFactory;
|
private JRDFFactory _jrdfFactory;
|
||||||
NTriplesParserFactory nTriplesParserFactory;
|
private NTriplesParserFactory _nTriplesParserFactory;
|
||||||
MemMapFactory newMapFactory;
|
private MemMapFactory _newMapFactory;
|
||||||
|
|
||||||
public RdfTripleImporter(){
|
public RdfTripleImporter(){
|
||||||
JrdfFactory = SortedMemoryJRDFFactory.getFactory();
|
_jrdfFactory = SortedMemoryJRDFFactory.getFactory();
|
||||||
nTriplesParserFactory = new NTriplesParserFactory();
|
_nTriplesParserFactory = new NTriplesParserFactory();
|
||||||
newMapFactory = new MemMapFactory();
|
_newMapFactory = new MemMapFactory();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void read(Reader reader, Project project, Properties options) throws Exception {
|
public void read(Reader reader, Project project, Properties options) throws ImportException {
|
||||||
String baseUrl = options.getProperty("base-url");
|
String baseUrl = options.getProperty("base-url");
|
||||||
|
|
||||||
Graph graph = JrdfFactory.getNewGraph();
|
Graph graph = _jrdfFactory.getNewGraph();
|
||||||
LineHandler lineHandler = nTriplesParserFactory.createParser(graph, newMapFactory);
|
LineHandler lineHandler = _nTriplesParserFactory.createParser(graph, _newMapFactory);
|
||||||
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
|
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
|
||||||
parser.parse(reader, baseUrl); // fills JRDF graph
|
try {
|
||||||
|
parser.parse(reader, baseUrl); // fills JRDF graph
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ImportException("i/o error while parsing RDF",e);
|
||||||
|
} catch (ParseException e) {
|
||||||
|
throw new ImportException("error parsing RDF",e);
|
||||||
|
} catch (StatementHandlerException e) {
|
||||||
|
throw new ImportException("error parsing RDF",e);
|
||||||
|
}
|
||||||
|
|
||||||
Map<String, List<Row>> subjectToRows = new HashMap<String, List<Row>>();
|
Map<String, List<Row>> subjectToRows = new HashMap<String, List<Row>>();
|
||||||
|
|
||||||
@ -64,62 +74,53 @@ public class RdfTripleImporter implements Importer{
|
|||||||
|
|
||||||
Column column = project.columnModel.getColumnByName(predicate);
|
Column column = project.columnModel.getColumnByName(predicate);
|
||||||
if (column == null) {
|
if (column == null) {
|
||||||
column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
|
column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
|
||||||
try {
|
try {
|
||||||
project.columnModel.addColumn(-1, column, true);
|
project.columnModel.addColumn(-1, column, true);
|
||||||
} catch (ModelException e) {
|
} catch (ModelException e) {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int cellIndex = column.getCellIndex();
|
int cellIndex = column.getCellIndex();
|
||||||
if (subjectToRows.containsKey(subject)) {
|
if (subjectToRows.containsKey(subject)) {
|
||||||
List<Row> rows = subjectToRows.get(subject);
|
List<Row> rows = subjectToRows.get(subject);
|
||||||
for (Row row : rows) {
|
for (Row row : rows) {
|
||||||
if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) {
|
if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) {
|
||||||
row.setCell(cellIndex, new Cell(object, null));
|
row.setCell(cellIndex, new Cell(object, null));
|
||||||
object = null;
|
object = null;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (object != null) {
|
if (object != null) {
|
||||||
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
||||||
rows.add(row);
|
rows.add(row);
|
||||||
|
|
||||||
row.setCell(cellIndex, new Cell(object, null));
|
row.setCell(cellIndex, new Cell(object, null));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
List<Row> rows = new ArrayList<Row>();
|
List<Row> rows = new ArrayList<Row>();
|
||||||
subjectToRows.put(subject, rows);
|
subjectToRows.put(subject, rows);
|
||||||
|
|
||||||
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
||||||
rows.add(row);
|
rows.add(row);
|
||||||
|
|
||||||
row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
|
row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
|
||||||
row.setCell(cellIndex, new Cell(object, null));
|
row.setCell(cellIndex, new Cell(object, null));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Entry<String, List<Row>> entry : subjectToRows.entrySet()) {
|
for (Entry<String, List<Row>> entry : subjectToRows.entrySet()) {
|
||||||
project.rows.addAll(entry.getValue());
|
project.rows.addAll(entry.getValue());
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
triples.iterator().close();
|
triples.iterator().close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void read(InputStream inputStream, Project project, Properties options) throws Exception {
|
|
||||||
// TODO
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean takesReader() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean canImportData(String contentType, String fileName) {
|
public boolean canImportData(String contentType, String fileName) {
|
||||||
if (contentType != null) {
|
if (contentType != null) {
|
||||||
contentType = contentType.toLowerCase().trim();
|
contentType = contentType.toLowerCase().trim();
|
||||||
|
27
main/src/com/google/gridworks/importers/ReaderImporter.java
Normal file
27
main/src/com/google/gridworks/importers/ReaderImporter.java
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
package com.google.gridworks.importers;
|
||||||
|
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import com.google.gridworks.model.Project;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface for importers which take a Reader as input.
|
||||||
|
*/
|
||||||
|
public interface ReaderImporter extends Importer {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read data from a input reader into project.
|
||||||
|
*
|
||||||
|
* @param reader
|
||||||
|
* reader to import data from. It is assumed to be positioned at
|
||||||
|
* the correct point and ready to go.
|
||||||
|
* @param project
|
||||||
|
* project which will contain data
|
||||||
|
* @param options
|
||||||
|
* set of properties with import options
|
||||||
|
* @throws ImportException
|
||||||
|
*/
|
||||||
|
public void read(Reader reader, Project project, Properties options)
|
||||||
|
throws ImportException;
|
||||||
|
}
|
19
main/src/com/google/gridworks/importers/StreamImporter.java
Normal file
19
main/src/com/google/gridworks/importers/StreamImporter.java
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
package com.google.gridworks.importers;
|
||||||
|
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import com.google.gridworks.model.Project;
|
||||||
|
|
||||||
|
public interface StreamImporter extends Importer {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param inputStream stream to be imported
|
||||||
|
* @param project project to import stream into
|
||||||
|
* @param options
|
||||||
|
* @throws ImportException
|
||||||
|
*/
|
||||||
|
public void read(InputStream inputStream, Project project,
|
||||||
|
Properties options) throws ImportException;
|
||||||
|
|
||||||
|
}
|
@ -19,8 +19,10 @@ import com.google.gridworks.model.Cell;
|
|||||||
import com.google.gridworks.model.Project;
|
import com.google.gridworks.model.Project;
|
||||||
import com.google.gridworks.model.Row;
|
import com.google.gridworks.model.Row;
|
||||||
|
|
||||||
public class TsvCsvImporter implements Importer {
|
public class TsvCsvImporter implements ReaderImporter,StreamImporter {
|
||||||
public void read(Reader reader, Project project, Properties options) throws Exception {
|
|
||||||
|
@Override
|
||||||
|
public void read(Reader reader, Project project, Properties options) throws ImportException {
|
||||||
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
|
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
|
||||||
|
|
||||||
String sep = options.getProperty("separator"); // auto-detect if not present
|
String sep = options.getProperty("separator"); // auto-detect if not present
|
||||||
@ -34,10 +36,14 @@ public class TsvCsvImporter implements Importer {
|
|||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(reader);
|
LineNumberReader lnReader = new LineNumberReader(reader);
|
||||||
|
|
||||||
read(lnReader, project, sep,
|
try {
|
||||||
limit, skip, ignoreLines, headerLines,
|
read(lnReader, project, sep,
|
||||||
guessValueType, splitIntoColumns, ignoreQuotes
|
limit, skip, ignoreLines, headerLines,
|
||||||
);
|
guessValueType, splitIntoColumns, ignoreQuotes
|
||||||
|
);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ImportException("Import failed",e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -170,18 +176,22 @@ public class TsvCsvImporter implements Importer {
|
|||||||
return cells;
|
return cells;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void read(InputStream inputStream, Project project, Properties options) throws Exception {
|
@Override
|
||||||
|
public void read(InputStream inputStream, Project project,
|
||||||
|
Properties options) throws ImportException {
|
||||||
read(new InputStreamReader(inputStream), project, options);
|
read(new InputStreamReader(inputStream), project, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean takesReader() {
|
@Override
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean canImportData(String contentType, String fileName) {
|
public boolean canImportData(String contentType, String fileName) {
|
||||||
if (contentType != null) {
|
if (contentType != null) {
|
||||||
contentType = contentType.toLowerCase().trim();
|
contentType = contentType.toLowerCase().trim();
|
||||||
return false;
|
return
|
||||||
|
"text/plain".equals(contentType) ||
|
||||||
|
"text/csv".equals(contentType) ||
|
||||||
|
"text/x-csv".equals(contentType) ||
|
||||||
|
"text/tab-separated-value".equals(contentType);
|
||||||
|
|
||||||
} else if (fileName != null) {
|
} else if (fileName != null) {
|
||||||
fileName = fileName.toLowerCase();
|
fileName = fileName.toLowerCase();
|
||||||
if (fileName.endsWith(".tsv")) {
|
if (fileName.endsWith(".tsv")) {
|
||||||
|
14
main/src/com/google/gridworks/importers/UrlImporter.java
Normal file
14
main/src/com/google/gridworks/importers/UrlImporter.java
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
package com.google.gridworks.importers;
|
||||||
|
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import com.google.gridworks.model.Project;
|
||||||
|
|
||||||
|
public interface UrlImporter extends Importer {
|
||||||
|
|
||||||
|
public void read(URL url, Project project, Properties options) throws Exception;
|
||||||
|
|
||||||
|
public boolean canImportData(URL url);
|
||||||
|
|
||||||
|
}
|
@ -1,9 +1,9 @@
|
|||||||
package com.google.gridworks.importers;
|
package com.google.gridworks.importers;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.PushbackInputStream;
|
import java.io.PushbackInputStream;
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -12,27 +12,18 @@ import org.slf4j.LoggerFactory;
|
|||||||
import com.google.gridworks.importers.XmlImportUtilities.ImportColumnGroup;
|
import com.google.gridworks.importers.XmlImportUtilities.ImportColumnGroup;
|
||||||
import com.google.gridworks.model.Project;
|
import com.google.gridworks.model.Project;
|
||||||
|
|
||||||
public class XmlImporter implements Importer {
|
public class XmlImporter implements StreamImporter {
|
||||||
|
|
||||||
final static Logger logger = LoggerFactory.getLogger("XmlImporter");
|
final static Logger logger = LoggerFactory.getLogger("XmlImporter");
|
||||||
|
|
||||||
public static final int BUFFER_SIZE = 64 * 1024;
|
public static final int BUFFER_SIZE = 64 * 1024;
|
||||||
|
|
||||||
public boolean takesReader() {
|
@Override
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void read(Reader reader, Project project, Properties options)
|
|
||||||
throws Exception {
|
|
||||||
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void read(
|
public void read(
|
||||||
InputStream inputStream,
|
InputStream inputStream,
|
||||||
Project project,
|
Project project,
|
||||||
Properties options
|
Properties options
|
||||||
) throws Exception {
|
) throws ImportException {
|
||||||
logger.trace("XmlImporter.read");
|
logger.trace("XmlImporter.read");
|
||||||
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
|
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
|
||||||
|
|
||||||
@ -40,12 +31,16 @@ public class XmlImporter implements Importer {
|
|||||||
{
|
{
|
||||||
byte[] buffer = new byte[BUFFER_SIZE];
|
byte[] buffer = new byte[BUFFER_SIZE];
|
||||||
int bytes_read = 0;
|
int bytes_read = 0;
|
||||||
while (bytes_read < BUFFER_SIZE) {
|
try {
|
||||||
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
|
while (bytes_read < BUFFER_SIZE) {
|
||||||
if (c == -1) break;
|
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
|
||||||
bytes_read +=c ;
|
if (c == -1) break;
|
||||||
|
bytes_read +=c ;
|
||||||
|
}
|
||||||
|
pis.unread(buffer, 0, bytes_read);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ImportException("Read error",e);
|
||||||
}
|
}
|
||||||
pis.unread(buffer, 0, bytes_read);
|
|
||||||
|
|
||||||
if (options.containsKey("importer-record-tag")) {
|
if (options.containsKey("importer-record-tag")) {
|
||||||
recordPath = XmlImportUtilities.detectPathFromTag(
|
recordPath = XmlImportUtilities.detectPathFromTag(
|
||||||
@ -68,6 +63,7 @@ public class XmlImporter implements Importer {
|
|||||||
project.columnModel.update();
|
project.columnModel.update();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public boolean canImportData(String contentType, String fileName) {
|
public boolean canImportData(String contentType, String fileName) {
|
||||||
if (contentType != null) {
|
if (contentType != null) {
|
||||||
contentType = contentType.toLowerCase().trim();
|
contentType = contentType.toLowerCase().trim();
|
||||||
|
Loading…
Reference in New Issue
Block a user