Refactor of CreateProjectCommand.java and Importers
The code for determining if an importer is suitable to import a file is now in each respective importer rather than in CreateProjectCommand. There is an additional method, canImportData, on the Importer interface to support this. CreateProjectCommand registers Importers from a Hashtable (this is a copy of Tom's code for registering commands in Gridworks Servlet). Plugging in new importers should be simpler. git-svn-id: http://google-refine.googlecode.com/svn/trunk@861 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
017a825600
commit
1c47ff476b
@ -45,12 +45,8 @@ import com.metaweb.gridworks.Gridworks;
|
|||||||
import com.metaweb.gridworks.ProjectManager;
|
import com.metaweb.gridworks.ProjectManager;
|
||||||
import com.metaweb.gridworks.ProjectMetadata;
|
import com.metaweb.gridworks.ProjectMetadata;
|
||||||
import com.metaweb.gridworks.commands.Command;
|
import com.metaweb.gridworks.commands.Command;
|
||||||
import com.metaweb.gridworks.importers.ExcelImporter;
|
|
||||||
import com.metaweb.gridworks.importers.Importer;
|
import com.metaweb.gridworks.importers.Importer;
|
||||||
import com.metaweb.gridworks.importers.MarcImporter;
|
|
||||||
import com.metaweb.gridworks.importers.RdfTripleImporter;
|
|
||||||
import com.metaweb.gridworks.importers.TsvCsvImporter;
|
import com.metaweb.gridworks.importers.TsvCsvImporter;
|
||||||
import com.metaweb.gridworks.importers.XmlImporter;
|
|
||||||
import com.metaweb.gridworks.model.Project;
|
import com.metaweb.gridworks.model.Project;
|
||||||
import com.metaweb.gridworks.util.IOUtils;
|
import com.metaweb.gridworks.util.IOUtils;
|
||||||
import com.metaweb.gridworks.util.ParsingUtilities;
|
import com.metaweb.gridworks.util.ParsingUtilities;
|
||||||
@ -58,11 +54,76 @@ import com.metaweb.gridworks.util.ParsingUtilities;
|
|||||||
public class CreateProjectCommand extends Command {
|
public class CreateProjectCommand extends Command {
|
||||||
|
|
||||||
final static Logger logger = LoggerFactory.getLogger("create-project_command");
|
final static Logger logger = LoggerFactory.getLogger("create-project_command");
|
||||||
|
|
||||||
|
static final private Map<String, Importer> importers = new HashMap<String, Importer>();
|
||||||
|
|
||||||
|
private static final String[][] importerNames = {
|
||||||
|
{"ExcelImporter", "com.metaweb.gridworks.importers.ExcelImporter"},
|
||||||
|
{"XmlImporter", "com.metaweb.gridworks.importers.XmlImporter"},
|
||||||
|
{"RdfTripleImporter", "com.metaweb.gridworks.importers.RdfTripleImporter"},
|
||||||
|
{"MarcImporter", "com.metaweb.gridworks.importers.MarcImporter"},
|
||||||
|
{"TsvCsvImporter", "com.metaweb.gridworks.importers.TsvCsvImporter"},
|
||||||
|
};
|
||||||
|
|
||||||
|
static {
|
||||||
|
registerImporters(importerNames);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public boolean registerImporters(String[][] importers) {
|
||||||
|
boolean status = true;
|
||||||
|
for (String[] importer : importerNames) {
|
||||||
|
String importerName = importer[0];
|
||||||
|
String className = importer[1];
|
||||||
|
logger.debug("Loading command " + importerName + " class: " + className);
|
||||||
|
Importer cmd;
|
||||||
|
try {
|
||||||
|
// TODO: May need to use the servlet container's class loader here
|
||||||
|
cmd = (Importer) Class.forName(className).newInstance();
|
||||||
|
} catch (InstantiationException e) {
|
||||||
|
logger.error("Failed to load importer class " + className, e);
|
||||||
|
status = false;
|
||||||
|
continue;
|
||||||
|
} catch (IllegalAccessException e) {
|
||||||
|
logger.error("Failed to load importer class " + className, e);
|
||||||
|
status = false;
|
||||||
|
continue;
|
||||||
|
} catch (ClassNotFoundException e) {
|
||||||
|
logger.error("Failed to load importer class " + className, e);
|
||||||
|
status = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
status |= registerImporter(importerName, cmd);
|
||||||
|
}
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register a single importer.
|
||||||
|
*
|
||||||
|
* @param name
|
||||||
|
* importer verb for importer
|
||||||
|
* @param commandObject
|
||||||
|
* object implementing the importer
|
||||||
|
* @return true if importer was loaded and registered successfully
|
||||||
|
*/
|
||||||
|
static public boolean registerImporter(String name,
|
||||||
|
Importer importerObject) {
|
||||||
|
if (importers.containsKey(name)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
importers.put(name, importerObject);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Currently only for test purposes
|
||||||
|
static protected boolean unregisterImporter(String verb) {
|
||||||
|
return importers.remove(verb) != null;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
throws ServletException, IOException {
|
throws ServletException, IOException {
|
||||||
|
|
||||||
ProjectManager.singleton.setBusy(true);
|
ProjectManager.singleton.setBusy(true);
|
||||||
try {
|
try {
|
||||||
/*
|
/*
|
||||||
@ -73,9 +134,9 @@ public class CreateProjectCommand extends Command {
|
|||||||
* Don't call request.getParameter() before calling internalImport().
|
* Don't call request.getParameter() before calling internalImport().
|
||||||
*/
|
*/
|
||||||
Properties options = ParsingUtilities.parseUrlParameters(request);
|
Properties options = ParsingUtilities.parseUrlParameters(request);
|
||||||
|
|
||||||
Project project = new Project();
|
Project project = new Project();
|
||||||
|
|
||||||
internalImport(request, project, options);
|
internalImport(request, project, options);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -91,7 +152,7 @@ public class CreateProjectCommand extends Command {
|
|||||||
ProjectManager.singleton.registerProject(project, pm);
|
ProjectManager.singleton.registerProject(project, pm);
|
||||||
|
|
||||||
project.update();
|
project.update();
|
||||||
|
|
||||||
redirect(response, "/project.html?project=" + project.id);
|
redirect(response, "/project.html?project=" + project.id);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
redirect(response, "/error.html?redirect=index.html&msg=" +
|
redirect(response, "/error.html?redirect=index.html&msg=" +
|
||||||
@ -102,7 +163,7 @@ public class CreateProjectCommand extends Command {
|
|||||||
ProjectManager.singleton.setBusy(false);
|
ProjectManager.singleton.setBusy(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void internalImport(
|
protected void internalImport(
|
||||||
HttpServletRequest request,
|
HttpServletRequest request,
|
||||||
Project project,
|
Project project,
|
||||||
@ -111,7 +172,7 @@ public class CreateProjectCommand extends Command {
|
|||||||
|
|
||||||
ServletFileUpload upload = new ServletFileUpload();
|
ServletFileUpload upload = new ServletFileUpload();
|
||||||
String url = null;
|
String url = null;
|
||||||
|
|
||||||
FileItemIterator iter = upload.getItemIterator(request);
|
FileItemIterator iter = upload.getItemIterator(request);
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
FileItemStream item = iter.next();
|
FileItemStream item = iter.next();
|
||||||
@ -138,32 +199,32 @@ public class CreateProjectCommand extends Command {
|
|||||||
stream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (url != null && url.length() > 0) {
|
if (url != null && url.length() > 0) {
|
||||||
internalImportURL(request, project, options, url);
|
internalImportURL(request, project, options, url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class SafeInputStream extends FilterInputStream {
|
static class SafeInputStream extends FilterInputStream {
|
||||||
public SafeInputStream(InputStream stream) {
|
public SafeInputStream(InputStream stream) {
|
||||||
super(stream);
|
super(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
// some libraries attempt to close the input stream while they can't
|
// some libraries attempt to close the input stream while they can't
|
||||||
// read anymore from it... unfortunately this behavior prevents
|
// read anymore from it... unfortunately this behavior prevents
|
||||||
// the zip input stream from functioning correctly so we just have
|
// the zip input stream from functioning correctly so we just have
|
||||||
// to ignore those close() calls and just close it ourselves
|
// to ignore those close() calls and just close it ourselves
|
||||||
// forcefully later
|
// forcefully later
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reallyClose() throws IOException {
|
public void reallyClose() throws IOException {
|
||||||
super.close();
|
super.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void internalImportFile(
|
protected void internalImportFile(
|
||||||
Project project,
|
Project project,
|
||||||
Properties options,
|
Properties options,
|
||||||
@ -172,13 +233,13 @@ public class CreateProjectCommand extends Command {
|
|||||||
) throws Exception {
|
) throws Exception {
|
||||||
|
|
||||||
logger.info("Importing '{}'", fileName);
|
logger.info("Importing '{}'", fileName);
|
||||||
|
|
||||||
if (fileName.endsWith(".zip") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz") || fileName.endsWith(".tar.bz2")) {
|
if (fileName.endsWith(".zip") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz") || fileName.endsWith(".tar.bz2")) {
|
||||||
|
|
||||||
// first, save the file on disk, since we need two passes and we might
|
// first, save the file on disk, since we need two passes and we might
|
||||||
// not have enough memory to keep it all in there
|
// not have enough memory to keep it all in there
|
||||||
File file = save(inputStream);
|
File file = save(inputStream);
|
||||||
|
|
||||||
// in the first pass, gather statistics about what files are in there
|
// in the first pass, gather statistics about what files are in there
|
||||||
// unfortunately, we have to rely on files extensions, which is horrible but
|
// unfortunately, we have to rely on files extensions, which is horrible but
|
||||||
// better than nothing
|
// better than nothing
|
||||||
@ -186,9 +247,9 @@ public class CreateProjectCommand extends Command {
|
|||||||
|
|
||||||
FileInputStream fis = new FileInputStream(file);
|
FileInputStream fis = new FileInputStream(file);
|
||||||
InputStream is = getStream(fileName, fis);
|
InputStream is = getStream(fileName, fis);
|
||||||
|
|
||||||
// NOTE(SM): unfortunately, java.io does not provide any generalized class for
|
// NOTE(SM): unfortunately, java.io does not provide any generalized class for
|
||||||
// archive-like input streams so while both TarInputStream and ZipInputStream
|
// archive-like input streams so while both TarInputStream and ZipInputStream
|
||||||
// behave precisely the same, there is no polymorphic behavior so we have
|
// behave precisely the same, there is no polymorphic behavior so we have
|
||||||
// to treat each instance explicitly... one of those times you wish you had
|
// to treat each instance explicitly... one of those times you wish you had
|
||||||
// closures
|
// closures
|
||||||
@ -224,10 +285,10 @@ public class CreateProjectCommand extends Command {
|
|||||||
if (values.size() == 0) {
|
if (values.size() == 0) {
|
||||||
throw new RuntimeException("The archive contains no files.");
|
throw new RuntimeException("The archive contains no files.");
|
||||||
}
|
}
|
||||||
|
|
||||||
// this will contain the set of extensions we'll load from the archive
|
// this will contain the set of extensions we'll load from the archive
|
||||||
HashSet<String> exts = new HashSet<String>();
|
HashSet<String> exts = new HashSet<String>();
|
||||||
|
|
||||||
// find the extension that is most frequent or those who share the highest frequency value
|
// find the extension that is most frequent or those who share the highest frequency value
|
||||||
if (values.size() == 1) {
|
if (values.size() == 1) {
|
||||||
exts.add(values.get(0).getKey());
|
exts.add(values.get(0).getKey());
|
||||||
@ -245,7 +306,7 @@ public class CreateProjectCommand extends Command {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("Most frequent extensions: {}", exts.toString());
|
logger.info("Most frequent extensions: {}", exts.toString());
|
||||||
|
|
||||||
// second pass, load the data for real
|
// second pass, load the data for real
|
||||||
@ -299,9 +360,9 @@ public class CreateProjectCommand extends Command {
|
|||||||
return o2.getValue() - o1.getValue();
|
return o2.getValue() - o1.getValue();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void load(Project project, Properties options, String fileName, InputStream inputStream) throws Exception {
|
private void load(Project project, Properties options, String fileName, InputStream inputStream) throws Exception {
|
||||||
Importer importer = guessImporter(options, null, fileName);
|
Importer importer = guessImporter(null, fileName);
|
||||||
internalInvokeImporter(project, importer, options, inputStream, null);
|
internalInvokeImporter(project, importer, options, inputStream, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -312,7 +373,7 @@ public class CreateProjectCommand extends Command {
|
|||||||
is.close();
|
is.close();
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void mapExtension(String name, Map<String,Integer> ext_map) {
|
private void mapExtension(String name, Map<String,Integer> ext_map) {
|
||||||
String ext = getExtension(name)[1];
|
String ext = getExtension(name)[1];
|
||||||
if (ext_map.containsKey(ext)) {
|
if (ext_map.containsKey(ext)) {
|
||||||
@ -323,7 +384,7 @@ public class CreateProjectCommand extends Command {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private InputStream getStream(String fileName, InputStream is) throws IOException {
|
private InputStream getStream(String fileName, InputStream is) throws IOException {
|
||||||
if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
|
if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
|
||||||
return new TarInputStream(new GZIPInputStream(is));
|
return new TarInputStream(new GZIPInputStream(is));
|
||||||
} else if (fileName.endsWith(".tar.bz2")) {
|
} else if (fileName.endsWith(".tar.bz2")) {
|
||||||
return new TarInputStream(new CBZip2InputStream(is));
|
return new TarInputStream(new CBZip2InputStream(is));
|
||||||
@ -331,7 +392,7 @@ public class CreateProjectCommand extends Command {
|
|||||||
return new ZipInputStream(is);
|
return new ZipInputStream(is);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String[] getExtension(String filename) {
|
private String[] getExtension(String filename) {
|
||||||
String[] result = new String[2];
|
String[] result = new String[2];
|
||||||
int ext_index = filename.lastIndexOf('.');
|
int ext_index = filename.lastIndexOf('.');
|
||||||
@ -339,7 +400,7 @@ public class CreateProjectCommand extends Command {
|
|||||||
result[1] = (ext_index == -1) ? "" : filename.substring(ext_index + 1);
|
result[1] = (ext_index == -1) ? "" : filename.substring(ext_index + 1);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void internalImportURL(
|
protected void internalImportURL(
|
||||||
HttpServletRequest request,
|
HttpServletRequest request,
|
||||||
Project project,
|
Project project,
|
||||||
@ -348,7 +409,7 @@ public class CreateProjectCommand extends Command {
|
|||||||
) throws Exception {
|
) throws Exception {
|
||||||
URL url = new URL(urlString);
|
URL url = new URL(urlString);
|
||||||
URLConnection connection = null;
|
URLConnection connection = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
connection = url.openConnection();
|
connection = url.openConnection();
|
||||||
connection.setConnectTimeout(5000);
|
connection.setConnectTimeout(5000);
|
||||||
@ -356,27 +417,26 @@ public class CreateProjectCommand extends Command {
|
|||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new Exception("Cannot connect to " + urlString, e);
|
throw new Exception("Cannot connect to " + urlString, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
InputStream inputStream = null;
|
InputStream inputStream = null;
|
||||||
try {
|
try {
|
||||||
inputStream = connection.getInputStream();
|
inputStream = connection.getInputStream();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new Exception("Cannot retrieve content from " + url, e);
|
throw new Exception("Cannot retrieve content from " + url, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Importer importer = guessImporter(
|
Importer importer = guessImporter(
|
||||||
options,
|
|
||||||
connection.getContentType(),
|
connection.getContentType(),
|
||||||
url.getPath()
|
url.getPath()
|
||||||
);
|
);
|
||||||
|
|
||||||
internalInvokeImporter(project, importer, options, inputStream, connection.getContentEncoding());
|
internalInvokeImporter(project, importer, options, inputStream, connection.getContentEncoding());
|
||||||
} finally {
|
} finally {
|
||||||
inputStream.close();
|
inputStream.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void internalInvokeImporter(
|
protected void internalInvokeImporter(
|
||||||
Project project,
|
Project project,
|
||||||
Importer importer,
|
Importer importer,
|
||||||
@ -387,48 +447,48 @@ public class CreateProjectCommand extends Command {
|
|||||||
if (importer.takesReader()) {
|
if (importer.takesReader()) {
|
||||||
|
|
||||||
BufferedInputStream inputStream = new BufferedInputStream(rawInputStream);
|
BufferedInputStream inputStream = new BufferedInputStream(rawInputStream);
|
||||||
|
|
||||||
// NOTE(SM): The ICU4J char detection code requires the input stream to support mark/reset.
|
// NOTE(SM): The ICU4J char detection code requires the input stream to support mark/reset.
|
||||||
// Unfortunately, not all ServletInputStream implementations are marking, so we need do
|
// Unfortunately, not all ServletInputStream implementations are marking, so we need do
|
||||||
// this memory-expensive wrapping to make it work. It's far from ideal but I don't have
|
// this memory-expensive wrapping to make it work. It's far from ideal but I don't have
|
||||||
// a more efficient solution.
|
// a more efficient solution.
|
||||||
byte[] bytes = new byte[1024 * 4];
|
byte[] bytes = new byte[1024 * 4];
|
||||||
inputStream.mark(bytes.length);
|
inputStream.mark(bytes.length);
|
||||||
inputStream.read(bytes);
|
inputStream.read(bytes);
|
||||||
inputStream.reset();
|
inputStream.reset();
|
||||||
|
|
||||||
CharsetDetector detector = new CharsetDetector();
|
CharsetDetector detector = new CharsetDetector();
|
||||||
detector.setDeclaredEncoding("utf8"); // most of the content on the web is encoded in UTF-8 so start with that
|
detector.setDeclaredEncoding("utf8"); // most of the content on the web is encoded in UTF-8 so start with that
|
||||||
|
|
||||||
Reader reader = null;
|
Reader reader = null;
|
||||||
CharsetMatch[] charsetMatches = detector.setText(bytes).detectAll();
|
CharsetMatch[] charsetMatches = detector.setText(bytes).detectAll();
|
||||||
for (CharsetMatch charsetMatch : charsetMatches) {
|
for (CharsetMatch charsetMatch : charsetMatches) {
|
||||||
try {
|
try {
|
||||||
reader = new InputStreamReader(inputStream, charsetMatch.getName());
|
reader = new InputStreamReader(inputStream, charsetMatch.getName());
|
||||||
|
|
||||||
options.setProperty("encoding", charsetMatch.getName());
|
options.setProperty("encoding", charsetMatch.getName());
|
||||||
options.setProperty("encoding_confidence", Integer.toString(charsetMatch.getConfidence()));
|
options.setProperty("encoding_confidence", Integer.toString(charsetMatch.getConfidence()));
|
||||||
|
|
||||||
logger.info("Best encoding guess: {} [confidence: {}]", charsetMatch.getName(), charsetMatch.getConfidence());
|
logger.info("Best encoding guess: {} [confidence: {}]", charsetMatch.getName(), charsetMatch.getConfidence());
|
||||||
|
|
||||||
break;
|
break;
|
||||||
} catch (UnsupportedEncodingException e) {
|
} catch (UnsupportedEncodingException e) {
|
||||||
// silent
|
// silent
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (reader == null) { // when all else fails
|
if (reader == null) { // when all else fails
|
||||||
reader = encoding != null ?
|
reader = encoding != null ?
|
||||||
new InputStreamReader(inputStream, encoding) :
|
new InputStreamReader(inputStream, encoding) :
|
||||||
new InputStreamReader(inputStream);
|
new InputStreamReader(inputStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
importer.read(reader, project, options);
|
importer.read(reader, project, options);
|
||||||
} else {
|
} else {
|
||||||
importer.read(rawInputStream, project, options);
|
importer.read(rawInputStream, project, options);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void internalInvokeImporter(
|
protected void internalInvokeImporter(
|
||||||
Project project,
|
Project project,
|
||||||
Importer importer,
|
Importer importer,
|
||||||
@ -437,58 +497,14 @@ public class CreateProjectCommand extends Command {
|
|||||||
) throws Exception {
|
) throws Exception {
|
||||||
importer.read(reader, project, options);
|
importer.read(reader, project, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Importer guessImporter(
|
protected Importer guessImporter(String contentType, String fileName) {
|
||||||
Properties options, String contentType, String fileName) {
|
for(Importer i : importers.values()){
|
||||||
|
if(i.canImportData(contentType, fileName)){
|
||||||
if (contentType != null) {
|
return i;
|
||||||
contentType = contentType.toLowerCase().trim();
|
|
||||||
|
|
||||||
if ("application/msexcel".equals(contentType) ||
|
|
||||||
"application/x-msexcel".equals(contentType) ||
|
|
||||||
"application/x-ms-excel".equals(contentType) ||
|
|
||||||
"application/vnd.ms-excel".equals(contentType) ||
|
|
||||||
"application/x-excel".equals(contentType) ||
|
|
||||||
"application/xls".equals(contentType)) {
|
|
||||||
|
|
||||||
return new ExcelImporter(false);
|
|
||||||
} else if("application/x-xls".equals(contentType)) {
|
|
||||||
return new ExcelImporter(true);
|
|
||||||
} else if("application/xml".equals(contentType) ||
|
|
||||||
"text/xml".equals(contentType) ||
|
|
||||||
"application/rss+xml".equals(contentType) ||
|
|
||||||
"application/atom+xml".equals(contentType)) {
|
|
||||||
return new XmlImporter();
|
|
||||||
} else if("application/rdf+xml".equals(contentType)) {
|
|
||||||
return new RdfTripleImporter();
|
|
||||||
} else if ("application/marc".equals(contentType)) {
|
|
||||||
return new MarcImporter();
|
|
||||||
}
|
|
||||||
} else if (fileName != null) {
|
|
||||||
fileName = fileName.toLowerCase();
|
|
||||||
if (fileName.endsWith(".xls")) {
|
|
||||||
return new ExcelImporter(false);
|
|
||||||
} else if (fileName.endsWith(".xlsx")) {
|
|
||||||
return new ExcelImporter(true);
|
|
||||||
} else if (
|
|
||||||
fileName.endsWith(".xml") ||
|
|
||||||
fileName.endsWith(".atom") ||
|
|
||||||
fileName.endsWith(".rss")
|
|
||||||
) {
|
|
||||||
return new XmlImporter();
|
|
||||||
} else if (
|
|
||||||
fileName.endsWith(".rdf")) {
|
|
||||||
return new RdfTripleImporter();
|
|
||||||
} else if (
|
|
||||||
fileName.endsWith(".mrc") ||
|
|
||||||
fileName.endsWith(".marc") ||
|
|
||||||
fileName.contains(".mrc.") ||
|
|
||||||
fileName.contains(".marc.")
|
|
||||||
) {
|
|
||||||
return new MarcImporter();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new TsvCsvImporter();
|
return new TsvCsvImporter(); //default
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,16 +27,12 @@ import com.metaweb.gridworks.model.Row;
|
|||||||
import com.metaweb.gridworks.model.Recon.Judgment;
|
import com.metaweb.gridworks.model.Recon.Judgment;
|
||||||
|
|
||||||
public class ExcelImporter implements Importer {
|
public class ExcelImporter implements Importer {
|
||||||
final protected boolean _xmlBased;
|
protected boolean _xmlBased;
|
||||||
|
|
||||||
public ExcelImporter(boolean xmlBased) {
|
|
||||||
_xmlBased = xmlBased;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean takesReader() {
|
public boolean takesReader() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void read(Reader reader, Project project, Properties options) throws Exception {
|
public void read(Reader reader, Project project, Properties options) throws Exception {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
@ -45,11 +41,11 @@ public class ExcelImporter implements Importer {
|
|||||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
||||||
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
||||||
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
||||||
|
|
||||||
Workbook wb = null;
|
Workbook wb = null;
|
||||||
try {
|
try {
|
||||||
wb = _xmlBased ?
|
wb = _xmlBased ?
|
||||||
new XSSFWorkbook(inputStream) :
|
new XSSFWorkbook(inputStream) :
|
||||||
new HSSFWorkbook(new POIFSFileSystem(inputStream));
|
new HSSFWorkbook(new POIFSFileSystem(inputStream));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new Exception(
|
throw new Exception(
|
||||||
@ -58,16 +54,16 @@ public class ExcelImporter implements Importer {
|
|||||||
e
|
e
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
Sheet sheet = wb.getSheetAt(0);
|
Sheet sheet = wb.getSheetAt(0);
|
||||||
|
|
||||||
int firstRow = sheet.getFirstRowNum();
|
int firstRow = sheet.getFirstRowNum();
|
||||||
int lastRow = sheet.getLastRowNum();
|
int lastRow = sheet.getLastRowNum();
|
||||||
int r = firstRow;
|
int r = firstRow;
|
||||||
|
|
||||||
List<Integer> nonBlankIndices = null;
|
List<Integer> nonBlankIndices = null;
|
||||||
List<String> nonBlankHeaderStrings = null;
|
List<String> nonBlankHeaderStrings = null;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find the header row
|
* Find the header row
|
||||||
*/
|
*/
|
||||||
@ -79,13 +75,13 @@ public class ExcelImporter implements Importer {
|
|||||||
ignoreLines--;
|
ignoreLines--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
short firstCell = row.getFirstCellNum();
|
short firstCell = row.getFirstCellNum();
|
||||||
short lastCell = row.getLastCellNum();
|
short lastCell = row.getLastCellNum();
|
||||||
if (firstCell >= 0 && firstCell <= lastCell) {
|
if (firstCell >= 0 && firstCell <= lastCell) {
|
||||||
nonBlankIndices = new ArrayList<Integer>(lastCell - firstCell + 1);
|
nonBlankIndices = new ArrayList<Integer>(lastCell - firstCell + 1);
|
||||||
nonBlankHeaderStrings = new ArrayList<String>(lastCell - firstCell + 1);
|
nonBlankHeaderStrings = new ArrayList<String>(lastCell - firstCell + 1);
|
||||||
|
|
||||||
for (int c = firstCell; c <= lastCell; c++) {
|
for (int c = firstCell; c <= lastCell; c++) {
|
||||||
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
|
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
|
||||||
if (cell != null) {
|
if (cell != null) {
|
||||||
@ -96,18 +92,18 @@ public class ExcelImporter implements Importer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nonBlankIndices.size() > 0) {
|
if (nonBlankIndices.size() > 0) {
|
||||||
r++;
|
r++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nonBlankIndices == null || nonBlankIndices.size() == 0) {
|
if (nonBlankIndices == null || nonBlankIndices.size() == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create columns
|
* Create columns
|
||||||
*/
|
*/
|
||||||
@ -117,59 +113,59 @@ public class ExcelImporter implements Importer {
|
|||||||
if (nameToIndex.containsKey(cell)) {
|
if (nameToIndex.containsKey(cell)) {
|
||||||
int index = nameToIndex.get(cell);
|
int index = nameToIndex.get(cell);
|
||||||
nameToIndex.put(cell, index + 1);
|
nameToIndex.put(cell, index + 1);
|
||||||
|
|
||||||
cell = cell.contains(" ") ? (cell + " " + index) : (cell + index);
|
cell = cell.contains(" ") ? (cell + " " + index) : (cell + index);
|
||||||
} else {
|
} else {
|
||||||
nameToIndex.put(cell, 2);
|
nameToIndex.put(cell, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
Column column = new Column(c, cell);
|
Column column = new Column(c, cell);
|
||||||
project.columnModel.columns.add(column);
|
project.columnModel.columns.add(column);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now process the data rows
|
* Now process the data rows
|
||||||
*/
|
*/
|
||||||
int rowsWithData = 0;
|
int rowsWithData = 0;
|
||||||
Map<String, Recon> reconMap = new HashMap<String, Recon>();
|
Map<String, Recon> reconMap = new HashMap<String, Recon>();
|
||||||
|
|
||||||
for (; r <= lastRow; r++) {
|
for (; r <= lastRow; r++) {
|
||||||
org.apache.poi.ss.usermodel.Row row = sheet.getRow(r);
|
org.apache.poi.ss.usermodel.Row row = sheet.getRow(r);
|
||||||
if (row == null) {
|
if (row == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
short firstCell = row.getFirstCellNum();
|
short firstCell = row.getFirstCellNum();
|
||||||
short lastCell = row.getLastCellNum();
|
short lastCell = row.getLastCellNum();
|
||||||
if (firstCell >= 0 && firstCell <= lastCell) {
|
if (firstCell >= 0 && firstCell <= lastCell) {
|
||||||
Row newRow = new Row(nonBlankIndices.size());
|
Row newRow = new Row(nonBlankIndices.size());
|
||||||
boolean hasData = false;
|
boolean hasData = false;
|
||||||
|
|
||||||
for (int c = 0; c < nonBlankIndices.size(); c++) {
|
for (int c = 0; c < nonBlankIndices.size(); c++) {
|
||||||
if (c < firstCell || c > lastCell) {
|
if (c < firstCell || c > lastCell) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
|
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
|
||||||
if (cell == null) {
|
if (cell == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
int cellType = cell.getCellType();
|
int cellType = cell.getCellType();
|
||||||
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_ERROR ||
|
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_ERROR ||
|
||||||
cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BLANK) {
|
cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BLANK) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_FORMULA) {
|
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_FORMULA) {
|
||||||
cellType = cell.getCachedFormulaResultType();
|
cellType = cell.getCachedFormulaResultType();
|
||||||
}
|
}
|
||||||
|
|
||||||
Serializable value = null;
|
Serializable value = null;
|
||||||
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BOOLEAN) {
|
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BOOLEAN) {
|
||||||
value = cell.getBooleanCellValue();
|
value = cell.getBooleanCellValue();
|
||||||
} else if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_NUMERIC) {
|
} else if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_NUMERIC) {
|
||||||
double d = cell.getNumericCellValue();
|
double d = cell.getNumericCellValue();
|
||||||
|
|
||||||
if (HSSFDateUtil.isCellDateFormatted(cell)) {
|
if (HSSFDateUtil.isCellDateFormatted(cell)) {
|
||||||
value = HSSFDateUtil.getJavaDate(d);
|
value = HSSFDateUtil.getJavaDate(d);
|
||||||
} else {
|
} else {
|
||||||
@ -181,23 +177,23 @@ public class ExcelImporter implements Importer {
|
|||||||
value = text;
|
value = text;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (value != null) {
|
if (value != null) {
|
||||||
Recon recon = null;
|
Recon recon = null;
|
||||||
|
|
||||||
Hyperlink hyperlink = cell.getHyperlink();
|
Hyperlink hyperlink = cell.getHyperlink();
|
||||||
if (hyperlink != null) {
|
if (hyperlink != null) {
|
||||||
String url = hyperlink.getAddress();
|
String url = hyperlink.getAddress();
|
||||||
|
|
||||||
if (url.startsWith("http://") ||
|
if (url.startsWith("http://") ||
|
||||||
url.startsWith("https://")) {
|
url.startsWith("https://")) {
|
||||||
|
|
||||||
final String sig = "freebase.com/view";
|
final String sig = "freebase.com/view";
|
||||||
|
|
||||||
int i = url.indexOf(sig);
|
int i = url.indexOf(sig);
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
String id = url.substring(i + sig.length());
|
String id = url.substring(i + sig.length());
|
||||||
|
|
||||||
int q = id.indexOf('?');
|
int q = id.indexOf('?');
|
||||||
if (q > 0) {
|
if (q > 0) {
|
||||||
id = id.substring(0, q);
|
id = id.substring(0, q);
|
||||||
@ -206,7 +202,7 @@ public class ExcelImporter implements Importer {
|
|||||||
if (h > 0) {
|
if (h > 0) {
|
||||||
id = id.substring(0, h);
|
id = id.substring(0, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (reconMap.containsKey(id)) {
|
if (reconMap.containsKey(id)) {
|
||||||
recon = reconMap.get(id);
|
recon = reconMap.get(id);
|
||||||
recon.judgmentBatchSize++;
|
recon.judgmentBatchSize++;
|
||||||
@ -219,26 +215,26 @@ public class ExcelImporter implements Importer {
|
|||||||
recon.judgmentAction = "auto";
|
recon.judgmentAction = "auto";
|
||||||
recon.judgmentBatchSize = 1;
|
recon.judgmentBatchSize = 1;
|
||||||
recon.addCandidate(recon.match);
|
recon.addCandidate(recon.match);
|
||||||
|
|
||||||
reconMap.put(id, recon);
|
reconMap.put(id, recon);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
newRow.setCell(c, new Cell(value, recon));
|
newRow.setCell(c, new Cell(value, recon));
|
||||||
hasData = true;
|
hasData = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hasData) {
|
if (hasData) {
|
||||||
rowsWithData++;
|
rowsWithData++;
|
||||||
|
|
||||||
if (skip <= 0 || rowsWithData > skip) {
|
if (skip <= 0 || rowsWithData > skip) {
|
||||||
project.rows.add(newRow);
|
project.rows.add(newRow);
|
||||||
project.columnModel.setMaxCellIndex(newRow.cells.size());
|
project.columnModel.setMaxCellIndex(newRow.cells.size());
|
||||||
|
|
||||||
if (limit > 0 && project.rows.size() >= limit) {
|
if (limit > 0 && project.rows.size() >= limit) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -247,4 +243,32 @@ public class ExcelImporter implements Importer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean canImportData(String contentType, String fileName) {
|
||||||
|
if (contentType != null) {
|
||||||
|
contentType = contentType.toLowerCase().trim();
|
||||||
|
if ("application/msexcel".equals(contentType) ||
|
||||||
|
"application/x-msexcel".equals(contentType) ||
|
||||||
|
"application/x-ms-excel".equals(contentType) ||
|
||||||
|
"application/vnd.ms-excel".equals(contentType) ||
|
||||||
|
"application/x-excel".equals(contentType) ||
|
||||||
|
"application/xls".equals(contentType)) {
|
||||||
|
this._xmlBased = false;
|
||||||
|
return true;
|
||||||
|
} else if("application/x-xls".equals(contentType)) {
|
||||||
|
this._xmlBased = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else if (fileName != null) {
|
||||||
|
fileName = fileName.toLowerCase();
|
||||||
|
if (fileName.endsWith(".xls")) {
|
||||||
|
this._xmlBased = false;
|
||||||
|
return true;
|
||||||
|
} else if (fileName.endsWith(".xlsx")) {
|
||||||
|
this._xmlBased = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,9 @@ import com.metaweb.gridworks.model.Project;
|
|||||||
|
|
||||||
public interface Importer {
|
public interface Importer {
|
||||||
public boolean takesReader();
|
public boolean takesReader();
|
||||||
|
|
||||||
public void read(Reader reader, Project project, Properties options) throws Exception;
|
public void read(Reader reader, Project project, Properties options) throws Exception;
|
||||||
public void read(InputStream inputStream, Project project, Properties options) throws Exception;
|
public void read(InputStream inputStream, Project project, Properties options) throws Exception;
|
||||||
|
|
||||||
|
public boolean canImportData(String contentType, String fileName);
|
||||||
}
|
}
|
||||||
|
@ -20,21 +20,21 @@ public class MarcImporter implements Importer {
|
|||||||
public boolean takesReader() {
|
public boolean takesReader() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void read(Reader reader, Project project, Properties options)
|
public void read(Reader reader, Project project, Properties options)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
|
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void read(
|
public void read(
|
||||||
InputStream inputStream,
|
InputStream inputStream,
|
||||||
Project project,
|
Project project,
|
||||||
Properties options
|
Properties options
|
||||||
) throws Exception {
|
) throws Exception {
|
||||||
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
||||||
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
||||||
|
|
||||||
File tempFile = File.createTempFile("gridworks-import-", ".marc.xml");
|
File tempFile = File.createTempFile("gridworks-import-", ".marc.xml");
|
||||||
try {
|
try {
|
||||||
OutputStream os = new FileOutputStream(tempFile);
|
OutputStream os = new FileOutputStream(tempFile);
|
||||||
@ -45,7 +45,7 @@ public class MarcImporter implements Importer {
|
|||||||
true
|
true
|
||||||
);
|
);
|
||||||
MarcWriter writer = new MarcXmlWriter(os, true);
|
MarcWriter writer = new MarcXmlWriter(os, true);
|
||||||
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while (reader.hasNext()) {
|
while (reader.hasNext()) {
|
||||||
Record record = reader.next();
|
Record record = reader.next();
|
||||||
@ -64,7 +64,7 @@ public class MarcImporter implements Importer {
|
|||||||
} finally {
|
} finally {
|
||||||
os.close();
|
os.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
InputStream is = new FileInputStream(tempFile);
|
InputStream is = new FileInputStream(tempFile);
|
||||||
try {
|
try {
|
||||||
new XmlImporter().read(is, project, options);
|
new XmlImporter().read(is, project, options);
|
||||||
@ -75,4 +75,25 @@ public class MarcImporter implements Importer {
|
|||||||
tempFile.delete();
|
tempFile.delete();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean canImportData(String contentType, String fileName) {
|
||||||
|
if (contentType != null) {
|
||||||
|
contentType = contentType.toLowerCase().trim();
|
||||||
|
|
||||||
|
if ("application/marc".equals(contentType)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else if (fileName != null) {
|
||||||
|
fileName = fileName.toLowerCase();
|
||||||
|
if (
|
||||||
|
fileName.endsWith(".mrc") ||
|
||||||
|
fileName.endsWith(".marc") ||
|
||||||
|
fileName.contains(".mrc.") ||
|
||||||
|
fileName.contains(".marc.")
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,25 +43,25 @@ public class RdfTripleImporter implements Importer{
|
|||||||
@Override
|
@Override
|
||||||
public void read(Reader reader, Project project, Properties options) throws Exception {
|
public void read(Reader reader, Project project, Properties options) throws Exception {
|
||||||
String baseUrl = options.getProperty("base-url");
|
String baseUrl = options.getProperty("base-url");
|
||||||
|
|
||||||
Graph graph = JrdfFactory.getNewGraph();
|
Graph graph = JrdfFactory.getNewGraph();
|
||||||
LineHandler lineHandler = nTriplesParserFactory.createParser(graph, newMapFactory);
|
LineHandler lineHandler = nTriplesParserFactory.createParser(graph, newMapFactory);
|
||||||
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
|
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
|
||||||
parser.parse(reader, baseUrl); // fills JRDF graph
|
parser.parse(reader, baseUrl); // fills JRDF graph
|
||||||
|
|
||||||
Map<String, List<Row>> subjectToRows = new HashMap<String, List<Row>>();
|
Map<String, List<Row>> subjectToRows = new HashMap<String, List<Row>>();
|
||||||
|
|
||||||
Column subjectColumn = new Column(0, "subject");
|
Column subjectColumn = new Column(0, "subject");
|
||||||
project.columnModel.columns.add(0, subjectColumn);
|
project.columnModel.columns.add(0, subjectColumn);
|
||||||
project.columnModel.setKeyColumnIndex(0);
|
project.columnModel.setKeyColumnIndex(0);
|
||||||
|
|
||||||
ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE);
|
ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE);
|
||||||
try {
|
try {
|
||||||
for (Triple triple : triples) {
|
for (Triple triple : triples) {
|
||||||
String subject = triple.getSubject().toString();
|
String subject = triple.getSubject().toString();
|
||||||
String predicate = triple.getPredicate().toString();
|
String predicate = triple.getPredicate().toString();
|
||||||
String object = triple.getObject().toString();
|
String object = triple.getObject().toString();
|
||||||
|
|
||||||
Column column = project.columnModel.getColumnByName(predicate);
|
Column column = project.columnModel.getColumnByName(predicate);
|
||||||
if (column == null) {
|
if (column == null) {
|
||||||
column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
|
column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
|
||||||
@ -71,7 +71,7 @@ public class RdfTripleImporter implements Importer{
|
|||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int cellIndex = column.getCellIndex();
|
int cellIndex = column.getCellIndex();
|
||||||
if (subjectToRows.containsKey(subject)) {
|
if (subjectToRows.containsKey(subject)) {
|
||||||
List<Row> rows = subjectToRows.get(subject);
|
List<Row> rows = subjectToRows.get(subject);
|
||||||
@ -82,20 +82,20 @@ public class RdfTripleImporter implements Importer{
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (object != null) {
|
if (object != null) {
|
||||||
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
||||||
rows.add(row);
|
rows.add(row);
|
||||||
|
|
||||||
row.setCell(cellIndex, new Cell(object, null));
|
row.setCell(cellIndex, new Cell(object, null));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
List<Row> rows = new ArrayList<Row>();
|
List<Row> rows = new ArrayList<Row>();
|
||||||
subjectToRows.put(subject, rows);
|
subjectToRows.put(subject, rows);
|
||||||
|
|
||||||
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
||||||
rows.add(row);
|
rows.add(row);
|
||||||
|
|
||||||
row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
|
row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
|
||||||
row.setCell(cellIndex, new Cell(object, null));
|
row.setCell(cellIndex, new Cell(object, null));
|
||||||
}
|
}
|
||||||
@ -120,4 +120,21 @@ public class RdfTripleImporter implements Importer{
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean canImportData(String contentType, String fileName) {
|
||||||
|
if (contentType != null) {
|
||||||
|
contentType = contentType.toLowerCase().trim();
|
||||||
|
|
||||||
|
if("application/rdf+xml".equals(contentType)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else if (fileName != null) {
|
||||||
|
fileName = fileName.toLowerCase();
|
||||||
|
if (
|
||||||
|
fileName.endsWith(".rdf")) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@ package com.metaweb.gridworks.importers;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
import java.io.LineNumberReader;
|
import java.io.LineNumberReader;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -151,10 +152,25 @@ public class TsvCsvImporter implements Importer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void read(InputStream inputStream, Project project, Properties options) throws Exception {
|
public void read(InputStream inputStream, Project project, Properties options) throws Exception {
|
||||||
throw new UnsupportedOperationException();
|
read(new InputStreamReader(inputStream), project, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean takesReader() {
|
public boolean takesReader() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean canImportData(String contentType, String fileName) {
|
||||||
|
if (contentType != null) {
|
||||||
|
contentType = contentType.toLowerCase().trim();
|
||||||
|
return false;
|
||||||
|
} else if (fileName != null) {
|
||||||
|
fileName = fileName.toLowerCase();
|
||||||
|
if (fileName.endsWith(".tsv")) {
|
||||||
|
return true;
|
||||||
|
}else if (fileName.endsWith(".csv")){
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,24 +12,24 @@ import com.metaweb.gridworks.model.Project;
|
|||||||
public class XmlImporter implements Importer {
|
public class XmlImporter implements Importer {
|
||||||
|
|
||||||
public static final int BUFFER_SIZE = 64 * 1024;
|
public static final int BUFFER_SIZE = 64 * 1024;
|
||||||
|
|
||||||
public boolean takesReader() {
|
public boolean takesReader() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void read(Reader reader, Project project, Properties options)
|
public void read(Reader reader, Project project, Properties options)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
|
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void read(
|
public void read(
|
||||||
InputStream inputStream,
|
InputStream inputStream,
|
||||||
Project project,
|
Project project,
|
||||||
Properties options
|
Properties options
|
||||||
) throws Exception {
|
) throws Exception {
|
||||||
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
|
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
|
||||||
|
|
||||||
String[] recordPath = null;
|
String[] recordPath = null;
|
||||||
{
|
{
|
||||||
byte[] buffer = new byte[BUFFER_SIZE];
|
byte[] buffer = new byte[BUFFER_SIZE];
|
||||||
@ -40,10 +40,10 @@ public class XmlImporter implements Importer {
|
|||||||
bytes_read +=c ;
|
bytes_read +=c ;
|
||||||
}
|
}
|
||||||
pis.unread(buffer, 0, bytes_read);
|
pis.unread(buffer, 0, bytes_read);
|
||||||
|
|
||||||
if (options.containsKey("importer-record-tag")) {
|
if (options.containsKey("importer-record-tag")) {
|
||||||
recordPath = XmlImportUtilities.detectPathFromTag(
|
recordPath = XmlImportUtilities.detectPathFromTag(
|
||||||
new ByteArrayInputStream(buffer, 0, bytes_read),
|
new ByteArrayInputStream(buffer, 0, bytes_read),
|
||||||
options.getProperty("importer-record-tag"));
|
options.getProperty("importer-record-tag"));
|
||||||
} else {
|
} else {
|
||||||
recordPath = XmlImportUtilities.detectRecordElement(
|
recordPath = XmlImportUtilities.detectRecordElement(
|
||||||
@ -52,11 +52,34 @@ public class XmlImporter implements Importer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
||||||
|
|
||||||
XmlImportUtilities.importXml(pis, project, recordPath, rootColumnGroup);
|
XmlImportUtilities.importXml(pis, project, recordPath, rootColumnGroup);
|
||||||
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
||||||
|
|
||||||
project.columnModel.update();
|
project.columnModel.update();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean canImportData(String contentType, String fileName) {
|
||||||
|
if (contentType != null) {
|
||||||
|
contentType = contentType.toLowerCase().trim();
|
||||||
|
|
||||||
|
if("application/xml".equals(contentType) ||
|
||||||
|
"text/xml".equals(contentType) ||
|
||||||
|
"application/rss+xml".equals(contentType) ||
|
||||||
|
"application/atom+xml".equals(contentType)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else if (fileName != null) {
|
||||||
|
fileName = fileName.toLowerCase();
|
||||||
|
if (
|
||||||
|
fileName.endsWith(".xml") ||
|
||||||
|
fileName.endsWith(".atom") ||
|
||||||
|
fileName.endsWith(".rss")
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user