Refactor of CreateProjectCommand.java and Importers

The code for determining if an importer is suitable to import a file is now in each respective importer rather than in CreateProjectCommand.  There is an additional method, canImportData, on the Importer interface to support this.

CreateProjectCommand registers Importers from a Hashtable (this is a copy of Tom's code for registering commands in Gridworks Servlet).  Plugging in new importers should be simpler.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@861 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-05-26 13:18:48 +00:00
parent 017a825600
commit 1c47ff476b
7 changed files with 291 additions and 172 deletions

View File

@ -45,12 +45,8 @@ import com.metaweb.gridworks.Gridworks;
import com.metaweb.gridworks.ProjectManager; import com.metaweb.gridworks.ProjectManager;
import com.metaweb.gridworks.ProjectMetadata; import com.metaweb.gridworks.ProjectMetadata;
import com.metaweb.gridworks.commands.Command; import com.metaweb.gridworks.commands.Command;
import com.metaweb.gridworks.importers.ExcelImporter;
import com.metaweb.gridworks.importers.Importer; import com.metaweb.gridworks.importers.Importer;
import com.metaweb.gridworks.importers.MarcImporter;
import com.metaweb.gridworks.importers.RdfTripleImporter;
import com.metaweb.gridworks.importers.TsvCsvImporter; import com.metaweb.gridworks.importers.TsvCsvImporter;
import com.metaweb.gridworks.importers.XmlImporter;
import com.metaweb.gridworks.model.Project; import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.util.IOUtils; import com.metaweb.gridworks.util.IOUtils;
import com.metaweb.gridworks.util.ParsingUtilities; import com.metaweb.gridworks.util.ParsingUtilities;
@ -59,6 +55,71 @@ public class CreateProjectCommand extends Command {
final static Logger logger = LoggerFactory.getLogger("create-project_command"); final static Logger logger = LoggerFactory.getLogger("create-project_command");
static final private Map<String, Importer> importers = new HashMap<String, Importer>();
private static final String[][] importerNames = {
{"ExcelImporter", "com.metaweb.gridworks.importers.ExcelImporter"},
{"XmlImporter", "com.metaweb.gridworks.importers.XmlImporter"},
{"RdfTripleImporter", "com.metaweb.gridworks.importers.RdfTripleImporter"},
{"MarcImporter", "com.metaweb.gridworks.importers.MarcImporter"},
{"TsvCsvImporter", "com.metaweb.gridworks.importers.TsvCsvImporter"},
};
static {
registerImporters(importerNames);
}
static public boolean registerImporters(String[][] importers) {
boolean status = true;
for (String[] importer : importerNames) {
String importerName = importer[0];
String className = importer[1];
logger.debug("Loading command " + importerName + " class: " + className);
Importer cmd;
try {
// TODO: May need to use the servlet container's class loader here
cmd = (Importer) Class.forName(className).newInstance();
} catch (InstantiationException e) {
logger.error("Failed to load importer class " + className, e);
status = false;
continue;
} catch (IllegalAccessException e) {
logger.error("Failed to load importer class " + className, e);
status = false;
continue;
} catch (ClassNotFoundException e) {
logger.error("Failed to load importer class " + className, e);
status = false;
continue;
}
status |= registerImporter(importerName, cmd);
}
return status;
}
/**
* Register a single importer.
*
* @param name
* importer verb for importer
* @param commandObject
* object implementing the importer
* @return true if importer was loaded and registered successfully
*/
static public boolean registerImporter(String name,
Importer importerObject) {
if (importers.containsKey(name)) {
return false;
}
importers.put(name, importerObject);
return true;
}
// Currently only for test purposes
static protected boolean unregisterImporter(String verb) {
return importers.remove(verb) != null;
}
@Override @Override
public void doPost(HttpServletRequest request, HttpServletResponse response) public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException { throws ServletException, IOException {
@ -301,7 +362,7 @@ public class CreateProjectCommand extends Command {
} }
private void load(Project project, Properties options, String fileName, InputStream inputStream) throws Exception { private void load(Project project, Properties options, String fileName, InputStream inputStream) throws Exception {
Importer importer = guessImporter(options, null, fileName); Importer importer = guessImporter(null, fileName);
internalInvokeImporter(project, importer, options, inputStream, null); internalInvokeImporter(project, importer, options, inputStream, null);
} }
@ -366,7 +427,6 @@ public class CreateProjectCommand extends Command {
try { try {
Importer importer = guessImporter( Importer importer = guessImporter(
options,
connection.getContentType(), connection.getContentType(),
url.getPath() url.getPath()
); );
@ -438,57 +498,13 @@ public class CreateProjectCommand extends Command {
importer.read(reader, project, options); importer.read(reader, project, options);
} }
protected Importer guessImporter( protected Importer guessImporter(String contentType, String fileName) {
Properties options, String contentType, String fileName) { for(Importer i : importers.values()){
if(i.canImportData(contentType, fileName)){
if (contentType != null) { return i;
contentType = contentType.toLowerCase().trim();
if ("application/msexcel".equals(contentType) ||
"application/x-msexcel".equals(contentType) ||
"application/x-ms-excel".equals(contentType) ||
"application/vnd.ms-excel".equals(contentType) ||
"application/x-excel".equals(contentType) ||
"application/xls".equals(contentType)) {
return new ExcelImporter(false);
} else if("application/x-xls".equals(contentType)) {
return new ExcelImporter(true);
} else if("application/xml".equals(contentType) ||
"text/xml".equals(contentType) ||
"application/rss+xml".equals(contentType) ||
"application/atom+xml".equals(contentType)) {
return new XmlImporter();
} else if("application/rdf+xml".equals(contentType)) {
return new RdfTripleImporter();
} else if ("application/marc".equals(contentType)) {
return new MarcImporter();
}
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (fileName.endsWith(".xls")) {
return new ExcelImporter(false);
} else if (fileName.endsWith(".xlsx")) {
return new ExcelImporter(true);
} else if (
fileName.endsWith(".xml") ||
fileName.endsWith(".atom") ||
fileName.endsWith(".rss")
) {
return new XmlImporter();
} else if (
fileName.endsWith(".rdf")) {
return new RdfTripleImporter();
} else if (
fileName.endsWith(".mrc") ||
fileName.endsWith(".marc") ||
fileName.contains(".mrc.") ||
fileName.contains(".marc.")
) {
return new MarcImporter();
} }
} }
return new TsvCsvImporter(); return new TsvCsvImporter(); //default
} }
} }

View File

@ -27,11 +27,7 @@ import com.metaweb.gridworks.model.Row;
import com.metaweb.gridworks.model.Recon.Judgment; import com.metaweb.gridworks.model.Recon.Judgment;
public class ExcelImporter implements Importer { public class ExcelImporter implements Importer {
final protected boolean _xmlBased; protected boolean _xmlBased;
public ExcelImporter(boolean xmlBased) {
_xmlBased = xmlBased;
}
public boolean takesReader() { public boolean takesReader() {
return false; return false;
@ -247,4 +243,32 @@ public class ExcelImporter implements Importer {
} }
} }
} }
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
if ("application/msexcel".equals(contentType) ||
"application/x-msexcel".equals(contentType) ||
"application/x-ms-excel".equals(contentType) ||
"application/vnd.ms-excel".equals(contentType) ||
"application/x-excel".equals(contentType) ||
"application/xls".equals(contentType)) {
this._xmlBased = false;
return true;
} else if("application/x-xls".equals(contentType)) {
this._xmlBased = true;
return true;
}
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (fileName.endsWith(".xls")) {
this._xmlBased = false;
return true;
} else if (fileName.endsWith(".xlsx")) {
this._xmlBased = true;
return true;
}
}
return false;
}
} }

View File

@ -11,4 +11,6 @@ public interface Importer {
public void read(Reader reader, Project project, Properties options) throws Exception; public void read(Reader reader, Project project, Properties options) throws Exception;
public void read(InputStream inputStream, Project project, Properties options) throws Exception; public void read(InputStream inputStream, Project project, Properties options) throws Exception;
public boolean canImportData(String contentType, String fileName);
} }

View File

@ -75,4 +75,25 @@ public class MarcImporter implements Importer {
tempFile.delete(); tempFile.delete();
} }
} }
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
if ("application/marc".equals(contentType)) {
return true;
}
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (
fileName.endsWith(".mrc") ||
fileName.endsWith(".marc") ||
fileName.contains(".mrc.") ||
fileName.contains(".marc.")
) {
return true;
}
}
return false;
}
} }

View File

@ -120,4 +120,21 @@ public class RdfTripleImporter implements Importer{
return true; return true;
} }
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
if("application/rdf+xml".equals(contentType)) {
return true;
}
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (
fileName.endsWith(".rdf")) {
return true;
}
}
return false;
}
} }

View File

@ -2,6 +2,7 @@ package com.metaweb.gridworks.importers;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader; import java.io.LineNumberReader;
import java.io.Reader; import java.io.Reader;
import java.util.ArrayList; import java.util.ArrayList;
@ -151,10 +152,25 @@ public class TsvCsvImporter implements Importer {
} }
public void read(InputStream inputStream, Project project, Properties options) throws Exception { public void read(InputStream inputStream, Project project, Properties options) throws Exception {
throw new UnsupportedOperationException(); read(new InputStreamReader(inputStream), project, options);
} }
public boolean takesReader() { public boolean takesReader() {
return true; return true;
} }
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
return false;
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (fileName.endsWith(".tsv")) {
return true;
}else if (fileName.endsWith(".csv")){
return true;
}
}
return false;
}
} }

View File

@ -59,4 +59,27 @@ public class XmlImporter implements Importer {
project.columnModel.update(); project.columnModel.update();
} }
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
if("application/xml".equals(contentType) ||
"text/xml".equals(contentType) ||
"application/rss+xml".equals(contentType) ||
"application/atom+xml".equals(contentType)) {
return true;
}
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (
fileName.endsWith(".xml") ||
fileName.endsWith(".atom") ||
fileName.endsWith(".rss")
) {
return true;
}
}
return false;
}
} }