Merge pull request #1047 from lemmingapex/master

Fixed #1046 Combine xls and xlsx formats by inspecting file header information in ExcelImporter
This commit is contained in:
Thad Guidry 2015-09-21 20:33:05 -05:00
commit 175f4a5319
3 changed files with 24 additions and 19 deletions

View File

@ -37,12 +37,14 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLException; import org.apache.poi.POIXMLException;
import org.apache.poi.common.usermodel.Hyperlink; import org.apache.poi.common.usermodel.Hyperlink;
import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.hssf.usermodel.HSSFDateUtil;
@ -78,9 +80,6 @@ public class ExcelImporter extends TabularImportingParserBase {
ImportingJob job, List<JSONObject> fileRecords, String format) { ImportingJob job, List<JSONObject> fileRecords, String format) {
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format); JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
boolean xmlBased = "text/xml/xlsx".equals(format);
JSONUtilities.safePut(options, "xmlBased", xmlBased);
JSONArray sheetRecords = new JSONArray(); JSONArray sheetRecords = new JSONArray();
JSONUtilities.safePut(options, "sheetRecords", sheetRecords); JSONUtilities.safePut(options, "sheetRecords", sheetRecords);
try { try {
@ -88,8 +87,13 @@ public class ExcelImporter extends TabularImportingParserBase {
JSONObject firstFileRecord = fileRecords.get(0); JSONObject firstFileRecord = fileRecords.get(0);
File file = ImportingUtilities.getFile(job, firstFileRecord); File file = ImportingUtilities.getFile(job, firstFileRecord);
InputStream is = new FileInputStream(file); InputStream is = new FileInputStream(file);
if (!is.markSupported()) {
is = new PushbackInputStream(is, 8);
}
try { try {
Workbook wb = xmlBased ? Workbook wb = POIXMLDocument.hasOOXMLHeader(is) ?
new XSSFWorkbook(is) : new XSSFWorkbook(is) :
new HSSFWorkbook(new POIFSFileSystem(is)); new HSSFWorkbook(new POIFSFileSystem(is));
@ -136,10 +140,13 @@ public class ExcelImporter extends TabularImportingParserBase {
JSONObject options, JSONObject options,
List<Exception> exceptions List<Exception> exceptions
) { ) {
boolean xmlBased = JSONUtilities.getBoolean(options, "xmlBased", false);
Workbook wb = null; Workbook wb = null;
if (!inputStream.markSupported()) {
inputStream = new PushbackInputStream(inputStream, 8);
}
try { try {
wb = xmlBased ? wb = POIXMLDocument.hasOOXMLHeader(inputStream) ?
new XSSFWorkbook(inputStream) : new XSSFWorkbook(inputStream) :
new HSSFWorkbook(new POIFSFileSystem(inputStream)); new HSSFWorkbook(new POIFSFileSystem(inputStream));
} catch (IOException e) { } catch (IOException e) {

View File

@ -204,14 +204,13 @@ function registerImporting() {
IM.registerFormat("text/rdf+n3", "RDF/N3 files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfTripleImporter()); IM.registerFormat("text/rdf+n3", "RDF/N3 files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfTripleImporter());
IM.registerFormat("text/xml", "XML files", "XmlParserUI", new Packages.com.google.refine.importers.XmlImporter()); IM.registerFormat("text/xml", "XML files", "XmlParserUI", new Packages.com.google.refine.importers.XmlImporter());
IM.registerFormat("text/xml/xlsx", "Excel (.xlsx) files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter()); IM.registerFormat("binary/text/xml/xls/xlsx", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
IM.registerFormat("text/xml/ods", "Open Document Format spreadsheets (.ods)", "ExcelParserUI", new Packages.com.google.refine.importers.OdsImporter()); IM.registerFormat("text/xml/ods", "Open Document Format spreadsheets (.ods)", "ExcelParserUI", new Packages.com.google.refine.importers.OdsImporter());
IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter()); IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter());
IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter()); IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter());
IM.registerFormat("text/marc", "MARC files", "XmlParserUI", new Packages.com.google.refine.importers.MarcImporter()); IM.registerFormat("text/marc", "MARC files", "XmlParserUI", new Packages.com.google.refine.importers.MarcImporter());
IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it
IM.registerFormat("binary/xls", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
IM.registerFormat("service", "Services"); // generic format, no parser to handle it IM.registerFormat("service", "Services"); // generic format, no parser to handle it
@ -228,8 +227,8 @@ function registerImporting() {
IM.registerExtension(".json", "text/json"); IM.registerExtension(".json", "text/json");
IM.registerExtension(".js", "text/json"); IM.registerExtension(".js", "text/json");
IM.registerExtension(".xls", "binary/xls"); IM.registerExtension(".xls", "binary/text/xml/xls/xlsx");
IM.registerExtension(".xlsx", "text/xml/xlsx"); IM.registerExtension(".xlsx", "binary/text/xml/xls/xlsx");
IM.registerExtension(".ods", "text/xml/ods"); IM.registerExtension(".ods", "text/xml/ods");
@ -250,13 +249,13 @@ function registerImporting() {
IM.registerMimeType("text/rdf+n3", "text/rdf+n3"); IM.registerMimeType("text/rdf+n3", "text/rdf+n3");
IM.registerMimeType("application/msexcel", "binary/xls"); IM.registerMimeType("application/msexcel", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/x-msexcel", "binary/xls"); IM.registerMimeType("application/x-msexcel", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/x-ms-excel", "binary/xls"); IM.registerMimeType("application/x-ms-excel", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/vnd.ms-excel", "binary/xls"); IM.registerMimeType("application/vnd.ms-excel", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/x-excel", "binary/xls"); IM.registerMimeType("application/x-excel", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/xls", "binary/xls"); IM.registerMimeType("application/xls", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/x-xls", "text/xml/xlsx"); IM.registerMimeType("application/x-xls", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/vnd.oasis.opendocument.spreadsheet","text/xml/ods"); IM.registerMimeType("application/vnd.oasis.opendocument.spreadsheet","text/xml/ods");

View File

@ -62,7 +62,6 @@ Refine.ExcelParserUI.prototype.confirmReadyToCreateProject = function() {
Refine.ExcelParserUI.prototype.getOptions = function() { Refine.ExcelParserUI.prototype.getOptions = function() {
var options = { var options = {
xmlBased: this._config.xmlBased,
sheets: [] sheets: []
}; };