Fixed #1046 Combine xls and xlsx formats by inspecting file header information in ExcelImporter.

This commit is contained in:
Scott Wiedemann 2015-07-30 16:19:26 -06:00
parent ee38f9edde
commit 5eab8893cc
3 changed files with 24 additions and 19 deletions

View File

@ -37,12 +37,14 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLException;
import org.apache.poi.common.usermodel.Hyperlink;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
@ -78,9 +80,6 @@ public class ExcelImporter extends TabularImportingParserBase {
ImportingJob job, List<JSONObject> fileRecords, String format) {
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
boolean xmlBased = "text/xml/xlsx".equals(format);
JSONUtilities.safePut(options, "xmlBased", xmlBased);
JSONArray sheetRecords = new JSONArray();
JSONUtilities.safePut(options, "sheetRecords", sheetRecords);
try {
@ -88,8 +87,13 @@ public class ExcelImporter extends TabularImportingParserBase {
JSONObject firstFileRecord = fileRecords.get(0);
File file = ImportingUtilities.getFile(job, firstFileRecord);
InputStream is = new FileInputStream(file);
if (!is.markSupported()) {
is = new PushbackInputStream(is, 8);
}
try {
Workbook wb = xmlBased ?
Workbook wb = POIXMLDocument.hasOOXMLHeader(is) ?
new XSSFWorkbook(is) :
new HSSFWorkbook(new POIFSFileSystem(is));
@ -136,10 +140,13 @@ public class ExcelImporter extends TabularImportingParserBase {
JSONObject options,
List<Exception> exceptions
) {
boolean xmlBased = JSONUtilities.getBoolean(options, "xmlBased", false);
Workbook wb = null;
if (!inputStream.markSupported()) {
inputStream = new PushbackInputStream(inputStream, 8);
}
try {
wb = xmlBased ?
wb = POIXMLDocument.hasOOXMLHeader(inputStream) ?
new XSSFWorkbook(inputStream) :
new HSSFWorkbook(new POIFSFileSystem(inputStream));
} catch (IOException e) {

View File

@ -204,14 +204,13 @@ function registerImporting() {
IM.registerFormat("text/rdf+n3", "RDF/N3 files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfTripleImporter());
IM.registerFormat("text/xml", "XML files", "XmlParserUI", new Packages.com.google.refine.importers.XmlImporter());
IM.registerFormat("text/xml/xlsx", "Excel (.xlsx) files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
IM.registerFormat("binary/text/xml/xls/xlsx", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
IM.registerFormat("text/xml/ods", "Open Document Format spreadsheets (.ods)", "ExcelParserUI", new Packages.com.google.refine.importers.OdsImporter());
IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter());
IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter());
IM.registerFormat("text/marc", "MARC files", "XmlParserUI", new Packages.com.google.refine.importers.MarcImporter());
IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it
IM.registerFormat("binary/xls", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
IM.registerFormat("service", "Services"); // generic format, no parser to handle it
@ -228,8 +227,8 @@ function registerImporting() {
IM.registerExtension(".json", "text/json");
IM.registerExtension(".js", "text/json");
IM.registerExtension(".xls", "binary/xls");
IM.registerExtension(".xlsx", "text/xml/xlsx");
IM.registerExtension(".xls", "binary/text/xml/xls/xlsx");
IM.registerExtension(".xlsx", "binary/text/xml/xls/xlsx");
IM.registerExtension(".ods", "text/xml/ods");
@ -250,13 +249,13 @@ function registerImporting() {
IM.registerMimeType("text/rdf+n3", "text/rdf+n3");
IM.registerMimeType("application/msexcel", "binary/xls");
IM.registerMimeType("application/x-msexcel", "binary/xls");
IM.registerMimeType("application/x-ms-excel", "binary/xls");
IM.registerMimeType("application/vnd.ms-excel", "binary/xls");
IM.registerMimeType("application/x-excel", "binary/xls");
IM.registerMimeType("application/xls", "binary/xls");
IM.registerMimeType("application/x-xls", "text/xml/xlsx");
IM.registerMimeType("application/msexcel", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/x-msexcel", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/x-ms-excel", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/vnd.ms-excel", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/x-excel", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/xls", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/x-xls", "binary/text/xml/xls/xlsx");
IM.registerMimeType("application/vnd.oasis.opendocument.spreadsheet","text/xml/ods");

View File

@ -62,7 +62,6 @@ Refine.ExcelParserUI.prototype.confirmReadyToCreateProject = function() {
Refine.ExcelParserUI.prototype.getOptions = function() {
var options = {
xmlBased: this._config.xmlBased,
sheets: []
};