Merge pull request #1047 from lemmingapex/master
Fixed #1046 Combine xls and xlsx formats by inspecting file header information in ExcelImporter
This commit is contained in:
commit
175f4a5319
@ -37,12 +37,14 @@ import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.POIXMLException;
|
||||
import org.apache.poi.common.usermodel.Hyperlink;
|
||||
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
|
||||
@ -77,10 +79,7 @@ public class ExcelImporter extends TabularImportingParserBase {
|
||||
public JSONObject createParserUIInitializationData(
|
||||
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||
|
||||
boolean xmlBased = "text/xml/xlsx".equals(format);
|
||||
JSONUtilities.safePut(options, "xmlBased", xmlBased);
|
||||
|
||||
|
||||
JSONArray sheetRecords = new JSONArray();
|
||||
JSONUtilities.safePut(options, "sheetRecords", sheetRecords);
|
||||
try {
|
||||
@ -88,8 +87,13 @@ public class ExcelImporter extends TabularImportingParserBase {
|
||||
JSONObject firstFileRecord = fileRecords.get(0);
|
||||
File file = ImportingUtilities.getFile(job, firstFileRecord);
|
||||
InputStream is = new FileInputStream(file);
|
||||
|
||||
if (!is.markSupported()) {
|
||||
is = new PushbackInputStream(is, 8);
|
||||
}
|
||||
|
||||
try {
|
||||
Workbook wb = xmlBased ?
|
||||
Workbook wb = POIXMLDocument.hasOOXMLHeader(is) ?
|
||||
new XSSFWorkbook(is) :
|
||||
new HSSFWorkbook(new POIFSFileSystem(is));
|
||||
|
||||
@ -136,10 +140,13 @@ public class ExcelImporter extends TabularImportingParserBase {
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
boolean xmlBased = JSONUtilities.getBoolean(options, "xmlBased", false);
|
||||
Workbook wb = null;
|
||||
if (!inputStream.markSupported()) {
|
||||
inputStream = new PushbackInputStream(inputStream, 8);
|
||||
}
|
||||
|
||||
try {
|
||||
wb = xmlBased ?
|
||||
wb = POIXMLDocument.hasOOXMLHeader(inputStream) ?
|
||||
new XSSFWorkbook(inputStream) :
|
||||
new HSSFWorkbook(new POIFSFileSystem(inputStream));
|
||||
} catch (IOException e) {
|
||||
|
@ -204,14 +204,13 @@ function registerImporting() {
|
||||
IM.registerFormat("text/rdf+n3", "RDF/N3 files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfTripleImporter());
|
||||
|
||||
IM.registerFormat("text/xml", "XML files", "XmlParserUI", new Packages.com.google.refine.importers.XmlImporter());
|
||||
IM.registerFormat("text/xml/xlsx", "Excel (.xlsx) files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
|
||||
IM.registerFormat("binary/text/xml/xls/xlsx", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
|
||||
IM.registerFormat("text/xml/ods", "Open Document Format spreadsheets (.ods)", "ExcelParserUI", new Packages.com.google.refine.importers.OdsImporter());
|
||||
IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter());
|
||||
IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter());
|
||||
IM.registerFormat("text/marc", "MARC files", "XmlParserUI", new Packages.com.google.refine.importers.MarcImporter());
|
||||
|
||||
IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it
|
||||
IM.registerFormat("binary/xls", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
|
||||
|
||||
IM.registerFormat("service", "Services"); // generic format, no parser to handle it
|
||||
|
||||
@ -228,8 +227,8 @@ function registerImporting() {
|
||||
IM.registerExtension(".json", "text/json");
|
||||
IM.registerExtension(".js", "text/json");
|
||||
|
||||
IM.registerExtension(".xls", "binary/xls");
|
||||
IM.registerExtension(".xlsx", "text/xml/xlsx");
|
||||
IM.registerExtension(".xls", "binary/text/xml/xls/xlsx");
|
||||
IM.registerExtension(".xlsx", "binary/text/xml/xls/xlsx");
|
||||
|
||||
IM.registerExtension(".ods", "text/xml/ods");
|
||||
|
||||
@ -250,13 +249,13 @@ function registerImporting() {
|
||||
|
||||
IM.registerMimeType("text/rdf+n3", "text/rdf+n3");
|
||||
|
||||
IM.registerMimeType("application/msexcel", "binary/xls");
|
||||
IM.registerMimeType("application/x-msexcel", "binary/xls");
|
||||
IM.registerMimeType("application/x-ms-excel", "binary/xls");
|
||||
IM.registerMimeType("application/vnd.ms-excel", "binary/xls");
|
||||
IM.registerMimeType("application/x-excel", "binary/xls");
|
||||
IM.registerMimeType("application/xls", "binary/xls");
|
||||
IM.registerMimeType("application/x-xls", "text/xml/xlsx");
|
||||
IM.registerMimeType("application/msexcel", "binary/text/xml/xls/xlsx");
|
||||
IM.registerMimeType("application/x-msexcel", "binary/text/xml/xls/xlsx");
|
||||
IM.registerMimeType("application/x-ms-excel", "binary/text/xml/xls/xlsx");
|
||||
IM.registerMimeType("application/vnd.ms-excel", "binary/text/xml/xls/xlsx");
|
||||
IM.registerMimeType("application/x-excel", "binary/text/xml/xls/xlsx");
|
||||
IM.registerMimeType("application/xls", "binary/text/xml/xls/xlsx");
|
||||
IM.registerMimeType("application/x-xls", "binary/text/xml/xls/xlsx");
|
||||
|
||||
IM.registerMimeType("application/vnd.oasis.opendocument.spreadsheet","text/xml/ods");
|
||||
|
||||
|
@ -62,7 +62,6 @@ Refine.ExcelParserUI.prototype.confirmReadyToCreateProject = function() {
|
||||
|
||||
Refine.ExcelParserUI.prototype.getOptions = function() {
|
||||
var options = {
|
||||
xmlBased: this._config.xmlBased,
|
||||
sheets: []
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user