From 5eab8893cc8a2e93f2fd5c0ba18656112fc55450 Mon Sep 17 00:00:00 2001 From: Scott Wiedemann Date: Thu, 30 Jul 2015 16:19:26 -0600 Subject: [PATCH] Fixed #1046 Combine xls and xlsx formats by inspecting file header information in ExcelImporter. --- .../refine/importers/ExcelImporter.java | 21 ++++++++++++------- .../webapp/modules/core/MOD-INF/controller.js | 21 +++++++++---------- .../parser-interfaces/excel-parser-ui.js | 1 - 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/main/src/com/google/refine/importers/ExcelImporter.java b/main/src/com/google/refine/importers/ExcelImporter.java index 802344bcc..4de1b3369 100644 --- a/main/src/com/google/refine/importers/ExcelImporter.java +++ b/main/src/com/google/refine/importers/ExcelImporter.java @@ -37,12 +37,14 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.PushbackInputStream; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLException; import org.apache.poi.common.usermodel.Hyperlink; import org.apache.poi.hssf.usermodel.HSSFDateUtil; @@ -77,10 +79,7 @@ public class ExcelImporter extends TabularImportingParserBase { public JSONObject createParserUIInitializationData( ImportingJob job, List fileRecords, String format) { JSONObject options = super.createParserUIInitializationData(job, fileRecords, format); - - boolean xmlBased = "text/xml/xlsx".equals(format); - JSONUtilities.safePut(options, "xmlBased", xmlBased); - + JSONArray sheetRecords = new JSONArray(); JSONUtilities.safePut(options, "sheetRecords", sheetRecords); try { @@ -88,8 +87,13 @@ public class ExcelImporter extends TabularImportingParserBase { JSONObject firstFileRecord = fileRecords.get(0); File file = ImportingUtilities.getFile(job, firstFileRecord); InputStream is = new FileInputStream(file); + + if (!is.markSupported()) { + is = new PushbackInputStream(is, 8); + } + try { - Workbook wb = xmlBased ? + Workbook wb = POIXMLDocument.hasOOXMLHeader(is) ? new XSSFWorkbook(is) : new HSSFWorkbook(new POIFSFileSystem(is)); @@ -136,10 +140,13 @@ public class ExcelImporter extends TabularImportingParserBase { JSONObject options, List exceptions ) { - boolean xmlBased = JSONUtilities.getBoolean(options, "xmlBased", false); Workbook wb = null; + if (!inputStream.markSupported()) { + inputStream = new PushbackInputStream(inputStream, 8); + } + try { - wb = xmlBased ? + wb = POIXMLDocument.hasOOXMLHeader(inputStream) ? new XSSFWorkbook(inputStream) : new HSSFWorkbook(new POIFSFileSystem(inputStream)); } catch (IOException e) { diff --git a/main/webapp/modules/core/MOD-INF/controller.js b/main/webapp/modules/core/MOD-INF/controller.js index 141529de9..e453097c3 100644 --- a/main/webapp/modules/core/MOD-INF/controller.js +++ b/main/webapp/modules/core/MOD-INF/controller.js @@ -204,14 +204,13 @@ function registerImporting() { IM.registerFormat("text/rdf+n3", "RDF/N3 files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfTripleImporter()); IM.registerFormat("text/xml", "XML files", "XmlParserUI", new Packages.com.google.refine.importers.XmlImporter()); - IM.registerFormat("text/xml/xlsx", "Excel (.xlsx) files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter()); + IM.registerFormat("binary/text/xml/xls/xlsx", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter()); IM.registerFormat("text/xml/ods", "Open Document Format spreadsheets (.ods)", "ExcelParserUI", new Packages.com.google.refine.importers.OdsImporter()); IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter()); IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter()); IM.registerFormat("text/marc", "MARC files", "XmlParserUI", new Packages.com.google.refine.importers.MarcImporter()); IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it - IM.registerFormat("binary/xls", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter()); IM.registerFormat("service", "Services"); // generic format, no parser to handle it @@ -228,8 +227,8 @@ function registerImporting() { IM.registerExtension(".json", "text/json"); IM.registerExtension(".js", "text/json"); - IM.registerExtension(".xls", "binary/xls"); - IM.registerExtension(".xlsx", "text/xml/xlsx"); + IM.registerExtension(".xls", "binary/text/xml/xls/xlsx"); + IM.registerExtension(".xlsx", "binary/text/xml/xls/xlsx"); IM.registerExtension(".ods", "text/xml/ods"); @@ -250,13 +249,13 @@ function registerImporting() { IM.registerMimeType("text/rdf+n3", "text/rdf+n3"); - IM.registerMimeType("application/msexcel", "binary/xls"); - IM.registerMimeType("application/x-msexcel", "binary/xls"); - IM.registerMimeType("application/x-ms-excel", "binary/xls"); - IM.registerMimeType("application/vnd.ms-excel", "binary/xls"); - IM.registerMimeType("application/x-excel", "binary/xls"); - IM.registerMimeType("application/xls", "binary/xls"); - IM.registerMimeType("application/x-xls", "text/xml/xlsx"); + IM.registerMimeType("application/msexcel", "binary/text/xml/xls/xlsx"); + IM.registerMimeType("application/x-msexcel", "binary/text/xml/xls/xlsx"); + IM.registerMimeType("application/x-ms-excel", "binary/text/xml/xls/xlsx"); + IM.registerMimeType("application/vnd.ms-excel", "binary/text/xml/xls/xlsx"); + IM.registerMimeType("application/x-excel", "binary/text/xml/xls/xlsx"); + IM.registerMimeType("application/xls", "binary/text/xml/xls/xlsx"); + IM.registerMimeType("application/x-xls", "binary/text/xml/xls/xlsx"); IM.registerMimeType("application/vnd.oasis.opendocument.spreadsheet","text/xml/ods"); diff --git a/main/webapp/modules/core/scripts/index/parser-interfaces/excel-parser-ui.js b/main/webapp/modules/core/scripts/index/parser-interfaces/excel-parser-ui.js index e56fc3889..3b25bcee4 100644 --- a/main/webapp/modules/core/scripts/index/parser-interfaces/excel-parser-ui.js +++ b/main/webapp/modules/core/scripts/index/parser-interfaces/excel-parser-ui.js @@ -62,7 +62,6 @@ Refine.ExcelParserUI.prototype.confirmReadyToCreateProject = function() { Refine.ExcelParserUI.prototype.getOptions = function() { var options = { - xmlBased: this._config.xmlBased, sheets: [] };