poi works better reading files directly (#2597)

This commit is contained in:
PJ Fanning 2020-04-26 21:27:09 +02:00 committed by GitHub
parent ab64303cbb
commit f047a88518
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -45,12 +45,13 @@ import java.util.List;
import org.apache.poi.ooxml.POIXMLException; import org.apache.poi.ooxml.POIXMLException;
import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.CellType; import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -83,43 +84,39 @@ public class ExcelImporter extends TabularImportingParserBase {
for (int index = 0;index < fileRecords.size();index++) { for (int index = 0;index < fileRecords.size();index++) {
ObjectNode fileRecord = fileRecords.get(index); ObjectNode fileRecord = fileRecords.get(index);
File file = ImportingUtilities.getFile(job, fileRecord); File file = ImportingUtilities.getFile(job, fileRecord);
InputStream is = new FileInputStream(file);
if (!is.markSupported()) {
is = new BufferedInputStream(is);
}
Workbook wb = null;
try { try {
Workbook wb = FileMagic.valueOf(is) == FileMagic.OOXML ? wb = FileMagic.valueOf(file) == FileMagic.OOXML ? new XSSFWorkbook(file) :
new XSSFWorkbook(is) : new HSSFWorkbook(new POIFSFileSystem(file));
new HSSFWorkbook(new POIFSFileSystem(is));
int sheetCount = wb.getNumberOfSheets(); int sheetCount = wb.getNumberOfSheets();
for (int i = 0; i < sheetCount; i++) { for (int i = 0; i < sheetCount; i++) {
Sheet sheet = wb.getSheetAt(i); Sheet sheet = wb.getSheetAt(i);
int rows = sheet.getLastRowNum() - sheet.getFirstRowNum() + 1; int rows = sheet.getLastRowNum() - sheet.getFirstRowNum() + 1;
ObjectNode sheetRecord = ParsingUtilities.mapper.createObjectNode(); ObjectNode sheetRecord = ParsingUtilities.mapper.createObjectNode();
JSONUtilities.safePut(sheetRecord, "name", file.getName() + "#" + sheet.getSheetName()); JSONUtilities.safePut(sheetRecord, "name", file.getName() + "#" + sheet.getSheetName());
JSONUtilities.safePut(sheetRecord, "fileNameAndSheetIndex", file.getName() + "#" + i); JSONUtilities.safePut(sheetRecord, "fileNameAndSheetIndex", file.getName() + "#" + i);
JSONUtilities.safePut(sheetRecord, "rows", rows); JSONUtilities.safePut(sheetRecord, "rows", rows);
if (rows > 1) { if (rows > 1) {
JSONUtilities.safePut(sheetRecord, "selected", true); JSONUtilities.safePut(sheetRecord, "selected", true);
} else { } else {
JSONUtilities.safePut(sheetRecord, "selected", false); JSONUtilities.safePut(sheetRecord, "selected", false);
} }
JSONUtilities.append(sheetRecords, sheetRecord); JSONUtilities.append(sheetRecords, sheetRecord);
} }
wb.close();
} finally { } finally {
is.close(); if (wb != null) {
wb.close();
}
} }
} }
} catch (IOException e) { } catch (IOException e) {
logger.error("Error generating parser UI initialization data for Excel file", e); logger.error("Error generating parser UI initialization data for Excel file", e);
} catch (IllegalArgumentException e) { } catch (IllegalArgumentException e) {
logger.error("Error generating parser UI initialization data for Excel file (only Excel 97 & later supported)", e); logger.error("Error generating parser UI initialization data for Excel file (only Excel 97 & later supported)", e);
} catch (POIXMLException e) { } catch (POIXMLException|InvalidFormatException e) {
logger.error("Error generating parser UI initialization data for Excel file - invalid XML", e); logger.error("Error generating parser UI initialization data for Excel file - invalid XML", e);
} }