poi works better reading files directly (#2597)

This commit is contained in:
PJ Fanning 2020-04-26 21:27:09 +02:00 committed by GitHub
parent ab64303cbb
commit f047a88518
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -45,12 +45,13 @@ import java.util.List;
import org.apache.poi.ooxml.POIXMLException;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -83,43 +84,39 @@ public class ExcelImporter extends TabularImportingParserBase {
for (int index = 0;index < fileRecords.size();index++) {
ObjectNode fileRecord = fileRecords.get(index);
File file = ImportingUtilities.getFile(job, fileRecord);
InputStream is = new FileInputStream(file);
if (!is.markSupported()) {
is = new BufferedInputStream(is);
}
Workbook wb = null;
try {
Workbook wb = FileMagic.valueOf(is) == FileMagic.OOXML ?
new XSSFWorkbook(is) :
new HSSFWorkbook(new POIFSFileSystem(is));
wb = FileMagic.valueOf(file) == FileMagic.OOXML ? new XSSFWorkbook(file) :
new HSSFWorkbook(new POIFSFileSystem(file));
int sheetCount = wb.getNumberOfSheets();
for (int i = 0; i < sheetCount; i++) {
Sheet sheet = wb.getSheetAt(i);
int rows = sheet.getLastRowNum() - sheet.getFirstRowNum() + 1;
int sheetCount = wb.getNumberOfSheets();
for (int i = 0; i < sheetCount; i++) {
Sheet sheet = wb.getSheetAt(i);
int rows = sheet.getLastRowNum() - sheet.getFirstRowNum() + 1;
ObjectNode sheetRecord = ParsingUtilities.mapper.createObjectNode();
JSONUtilities.safePut(sheetRecord, "name", file.getName() + "#" + sheet.getSheetName());
JSONUtilities.safePut(sheetRecord, "fileNameAndSheetIndex", file.getName() + "#" + i);
JSONUtilities.safePut(sheetRecord, "rows", rows);
if (rows > 1) {
JSONUtilities.safePut(sheetRecord, "selected", true);
} else {
JSONUtilities.safePut(sheetRecord, "selected", false);
}
JSONUtilities.append(sheetRecords, sheetRecord);
}
wb.close();
ObjectNode sheetRecord = ParsingUtilities.mapper.createObjectNode();
JSONUtilities.safePut(sheetRecord, "name", file.getName() + "#" + sheet.getSheetName());
JSONUtilities.safePut(sheetRecord, "fileNameAndSheetIndex", file.getName() + "#" + i);
JSONUtilities.safePut(sheetRecord, "rows", rows);
if (rows > 1) {
JSONUtilities.safePut(sheetRecord, "selected", true);
} else {
JSONUtilities.safePut(sheetRecord, "selected", false);
}
JSONUtilities.append(sheetRecords, sheetRecord);
}
} finally {
is.close();
if (wb != null) {
wb.close();
}
}
}
} catch (IOException e) {
logger.error("Error generating parser UI initialization data for Excel file", e);
} catch (IllegalArgumentException e) {
logger.error("Error generating parser UI initialization data for Excel file (only Excel 97 & later supported)", e);
} catch (POIXMLException e) {
} catch (POIXMLException|InvalidFormatException e) {
logger.error("Error generating parser UI initialization data for Excel file - invalid XML", e);
}