From 6e261626190e7c59fcd1b415c20c908c5b6f8744 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Tue, 9 Apr 2019 12:55:23 +0100 Subject: [PATCH 1/5] Update to using latest version of POI --- main/pom.xml | 4 ++-- main/src/com/google/refine/exporters/XlsExporter.java | 1 + main/src/com/google/refine/importers/ExcelImporter.java | 8 ++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/main/pom.xml b/main/pom.xml index 99c673bf3..bfb3b76cc 100644 --- a/main/pom.xml +++ b/main/pom.xml @@ -348,12 +348,12 @@ org.apache.poi poi - 4.0.1 + 4.1.0 org.apache.poi poi-ooxml - 3.8 + 4.1.0 org.apache.odftoolkit diff --git a/main/src/com/google/refine/exporters/XlsExporter.java b/main/src/com/google/refine/exporters/XlsExporter.java index 3dbce9de0..b1749d9d1 100644 --- a/main/src/com/google/refine/exporters/XlsExporter.java +++ b/main/src/com/google/refine/exporters/XlsExporter.java @@ -141,6 +141,7 @@ public class XlsExporter implements StreamExporter { project, engine, params, serializer); wb.write(outputStream); + wb.close(); outputStream.flush(); } diff --git a/main/src/com/google/refine/importers/ExcelImporter.java b/main/src/com/google/refine/importers/ExcelImporter.java index 4ce4d4792..cace15770 100644 --- a/main/src/com/google/refine/importers/ExcelImporter.java +++ b/main/src/com/google/refine/importers/ExcelImporter.java @@ -44,8 +44,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.poi.POIXMLDocument; -import org.apache.poi.POIXMLException; +import org.apache.poi.ooxml.POIXMLDocument; +import org.apache.poi.ooxml.POIXMLException; import org.apache.poi.common.usermodel.Hyperlink; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.hssf.usermodel.HSSFWorkbook; @@ -95,7 +95,7 @@ public class ExcelImporter extends TabularImportingParserBase { } try { - Workbook wb = POIXMLDocument.hasOOXMLHeader(is) ? + Workbook wb = FileMagic.valueOf(is) == FileMagic.OOXML ? new XSSFWorkbook(is) : new HSSFWorkbook(new POIFSFileSystem(is)); @@ -147,7 +147,7 @@ public class ExcelImporter extends TabularImportingParserBase { } try { - wb = POIXMLDocument.hasOOXMLHeader(inputStream) ? + wb = FileMagic.valueOf(InputStream) == FileMagic.OOXML ? new XSSFWorkbook(inputStream) : new HSSFWorkbook(new POIFSFileSystem(inputStream)); } catch (IOException e) { From 0217595275a9f2531c43f5ec75b3f2fb72f3182c Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Tue, 9 Apr 2019 13:47:08 +0100 Subject: [PATCH 2/5] FileMagic requires a stream that supports mark --- .../com/google/refine/importers/ExcelImporter.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/main/src/com/google/refine/importers/ExcelImporter.java b/main/src/com/google/refine/importers/ExcelImporter.java index cace15770..b513ff0eb 100644 --- a/main/src/com/google/refine/importers/ExcelImporter.java +++ b/main/src/com/google/refine/importers/ExcelImporter.java @@ -35,6 +35,7 @@ package com.google.refine.importers; import java.io.File; import java.io.FileInputStream; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.PushbackInputStream; @@ -44,7 +45,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.poi.ooxml.POIXMLDocument; import org.apache.poi.ooxml.POIXMLException; import org.apache.poi.common.usermodel.Hyperlink; import org.apache.poi.hssf.usermodel.HSSFDateUtil; @@ -54,6 +54,8 @@ import org.apache.poi.ss.usermodel.CellType; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.apache.poi.poifs.filesystem.FileMagic; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -91,7 +93,7 @@ public class ExcelImporter extends TabularImportingParserBase { InputStream is = new FileInputStream(file); if (!is.markSupported()) { - is = new PushbackInputStream(is, 8); + is = new BufferedInputStream(is); } try { @@ -143,11 +145,12 @@ public class ExcelImporter extends TabularImportingParserBase { ) { Workbook wb = null; if (!inputStream.markSupported()) { - inputStream = new PushbackInputStream(inputStream, 8); + inputStream = new BufferedInputStream(inputStream); +; } try { - wb = FileMagic.valueOf(InputStream) == FileMagic.OOXML ? + wb = FileMagic.valueOf(inputStream) == FileMagic.OOXML ? new XSSFWorkbook(inputStream) : new HSSFWorkbook(new POIFSFileSystem(inputStream)); } catch (IOException e) { From 461b3520352b3d611439d9ce11716c800b64b07a Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Tue, 9 Apr 2019 13:50:42 +0100 Subject: [PATCH 3/5] Add test for reading XLSX --- .../tests/importers/ExcelImporterTests.java | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/main/tests/server/src/com/google/refine/tests/importers/ExcelImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/ExcelImporterTests.java index 9e454c678..7a93fd62c 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/ExcelImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/ExcelImporterTests.java @@ -73,6 +73,7 @@ public class ExcelImporterTests extends ImporterTest { //private static final File xlsxFile = createSpreadsheet(true); private static final File xlsFile = createSpreadsheet(false); + private static final File xlsxFile = createSpreadsheet(true); @Override @BeforeTest @@ -137,6 +138,45 @@ public class ExcelImporterTests extends ImporterTest { verify(options, times(1)).get("storeBlankCellsAsNulls"); } + @Test + public void readXlsx() throws FileNotFoundException, IOException{ + + ArrayNode sheets = ParsingUtilities.mapper.createArrayNode(); + sheets.add(ParsingUtilities.mapper.readTree("{name: \"file-source#Test Sheet 0\", fileNameAndSheetIndex: \"file-source#0\", rows: 31, selected: true}")); + whenGetArrayOption("sheets", options, sheets); + + whenGetIntegerOption("ignoreLines", options, 0); + whenGetIntegerOption("headerLines", options, 0); + whenGetIntegerOption("skipDataLines", options, 0); + whenGetIntegerOption("limit", options, -1); + whenGetBooleanOption("storeBlankCellsAsNulls",options,true); + + InputStream stream = new FileInputStream(xlsxFile); + + try { + parseOneFile(SUT, stream); + } catch (Exception e) { + Assert.fail(e.getMessage()); + } + + Assert.assertEquals(project.rows.size(), ROWS); + Assert.assertEquals(project.rows.get(1).cells.size(), COLUMNS); + Assert.assertEquals(((Number)project.rows.get(1).getCellValue(0)).doubleValue(),1.1, EPSILON); + Assert.assertEquals(((Number)project.rows.get(2).getCellValue(0)).doubleValue(),2.2, EPSILON); + + Assert.assertFalse((Boolean)project.rows.get(1).getCellValue(1)); + Assert.assertTrue((Boolean)project.rows.get(2).getCellValue(1)); + + Assert.assertEquals((String)project.rows.get(1).getCellValue(4)," Row 1 Col 5"); + Assert.assertNull((String)project.rows.get(1).getCellValue(5)); + + verify(options, times(1)).get("ignoreLines"); + verify(options, times(1)).get("headerLines"); + verify(options, times(1)).get("skipDataLines"); + verify(options, times(1)).get("limit"); + verify(options, times(1)).get("storeBlankCellsAsNulls"); + } + private static File createSpreadsheet(boolean xml) { final Workbook wb = xml ? new XSSFWorkbook() : new HSSFWorkbook(); From e2aa929908e5c5d952151adddae02da823096539 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Tue, 9 Apr 2019 15:02:10 +0100 Subject: [PATCH 4/5] Ensure workbooks are closed after reading/writing --- main/src/com/google/refine/exporters/XlsExporter.java | 2 +- main/src/com/google/refine/importers/ExcelImporter.java | 4 ++-- .../com/google/refine/tests/importers/ExcelImporterTests.java | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/main/src/com/google/refine/exporters/XlsExporter.java b/main/src/com/google/refine/exporters/XlsExporter.java index b1749d9d1..48100b0ae 100644 --- a/main/src/com/google/refine/exporters/XlsExporter.java +++ b/main/src/com/google/refine/exporters/XlsExporter.java @@ -141,8 +141,8 @@ public class XlsExporter implements StreamExporter { project, engine, params, serializer); wb.write(outputStream); - wb.close(); outputStream.flush(); + wb.close(); } } diff --git a/main/src/com/google/refine/importers/ExcelImporter.java b/main/src/com/google/refine/importers/ExcelImporter.java index b513ff0eb..421ccc66a 100644 --- a/main/src/com/google/refine/importers/ExcelImporter.java +++ b/main/src/com/google/refine/importers/ExcelImporter.java @@ -117,6 +117,7 @@ public class ExcelImporter extends TabularImportingParserBase { } JSONUtilities.append(sheetRecords, sheetRecord); } + wb.close(); } finally { is.close(); } @@ -146,7 +147,6 @@ public class ExcelImporter extends TabularImportingParserBase { Workbook wb = null; if (!inputStream.markSupported()) { inputStream = new BufferedInputStream(inputStream); -; } try { @@ -238,7 +238,7 @@ public class ExcelImporter extends TabularImportingParserBase { exceptions ); } - + super.parseOneFile(project, metadata, job, fileSource, inputStream, limit, options, exceptions); } diff --git a/main/tests/server/src/com/google/refine/tests/importers/ExcelImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/ExcelImporterTests.java index 7a93fd62c..bb5f5a81b 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/ExcelImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/ExcelImporterTests.java @@ -222,6 +222,7 @@ public class ExcelImporterTests extends ImporterTest { wb.write(outputStream); outputStream.flush(); outputStream.close(); + wb.close(); } catch (IOException e) { return null; } From 794a17c98e55df9fa9505c679d87ce5a837ac8ad Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Tue, 9 Apr 2019 16:58:58 +0100 Subject: [PATCH 5/5] Added XLSX exporter tests --- .../tests/exporters/XlsxExporterTests.java | 194 ++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 main/tests/server/src/com/google/refine/tests/exporters/XlsxExporterTests.java diff --git a/main/tests/server/src/com/google/refine/tests/exporters/XlsxExporterTests.java b/main/tests/server/src/com/google/refine/tests/exporters/XlsxExporterTests.java new file mode 100644 index 000000000..002c52cc1 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/exporters/XlsxExporterTests.java @@ -0,0 +1,194 @@ +/* + +Copyright 2010, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.tests.exporters; + +import java.io.ByteArrayInputStream; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFRow; +import org.apache.poi.xssf.usermodel.XSSFCell; + +import static org.mockito.Mockito.mock; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.time.OffsetDateTime; +import java.util.Properties; + +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.browsing.Engine; +import com.google.refine.exporters.StreamExporter; +import com.google.refine.exporters.XlsExporter; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.ModelException; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.tests.ProjectManagerStub; +import com.google.refine.tests.RefineTest; + +public class XlsxExporterTests extends RefineTest { + + private static final String TEST_PROJECT_NAME = "xlsx exporter test project"; + + @Override + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + //dependencies + ByteArrayOutputStream stream; + ProjectMetadata projectMetadata; + Project project; + Engine engine; + Properties options; + + //System Under Test + StreamExporter SUT; + + @BeforeMethod + public void SetUp(){ + SUT = new XlsExporter(true); + stream = new ByteArrayOutputStream(); + ProjectManager.singleton = new ProjectManagerStub(); + projectMetadata = new ProjectMetadata(); + project = new Project(); + projectMetadata.setName(TEST_PROJECT_NAME); + ProjectManager.singleton.registerProject(project, projectMetadata); + engine = new Engine(project); + options = mock(Properties.class); + } + + @AfterMethod + public void TearDown(){ + SUT = null; + stream = null; + ProjectManager.singleton.deleteProject(project.id); + project = null; + engine = null; + options = null; + } + + @Test + public void exportSimpleXlsx(){ + CreateGrid(2, 2); + + try { + SUT.export(project, options, engine, stream); + } catch (IOException e) { + Assert.fail(); + } + + ByteArrayInputStream inStream = new ByteArrayInputStream( stream.toByteArray() ); + try { + XSSFWorkbook wb = new XSSFWorkbook(inStream); + XSSFSheet ws = wb.getSheetAt(0); + XSSFRow row1 = ws.getRow(1); + XSSFCell cell0 = row1.getCell(0); + Assert.assertEquals(cell0.toString(),"row0cell0"); + wb.close(); + } catch (IOException e) { + Assert.fail(); + } + } + + @Test + public void exportXlsxDateType() throws IOException{ + OffsetDateTime odt = OffsetDateTime.parse("2019-04-09T12:00+00:00"); + createDateGrid(2, 2, odt); + + try { + SUT.export(project, options, engine, stream); + } catch (IOException e) { + Assert.fail(); + } + + ByteArrayInputStream inStream = new ByteArrayInputStream( stream.toByteArray() ); + try { + XSSFWorkbook wb = new XSSFWorkbook(inStream); + XSSFSheet ws = wb.getSheetAt(0); + XSSFRow row1 = ws.getRow(1); + XSSFCell cell0 = row1.getCell(0); + Assert.assertEquals(cell0.toString(),"09-Apr-2019"); + wb.close(); + } catch (IOException e) { + Assert.fail(); + } + } + + //helper methods + + protected void CreateColumns(int noOfColumns){ + for(int i = 0; i < noOfColumns; i++){ + try { + project.columnModel.addColumn(i, new Column(i, "column" + i), true); + } catch (ModelException e1) { + Assert.fail("Could not create column"); + } + } + } + + protected void CreateGrid(int noOfRows, int noOfColumns){ + CreateColumns(noOfColumns); + + for(int i = 0; i < noOfRows; i++){ + Row row = new Row(noOfColumns); + for(int j = 0; j < noOfColumns; j++){ + row.cells.add(new Cell("row" + i + "cell" + j, null)); + } + project.rows.add(row); + } + } + + private void createDateGrid(int noOfRows, int noOfColumns, OffsetDateTime now){ + CreateColumns(noOfColumns); + + for(int i = 0; i < noOfRows; i++){ + Row row = new Row(noOfColumns); + for(int j = 0; j < noOfColumns; j++){ + row.cells.add(new Cell(now, null)); + } + project.rows.add(row); + } + } +}