Fix Excel date import - Fixes #1908 (#2909)

* Add utility functions to check/convert dates

* Add date tests and refactor to DRY up

* Fix date import - fixes #1908

Change from java.util.Date to OpenRefine 3.0+'s OffsetDateTime
Fixes #1908

* Centralize date conversion

* Moving utility methods to ParsingUtilities

* Fix tests
This commit is contained in:
Tom Morris 2020-07-09 17:13:44 -04:00 committed by GitHub
parent a0f2d11255
commit 306b541c69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 80 additions and 65 deletions

View File

@ -202,4 +202,5 @@ public class ToDate implements Function {
public String getReturns() { public String getReturns() {
return "date"; return "date";
} }
} }

View File

@ -34,7 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers; package com.google.refine.importers;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.BufferedInputStream; import java.io.BufferedInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
@ -249,7 +248,7 @@ public class ExcelImporter extends TabularImportingParserBase {
double d = cell.getNumericCellValue(); double d = cell.getNumericCellValue();
if (DateUtil.isCellDateFormatted(cell)) { if (DateUtil.isCellDateFormatted(cell)) {
value = DateUtil.getJavaDate(d); value = ParsingUtilities.toDate(DateUtil.getJavaDate(d));
// TODO: If we had a time datatype, we could use something like the following // TODO: If we had a time datatype, we could use something like the following
// to distinguish times from dates (although Excel doesn't really make the distinction) // to distinguish times from dates (although Excel doesn't really make the distinction)
// Another alternative would be to look for values < 0.60 // Another alternative would be to look for values < 0.60

View File

@ -217,7 +217,7 @@ public class OdsImporter extends TabularImportingParserBase {
} else if ("float".equals(cellType)) { } else if ("float".equals(cellType)) {
value = cell.getDoubleValue(); value = cell.getDoubleValue();
} else if ("date".equals(cellType)) { } else if ("date".equals(cellType)) {
value = cell.getDateValue().toInstant().atOffset(ZoneOffset.UTC); value = ParsingUtilities.toDate(cell.getDateValue());
} else if ("currency".equals(cellType)) { } else if ("currency".equals(cellType)) {
value = cell.getCurrencyValue(); value = cell.getCurrencyValue();
} else if ("percentage".equals(cellType)) { } else if ("percentage".equals(cellType)) {
@ -289,5 +289,4 @@ public class OdsImporter extends TabularImportingParserBase {
return null; return null;
} }
} }
} }

View File

@ -44,6 +44,7 @@ import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException; import java.time.format.DateTimeParseException;
import java.util.Calendar; import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar; import java.util.GregorianCalendar;
import java.util.Properties; import java.util.Properties;
import java.util.TimeZone; import java.util.TimeZone;
@ -240,6 +241,18 @@ public class ParsingUtilities {
return cal; return cal;
} }
public static boolean isDate(Object o) {
return o instanceof OffsetDateTime;
}
public static OffsetDateTime toDate(Date date) {
return date.toInstant().atOffset(ZoneOffset.UTC);
}
public static OffsetDateTime toDate(Calendar date) {
return date.toInstant().atOffset(ZoneOffset.UTC);
}
public static ObjectNode evaluateJsonStringToObjectNode(String optionsString) { public static ObjectNode evaluateJsonStringToObjectNode(String optionsString) {
try { try {
JsonNode tree = mapper.readTree(optionsString); JsonNode tree = mapper.readTree(optionsString);

View File

@ -36,6 +36,7 @@ package com.google.refine.importers;
import static org.mockito.Mockito.times; import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verify;
import static org.testng.Assert.assertTrue;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
@ -49,6 +50,7 @@ import java.util.Date;
import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.Workbook;
@ -134,6 +136,9 @@ public class ExcelImporterTests extends ImporterTest {
Assert.assertEquals((String)project.rows.get(1).getCellValue(4)," Row 1 Col 5"); Assert.assertEquals((String)project.rows.get(1).getCellValue(4)," Row 1 Col 5");
Assert.assertNull((String)project.rows.get(1).getCellValue(5)); Assert.assertNull((String)project.rows.get(1).getCellValue(5));
assertTrue(ParsingUtilities.isDate(project.rows.get(1).getCellValue(2))); // Calendar
assertTrue(ParsingUtilities.isDate(project.rows.get(1).getCellValue(3))); // Date
verify(options, times(1)).get("ignoreLines"); verify(options, times(1)).get("ignoreLines");
verify(options, times(1)).get("headerLines"); verify(options, times(1)).get("headerLines");
verify(options, times(1)).get("skipDataLines"); verify(options, times(1)).get("skipDataLines");
@ -199,6 +204,9 @@ public class ExcelImporterTests extends ImporterTest {
Assert.assertFalse((Boolean)project.rows.get(1).getCellValue(1)); Assert.assertFalse((Boolean)project.rows.get(1).getCellValue(1));
Assert.assertTrue((Boolean)project.rows.get(2).getCellValue(1)); Assert.assertTrue((Boolean)project.rows.get(2).getCellValue(1));
assertTrue(ParsingUtilities.isDate(project.rows.get(1).getCellValue(2))); // Calendar
assertTrue(ParsingUtilities.isDate(project.rows.get(1).getCellValue(3))); // Date
Assert.assertEquals((String)project.rows.get(1).getCellValue(4)," Row 1 Col 5"); Assert.assertEquals((String)project.rows.get(1).getCellValue(4)," Row 1 Col 5");
Assert.assertNull((String)project.rows.get(1).getCellValue(5)); Assert.assertNull((String)project.rows.get(1).getCellValue(5));
@ -258,6 +266,9 @@ public class ExcelImporterTests extends ImporterTest {
Assert.assertFalse((Boolean)project.rows.get(1).getCellValue(1)); Assert.assertFalse((Boolean)project.rows.get(1).getCellValue(1));
Assert.assertTrue((Boolean)project.rows.get(2).getCellValue(1)); Assert.assertTrue((Boolean)project.rows.get(2).getCellValue(1));
assertTrue(ParsingUtilities.isDate(project.rows.get(1).getCellValue(2))); // Calendar
assertTrue(ParsingUtilities.isDate(project.rows.get(1).getCellValue(3))); // Date
Assert.assertEquals((String)project.rows.get(1).getCellValue(4)," Row 1 Col 5"); Assert.assertEquals((String)project.rows.get(1).getCellValue(4)," Row 1 Col 5");
Assert.assertNull((String)project.rows.get(1).getCellValue(5)); Assert.assertNull((String)project.rows.get(1).getCellValue(5));
@ -302,10 +313,12 @@ public class ExcelImporterTests extends ImporterTest {
Assert.assertEquals(((Number)project.rows.get(ROWS).getCellValue(0)).doubleValue(),0.0, EPSILON); Assert.assertEquals(((Number)project.rows.get(ROWS).getCellValue(0)).doubleValue(),0.0, EPSILON);
Assert.assertEquals(((Number)project.rows.get(ROWS).getCellValue(COLUMNS)).doubleValue(),1.0, EPSILON); Assert.assertEquals(((Number)project.rows.get(ROWS).getCellValue(COLUMNS)).doubleValue(),1.0, EPSILON);
Assert.assertFalse((Boolean)project.rows.get(1).getCellValue(1)); Assert.assertFalse((Boolean)project.rows.get(1).getCellValue(1));
Assert.assertTrue((Boolean)project.rows.get(2).getCellValue(1)); Assert.assertTrue((Boolean)project.rows.get(2).getCellValue(1));
assertTrue(ParsingUtilities.isDate(project.rows.get(1).getCellValue(2))); // Calendar
assertTrue(ParsingUtilities.isDate(project.rows.get(1).getCellValue(3))); // Date
Assert.assertEquals((String)project.rows.get(1).getCellValue(4)," Row 1 Col 5"); Assert.assertEquals((String)project.rows.get(1).getCellValue(4)," Row 1 Col 5");
Assert.assertNull((String)project.rows.get(1).getCellValue(5)); Assert.assertNull((String)project.rows.get(1).getCellValue(5));
@ -321,37 +334,15 @@ public class ExcelImporterTests extends ImporterTest {
final Workbook wb = xml ? new XSSFWorkbook() : new HSSFWorkbook(); final Workbook wb = xml ? new XSSFWorkbook() : new HSSFWorkbook();
CellStyle dateStyle = wb.createCellStyle();
short dateFormat = wb.createDataFormat().getFormat("yyyy-MM-dd");
dateStyle.setDataFormat(dateFormat);
for (int s = 0; s < SHEETS; s++) { for (int s = 0; s < SHEETS; s++) {
Sheet sheet = wb.createSheet("Test Sheet " + s); Sheet sheet = wb.createSheet("Test Sheet " + s);
for (int row = 0; row < ROWS; row++) { for (int row = 0; row < ROWS; row++) {
int col = 0; createDataRow(sheet, row, dateStyle, 0);
Row r = sheet.createRow(row);
Cell c;
c = r.createCell(col++);
c.setCellValue(row * 1.1); // double
c = r.createCell(col++);
c.setCellValue(row % 2 == 0); // boolean
c = r.createCell(col++);
c.setCellValue(Calendar.getInstance()); // calendar
c = r.createCell(col++);
c.setCellValue(new Date()); // date
c = r.createCell(col++);
c.setCellValue(" Row " + row + " Col " + col); // string
c = r.createCell(col++);
c.setCellValue(""); // string
// HSSFHyperlink hl = new HSSFHyperlink(HSSFHyperlink.LINK_URL);
// hl.setLabel(cellData.text);
// hl.setAddress(cellData.link);
} }
} }
File file = null; File file = null;
@ -367,48 +358,23 @@ public class ExcelImporterTests extends ImporterTest {
return null; return null;
} }
return file; return file;
} }
private static File createSheetsWithDifferentColumns(boolean xml) { private static File createSheetsWithDifferentColumns(boolean xml) {
final Workbook wb = xml ? new XSSFWorkbook() : new HSSFWorkbook(); final Workbook wb = xml ? new XSSFWorkbook() : new HSSFWorkbook();
CellStyle dateStyle = wb.createCellStyle();
short dateFormat = wb.createDataFormat().getFormat("yyyy-MM-dd");
dateStyle.setDataFormat(dateFormat);
for (int s = 0; s < SHEETS; s++) { for (int s = 0; s < SHEETS; s++) {
Sheet sheet = wb.createSheet("Test Sheet " + s); Sheet sheet = wb.createSheet("Test Sheet " + s);
for (int row = 0; row < ROWS; row++) { for (int row = 0; row < ROWS; row++) {
int col = 0; createDataRow(sheet, row, dateStyle, s);
Row r = sheet.createRow(row);
Cell c;
c = r.createCell(col++);
c.setCellValue(row * 1.1); // double
c = r.createCell(col++);
c.setCellValue(row % 2 == 0); // boolean
c = r.createCell(col++);
c.setCellValue(Calendar.getInstance()); // calendar
c = r.createCell(col++);
c.setCellValue(new Date()); // date
c = r.createCell(col++);
c.setCellValue(" Row " + row + " Col " + col); // string
c = r.createCell(col++);
c.setCellValue(""); // string
// Create extra columns to ensure sheet(i+1) has more columns than sheet(i)
for(int i = 0; i < s; i++){
c = r.createCell(col++);
c.setCellValue(i + s);
} }
} }
}
File file = null; File file = null;
try { try {
file = File.createTempFile("openrefine-importer-test", xml ? ".xlsx" : ".xls"); file = File.createTempFile("openrefine-importer-test", xml ? ".xlsx" : ".xls");
@ -424,4 +390,41 @@ public class ExcelImporterTests extends ImporterTest {
return file; return file;
} }
private static void createDataRow(Sheet sheet, int row, CellStyle dateCellStyle, int extra_columns) {
int col = 0;
Row r = sheet.createRow(row);
Cell c;
c = r.createCell(col++);
c.setCellValue(row * 1.1); // double
c = r.createCell(col++);
c.setCellValue(row % 2 == 0); // boolean
c = r.createCell(col++);
c.setCellValue(Calendar.getInstance()); // calendar
c.setCellStyle(dateCellStyle);
c = r.createCell(col++);
c.setCellValue(new Date()); // date
c.setCellStyle(dateCellStyle);
c = r.createCell(col++);
c.setCellValue(" Row " + row + " Col " + col); // string
c = r.createCell(col++);
c.setCellValue(""); // string
// HSSFHyperlink hl = new HSSFHyperlink(HSSFHyperlink.LINK_URL);
// hl.setLabel(cellData.text);
// hl.setAddress(cellData.link);
// Create extra columns to ensure sheet(i+1) has more columns than sheet(i)
for(int i = 0; i < extra_columns; i++){
c = r.createCell(col++);
c.setCellValue(i + extra_columns);
}
}
} }