Fix ToDate test failure & inefficiency - fixes #3026 (#3027)

* Fix ToDate test failure - fixes #3026

Instead of computing offset from UTC at current
point in time, use the offset from the parsed
date so that we're not affected by crossing
a daylight savings time boundary.

* Fix date parsing with locale as first format string

Also refactors for simpicity, restore some dropped tests,
and restores previous behavior of considering a bad
format string an error instead of silently ignoring it.

It does NOT address another issue which was introduced
in May 2018 of treating date/times without timzone
information as UTC instead of local.

* Restore error checking and messages

* Save & restore default timezone for tests

Also add some ToDos for places where LocalDate is being misused.
This commit is contained in:
Tom Morris 2020-08-09 07:53:43 -04:00 committed by GitHub
parent 6bcc2bfbe9
commit 55edae2b7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 144 additions and 106 deletions

View File

@ -37,10 +37,11 @@ import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.IllformedLocaleException;
import java.util.List;
import java.util.Locale;
import java.util.Properties;
@ -61,7 +62,7 @@ public class ToDate implements Function {
public Object call(Properties bindings, Object[] args) {
String o1;
Boolean month_first = null;
List<String> formats = new ArrayList<String>();
List<String> formats = new ArrayList<>();
OffsetDateTime date = null;
//Check there is at least one argument
@ -83,7 +84,11 @@ public class ToDate implements Function {
}
if(args.length==1) {
date = parse(o1,true,formats);
try {
date = parse(o1, true, formats);
} catch (DateFormatException e) {
// Should never happen since we're using an empty format list
}
} else if (args.length > 1) {
if(args[1] instanceof Boolean) {
month_first = (Boolean) args[1];
@ -92,19 +97,21 @@ public class ToDate implements Function {
} else {
return new EvalError("Invalid argument");
}
for(int i=2;i<args.length;i++) {
for (int i = 2; i < args.length; i++) {
if (!(args[i] instanceof String)) {
// skip formats that aren't strings
continue;
return new EvalError("Invalid non-string format argument " + args[i].toString());
}
formats.add(StringUtils.trim((String) args[i]));
}
if(month_first != null) {
date = parse(o1,month_first,formats);
} else {
date = parse(o1,formats);
try {
if (month_first != null) {
date = parse(o1, month_first, formats);
} else {
date = parse(o1, formats);
}
} catch (DateFormatException e) {
return new EvalError(e.getMessage());
}
}
if(date != null) {
return date;
@ -112,7 +119,7 @@ public class ToDate implements Function {
return new EvalError("Unable to convert to a date");
}
private OffsetDateTime parse(String o1, Boolean month_first, List<String> formats) {
private OffsetDateTime parse(String o1, Boolean month_first, List<String> formats) throws DateFormatException {
if(month_first != null) {
try {
return CalendarParser.parseAsOffsetDateTime( o1, (month_first) ? CalendarParser.MM_DD_YY : CalendarParser.DD_MM_YY);
@ -121,31 +128,30 @@ public class ToDate implements Function {
}
return parse(o1,formats);
}
private OffsetDateTime parse(String o1, List<String> formats) {
if(formats.size()>0) {
String f1 = formats.get(0);
formats.remove(0);
return parse(o1,f1,formats);
} else {
return parse(o1,Locale.getDefault(),formats);
}
}
private OffsetDateTime parse(String o1, String f1, List<String> formats) {
private Locale getLocale(List<String> formats) {
Locale locale = Locale.getDefault();
Locale possibleLocale = Locale.forLanguageTag(f1); // Java 1.7+
for (Locale l : DateFormat.getAvailableLocales()) {
if (l.equals(possibleLocale)) {
locale = possibleLocale;
} else {
formats.add(0,f1);
if (formats.size() > 0) {
String possibleLanguageTag = formats.get(0);
try {
Locale possibleLocale = new Locale.Builder().setLanguageTag(possibleLanguageTag).build();
// Check if it's in our list of supported date locales
for (Locale l : DateFormat.getAvailableLocales()) {
if (l.equals(possibleLocale)) {
locale = possibleLocale;
formats.remove(0);
}
}
} catch (IllformedLocaleException e) {
// We ignore this. It PROBABLY means we got a date format string, not a language code
// although it could be a malformed language tag like zh_TW instead of zh-TW
}
}
return parse(o1,locale,formats);
return locale;
}
private OffsetDateTime parse(String o1, Locale locale, List<String> formats) {
private OffsetDateTime parse(String o1, List<String> formats) throws DateFormatException {
Locale locale = getLocale(formats);
DateFormat formatter;
OffsetDateTime date;
//need to try using each format in the formats list!
@ -154,7 +160,7 @@ public class ToDate implements Function {
try {
formatter = new SimpleDateFormat(formats.get(i),locale);
} catch (IllegalArgumentException e) {
continue;
throw new DateFormatException("Unable to parse date format " + formats.get(i));
}
date = parse(o1, formatter);
if (date != null) {
@ -167,9 +173,9 @@ public class ToDate implements Function {
return date;
} else {
try {
return javax.xml.bind.DatatypeConverter.parseDateTime(o1).getTime().toInstant()
.plusSeconds(ZonedDateTime.now().getOffset().getTotalSeconds())
.atOffset(ZoneOffset.of("Z"));
Calendar parsedDate = javax.xml.bind.DatatypeConverter.parseDateTime(o1);
int offsetMillis = parsedDate.getTimeZone().getOffset(parsedDate.getTimeInMillis());
return parsedDate.toInstant().plusMillis(offsetMillis).atOffset(ZoneOffset.of("Z"));
} catch (IllegalArgumentException e2) {
return null;
}
@ -203,4 +209,12 @@ public class ToDate implements Function {
return "date";
}
class DateFormatException extends Exception {
private static final long serialVersionUID = -6506736145451835731L;
public DateFormatException(String string) {
super(string);
}
}
}

View File

@ -182,7 +182,7 @@ public class ParsingUtilities {
static public String localDateToString(LocalDateTime d) {
OffsetDateTime odt = OffsetDateTime.of(d,
OffsetDateTime.now().getOffset());
// FIXME: A LocalDate has no timezone, by definition.
return odt.withOffsetSameInstant(ZoneOffset.of("Z")).format(ISO8601);
}

View File

@ -35,6 +35,7 @@ package com.google.refine.expr.functions.strings;
import java.time.OffsetDateTime;
import java.util.Properties;
import java.util.TimeZone;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
@ -49,7 +50,6 @@ import com.google.refine.expr.util.CalendarParser;
import com.google.refine.expr.util.CalendarParserException;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
import com.google.refine.util.ParsingUtilities;
/**
@ -117,15 +117,13 @@ public class ToFromConversionTests extends RefineTest {
Assert.assertEquals(invoke("toString", Long.valueOf(100)),"100");
Assert.assertEquals(invoke("toString", Double.valueOf(100.0)),"100.0");
Assert.assertEquals(invoke("toString", Double.valueOf(100.0),"%.0f"),"100");
String inputDate = "2013-06-01";
Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate)),
"2013-06-01T00:00:00Z");
Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate), "yyyy-MM-dd"),
"2013-06-01");
Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate)), "2013-06-01T00:00:00Z");
Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate), "yyyy-MM-dd"), "2013-06-01");
Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate), "yyyy/dd/MM"), "2013/01/06");
Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate), "yyyy-MM-dd hh:mm:ss"), "2013-06-01 12:00:00");
String inputDateTime = "2013-06-01 13:12:11";
Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDateTime)), "2013-06-01T13:12:11Z");
Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDateTime), "yyyy-MM-dd"), "2013-06-01");
@ -135,42 +133,63 @@ public class ToFromConversionTests extends RefineTest {
@Test
public void testToDate() throws CalendarParserException {
Assert.assertTrue(invoke("toDate") instanceof EvalError);
Assert.assertTrue(invoke("toDate", (Object) null) instanceof EvalError);
Assert.assertTrue(invoke("toDate", "") instanceof EvalError);
Assert.assertTrue(invoke("toDate", 1.0) instanceof EvalError);
//Assert.assertTrue(invoke("toDate", "2012-03-01","xxx") instanceof EvalError); // bad format string
Assert.assertTrue(invoke("toDate", "2012-03-01") instanceof OffsetDateTime);
Assert.assertEquals(invoke("toDate", "2012-03-01"),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//parse as 'month first' date with and without explicit 'true' parameter
Assert.assertEquals(invoke("toDate", "01/03/2012"),CalendarParser.parseAsOffsetDateTime("2012-01-03"));
Assert.assertEquals(invoke("toDate", "01/03/2012",true),CalendarParser.parseAsOffsetDateTime("2012-01-03"));
//parse as 'month first' date with 'false' parameter
Assert.assertEquals(invoke("toDate", "01/03/2012",false),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//parse as 'month first' date without 'false' parameter but with format specified
Assert.assertEquals(invoke("toDate", "01/03/2012","dd/MM/yyyy"),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
Assert.assertEquals(invoke("toDate", "2012-03-01","yyyy-MM-dd"),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//Two digit year
Assert.assertEquals(invoke("toDate", "02-02-01"),CalendarParser.parseAsOffsetDateTime("2001-02-02"));
// Multiple format strings should get tried sequentially until one succeeds or all are exhausted
Assert.assertEquals(invoke("toDate", "2012-03-01","MMM","yyyy-MM-dd"), CalendarParser.parseAsOffsetDateTime("2012-03-01"));
// Boolean argument combined with Multiple format strings
Assert.assertEquals(invoke("toDate", "01/03/2012",false, "MMM","yyyy-MM-dd","MM/dd/yyyy"), CalendarParser.parseAsOffsetDateTime("2012-03-01"));
// First string can be a locale identifier instead of a format string
Assert.assertEquals(invoke("toDate", "01-六月-2013","zh","dd-MMM-yyyy"), CalendarParser.parseAsOffsetDateTime("2013-06-01"));
//if invalid format/locale strings are passed, ignore them
Assert.assertEquals(invoke("toDate", "2012-03-01","XXX"), invoke("toDate", "2012-03-01"));
TimeZone originalTimeZone = TimeZone.getDefault();
try {
// Inject a fixed non-UTC timezone
TimeZone.setDefault(TimeZone.getTimeZone("JST"));
// If a long, convert to string
Assert.assertEquals(invoke("toDate", (long) 2012), invoke("toDate", "2012-01-01"));
Assert.assertTrue(invoke("toDate") instanceof EvalError);
Assert.assertTrue(invoke("toDate", (Object) null) instanceof EvalError);
Assert.assertTrue(invoke("toDate", "") instanceof EvalError);
Assert.assertTrue(invoke("toDate", 1.0) instanceof EvalError);
Assert.assertTrue(invoke("toDate", "2012-03-01", "xxx") instanceof EvalError); // bad format string
Assert.assertTrue(invoke("toDate", "2012-03-01", 1L) instanceof EvalError); // non-string format arg
Assert.assertTrue(invoke("toDate", "P1M") instanceof EvalError); // Durations aren't supported
// If already a date, leave it alone
Assert.assertEquals(invoke("toDate", CalendarParser.parseAsOffsetDateTime("2012-03-01")),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
Assert.assertTrue(invoke("toDate", "2012-03-01") instanceof OffsetDateTime);
Assert.assertEquals(invoke("toDate", "2012-03-01"),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//parse as 'month first' date with and without explicit 'true' parameter
Assert.assertEquals(invoke("toDate", "01/03/2012"),CalendarParser.parseAsOffsetDateTime("2012-01-03"));
Assert.assertEquals(invoke("toDate", "01/03/2012",true),CalendarParser.parseAsOffsetDateTime("2012-01-03"));
//parse as 'month first' date with 'false' parameter
Assert.assertEquals(invoke("toDate", "01/03/2012",false),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//parse as 'month first' date without 'false' parameter but with format specified
Assert.assertEquals(invoke("toDate", "01/03/2012","dd/MM/yyyy"),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
Assert.assertEquals(invoke("toDate", "2012-03-01","yyyy-MM-dd"),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//Two digit year
Assert.assertEquals(invoke("toDate", "02-02-01"),CalendarParser.parseAsOffsetDateTime("2001-02-02"));
// Multiple format strings should get tried sequentially until one succeeds or all are exhausted
Assert.assertEquals(invoke("toDate", "2012-03-01","MMM","yyyy-MM-dd"), CalendarParser.parseAsOffsetDateTime("2012-03-01"));
// Boolean argument combined with Multiple format strings
Assert.assertEquals(invoke("toDate", "01/03/2012",false, "MMM","yyyy-MM-dd","MM/dd/yyyy"), CalendarParser.parseAsOffsetDateTime("2012-03-01"));
// First string can be a locale identifier instead of a format string
Assert.assertEquals(invoke("toDate", "2013-06-01","zh"), CalendarParser.parseAsOffsetDateTime("2013-06-01"));
Assert.assertEquals(invoke("toDate", "01-六月-2013","zh","dd-MMM-yyyy"), CalendarParser.parseAsOffsetDateTime("2013-06-01"));
Assert.assertEquals(invoke("toDate", "01-六月-2013", "zh-CN", "dd-MMM-yyyy"), CalendarParser.parseAsOffsetDateTime("2013-06-01"));
Assert.assertEquals(invoke("toDate", "01-六月-2013", "zh", "MMM-dd-yyyy", "dd-MMM-yyyy"), CalendarParser.parseAsOffsetDateTime("2013-06-01"));
// If a long, convert to string
Assert.assertEquals(invoke("toDate", (long) 2012), invoke("toDate", "2012-01-01"));
// If already a date, leave it alone
Assert.assertEquals(invoke("toDate", CalendarParser.parseAsOffsetDateTime("2012-03-01")),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
// FIXME: Date/times without timezone info were interpreted as local up until May 2018 at which point they switch to UTC
// Assert.assertEquals(invoke("toDate", "2013-06-01T13:12:11"), CalendarParser.parseAsOffsetDateTime("2013-06-01 13:12:11"));
// These match current behavior, but would fail with the historic (pre-2018) behavior
Assert.assertEquals(invoke("toDate", "2013-06-01T13:12:11Z"), CalendarParser.parseAsOffsetDateTime("2013-06-01 13:12:11"));
Assert.assertEquals(invoke("toDate", "2013-06-01Z"), CalendarParser.parseAsOffsetDateTime("2013-06-01"));
// TODO: more tests for datetimes with timezones and/or offsets
// Assert.assertEquals(invoke("toDate", "2013-06-01T13:12:11+06:00"), CalendarParser.parseAsOffsetDateTime("2013-06-01 13:12:11"));
} finally {
TimeZone.setDefault(originalTimeZone);
}
}
@Test
public void testEscape() {
Assert.assertNull(invoke("escape"));

View File

@ -41,10 +41,10 @@ import com.fasterxml.jackson.databind.JsonMappingException;
import com.google.refine.ProjectMetadata;
public class ProjectMetadataTests {
private String jsonSaveMode = null;
private String jsonNonSaveMode = null;
@BeforeSuite
public void setUpJson() throws IOException {
InputStream f = ProjectMetadataTests.class.getClassLoader().getResourceAsStream("example_project_metadata.json");
@ -59,16 +59,17 @@ public class ProjectMetadataTests {
TestUtils.isSerializedTo(metadata, jsonNonSaveMode);
TestUtils.isSerializedTo(metadata, jsonSaveMode, true);
}
@Test
public void serializeProjectMetadataInDifferentTimezone() throws JsonParseException, JsonMappingException, IOException {
TimeZone.setDefault(TimeZone.getTimeZone("JST"));
try {
ProjectMetadata metadata = ParsingUtilities.mapper.readValue(jsonSaveMode, ProjectMetadata.class);
TestUtils.isSerializedTo(metadata, jsonNonSaveMode);
TestUtils.isSerializedTo(metadata, jsonSaveMode, true);
} finally {
TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
}
}
@Test
public void serializeProjectMetadataInDifferentTimezone() throws JsonParseException, JsonMappingException, IOException {
TimeZone originalTimeZone = TimeZone.getDefault();
try {
TimeZone.setDefault(TimeZone.getTimeZone("JST"));
ProjectMetadata metadata = ParsingUtilities.mapper.readValue(jsonSaveMode, ProjectMetadata.class);
TestUtils.isSerializedTo(metadata, jsonNonSaveMode);
TestUtils.isSerializedTo(metadata, jsonSaveMode, true);
} finally {
TimeZone.setDefault(originalTimeZone);
}
}
}

View File

@ -33,7 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.util;
import java.io.*;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.zip.GZIPOutputStream;
import java.time.OffsetDateTime;
@ -49,7 +51,7 @@ import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.RefineTest;
import com.google.refine.util.ParsingUtilities;
public class ParsingUtilitiesTests extends RefineTest {
@ -92,24 +94,26 @@ public class ParsingUtilitiesTests extends RefineTest {
Assert.assertEquals(2017, ParsingUtilities.stringToLocalDate("2017-04-03T08:09:43.123Z").getYear());
Assert.assertEquals(2017, ParsingUtilities.stringToLocalDate("2017-04-03T08:09:43+00:00").getYear());
}
/**
* Converting between string and local time must be reversible, no matter the timezone.
*/
@Test
public void stringToLocalDateNonUTC() {
TimeZone.setDefault(TimeZone.getTimeZone("JST"));
try {
Assert.assertEquals(ParsingUtilities.stringToLocalDate("2001-08-12T00:00:00Z").getHour(), 9);
Assert.assertEquals(ParsingUtilities.localDateToString(
ParsingUtilities.stringToLocalDate("2001-08-12T00:00:00Z")),
"2001-08-12T00:00:00Z");
} finally {
TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
}
TimeZone originalTimeZone = TimeZone.getDefault();
try {
TimeZone.setDefault(TimeZone.getTimeZone("JST"));
Assert.assertEquals(ParsingUtilities.stringToLocalDate("2001-08-12T00:00:00Z").getHour(), 9);
// TODO: This doesn't really make sense since a LocalDate, by definition, doesn't have timezone info
Assert.assertEquals(ParsingUtilities.localDateToString(
ParsingUtilities.stringToLocalDate("2001-08-12T00:00:00Z")),
"2001-08-12T00:00:00Z");
} finally {
TimeZone.setDefault(originalTimeZone);
}
}
@Test
public void parseProjectModifiedBeforeJDK8() {
String modified = "2014-01-15T21:46:25Z";