Fix ToDate test failure & inefficiency - fixes #3026 (#3027)

* Fix ToDate test failure - fixes #3026

Instead of computing offset from UTC at current
point in time, use the offset from the parsed
date so that we're not affected by crossing
a daylight savings time boundary.

* Fix date parsing with locale as first format string

Also refactors for simpicity, restore some dropped tests,
and restores previous behavior of considering a bad
format string an error instead of silently ignoring it.

It does NOT address another issue which was introduced
in May 2018 of treating date/times without timzone
information as UTC instead of local.

* Restore error checking and messages

* Save & restore default timezone for tests

Also add some ToDos for places where LocalDate is being misused.
This commit is contained in:
Tom Morris 2020-08-09 07:53:43 -04:00 committed by GitHub
parent 6bcc2bfbe9
commit 55edae2b7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 144 additions and 106 deletions

View File

@ -37,10 +37,11 @@ import java.text.DateFormat;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
import java.time.ZoneOffset; import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date; import java.util.Date;
import java.util.GregorianCalendar; import java.util.GregorianCalendar;
import java.util.IllformedLocaleException;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Properties; import java.util.Properties;
@ -61,7 +62,7 @@ public class ToDate implements Function {
public Object call(Properties bindings, Object[] args) { public Object call(Properties bindings, Object[] args) {
String o1; String o1;
Boolean month_first = null; Boolean month_first = null;
List<String> formats = new ArrayList<String>(); List<String> formats = new ArrayList<>();
OffsetDateTime date = null; OffsetDateTime date = null;
//Check there is at least one argument //Check there is at least one argument
@ -83,7 +84,11 @@ public class ToDate implements Function {
} }
if(args.length==1) { if(args.length==1) {
date = parse(o1,true,formats); try {
date = parse(o1, true, formats);
} catch (DateFormatException e) {
// Should never happen since we're using an empty format list
}
} else if (args.length > 1) { } else if (args.length > 1) {
if(args[1] instanceof Boolean) { if(args[1] instanceof Boolean) {
month_first = (Boolean) args[1]; month_first = (Boolean) args[1];
@ -92,19 +97,21 @@ public class ToDate implements Function {
} else { } else {
return new EvalError("Invalid argument"); return new EvalError("Invalid argument");
} }
for(int i=2;i<args.length;i++) { for (int i = 2; i < args.length; i++) {
if (!(args[i] instanceof String)) { if (!(args[i] instanceof String)) {
// skip formats that aren't strings return new EvalError("Invalid non-string format argument " + args[i].toString());
continue;
} }
formats.add(StringUtils.trim((String) args[i])); formats.add(StringUtils.trim((String) args[i]));
} }
if(month_first != null) { try {
date = parse(o1,month_first,formats); if (month_first != null) {
} else { date = parse(o1, month_first, formats);
date = parse(o1,formats); } else {
date = parse(o1, formats);
}
} catch (DateFormatException e) {
return new EvalError(e.getMessage());
} }
} }
if(date != null) { if(date != null) {
return date; return date;
@ -112,7 +119,7 @@ public class ToDate implements Function {
return new EvalError("Unable to convert to a date"); return new EvalError("Unable to convert to a date");
} }
private OffsetDateTime parse(String o1, Boolean month_first, List<String> formats) { private OffsetDateTime parse(String o1, Boolean month_first, List<String> formats) throws DateFormatException {
if(month_first != null) { if(month_first != null) {
try { try {
return CalendarParser.parseAsOffsetDateTime( o1, (month_first) ? CalendarParser.MM_DD_YY : CalendarParser.DD_MM_YY); return CalendarParser.parseAsOffsetDateTime( o1, (month_first) ? CalendarParser.MM_DD_YY : CalendarParser.DD_MM_YY);
@ -122,30 +129,29 @@ public class ToDate implements Function {
return parse(o1,formats); return parse(o1,formats);
} }
private OffsetDateTime parse(String o1, List<String> formats) { private Locale getLocale(List<String> formats) {
if(formats.size()>0) {
String f1 = formats.get(0);
formats.remove(0);
return parse(o1,f1,formats);
} else {
return parse(o1,Locale.getDefault(),formats);
}
}
private OffsetDateTime parse(String o1, String f1, List<String> formats) {
Locale locale = Locale.getDefault(); Locale locale = Locale.getDefault();
Locale possibleLocale = Locale.forLanguageTag(f1); // Java 1.7+ if (formats.size() > 0) {
for (Locale l : DateFormat.getAvailableLocales()) { String possibleLanguageTag = formats.get(0);
if (l.equals(possibleLocale)) { try {
locale = possibleLocale; Locale possibleLocale = new Locale.Builder().setLanguageTag(possibleLanguageTag).build();
} else { // Check if it's in our list of supported date locales
formats.add(0,f1); for (Locale l : DateFormat.getAvailableLocales()) {
if (l.equals(possibleLocale)) {
locale = possibleLocale;
formats.remove(0);
}
}
} catch (IllformedLocaleException e) {
// We ignore this. It PROBABLY means we got a date format string, not a language code
// although it could be a malformed language tag like zh_TW instead of zh-TW
} }
} }
return parse(o1,locale,formats); return locale;
} }
private OffsetDateTime parse(String o1, Locale locale, List<String> formats) { private OffsetDateTime parse(String o1, List<String> formats) throws DateFormatException {
Locale locale = getLocale(formats);
DateFormat formatter; DateFormat formatter;
OffsetDateTime date; OffsetDateTime date;
//need to try using each format in the formats list! //need to try using each format in the formats list!
@ -154,7 +160,7 @@ public class ToDate implements Function {
try { try {
formatter = new SimpleDateFormat(formats.get(i),locale); formatter = new SimpleDateFormat(formats.get(i),locale);
} catch (IllegalArgumentException e) { } catch (IllegalArgumentException e) {
continue; throw new DateFormatException("Unable to parse date format " + formats.get(i));
} }
date = parse(o1, formatter); date = parse(o1, formatter);
if (date != null) { if (date != null) {
@ -167,9 +173,9 @@ public class ToDate implements Function {
return date; return date;
} else { } else {
try { try {
return javax.xml.bind.DatatypeConverter.parseDateTime(o1).getTime().toInstant() Calendar parsedDate = javax.xml.bind.DatatypeConverter.parseDateTime(o1);
.plusSeconds(ZonedDateTime.now().getOffset().getTotalSeconds()) int offsetMillis = parsedDate.getTimeZone().getOffset(parsedDate.getTimeInMillis());
.atOffset(ZoneOffset.of("Z")); return parsedDate.toInstant().plusMillis(offsetMillis).atOffset(ZoneOffset.of("Z"));
} catch (IllegalArgumentException e2) { } catch (IllegalArgumentException e2) {
return null; return null;
} }
@ -203,4 +209,12 @@ public class ToDate implements Function {
return "date"; return "date";
} }
class DateFormatException extends Exception {
private static final long serialVersionUID = -6506736145451835731L;
public DateFormatException(String string) {
super(string);
}
}
} }

View File

@ -182,7 +182,7 @@ public class ParsingUtilities {
static public String localDateToString(LocalDateTime d) { static public String localDateToString(LocalDateTime d) {
OffsetDateTime odt = OffsetDateTime.of(d, OffsetDateTime odt = OffsetDateTime.of(d,
OffsetDateTime.now().getOffset()); OffsetDateTime.now().getOffset());
// FIXME: A LocalDate has no timezone, by definition.
return odt.withOffsetSameInstant(ZoneOffset.of("Z")).format(ISO8601); return odt.withOffsetSameInstant(ZoneOffset.of("Z")).format(ISO8601);
} }

View File

@ -35,6 +35,7 @@ package com.google.refine.expr.functions.strings;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
import java.util.Properties; import java.util.Properties;
import java.util.TimeZone;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.testng.Assert; import org.testng.Assert;
@ -49,7 +50,6 @@ import com.google.refine.expr.util.CalendarParser;
import com.google.refine.expr.util.CalendarParserException; import com.google.refine.expr.util.CalendarParserException;
import com.google.refine.grel.ControlFunctionRegistry; import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function; import com.google.refine.grel.Function;
import com.google.refine.util.ParsingUtilities;
/** /**
@ -119,10 +119,8 @@ public class ToFromConversionTests extends RefineTest {
Assert.assertEquals(invoke("toString", Double.valueOf(100.0),"%.0f"),"100"); Assert.assertEquals(invoke("toString", Double.valueOf(100.0),"%.0f"),"100");
String inputDate = "2013-06-01"; String inputDate = "2013-06-01";
Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate)), Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate)), "2013-06-01T00:00:00Z");
"2013-06-01T00:00:00Z"); Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate), "yyyy-MM-dd"), "2013-06-01");
Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate), "yyyy-MM-dd"),
"2013-06-01");
Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate), "yyyy/dd/MM"), "2013/01/06"); Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate), "yyyy/dd/MM"), "2013/01/06");
Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate), "yyyy-MM-dd hh:mm:ss"), "2013-06-01 12:00:00"); Assert.assertEquals(invoke("toString", CalendarParser.parseAsOffsetDateTime(inputDate), "yyyy-MM-dd hh:mm:ss"), "2013-06-01 12:00:00");
@ -135,40 +133,61 @@ public class ToFromConversionTests extends RefineTest {
@Test @Test
public void testToDate() throws CalendarParserException { public void testToDate() throws CalendarParserException {
Assert.assertTrue(invoke("toDate") instanceof EvalError); TimeZone originalTimeZone = TimeZone.getDefault();
Assert.assertTrue(invoke("toDate", (Object) null) instanceof EvalError); try {
Assert.assertTrue(invoke("toDate", "") instanceof EvalError); // Inject a fixed non-UTC timezone
Assert.assertTrue(invoke("toDate", 1.0) instanceof EvalError); TimeZone.setDefault(TimeZone.getTimeZone("JST"));
//Assert.assertTrue(invoke("toDate", "2012-03-01","xxx") instanceof EvalError); // bad format string
Assert.assertTrue(invoke("toDate", "2012-03-01") instanceof OffsetDateTime);
Assert.assertEquals(invoke("toDate", "2012-03-01"),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//parse as 'month first' date with and without explicit 'true' parameter
Assert.assertEquals(invoke("toDate", "01/03/2012"),CalendarParser.parseAsOffsetDateTime("2012-01-03"));
Assert.assertEquals(invoke("toDate", "01/03/2012",true),CalendarParser.parseAsOffsetDateTime("2012-01-03"));
//parse as 'month first' date with 'false' parameter
Assert.assertEquals(invoke("toDate", "01/03/2012",false),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//parse as 'month first' date without 'false' parameter but with format specified
Assert.assertEquals(invoke("toDate", "01/03/2012","dd/MM/yyyy"),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
Assert.assertEquals(invoke("toDate", "2012-03-01","yyyy-MM-dd"),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//Two digit year
Assert.assertEquals(invoke("toDate", "02-02-01"),CalendarParser.parseAsOffsetDateTime("2001-02-02"));
// Multiple format strings should get tried sequentially until one succeeds or all are exhausted
Assert.assertEquals(invoke("toDate", "2012-03-01","MMM","yyyy-MM-dd"), CalendarParser.parseAsOffsetDateTime("2012-03-01"));
// Boolean argument combined with Multiple format strings Assert.assertTrue(invoke("toDate") instanceof EvalError);
Assert.assertEquals(invoke("toDate", "01/03/2012",false, "MMM","yyyy-MM-dd","MM/dd/yyyy"), CalendarParser.parseAsOffsetDateTime("2012-03-01")); Assert.assertTrue(invoke("toDate", (Object) null) instanceof EvalError);
Assert.assertTrue(invoke("toDate", "") instanceof EvalError);
Assert.assertTrue(invoke("toDate", 1.0) instanceof EvalError);
Assert.assertTrue(invoke("toDate", "2012-03-01", "xxx") instanceof EvalError); // bad format string
Assert.assertTrue(invoke("toDate", "2012-03-01", 1L) instanceof EvalError); // non-string format arg
Assert.assertTrue(invoke("toDate", "P1M") instanceof EvalError); // Durations aren't supported
// First string can be a locale identifier instead of a format string Assert.assertTrue(invoke("toDate", "2012-03-01") instanceof OffsetDateTime);
Assert.assertEquals(invoke("toDate", "01-六月-2013","zh","dd-MMM-yyyy"), CalendarParser.parseAsOffsetDateTime("2013-06-01")); Assert.assertEquals(invoke("toDate", "2012-03-01"),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//parse as 'month first' date with and without explicit 'true' parameter
Assert.assertEquals(invoke("toDate", "01/03/2012"),CalendarParser.parseAsOffsetDateTime("2012-01-03"));
Assert.assertEquals(invoke("toDate", "01/03/2012",true),CalendarParser.parseAsOffsetDateTime("2012-01-03"));
//parse as 'month first' date with 'false' parameter
Assert.assertEquals(invoke("toDate", "01/03/2012",false),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//parse as 'month first' date without 'false' parameter but with format specified
Assert.assertEquals(invoke("toDate", "01/03/2012","dd/MM/yyyy"),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
Assert.assertEquals(invoke("toDate", "2012-03-01","yyyy-MM-dd"),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//Two digit year
Assert.assertEquals(invoke("toDate", "02-02-01"),CalendarParser.parseAsOffsetDateTime("2001-02-02"));
// Multiple format strings should get tried sequentially until one succeeds or all are exhausted
Assert.assertEquals(invoke("toDate", "2012-03-01","MMM","yyyy-MM-dd"), CalendarParser.parseAsOffsetDateTime("2012-03-01"));
//if invalid format/locale strings are passed, ignore them // Boolean argument combined with Multiple format strings
Assert.assertEquals(invoke("toDate", "2012-03-01","XXX"), invoke("toDate", "2012-03-01")); Assert.assertEquals(invoke("toDate", "01/03/2012",false, "MMM","yyyy-MM-dd","MM/dd/yyyy"), CalendarParser.parseAsOffsetDateTime("2012-03-01"));
// If a long, convert to string // First string can be a locale identifier instead of a format string
Assert.assertEquals(invoke("toDate", (long) 2012), invoke("toDate", "2012-01-01")); Assert.assertEquals(invoke("toDate", "2013-06-01","zh"), CalendarParser.parseAsOffsetDateTime("2013-06-01"));
Assert.assertEquals(invoke("toDate", "01-六月-2013","zh","dd-MMM-yyyy"), CalendarParser.parseAsOffsetDateTime("2013-06-01"));
Assert.assertEquals(invoke("toDate", "01-六月-2013", "zh-CN", "dd-MMM-yyyy"), CalendarParser.parseAsOffsetDateTime("2013-06-01"));
Assert.assertEquals(invoke("toDate", "01-六月-2013", "zh", "MMM-dd-yyyy", "dd-MMM-yyyy"), CalendarParser.parseAsOffsetDateTime("2013-06-01"));
// If already a date, leave it alone // If a long, convert to string
Assert.assertEquals(invoke("toDate", CalendarParser.parseAsOffsetDateTime("2012-03-01")),CalendarParser.parseAsOffsetDateTime("2012-03-01")); Assert.assertEquals(invoke("toDate", (long) 2012), invoke("toDate", "2012-01-01"));
// If already a date, leave it alone
Assert.assertEquals(invoke("toDate", CalendarParser.parseAsOffsetDateTime("2012-03-01")),CalendarParser.parseAsOffsetDateTime("2012-03-01"));
// FIXME: Date/times without timezone info were interpreted as local up until May 2018 at which point they switch to UTC
// Assert.assertEquals(invoke("toDate", "2013-06-01T13:12:11"), CalendarParser.parseAsOffsetDateTime("2013-06-01 13:12:11"));
// These match current behavior, but would fail with the historic (pre-2018) behavior
Assert.assertEquals(invoke("toDate", "2013-06-01T13:12:11Z"), CalendarParser.parseAsOffsetDateTime("2013-06-01 13:12:11"));
Assert.assertEquals(invoke("toDate", "2013-06-01Z"), CalendarParser.parseAsOffsetDateTime("2013-06-01"));
// TODO: more tests for datetimes with timezones and/or offsets
// Assert.assertEquals(invoke("toDate", "2013-06-01T13:12:11+06:00"), CalendarParser.parseAsOffsetDateTime("2013-06-01 13:12:11"));
} finally {
TimeZone.setDefault(originalTimeZone);
}
} }
@Test @Test

View File

@ -60,15 +60,16 @@ public class ProjectMetadataTests {
TestUtils.isSerializedTo(metadata, jsonSaveMode, true); TestUtils.isSerializedTo(metadata, jsonSaveMode, true);
} }
@Test @Test
public void serializeProjectMetadataInDifferentTimezone() throws JsonParseException, JsonMappingException, IOException { public void serializeProjectMetadataInDifferentTimezone() throws JsonParseException, JsonMappingException, IOException {
TimeZone.setDefault(TimeZone.getTimeZone("JST")); TimeZone originalTimeZone = TimeZone.getDefault();
try { try {
ProjectMetadata metadata = ParsingUtilities.mapper.readValue(jsonSaveMode, ProjectMetadata.class); TimeZone.setDefault(TimeZone.getTimeZone("JST"));
TestUtils.isSerializedTo(metadata, jsonNonSaveMode); ProjectMetadata metadata = ParsingUtilities.mapper.readValue(jsonSaveMode, ProjectMetadata.class);
TestUtils.isSerializedTo(metadata, jsonSaveMode, true); TestUtils.isSerializedTo(metadata, jsonNonSaveMode);
} finally { TestUtils.isSerializedTo(metadata, jsonSaveMode, true);
TimeZone.setDefault(TimeZone.getTimeZone("UTC")); } finally {
} TimeZone.setDefault(originalTimeZone);
} }
}
} }

View File

@ -33,7 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.util; package com.google.refine.util;
import java.io.*; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.zip.GZIPOutputStream; import java.util.zip.GZIPOutputStream;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
@ -49,7 +51,7 @@ import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import com.google.refine.RefineTest; import com.google.refine.RefineTest;
import com.google.refine.util.ParsingUtilities;
public class ParsingUtilitiesTests extends RefineTest { public class ParsingUtilitiesTests extends RefineTest {
@ -98,16 +100,18 @@ public class ParsingUtilitiesTests extends RefineTest {
*/ */
@Test @Test
public void stringToLocalDateNonUTC() { public void stringToLocalDateNonUTC() {
TimeZone.setDefault(TimeZone.getTimeZone("JST")); TimeZone originalTimeZone = TimeZone.getDefault();
try { try {
Assert.assertEquals(ParsingUtilities.stringToLocalDate("2001-08-12T00:00:00Z").getHour(), 9); TimeZone.setDefault(TimeZone.getTimeZone("JST"));
Assert.assertEquals(ParsingUtilities.localDateToString( Assert.assertEquals(ParsingUtilities.stringToLocalDate("2001-08-12T00:00:00Z").getHour(), 9);
ParsingUtilities.stringToLocalDate("2001-08-12T00:00:00Z")), // TODO: This doesn't really make sense since a LocalDate, by definition, doesn't have timezone info
"2001-08-12T00:00:00Z"); Assert.assertEquals(ParsingUtilities.localDateToString(
ParsingUtilities.stringToLocalDate("2001-08-12T00:00:00Z")),
"2001-08-12T00:00:00Z");
} finally { } finally {
TimeZone.setDefault(TimeZone.getTimeZone("UTC")); TimeZone.setDefault(originalTimeZone);
} }
} }
@Test @Test