From cc12098828810b915750da9b42b0db873494425c Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Thu, 8 Nov 2018 09:11:49 +0000 Subject: [PATCH 1/3] Initial attempt to fix toDate function --- .../google/refine/expr/functions/ToDate.java | 166 ++++++++++-------- .../strings/ToFromConversionTests.java | 11 ++ 2 files changed, 106 insertions(+), 71 deletions(-) diff --git a/main/src/com/google/refine/expr/functions/ToDate.java b/main/src/com/google/refine/expr/functions/ToDate.java index 7204bf7f9..8cf9daf5f 100644 --- a/main/src/com/google/refine/expr/functions/ToDate.java +++ b/main/src/com/google/refine/expr/functions/ToDate.java @@ -59,10 +59,18 @@ public class ToDate implements Function { @Override public Object call(Properties bindings, Object[] args) { String o1; + Boolean month_first = null; + Locale locale = Locale.getDefault(); + Integer arg_pointer = 0; //pointer used to keep track of which argument we are parsing + DateFormat formatter; + OffsetDateTime date; + + //Check there is at least one argument if (args.length == 0) { return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects at least one argument"); } else { - Object arg0 = args[0]; + Object arg0 = args[arg_pointer]; + //check the first argument is something that can be parsed as a date if (arg0 instanceof OffsetDateTime) { return arg0; } else if (arg0 instanceof Long) { @@ -73,79 +81,95 @@ public class ToDate implements Function { // ignore cell values that aren't Date, Calendar, Long or String return new EvalError("Unable to parse as date"); } + arg_pointer++; //increment arg_pointer to 1 } - - // "o, boolean month_first (optional)" - if (args.length == 1 || (args.length == 2 && args[1] instanceof Boolean)) { - boolean month_first = true; - if (args.length == 2) { - month_first = (Boolean) args[1]; - } - try { - return CalendarParser.parseAsOffsetDateTime( o1, (month_first) ? CalendarParser.MM_DD_YY : CalendarParser.DD_MM_YY); - } catch (CalendarParserException e) { - OffsetDateTime d = ParsingUtilities.stringToDate(o1); - if (d != null) { - return d; - } else { - try { - return javax.xml.bind.DatatypeConverter.parseDateTime(o1).getTime().toInstant().atOffset(ZoneOffset.of("Z")); - } catch (IllegalArgumentException e2) { - } - } - return new EvalError("Unable to parse as date"); - } - } else if (args.length>=2) { - // "o, format1, format2 (optional), ..." - Locale locale = Locale.getDefault(); - for (int i=1;iarg_pointer) { + //is the first argument a boolean? If so use it as the month_first option + if(args[arg_pointer] instanceof Boolean) { + month_first = (Boolean) args[arg_pointer]; + arg_pointer++; //increment arg_pointer to 2 + } + //if first argument isn't Boolean, do nothing + } + + //month first helper maybe set by now so try a parse + if(month_first != null) { + try { + return CalendarParser.parseAsOffsetDateTime( o1, (month_first) ? CalendarParser.MM_DD_YY : CalendarParser.DD_MM_YY); + } catch (CalendarParserException e) { + if(args.length-arg_pointer<1) { //no more arguments to try + OffsetDateTime d = ParsingUtilities.stringToDate(o1); + if (d != null) { + return d; + } else { + try { + return javax.xml.bind.DatatypeConverter.parseDateTime(o1).getTime().toInstant().atOffset(ZoneOffset.of("Z")); + } catch (IllegalArgumentException e2) { + return new EvalError("Unable to parse as date"); + } + } + // if arguments >2 do nothing - there are still things we can try + } + } + } + + //check if we have still have arguments to parse + if (args.length-arg_pointer>=1) { + if(args[arg_pointer] instanceof String); + String localeString = StringUtils.trim((String) args[arg_pointer]); + Locale possibleLocale = Locale.forLanguageTag(localeString); // Java 1.7+ + for (Locale l : DateFormat.getAvailableLocales()) { + if (l.equals(possibleLocale.toLanguageTag())) { + locale = possibleLocale; + arg_pointer++; + break; + } + } + } + + while (arg_pointer < args.length) { + if (!(args[arg_pointer] instanceof String)) { + // skip formats that aren't strings + continue; + } + String format = StringUtils.trim((String) args[arg_pointer]); + try { + formatter = new SimpleDateFormat(format,locale); + } catch (IllegalArgumentException e) { + return new EvalError("Unknown date format"); + } + date = parse(o1, formatter); + if (date != null) { + return date; + } + arg_pointer++; + } + + //no formats, or not managed to convert to date using provided formats, so try default format + formatter = DateFormat.getDateInstance(DateFormat.DEFAULT, locale); + date = parse(o1, formatter); + if (date != null) { + return date; + } + //if we get here without successfully getting a date, try just a basic parse + date = ParsingUtilities.stringToDate(o1); + if (date != null) { + return date; + } else { //trying one last way + try { + return javax.xml.bind.DatatypeConverter.parseDateTime(o1).getTime().toInstant().atOffset(ZoneOffset.of("Z")); + } catch (IllegalArgumentException e2) { + return new EvalError("Unable to parse as date"); + } + } } private OffsetDateTime parse(String o1, DateFormat formatter) { diff --git a/main/tests/server/src/com/google/refine/tests/expr/functions/strings/ToFromConversionTests.java b/main/tests/server/src/com/google/refine/tests/expr/functions/strings/ToFromConversionTests.java index b6b7e2cb7..37722da00 100644 --- a/main/tests/server/src/com/google/refine/tests/expr/functions/strings/ToFromConversionTests.java +++ b/main/tests/server/src/com/google/refine/tests/expr/functions/strings/ToFromConversionTests.java @@ -141,14 +141,25 @@ public class ToFromConversionTests extends RefineTest { Assert.assertTrue(invoke("toDate", "2012-03-01","xxx") instanceof EvalError); // bad format string Assert.assertTrue(invoke("toDate", "2012-03-01") instanceof OffsetDateTime); Assert.assertEquals(invoke("toDate", "2012-03-01"),CalendarParser.parseAsOffsetDateTime("2012-03-01")); + //parse as 'month first' date with and without explicit 'true' parameter + Assert.assertEquals(invoke("toDate", "01/03/2012"),CalendarParser.parseAsOffsetDateTime("2012-01-03")); + Assert.assertEquals(invoke("toDate", "01/03/2012",true),CalendarParser.parseAsOffsetDateTime("2012-01-03")); + //parse as 'month first' date with 'false' parameter + Assert.assertEquals(invoke("toDate", "01/03/2012",false),CalendarParser.parseAsOffsetDateTime("2012-03-01")); + //parse as 'month first' date without 'false' parameter but with format specified + Assert.assertEquals(invoke("toDate", "01/03/2012","dd/MM/yyyy"),CalendarParser.parseAsOffsetDateTime("2012-03-01")); Assert.assertEquals(invoke("toDate", "2012-03-01","yyyy-MM-dd"),CalendarParser.parseAsOffsetDateTime("2012-03-01")); // Multiple format strings should get tried sequentially until one succeeds or all are exhausted Assert.assertEquals(invoke("toDate", "2012-03-01","MMM","yyyy-MM-dd"), CalendarParser.parseAsOffsetDateTime("2012-03-01")); + // Boolean argument combined with Multiple format strings + Assert.assertEquals(invoke("toDate", "01/03/2012",false, "MMM","yyyy-MM-dd","MM/dd/yyyy"), CalendarParser.parseAsOffsetDateTime("2012-03-01")); + // First string can be a locale identifier instead of a format string Assert.assertEquals(invoke("toDate", "2013-06-01","zh"), CalendarParser.parseAsOffsetDateTime("2013-06-01")); Assert.assertEquals(invoke("toDate", "01-六月-2013","zh","dd-MMM-yyyy"), CalendarParser.parseAsOffsetDateTime("2013-06-01")); + } @Test From 1627a5107585d29715a8a78a0fb0b4df9fa9ef91 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Thu, 15 Nov 2018 18:20:55 +0000 Subject: [PATCH 2/3] Reorganises ToDate function, adds tests and fixes #1759 --- .../google/refine/expr/functions/ToDate.java | 165 +++++++++--------- .../strings/ToFromConversionTests.java | 13 +- 2 files changed, 94 insertions(+), 84 deletions(-) diff --git a/main/src/com/google/refine/expr/functions/ToDate.java b/main/src/com/google/refine/expr/functions/ToDate.java index 8cf9daf5f..1a20e8cdf 100644 --- a/main/src/com/google/refine/expr/functions/ToDate.java +++ b/main/src/com/google/refine/expr/functions/ToDate.java @@ -37,8 +37,10 @@ import java.text.DateFormat; import java.text.SimpleDateFormat; import java.time.OffsetDateTime; import java.time.ZoneOffset; +import java.util.ArrayList; import java.util.Date; import java.util.GregorianCalendar; +import java.util.List; import java.util.Locale; import java.util.Properties; import java.util.TimeZone; @@ -60,16 +62,14 @@ public class ToDate implements Function { public Object call(Properties bindings, Object[] args) { String o1; Boolean month_first = null; - Locale locale = Locale.getDefault(); - Integer arg_pointer = 0; //pointer used to keep track of which argument we are parsing - DateFormat formatter; - OffsetDateTime date; + List formats = new ArrayList(); + OffsetDateTime date = null; //Check there is at least one argument if (args.length == 0) { return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects at least one argument"); } else { - Object arg0 = args[arg_pointer]; + Object arg0 = args[0]; //check the first argument is something that can be parsed as a date if (arg0 instanceof OffsetDateTime) { return arg0; @@ -81,95 +81,98 @@ public class ToDate implements Function { // ignore cell values that aren't Date, Calendar, Long or String return new EvalError("Unable to parse as date"); } - arg_pointer++; //increment arg_pointer to 1 } - // - if(args.length==arg_pointer) { - //if there is just one valid argument, we treat as if month_first set to true - month_first = true; - } - - //if there are two or more arguments work out what type of arguments they are - if (args.length>arg_pointer) { - //is the first argument a boolean? If so use it as the month_first option - if(args[arg_pointer] instanceof Boolean) { - month_first = (Boolean) args[arg_pointer]; - arg_pointer++; //increment arg_pointer to 2 + if(args.length==1) { + date = parse(o1,true,formats); + } else if (args.length > 1) { + if(args[1] instanceof Boolean) { + month_first = (Boolean) args[1]; + } else if (args[1] instanceof String) { + formats.add(StringUtils.trim((String) args[1])); + } else { + return new EvalError("Invalid argument"); } - //if first argument isn't Boolean, do nothing + for(int i=2;i formats) { if(month_first != null) { try { return CalendarParser.parseAsOffsetDateTime( o1, (month_first) ? CalendarParser.MM_DD_YY : CalendarParser.DD_MM_YY); - } catch (CalendarParserException e) { - if(args.length-arg_pointer<1) { //no more arguments to try - OffsetDateTime d = ParsingUtilities.stringToDate(o1); - if (d != null) { - return d; - } else { - try { - return javax.xml.bind.DatatypeConverter.parseDateTime(o1).getTime().toInstant().atOffset(ZoneOffset.of("Z")); - } catch (IllegalArgumentException e2) { - return new EvalError("Unable to parse as date"); - } - } - // if arguments >2 do nothing - there are still things we can try - } + } catch (CalendarParserException e) { } - } - - //check if we have still have arguments to parse - if (args.length-arg_pointer>=1) { - if(args[arg_pointer] instanceof String); - String localeString = StringUtils.trim((String) args[arg_pointer]); - Locale possibleLocale = Locale.forLanguageTag(localeString); // Java 1.7+ - for (Locale l : DateFormat.getAvailableLocales()) { - if (l.equals(possibleLocale.toLanguageTag())) { - locale = possibleLocale; - arg_pointer++; - break; + } + return parse(o1,formats); + } + + private OffsetDateTime parse(String o1, List formats) { + if(formats.size()>0) { + String f1 = formats.get(0); + formats.remove(0); + return parse(o1,f1,formats); + } else { + return parse(o1,Locale.getDefault(),formats); + } + } + + private OffsetDateTime parse(String o1, String f1, List formats) { + Locale locale = Locale.getDefault(); + Locale possibleLocale = Locale.forLanguageTag(f1); // Java 1.7+ + for (Locale l : DateFormat.getAvailableLocales()) { + if (l.equals(possibleLocale)) { + locale = possibleLocale; + } else { + formats.add(0,f1); + } + } + return parse(o1,locale,formats); + } + + private OffsetDateTime parse(String o1, Locale locale, List formats) { + DateFormat formatter; + OffsetDateTime date; + //need to try using each format in the formats list! + if(formats.size()>0) { + for(int i=0;i Date: Thu, 15 Nov 2018 23:47:22 +0000 Subject: [PATCH 3/3] Add test for parsing a string with a two digit year --- .../tests/expr/functions/strings/ToFromConversionTests.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main/tests/server/src/com/google/refine/tests/expr/functions/strings/ToFromConversionTests.java b/main/tests/server/src/com/google/refine/tests/expr/functions/strings/ToFromConversionTests.java index dab2021b7..7d64488fd 100644 --- a/main/tests/server/src/com/google/refine/tests/expr/functions/strings/ToFromConversionTests.java +++ b/main/tests/server/src/com/google/refine/tests/expr/functions/strings/ToFromConversionTests.java @@ -149,7 +149,8 @@ public class ToFromConversionTests extends RefineTest { //parse as 'month first' date without 'false' parameter but with format specified Assert.assertEquals(invoke("toDate", "01/03/2012","dd/MM/yyyy"),CalendarParser.parseAsOffsetDateTime("2012-03-01")); Assert.assertEquals(invoke("toDate", "2012-03-01","yyyy-MM-dd"),CalendarParser.parseAsOffsetDateTime("2012-03-01")); - + //Two digit year + Assert.assertEquals(invoke("toDate", "02-02-01"),CalendarParser.parseAsOffsetDateTime("2001-02-02")); // Multiple format strings should get tried sequentially until one succeeds or all are exhausted Assert.assertEquals(invoke("toDate", "2012-03-01","MMM","yyyy-MM-dd"), CalendarParser.parseAsOffsetDateTime("2012-03-01"));