diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json
index a2365bdb4..0a1717be4 100644
--- a/extensions/wikidata/module/langs/translation-en.json
+++ b/extensions/wikidata/module/langs/translation-en.json
@@ -154,5 +154,7 @@
"warnings-messages/no-unit-provided/title": "Unit missing for {property_entity}",
"warnings-messages/no-unit-provided/body": "Values such as {example_value} on {example_item_entity} are expected to have units.",
"warnings-messages/invalid-entity-type/title": "{property_entity} used on items",
- "warnings-messages/invalid-entity-type/body": "Uses of {property_entity} on items such as {example_entity} are invalid."
+ "warnings-messages/invalid-entity-type/body": "Uses of {property_entity} on items such as {example_entity} are invalid.",
+ "warnings-messages/early-gregorian-date/title": "Early dates in the Gregorian calendar",
+ "warnings-messages/early-gregorian-date/body": "Dates earlier than October 1582 (such as in year {example_year}) are unlikely to be expressed using the Gregorian calendar. See the manual to specify the appropriate calendar for your dates."
}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java
index 86b876b8c..531894588 100644
--- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java
@@ -28,6 +28,7 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
+import org.openrefine.wikidata.qa.scrutinizers.CalendarScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.EntityTypeScrutinizer;
@@ -79,6 +80,7 @@ public class EditInspector {
register(new QuantityScrutinizer());
register(new RestrictedValuesScrutinizer());
register(new EntityTypeScrutinizer());
+ register(new CalendarScrutinizer());
}
/**
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/CalendarScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/CalendarScrutinizer.java
new file mode 100644
index 000000000..928cfc542
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/CalendarScrutinizer.java
@@ -0,0 +1,31 @@
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import org.openrefine.wikidata.qa.QAWarning;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
+import org.wikidata.wdtk.datamodel.interfaces.Value;
+
+public class CalendarScrutinizer extends ValueScrutinizer {
+
+ public static final String earlyGregorianDateType = "early-gregorian-date";
+
+ public static final TimeValue earliestGregorian = Datamodel.makeTimeValue(
+ 1582, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO);
+
+ @Override
+ public void scrutinize(Value value) {
+ if(TimeValue.class.isInstance(value)) {
+ TimeValue time = (TimeValue)value;
+ if(time.getPreferredCalendarModel().equals(earliestGregorian.getPreferredCalendarModel()) &&
+ time.getPrecision() >= 10 &&
+ (time.getYear() < earliestGregorian.getYear() ||
+ time.getYear() == earliestGregorian.getYear() && time.getMonth() < earliestGregorian.getMonth() ||
+ time.getYear() == earliestGregorian.getYear() && time.getMonth() == earliestGregorian.getMonth() && time.getDay() < earliestGregorian.getDay())) {
+ QAWarning warning = new QAWarning(earlyGregorianDateType, null, QAWarning.Severity.WARNING, 1);
+ warning.setProperty("example_year", Long.toString(time.getYear()));
+ addIssue(warning);
+ }
+ }
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateConstant.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateConstant.java
index 8c7909bad..75268b6ca 100644
--- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateConstant.java
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateConstant.java
@@ -30,6 +30,8 @@ import java.util.Calendar;
import java.util.Date;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.jsoup.helper.Validate;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
@@ -61,6 +63,8 @@ public class WbDateConstant implements WbExpression {
.put(new SimpleDateFormat("yyyy-MM"), 10)
.put(new SimpleDateFormat("yyyy-MM-dd"), 11)
.build();
+
+ public static Pattern calendarSuffixPattern = Pattern.compile("_(Q[1-9][0-9]*)$");
private TimeValue parsed;
private String origDatestamp;
@@ -100,15 +104,46 @@ public class WbDateConstant implements WbExpression {
Date bestDate = null;
int precision = 0; // default precision (will be overridden if successfully parsed)
int maxLength = 0; // the maximum length parsed
+ String calendarIri = TimeValue.CM_GREGORIAN_PRO; // Gregorian calendar is assumed by default
+
+ String trimmedDatestamp = datestamp.trim();
+
+ if("TODAY".equals(trimmedDatestamp)) {
+ Calendar calendar = Calendar.getInstance();
+ TimeValue todaysDate = Datamodel.makeTimeValue(
+ calendar.get(Calendar.YEAR),
+ (byte)calendar.get(Calendar.MONTH),
+ (byte)calendar.get(Calendar.DAY_OF_MONTH),
+ (byte)0, (byte)0, (byte)0, (byte)11, 0,0,0, TimeValue.CM_GREGORIAN_PRO);
+ return todaysDate;
+ }
+
+
for (Entry entry : acceptedFormats.entrySet()) {
ParsePosition position = new ParsePosition(0);
- String trimmedDatestamp = datestamp.trim();
Date date = entry.getKey().parse(trimmedDatestamp, position);
+
+ if (date == null) {
+ continue;
+ }
+
+ // Potentially parse the calendar Qid after the date
+ int consumedUntil = position.getIndex();
+ if(consumedUntil < trimmedDatestamp.length()) {
+ Matcher matcher = calendarSuffixPattern.matcher(
+ trimmedDatestamp.subSequence(position.getIndex(), trimmedDatestamp.length()));
+ if(matcher.find()) {
+ String calendarQid = matcher.group(1);
+ calendarIri = Datamodel.SITE_WIKIDATA + calendarQid;
+ consumedUntil = trimmedDatestamp.length();
+ }
+ }
// Ignore parses which failed or do not consume all the input
if (date != null && position.getIndex() > maxLength
- // only allow to partially consume the input if the precision is more than a year
- && (entry.getValue() > 9 || position.getIndex() == trimmedDatestamp.length())) {
+ // only allow to partially consume the input if the precision is day and followed by a T (as in ISO)
+ && (consumedUntil == trimmedDatestamp.length()
+ || (entry.getValue() == 11 && trimmedDatestamp.charAt(consumedUntil) == 'T'))) {
precision = entry.getValue();
bestDate = date;
maxLength = position.getIndex();
@@ -123,7 +158,7 @@ public class WbDateConstant implements WbExpression {
return Datamodel.makeTimeValue(calendar.get(Calendar.YEAR), (byte) (calendar.get(Calendar.MONTH) + 1),
(byte) calendar.get(Calendar.DAY_OF_MONTH), (byte) calendar.get(Calendar.HOUR_OF_DAY),
(byte) calendar.get(Calendar.MINUTE), (byte) calendar.get(Calendar.SECOND), (byte) precision, 0, 0,
- 0, TimeValue.CM_GREGORIAN_PRO);
+ 0, calendarIri);
}
}
diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/CalendarScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/CalendarScrutinizerTest.java
new file mode 100644
index 000000000..5acb2b254
--- /dev/null
+++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/CalendarScrutinizerTest.java
@@ -0,0 +1,37 @@
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import org.testng.annotations.Test;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
+
+public class CalendarScrutinizerTest extends ValueScrutinizerTest {
+
+ @Override
+ public EditScrutinizer getScrutinizer() {
+ return new CalendarScrutinizer();
+ }
+
+ @Test
+ public void testScrutinizeRecentValue() {
+ scrutinize(Datamodel.makeTimeValue(1978L, (byte)3, (byte)4, (byte)0, (byte)0, (byte)0, 11, TimeValue.CM_GREGORIAN_PRO));
+ assertNoWarningRaised();
+ }
+
+ @Test
+ public void testScrutinizeCloseValue() {
+ scrutinize(Datamodel.makeTimeValue(1582L, (byte)10, (byte)17, (byte)0, (byte)0, (byte)0, 11, TimeValue.CM_GREGORIAN_PRO));
+ assertNoWarningRaised();
+ }
+
+ @Test
+ public void testScrutinizeEarlyYear() {
+ scrutinize(Datamodel.makeTimeValue(1400L, (byte)1, (byte)1, (byte)0, (byte)0, (byte)0, (byte)9, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO));
+ assertNoWarningRaised();
+ }
+
+ @Test
+ public void testScrutinizeEarlyDay() {
+ scrutinize(Datamodel.makeTimeValue(1440L, (byte)10, (byte)17, (byte)0, (byte)0, (byte)0, 11, TimeValue.CM_GREGORIAN_PRO));
+ assertWarningsRaised(CalendarScrutinizer.earlyGregorianDateType);
+ }
+}
diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbDateConstantTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbDateConstantTest.java
index d7d777034..1df4d3c8c 100644
--- a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbDateConstantTest.java
+++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbDateConstantTest.java
@@ -23,6 +23,8 @@
******************************************************************************/
package org.openrefine.wikidata.schema;
+import java.util.Calendar;
+
import org.openrefine.wikidata.testing.JacksonSerializationTest;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
@@ -40,6 +42,12 @@ public class WbDateConstantTest extends WbExpressionTest {
private WbDateConstant second = new WbDateConstant("2017-01-03T04:12:45");
private WbDateConstant secondz = new WbDateConstant("2017-01-03T04:12:45Z");
+ private WbDateConstant julianDay = new WbDateConstant("1324-02-27_Q1985786");
+ private WbDateConstant julianMonth = new WbDateConstant("1324-02_Q1985786");
+ private WbDateConstant julianYear = new WbDateConstant("1324_Q1985786");
+ private WbDateConstant julianDecade = new WbDateConstant("1320D_Q1985786");
+
+
@Test
public void testSerialize() {
JacksonSerializationTest.canonicalSerialization(WbExpression.class, year,
@@ -70,6 +78,26 @@ public class WbDateConstantTest extends WbExpressionTest {
evaluatesTo(Datamodel.makeTimeValue(2018, (byte) 2, (byte) 27, (byte) 0, (byte) 0, (byte) 0, (byte) 11, 0, 0, 0,
TimeValue.CM_GREGORIAN_PRO), whitespace);
+
+ evaluatesTo(Datamodel.makeTimeValue(1320, (byte) 1, (byte) 1, (byte) 0, (byte) 0, (byte) 0, (byte) 8, 0, 0, 0,
+ TimeValue.CM_JULIAN_PRO), julianDecade);
+ evaluatesTo(Datamodel.makeTimeValue(1324, (byte) 1, (byte) 1, (byte) 0, (byte) 0, (byte) 0, (byte) 9, 0, 0, 0,
+ TimeValue.CM_JULIAN_PRO), julianYear);
+ evaluatesTo(Datamodel.makeTimeValue(1324, (byte) 2, (byte) 1, (byte) 0, (byte) 0, (byte) 0, (byte) 10, 0, 0, 0,
+ TimeValue.CM_JULIAN_PRO), julianMonth);
+ evaluatesTo(Datamodel.makeTimeValue(1324, (byte) 2, (byte) 27, (byte) 0, (byte) 0, (byte) 0, (byte) 11, 0, 0, 0,
+ TimeValue.CM_JULIAN_PRO), julianDay);
+ }
+
+ @Test
+ public void testToday() {
+ Calendar calendar = Calendar.getInstance();
+ TimeValue expectedDate = Datamodel.makeTimeValue(
+ calendar.get(Calendar.YEAR),
+ (byte)calendar.get(Calendar.MONTH),
+ (byte)calendar.get(Calendar.DAY_OF_MONTH),
+ (byte)0, (byte)0, (byte)0, (byte)11, 0,0,0, TimeValue.CM_GREGORIAN_PRO);
+ evaluatesTo(expectedDate, new WbDateConstant("TODAY"));
}
@Test(expectedExceptions = IllegalArgumentException.class)
@@ -81,4 +109,9 @@ public class WbDateConstantTest extends WbExpressionTest {
public void testPartlyValid() {
new WbDateConstant("2018-partly valid");
}
+
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void testInvalidCalendar() {
+ new WbDateConstant("2018-01-02_P234");
+ }
}