diff --git a/extensions/wikidata/module/langs/translation-en.json b/extensions/wikidata/module/langs/translation-en.json index a2365bdb4..0a1717be4 100644 --- a/extensions/wikidata/module/langs/translation-en.json +++ b/extensions/wikidata/module/langs/translation-en.json @@ -154,5 +154,7 @@ "warnings-messages/no-unit-provided/title": "Unit missing for {property_entity}", "warnings-messages/no-unit-provided/body": "Values such as {example_value} on {example_item_entity} are expected to have units.", "warnings-messages/invalid-entity-type/title": "{property_entity} used on items", - "warnings-messages/invalid-entity-type/body": "Uses of {property_entity} on items such as {example_entity} are invalid." + "warnings-messages/invalid-entity-type/body": "Uses of {property_entity} on items such as {example_entity} are invalid.", + "warnings-messages/early-gregorian-date/title": "Early dates in the Gregorian calendar", + "warnings-messages/early-gregorian-date/body": "Dates earlier than October 1582 (such as in year {example_year}) are unlikely to be expressed using the Gregorian calendar. See the manual to specify the appropriate calendar for your dates." } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java index 86b876b8c..531894588 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java @@ -28,6 +28,7 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; +import org.openrefine.wikidata.qa.scrutinizers.CalendarScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.EntityTypeScrutinizer; @@ -79,6 +80,7 @@ public class EditInspector { register(new QuantityScrutinizer()); register(new RestrictedValuesScrutinizer()); register(new EntityTypeScrutinizer()); + register(new CalendarScrutinizer()); } /** diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/CalendarScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/CalendarScrutinizer.java new file mode 100644 index 000000000..928cfc542 --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/CalendarScrutinizer.java @@ -0,0 +1,31 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.qa.QAWarning; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.TimeValue; +import org.wikidata.wdtk.datamodel.interfaces.Value; + +public class CalendarScrutinizer extends ValueScrutinizer { + + public static final String earlyGregorianDateType = "early-gregorian-date"; + + public static final TimeValue earliestGregorian = Datamodel.makeTimeValue( + 1582, (byte)10, (byte)15, (byte)0, (byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO); + + @Override + public void scrutinize(Value value) { + if(TimeValue.class.isInstance(value)) { + TimeValue time = (TimeValue)value; + if(time.getPreferredCalendarModel().equals(earliestGregorian.getPreferredCalendarModel()) && + time.getPrecision() >= 10 && + (time.getYear() < earliestGregorian.getYear() || + time.getYear() == earliestGregorian.getYear() && time.getMonth() < earliestGregorian.getMonth() || + time.getYear() == earliestGregorian.getYear() && time.getMonth() == earliestGregorian.getMonth() && time.getDay() < earliestGregorian.getDay())) { + QAWarning warning = new QAWarning(earlyGregorianDateType, null, QAWarning.Severity.WARNING, 1); + warning.setProperty("example_year", Long.toString(time.getYear())); + addIssue(warning); + } + } + } + +} diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateConstant.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateConstant.java index 8c7909bad..75268b6ca 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateConstant.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateConstant.java @@ -30,6 +30,8 @@ import java.util.Calendar; import java.util.Date; import java.util.Map; import java.util.Map.Entry; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.jsoup.helper.Validate; import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException; @@ -61,6 +63,8 @@ public class WbDateConstant implements WbExpression { .put(new SimpleDateFormat("yyyy-MM"), 10) .put(new SimpleDateFormat("yyyy-MM-dd"), 11) .build(); + + public static Pattern calendarSuffixPattern = Pattern.compile("_(Q[1-9][0-9]*)$"); private TimeValue parsed; private String origDatestamp; @@ -100,15 +104,46 @@ public class WbDateConstant implements WbExpression { Date bestDate = null; int precision = 0; // default precision (will be overridden if successfully parsed) int maxLength = 0; // the maximum length parsed + String calendarIri = TimeValue.CM_GREGORIAN_PRO; // Gregorian calendar is assumed by default + + String trimmedDatestamp = datestamp.trim(); + + if("TODAY".equals(trimmedDatestamp)) { + Calendar calendar = Calendar.getInstance(); + TimeValue todaysDate = Datamodel.makeTimeValue( + calendar.get(Calendar.YEAR), + (byte)calendar.get(Calendar.MONTH), + (byte)calendar.get(Calendar.DAY_OF_MONTH), + (byte)0, (byte)0, (byte)0, (byte)11, 0,0,0, TimeValue.CM_GREGORIAN_PRO); + return todaysDate; + } + + for (Entry entry : acceptedFormats.entrySet()) { ParsePosition position = new ParsePosition(0); - String trimmedDatestamp = datestamp.trim(); Date date = entry.getKey().parse(trimmedDatestamp, position); + + if (date == null) { + continue; + } + + // Potentially parse the calendar Qid after the date + int consumedUntil = position.getIndex(); + if(consumedUntil < trimmedDatestamp.length()) { + Matcher matcher = calendarSuffixPattern.matcher( + trimmedDatestamp.subSequence(position.getIndex(), trimmedDatestamp.length())); + if(matcher.find()) { + String calendarQid = matcher.group(1); + calendarIri = Datamodel.SITE_WIKIDATA + calendarQid; + consumedUntil = trimmedDatestamp.length(); + } + } // Ignore parses which failed or do not consume all the input if (date != null && position.getIndex() > maxLength - // only allow to partially consume the input if the precision is more than a year - && (entry.getValue() > 9 || position.getIndex() == trimmedDatestamp.length())) { + // only allow to partially consume the input if the precision is day and followed by a T (as in ISO) + && (consumedUntil == trimmedDatestamp.length() + || (entry.getValue() == 11 && trimmedDatestamp.charAt(consumedUntil) == 'T'))) { precision = entry.getValue(); bestDate = date; maxLength = position.getIndex(); @@ -123,7 +158,7 @@ public class WbDateConstant implements WbExpression { return Datamodel.makeTimeValue(calendar.get(Calendar.YEAR), (byte) (calendar.get(Calendar.MONTH) + 1), (byte) calendar.get(Calendar.DAY_OF_MONTH), (byte) calendar.get(Calendar.HOUR_OF_DAY), (byte) calendar.get(Calendar.MINUTE), (byte) calendar.get(Calendar.SECOND), (byte) precision, 0, 0, - 0, TimeValue.CM_GREGORIAN_PRO); + 0, calendarIri); } } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/CalendarScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/CalendarScrutinizerTest.java new file mode 100644 index 000000000..5acb2b254 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/CalendarScrutinizerTest.java @@ -0,0 +1,37 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.TimeValue; + +public class CalendarScrutinizerTest extends ValueScrutinizerTest { + + @Override + public EditScrutinizer getScrutinizer() { + return new CalendarScrutinizer(); + } + + @Test + public void testScrutinizeRecentValue() { + scrutinize(Datamodel.makeTimeValue(1978L, (byte)3, (byte)4, (byte)0, (byte)0, (byte)0, 11, TimeValue.CM_GREGORIAN_PRO)); + assertNoWarningRaised(); + } + + @Test + public void testScrutinizeCloseValue() { + scrutinize(Datamodel.makeTimeValue(1582L, (byte)10, (byte)17, (byte)0, (byte)0, (byte)0, 11, TimeValue.CM_GREGORIAN_PRO)); + assertNoWarningRaised(); + } + + @Test + public void testScrutinizeEarlyYear() { + scrutinize(Datamodel.makeTimeValue(1400L, (byte)1, (byte)1, (byte)0, (byte)0, (byte)0, (byte)9, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO)); + assertNoWarningRaised(); + } + + @Test + public void testScrutinizeEarlyDay() { + scrutinize(Datamodel.makeTimeValue(1440L, (byte)10, (byte)17, (byte)0, (byte)0, (byte)0, 11, TimeValue.CM_GREGORIAN_PRO)); + assertWarningsRaised(CalendarScrutinizer.earlyGregorianDateType); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbDateConstantTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbDateConstantTest.java index d7d777034..1df4d3c8c 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbDateConstantTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbDateConstantTest.java @@ -23,6 +23,8 @@ ******************************************************************************/ package org.openrefine.wikidata.schema; +import java.util.Calendar; + import org.openrefine.wikidata.testing.JacksonSerializationTest; import org.testng.annotations.Test; import org.wikidata.wdtk.datamodel.helpers.Datamodel; @@ -40,6 +42,12 @@ public class WbDateConstantTest extends WbExpressionTest { private WbDateConstant second = new WbDateConstant("2017-01-03T04:12:45"); private WbDateConstant secondz = new WbDateConstant("2017-01-03T04:12:45Z"); + private WbDateConstant julianDay = new WbDateConstant("1324-02-27_Q1985786"); + private WbDateConstant julianMonth = new WbDateConstant("1324-02_Q1985786"); + private WbDateConstant julianYear = new WbDateConstant("1324_Q1985786"); + private WbDateConstant julianDecade = new WbDateConstant("1320D_Q1985786"); + + @Test public void testSerialize() { JacksonSerializationTest.canonicalSerialization(WbExpression.class, year, @@ -70,6 +78,26 @@ public class WbDateConstantTest extends WbExpressionTest { evaluatesTo(Datamodel.makeTimeValue(2018, (byte) 2, (byte) 27, (byte) 0, (byte) 0, (byte) 0, (byte) 11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO), whitespace); + + evaluatesTo(Datamodel.makeTimeValue(1320, (byte) 1, (byte) 1, (byte) 0, (byte) 0, (byte) 0, (byte) 8, 0, 0, 0, + TimeValue.CM_JULIAN_PRO), julianDecade); + evaluatesTo(Datamodel.makeTimeValue(1324, (byte) 1, (byte) 1, (byte) 0, (byte) 0, (byte) 0, (byte) 9, 0, 0, 0, + TimeValue.CM_JULIAN_PRO), julianYear); + evaluatesTo(Datamodel.makeTimeValue(1324, (byte) 2, (byte) 1, (byte) 0, (byte) 0, (byte) 0, (byte) 10, 0, 0, 0, + TimeValue.CM_JULIAN_PRO), julianMonth); + evaluatesTo(Datamodel.makeTimeValue(1324, (byte) 2, (byte) 27, (byte) 0, (byte) 0, (byte) 0, (byte) 11, 0, 0, 0, + TimeValue.CM_JULIAN_PRO), julianDay); + } + + @Test + public void testToday() { + Calendar calendar = Calendar.getInstance(); + TimeValue expectedDate = Datamodel.makeTimeValue( + calendar.get(Calendar.YEAR), + (byte)calendar.get(Calendar.MONTH), + (byte)calendar.get(Calendar.DAY_OF_MONTH), + (byte)0, (byte)0, (byte)0, (byte)11, 0,0,0, TimeValue.CM_GREGORIAN_PRO); + evaluatesTo(expectedDate, new WbDateConstant("TODAY")); } @Test(expectedExceptions = IllegalArgumentException.class) @@ -81,4 +109,9 @@ public class WbDateConstantTest extends WbExpressionTest { public void testPartlyValid() { new WbDateConstant("2018-partly valid"); } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testInvalidCalendar() { + new WbDateConstant("2018-01-02_P234"); + } }