").addClass(css.item_name).append(type).append(label));
+ return li;
+ },
+
+ get_native_name: function(lang_code) {
+ var language = $.uls.data.languages[lang_code];
+ if (language) {
+ return language[2];
+ }
+ },
+});
+
diff --git a/extensions/wikidata/module/scripts/menu-bar-extension.js b/extensions/wikidata/module/scripts/menu-bar-extension.js
new file mode 100644
index 000000000..0c69d2a3f
--- /dev/null
+++ b/extensions/wikidata/module/scripts/menu-bar-extension.js
@@ -0,0 +1,90 @@
+// Load the localization file
+var dictionary = {};
+$.ajax({
+ url : "command/core/load-language?",
+ type : "POST",
+ async : false,
+ data : {
+ module : "wikidata",
+// lang : lang
+ },
+ success : function(data) {
+ dictionary = data;
+ }
+});
+$.i18n.setDictionary(dictionary);
+
+
+
+ExporterManager.MenuItems.push({});
+ExporterManager.MenuItems.push(
+ {
+ "id" : "exportQuickStatements",
+ "label": $.i18n._('wikidata-extension')["quickstatements-export-name"],
+ "click": function() { WikibaseExporterMenuBar.exportTo("quickstatements"); }
+ }
+);
+
+WikibaseExporterMenuBar = {};
+
+WikibaseExporterMenuBar.exportTo = function(format) {
+ var form = document.createElement("form");
+ $(form).css("display", "none")
+ .attr("method", "post")
+ .attr("action", "command/core/export-rows/statements.txt")
+ .attr("target", "gridworks-export");
+ $('
')
+ .attr("name", "engine")
+ .attr("value", JSON.stringify(ui.browsingEngine.getJSON()))
+ .appendTo(form);
+ $('
')
+ .attr("name", "project")
+ .attr("value", theProject.id)
+ .appendTo(form);
+ $('
')
+ .attr("name", "format")
+ .attr("value", format)
+ .appendTo(form);
+
+ document.body.appendChild(form);
+
+ window.open("about:blank", "gridworks-export");
+ form.submit();
+
+ document.body.removeChild(form);
+};
+
+//extend the column header menu
+$(function(){
+
+ ExtensionBar.MenuItems.push(
+ {
+ "id":"reconcile",
+ "label": $.i18n._('wikidata-extension')["menu-label"],
+ "submenu" : [
+ {
+ id: "wikidata/edit-schema",
+ label: $.i18n._('wikidata-extension')["edit-wikidata-schema"],
+ click: function() { SchemaAlignmentDialog.launch(false); }
+ },
+ {
+ id:"wikidata/manage-account",
+ label: $.i18n._('wikidata-extension')["manage-wikidata-account"],
+ click: function() { ManageAccountDialog.checkAndLaunch(); }
+ },
+ {
+ id:"wikidata/perform-edits",
+ label: $.i18n._('wikidata-extension')["perform-edits-on-wikidata"],
+ click: function() { PerformEditsDialog.checkAndLaunch(); }
+ },
+ {
+ id:"wikidata/export-qs",
+ label: $.i18n._('wikidata-extension')["export-to-qs"],
+ click: function() { WikibaseExporterMenuBar.exportTo("quickstatements"); }
+ },
+
+ ]
+ }
+ );
+});
+
diff --git a/extensions/wikidata/module/scripts/preview-tab.html b/extensions/wikidata/module/scripts/preview-tab.html
new file mode 100644
index 000000000..12a917323
--- /dev/null
+++ b/extensions/wikidata/module/scripts/preview-tab.html
@@ -0,0 +1,6 @@
+
+
diff --git a/extensions/wikidata/module/scripts/previewrenderer.js b/extensions/wikidata/module/scripts/previewrenderer.js
new file mode 100644
index 000000000..454d3d63f
--- /dev/null
+++ b/extensions/wikidata/module/scripts/previewrenderer.js
@@ -0,0 +1,256 @@
+/**
+ * renders an item update (an edit on an item) in HTML.
+ */
+
+var EditRenderer = {};
+
+// settings
+EditRenderer.maxTerms = 15; // max number of terms displayed
+EditRenderer.maxStatements = 25; // max number of statements per statement group
+
+// main method: takes a DOM element and a list
+// of edits to render there.
+EditRenderer.renderEdits = function(edits, container) {
+ for(var i = 0; i != edits.length; i++) {
+ EditRenderer._renderItem(edits[i], container);
+ }
+}
+
+/**************/
+/*** ITEMS ****/
+/**************/
+
+EditRenderer._renderItem = function(json, container) {
+ var subject = json;
+ var statementGroups = null;
+ var nameDescs = null;
+ if (json) {
+ subject = json.subject;
+ statementGroups = json.statementGroups;
+ nameDescs = json.nameDescs;
+ }
+
+ var item = $('
').addClass('wbs-item').appendTo(container);
+ var inputContainer = $('
').addClass('wbs-item-input').appendTo(item);
+ EditRenderer._renderEntity(json.subject, inputContainer);
+ var right = $('
').addClass('wbs-item-contents').appendTo(item);
+
+ // Terms
+ if ((json.labels && json.labels.length) ||
+ (json.descriptions && json.descriptions.length) ||
+ (json.addedAliases && json.addedAliases.length)) {
+ var termsContainer = $('
').addClass('wbs-namedesc-container')
+ .appendTo(right);
+
+ this._renderTermsList(json.labels, "label", termsContainer);
+ this._renderTermsList(json.descriptions, "description", termsContainer);
+ this._renderTermsList(json.aliases, "alias", termsContainer);
+
+ // Clear the float
+ $('
').attr('style', 'clear: right').appendTo(right);
+ }
+
+ // Statements
+ if (json.addedStatementGroups && json.addedStatementGroups.length) {
+ // $('
').addClass('wbs-statements-header')
+ // .text($.i18n._('wikidata-schema')['statements-header']).appendTo(right);
+ var statementsGroupContainer = $('
').addClass('wbs-statement-group-container')
+ .appendTo(right);
+ for(var i = 0; i != json.addedStatementGroups.length; i++) {
+ EditRenderer._renderStatementGroup(json.addedStatementGroups[i], statementsGroupContainer);
+ }
+ }
+}
+
+/**************************
+ * NAMES AND DESCRIPTIONS *
+ **************************/
+
+EditRenderer._renderTermsList = function(termList, termType, termsContainer) {
+ if(!termList) {
+ return;
+ }
+ for(var i = 0; i != Math.min(termList.length, this.maxTerms); i++) {
+ EditRenderer._renderTerm(termType, termList[i], termsContainer);
+ }
+ if(termList.length > this.maxTerms) {
+ $('
').addClass('wbs-namedesc').text('...').appendTo(termsContainer);
+ }
+}
+
+EditRenderer._renderTerm = function(termType, json, container) {
+ var namedesc = $('
').addClass('wbs-namedesc').appendTo(container);
+ var type_container = $('
').addClass('wbs-namedesc-type').appendTo(namedesc);
+ var type_span = $('
').appendTo(type_container)
+ .text($.i18n._('wikidata-schema')[termType]);
+
+ var right = $('
').addClass('wbs-right').appendTo(namedesc);
+ var value_container = $('
').addClass('wbs-namedesc-value').appendTo(namedesc);
+ EditRenderer._renderValue({datavalue:json,datatype:'monolingualtext'}, value_container);
+}
+
+/********************
+ * STATEMENT GROUPS *
+ ********************/
+
+EditRenderer._renderStatementGroup = function(json, container) {
+
+ var statementGroup = $('
').addClass('wbs-statement-group').appendTo(container);
+ var inputContainer = $('
').addClass('wbs-prop-input').appendTo(statementGroup);
+ var right = $('
').addClass('wbs-right').appendTo(statementGroup);
+ EditRenderer._renderEntity(json.property, inputContainer);
+
+ var statementContainer = $('
').addClass('wbs-statement-container').appendTo(right);
+ for (var i = 0; i != json.statements.length; i++) {
+ EditRenderer._renderStatement(json.statements[i], statementContainer);
+ }
+ if(json.statements.length > EditRenderer.maxStatements) {
+ $('
')
+ .text('...')
+ .addClass('wbs-statement')
+ .appendTo(statementContainer);
+ }
+}
+
+/**************
+ * STATEMENTS *
+ **************/
+
+EditRenderer._renderStatement = function(json, container) {
+
+ var statement = $('
').addClass('wbs-statement').appendTo(container);
+ var inputContainer = $('
').addClass('wbs-target-input').appendTo(statement);
+ EditRenderer._renderValue(json.mainsnak, inputContainer);
+
+ // add rank
+ var rank = $('
').addClass('wbs-rank-selector-icon').prependTo(inputContainer);
+
+ // add qualifiers...
+ var right = $('
').addClass('wbs-right').appendTo(statement);
+ var qualifierContainer = $('
').addClass('wbs-qualifier-container').appendTo(right);
+
+ if (json.qualifiers) {
+ for (var pid in json.qualifiers) {
+ if (json.qualifiers.hasOwnProperty(pid)) {
+ var qualifiers = json.qualifiers[pid];
+ for (var i = 0; i != qualifiers.length; i++) {
+ EditRenderer._renderSnak(qualifiers[i], qualifierContainer);
+ }
+ }
+ }
+ }
+
+ // and references
+ $('
').attr('style', 'clear: right').appendTo(statement);
+ var referencesToggleContainer = $('
').addClass('wbs-references-toggle').appendTo(statement);
+ var triangle = $('
').addClass('triangle-icon').addClass('pointing-right').appendTo(referencesToggleContainer);
+ var referencesToggle = $('
').appendTo(referencesToggleContainer);
+ right = $('
').addClass('wbs-right').appendTo(statement);
+ var referenceContainer = $('
').addClass('wbs-reference-container').appendTo(right);
+ referencesToggle.click(function () {
+ triangle.toggleClass('pointing-down');
+ triangle.toggleClass('pointing-right');
+ referenceContainer.toggle(100);
+ });
+ referenceContainer.hide();
+
+ if (json.references) {
+ for (var i = 0; i != json.references.length; i++) {
+ EditRenderer._renderReference(json.references[i], referenceContainer);
+ }
+ }
+ EditRenderer._updateReferencesNumber(referenceContainer);
+}
+
+/*********************************
+ * QUALIFIER AND REFERENCE SNAKS *
+ *********************************/
+
+EditRenderer._renderSnak = function(json, container) {
+
+ var qualifier = $('
').addClass('wbs-qualifier').appendTo(container);
+ var toolbar1 = $('
').addClass('wbs-toolbar').appendTo(qualifier);
+ var inputContainer = $('
').addClass('wbs-prop-input').appendTo(qualifier);
+ var right = $('
').addClass('wbs-right').appendTo(qualifier);
+ var statementContainer = $('
').addClass('wbs-statement-container').appendTo(right);
+ EditRenderer._renderEntity(json.full_property, inputContainer);
+ EditRenderer._renderValue(json, statementContainer);
+}
+
+/**************
+ * REFERENCES *
+ **************/
+
+EditRenderer._renderReference = function(json, container) {
+ var reference = $('
').addClass('wbs-reference').appendTo(container);
+ var referenceHeader = $('
').addClass('wbs-reference-header').appendTo(reference);
+ var right = $('
').addClass('wbs-right').appendTo(reference);
+ var qualifierContainer = $('
').addClass('wbs-qualifier-container').appendTo(right);
+
+ for (var pid in json.snaks) {
+ if (json.snaks.hasOwnProperty(pid)) {
+ var snaks = json.snaks[pid];
+ for(var i = 0; i != snaks.length; i++) {
+ EditRenderer._renderSnak(snaks[i], qualifierContainer);
+ }
+ }
+ }
+}
+
+EditRenderer._updateReferencesNumber = function(container) {
+ var childrenCount = container.children().length;
+ var statement = container.parents('.wbs-statement');
+ var a = statement.find('.wbs-references-toggle a').first();
+ a.html(childrenCount+$.i18n._('wikidata-schema')["nb-references"]);
+}
+
+/*******************
+ * VALUE RENDERING *
+ *******************/
+
+EditRenderer.renderedValueCache = {};
+
+EditRenderer._renderEntity = function(json, container) {
+ var html = WarningsRenderer._renderEntity(json);
+ $(html).appendTo(container);
+}
+
+EditRenderer._renderValue = function(json, container) {
+ var input = $('
').appendTo(container);
+ var mode = json.datatype;
+
+ if (mode === "wikibase-item" || mode === "wikibase-property") {
+ EditRenderer._renderEntity(json.datavalue, container);
+ } else {
+ var jsonValue = JSON.stringify(json.datavalue);
+ if (jsonValue in EditRenderer.renderedValueCache) {
+ $('
'+EditRenderer.renderedValueCache[jsonValue]+' ').appendTo(container);
+ } else {
+ var params = {
+ action: 'wbformatvalue',
+ generate: 'text/html',
+ datavalue: jsonValue,
+ options: '{"lang":"'+$.i18n._('core-recon')["wd-recon-lang"]+'"}',
+ format: 'json'
+ };
+ if ('property' in json) {
+ params.property = json.property;
+ } else {
+ params.datatype = json.datatype;
+ }
+ $.get(
+ 'https://www.wikidata.org/w/api.php',
+ params,
+ function (data) {
+ if('result' in data) {
+ EditRenderer.renderedValueCache[jsonValue] = data.result;
+ $('
'+data.result+' ').appendTo(container);
+ }
+ },
+ 'jsonp'
+ );
+ }
+ }
+}
+
+
diff --git a/extensions/wikidata/module/scripts/schema-alignment-tab.html b/extensions/wikidata/module/scripts/schema-alignment-tab.html
new file mode 100644
index 000000000..5bff5236c
--- /dev/null
+++ b/extensions/wikidata/module/scripts/schema-alignment-tab.html
@@ -0,0 +1,17 @@
+
+
diff --git a/extensions/wikidata/module/scripts/warningsrenderer.js b/extensions/wikidata/module/scripts/warningsrenderer.js
new file mode 100644
index 000000000..bab750a76
--- /dev/null
+++ b/extensions/wikidata/module/scripts/warningsrenderer.js
@@ -0,0 +1,79 @@
+var WarningsRenderer = {};
+
+// renders a Wikibase entity into a link
+WarningsRenderer._renderEntity = function(entity) {
+ if (!entity.id && entity.value) {
+ entity.id = entity.value.id;
+ }
+ var id = entity.id;
+ var is_new = entity.siteIri == "http://localhost/entity/";
+ if (is_new) {
+ id = $.i18n._('wikidata-preview')['new-id'];
+ }
+ var fullLabel = id;
+ if (entity.label) {
+ fullLabel = entity.label + ' (' + id + ')';
+ }
+
+ var url = entity.iri;
+ if (!url && entity.value) {
+ url = 'http://www.wikidata.org/entity/'+entity.value.id;
+ }
+
+ if (is_new) {
+ return '
'+fullLabel+' ';
+ } else {
+ return '
'+fullLabel+' ';
+ }
+}
+
+// replaces the issue properties in localization template
+WarningsRenderer._replaceIssueProperties = function(template, properties) {
+ if (!properties) {
+ return template;
+ }
+ var expanded = template;
+ for (var key in properties) {
+ if (properties.hasOwnProperty(key)) {
+ var rendered = properties[key];
+ if (key.endsWith('_entity')) {
+ rendered = WarningsRenderer._renderEntity(properties[key]);
+ }
+ expanded = expanded.replace(new RegExp('{'+key+'}', 'g'), rendered);
+ }
+ }
+ return expanded;
+}
+
+WarningsRenderer._renderWarning = function(warning) {
+ var localized = $.i18n._('warnings-messages')[warning.type];
+ var title = warning.type;
+ var body = "";
+ if (localized) {
+ title = WarningsRenderer._replaceIssueProperties(localized.title, warning.properties);
+ body = WarningsRenderer._replaceIssueProperties(localized.body, warning.properties);
+ }
+ var tr = $('
').addClass('wb-warning');
+ var severityTd = $('
')
+ .addClass('wb-warning-severity')
+ .addClass('wb-warning-severity-'+warning.severity)
+ .appendTo(tr);
+ var bodyTd = $('
')
+ .addClass('wb-warning-body')
+ .appendTo(tr);
+ var h1 = $('
')
+ .html(title)
+ .appendTo(bodyTd);
+ var p = $('
')
+ .html(body)
+ .appendTo(bodyTd);
+ var countTd = $('
')
+ .addClass('wb-warning-count')
+ .appendTo(tr);
+ var countSpan = $('
')
+ .text(warning.count)
+ .appendTo(countTd);
+ return tr;
+}
+
+
diff --git a/extensions/wikidata/module/styles/dialogs/manage-account-dialog.less b/extensions/wikidata/module/styles/dialogs/manage-account-dialog.less
new file mode 100644
index 000000000..024fc8ed0
--- /dev/null
+++ b/extensions/wikidata/module/styles/dialogs/manage-account-dialog.less
@@ -0,0 +1,64 @@
+/*
+
+Copyright 2010, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+@import-less url("../theme.less");
+
+.wikibase-login-form {
+ text-align: center;
+}
+
+.wikibase-login-buttons {
+ text-align: right;
+}
+
+.wikibase-invalid-credentials {
+ color: red;
+}
+
+.wikibase-user-login tr td:first-child {
+ text-align: right;
+}
+
+.wikibase-user-login tr td:last-child {
+ padding-left: 10px;
+ text-align: left;
+}
+
+.wikidata-logo {
+ float: left;
+ margin: -10px;
+}
+
+.right-of-logo {
+ margin-left: 110px;
+}
diff --git a/extensions/wikidata/module/styles/dialogs/perform-edits.less b/extensions/wikidata/module/styles/dialogs/perform-edits.less
new file mode 100644
index 000000000..8ff3d4f77
--- /dev/null
+++ b/extensions/wikidata/module/styles/dialogs/perform-edits.less
@@ -0,0 +1,39 @@
+/*
+
+Copyright 2010, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+@import-less url("../theme.less");
+
+.edit-summary {
+ width: 300px;
+}
+
diff --git a/extensions/wikidata/module/styles/dialogs/schema-alignment-dialog.css b/extensions/wikidata/module/styles/dialogs/schema-alignment-dialog.css
new file mode 100644
index 000000000..03561fede
--- /dev/null
+++ b/extensions/wikidata/module/styles/dialogs/schema-alignment-dialog.css
@@ -0,0 +1,516 @@
+@import-less url("../theme.less");
+
+.main-view-panel-tab-header#summary-bar {
+ font-size: 1.3em;
+ font-weight: normal;
+ position: initial;
+}
+
+#wikidata-schema-panel, #wikidata-issues-panel, #wikidata-preview-panel {
+ display: block;
+ overflow: hidden;
+ height: 100%;
+ background-color: white;
+ font-size: 1.2em;
+}
+
+#schema-preview-tab, #schema-issues-tab, #schema-alignment-tab {
+ overflow: auto;
+ height: 100%;
+}
+
+#schema-alignment-tab {
+ overflow: hidden;
+}
+
+.main-view-panel-tab-header {
+ margin-top: 9px;
+ margin-left: 7px;
+ font-size: 1.3em;
+ display: inline-block;
+ background-color: transparent;
+ color: #11c;
+ padding: 3px 7px;
+ cursor: pointer;
+}
+
+.main-view-panel-tab-header.active {
+ background-color: white;
+ color: black;
+ border: 1px solid #818fb7;
+ border-bottom: none;
+}
+
+.main-view-panel-tab-header.active:first-child {
+ background-color: #f2f2f2;
+}
+
+.schema-alignment-dialog-canvas {
+ background: white;
+ overflow-y: auto;
+}
+
+.schema-alignment-dialog-statements-area {
+ padding: 10px;
+ max-width: 900px;
+ overflow-y: auto;
+}
+
+.panel-explanation {
+ margin: 1em;
+}
+
+.schema-alignment-save {
+ float: right;
+ padding: 1em;
+}
+
+.schema-alignment-save button:disabled {
+ color: gray;
+}
+
+.schema-alignment-save button {
+ font-weight: normal;
+}
+
+.schema-alignment-dialog-columns-area {
+ border: 1px solid #bcf;
+ border-left: 0;
+ border-right: 0;
+ padding: 5px;
+ max-height: 100px;
+ overflow-y: auto;
+}
+
+.wbs-draggable-column {
+ border: 1px solid #aaa;
+ padding: 2px;
+ margin: 2px;
+ background-color: #eee;
+ display: inline-block;
+ padding-bottom: 3px;
+ max-width: 95%;
+}
+
+.ui-droppable .wbs-draggable-column {
+ margin: 0px;
+ padding: 0px;
+ padding-left: 2px;
+}
+
+.wbs-reconciled-column {
+ border-bottom: 3px solid #282;
+ padding-bottom: 1px;
+}
+
+.wbs-restricted-column-name {
+ max-width: -moz-calc(100% - 20px);
+ max-width: -webkit-calc(100% - 20px);
+ max-width: -o-calc(100% - 20px);
+ max-width: calc(100% - 20px);
+ white-space: nowrap;
+ overflow: hidden;
+ text-overflow: ellipsis;
+ display: inline-block;
+ vertical-align: middle;
+ white-space: nowrap;
+}
+
+.wbs-draggable-column .wbs-remove {
+ float: right;
+}
+
+.wbs-accepting-input {
+ box-shadow: inset 0 0 10px #a7cdff;
+}
+
+.wbs-validated-input {
+ border: 1px solid green !important;
+}
+
+.wbs-unvalidated-input {
+ border: 1px solid red !important;
+ background-color: #ffbfbf;
+}
+
+.wbs-icon {
+ background-position: center;
+ background-size: 20px 20px;
+ width: 20px;
+ height: 20px;
+ display: inline-block;
+ vertical-align: middle;
+ opacity: 0.7;
+}
+
+.wbs-remove .wbs-icon {
+ background-image:linear-gradient(transparent,transparent),url("data:image/svg+xml,%3Csvg%20xmlns=%22http://www.w3.org/2000/svg%22%20width=%2224%22%20height=%2224%22%20viewBox=%220%200%2024%2024%22%3E%20%3Cg%20id=%22remove%22%3E%20%3Cpath%20id=%22trash-can%22%20d=%22M12%2010h-1v6h1v-6zm-2%200H9v6h1v-6zm4%200h-1v6h1v-6zm0-4V5H9v1H6v3h1v7.966l1%201.03v-.073V18h6.984l.016-.018v.015l1-1.03V9h1V6h-3zm1%2011H8V9h7v8zm1-9H7V7h9v1z%22/%3E%20%3C/g%3E%20%3C/svg%3E");
+}
+
+.wbs-remove span:last-child:hover {
+ text-decoration: underline;
+}
+
+.wbs-rank-selector-icon {
+ background-position: -36px;
+ width: 8px;
+ flex: 0 0 8px;
+ height: 20px;
+ margin-right: 2px;
+ background-image:linear-gradient(transparent,transparent),url("data:image/svg+xml,%3Csvg%20xmlns=%22http://www.w3.org/2000/svg%22%20xmlns:xlink=%22http://www.w3.org/1999/xlink%22%20width=%22107%22%20height=%2220%22%3E%20%3Cdefs%3E%20%3Cpath%20d=%22M3.1%2C0%200%2C3.8%200%2C6%208%2C6%208%2C3.8%204.9%2C0zm8.2%2C7%20-2.3%2C2%200%2C2%202.3%2C2%203.4%2C0%202.3%2C-2%200%2C-2%20-2.3%2C-2zm6.7%2C7%200%2C2.2%203.1%2C3.8%201.8%2C0%203.1%2C-3.8%200%2C-2.2z%22%20id=%22a%22/%3E%20%3Cpath%20d=%22m18.5%2C10.75%200%2C-1.5%202%2C-1.75%203%2C0%202%2C1.75%200%2C1.5%20-2%2C1.75%20-3%2C0zm0%2C-6.75%200%2C1.5%207%2C0%200%2C-1.5%20-2.875%2C-3.5%20-1.25%2C0zm-9%2C12%200%2C-1.5%207%2C0%200%2C1.5%20-2.875%2C3.5%20-1.25%2C0zm0%2C-12%200%2C1.5%207%2C0%200%2C-1.5%20-2.875%2C-3.5%20-1.25%2C0zm-9%2C12%200%2C-1.5%207%2C0%200%2C1.5%20-2.875%2C3.5%20-1.25%2C0zm0%2C-5.25%200%2C-1.5%202%2C-1.75%203%2C0%202%2C1.75%200%2C1.5%20-2%2C1.75%20-3%2C0z%22%20id=%22b%22%20fill=%22none%22/%3E%20%3C/defs%3E%20%3Cuse%20fill=%22%23000%22%20x=%220%22%20y=%220%22%20xlink:href=%22%23a%22/%3E%20%3Cuse%20stroke=%22%23000%22%20x=%220%22%20y=%220%22%20xlink:href=%22%23b%22/%3E%20%3Cuse%20fill=%22%2372777d%22%20x=%2227%22%20y=%220%22%20xlink:href=%22%23a%22/%3E%20%3Cuse%20stroke=%22%2372777d%22%20x=%2227%22%20y=%220%22%20xlink:href=%22%23b%22/%3E%20%3Cuse%20fill=%22%2336c%22%20x=%2254%22%20y=%220%22%20xlink:href=%22%23a%22/%3E%20%3Cuse%20stroke=%22%2336c%22%20x=%2254%22%20y=%220%22%20xlink:href=%22%23b%22/%3E%20%3Cuse%20fill=%22%23447ff5%22%20x=%2281%22%20y=%220%22%20xlink:href=%22%23a%22/%3E%20%3Cuse%20stroke=%22%23447ff5%22%20x=%2281%22%20y=%220%22%20xlink:href=%22%23b%22/%3E%20%3C/svg%3E");
+}
+
+.wbs-item-input, .wbs-prop-input, .wbs-target-input {
+ width: 250px;
+ min-height: 20px;
+ display: inline-block;
+ margin: 5px;
+ white-space: normal;
+ word-wrap: break-word;
+}
+
+.wbs-target-input {
+ display: inline-flex;
+}
+
+.wbs-prop-input {
+ width: 120px;
+}
+
+.wbs-qualifier .wbs-prop-input {
+ width: 85px;
+}
+
+.wbs-qualifier .wbs-target-input {
+ width: 120px;
+}
+
+/* Fix input rendering for Firefox */
+
+
+.wbs-item-input input, .wbs-prop-input input, .wbs-target-input input,
+.wbs-monolingual-container input {
+ width: 100%;
+ border: solid #b5b5b5 1px;
+ padding: 2px;
+}
+
+.wbs-draggable-column img {
+ float: right;
+}
+
+.wbs-toolbar {
+ float: right;
+ width: 100px;
+ padding: 2px;
+}
+
+.wbs-toolbar a {
+ cursor: pointer;
+}
+
+.wbs-remove,
+.wbs-add-item, .wbs-add-statement-group, .wbs-add-statement,
+.wbs-add-qualifier, .wbs-add-reference, .wbs-add-namedesc {
+ color: #0645ad !important;
+ font-size: 0.9em;
+ cursor: pointer;
+}
+
+.wbs-add-item:hover, .wbs-add-statement-group:hover, .wbs-add-statement:hover,
+.wbs-add-qualifier:hover, .wbs-add-reference:hover, .wbs-add-namedesc:hover {
+ text-decoration: none;
+}
+
+.wbs-add-item span:hover, .wbs-add-statement-group span:hover, .wbs-add-statement span:hover,
+.wbs-add-qualifier span:hover, .wbs-add-reference span:hover, .wbs-add-namedesc span:hover {
+ text-decoration: underline;
+}
+
+.wbs-remove {
+ display: inline-block;
+}
+
+.wbs-item-contents {
+ padding-left: 10px;
+ margin-left: 20px;
+ border-left: 3px solid gray;
+}
+
+.wbs-add-item b, .wbs-add-statement-group b, .wbs-add-statement b,
+.wbs-add-qualifier b, .wbs-add-reference b, .wbs-add-namedesc b {
+ font-size: 1em;
+ color: grey;
+ width: 20px;
+ height: 20px;
+}
+
+.wbs-statement-group-container:empty:before {
+ content: attr(data-emptyplaceholder);
+ color: gray;
+}
+
+.schema-alignment-dialog-preview .wbs-qualifier .wbs-statement-container {
+ display: table-cell;
+ vertical-align: bottom;
+ padding-left: 10px;
+ height: 20px;
+}
+
+.wbs-statement-group-container, .wbs-statement-container, .wbs-qualifier-container, .wbs-reference-container {
+ width: 100%;
+ display: block;
+ overflow: auto;
+}
+
+.wbs-item, .wbs-statement-group, .wbs-statement, .wbs-qualifier, .wbs-reference {
+ display: block;
+ overflow: auto;
+}
+
+.wbs-item {
+ margin-bottom: 5px;
+ padding-right: 5px;
+}
+
+.wbs-statement-group {
+ background: #eaecf0;
+ margin-bottom: 5px;
+ border: 1px solid #c8ccd1;
+}
+
+.wbs-statement {
+ background: white;
+ border-bottom: 1px solid #eaecf0;
+}
+
+.wbs-statement:last-child {
+ border-bottom: 0;
+}
+
+.wbs-right {
+ float: right;
+ width: 85%;
+}
+
+.wbs-statement-group .wbs-right {
+ width: 75%;
+}
+
+.wbs-statement .wbs-right {
+ width: 90%;
+}
+
+.wbs-qualifier, .wbs-reference {
+ position: relative;
+ overflow-x: hidden;
+}
+
+.wbs-qualifier .wbs-right {
+ width: auto;
+ position: absolute;
+ top: 0px;
+ margin-left: 100px;
+}
+
+.wbs-reference > .wbs-right {
+ width: 100%;
+}
+
+.wbs-references-toggle {
+ width: 12em;
+ margin: 5px;
+}
+
+.wbs-references-toggle a {
+ color: #0645ad !important;
+}
+
+
+.wbs-references-toggle .triangle-icon {
+ background: transparent url(data:image/svg+xml,%3C%3Fxml%20version%3D%221.0%22%20encoding%3D%22UTF-8%22%20standalone%3D%22no%22%3F%3E%0A%3Csvg%0A%20%20%20xmlns%3Adc%3D%22http%3A//purl.org/dc/elements/1.1/%22%0A%20%20%20xmlns%3Acc%3D%22http%3A//creativecommons.org/ns%23%22%0A%20%20%20xmlns%3Ardf%3D%22http%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%22%0A%20%20%20xmlns%3Asvg%3D%22http%3A//www.w3.org/2000/svg%22%0A%20%20%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%0A%20%20%20id%3D%22svg8%22%0A%20%20%20version%3D%221.1%22%0A%20%20%20viewBox%3D%220%200%202.6458332%202.6458332%22%0A%20%20%20height%3D%2210%22%0A%20%20%20width%3D%2210%22%3E%0A%20%20%3Cdefs%0A%20%20%20%20%20id%3D%22defs2%22%20/%3E%0A%20%20%3Cmetadata%0A%20%20%20%20%20id%3D%22metadata5%22%3E%0A%20%20%20%20%3Crdf%3ARDF%3E%0A%20%20%20%20%20%20%3Ccc%3AWork%0A%20%20%20%20%20%20%20%20%20rdf%3Aabout%3D%22%22%3E%0A%20%20%20%20%20%20%20%20%3Cdc%3Aformat%3Eimage/svg+xml%3C/dc%3Aformat%3E%0A%20%20%20%20%20%20%20%20%3Cdc%3Atype%0A%20%20%20%20%20%20%20%20%20%20%20rdf%3Aresource%3D%22http%3A//purl.org/dc/dcmitype/StillImage%22%20/%3E%0A%20%20%20%20%20%20%20%20%3Cdc%3Atitle%3E%3C/dc%3Atitle%3E%0A%20%20%20%20%20%20%3C/cc%3AWork%3E%0A%20%20%20%20%3C/rdf%3ARDF%3E%0A%20%20%3C/metadata%3E%0A%20%20%3Cg%0A%20%20%20%20%20transform%3D%22translate%280%2C-294.35416%29%22%0A%20%20%20%20%20id%3D%22layer1%22%3E%0A%20%20%20%20%3Cpath%0A%20%20%20%20%20%20%20id%3D%22path12%22%0A%20%20%20%20%20%20%20d%3D%22m%200.21601809%2C294.5035%202.28763941%2C1.14744%20-2.29850705%2C1.20871%20z%22%0A%20%20%20%20%20%20%20style%3D%22fill%3A%23645eea%3Bfill-opacity%3A1%3Bfill-rule%3Aevenodd%3Bstroke%3A%23645eea%3Bstroke-width%3A0.06161711px%3Bstroke-linecap%3Abutt%3Bstroke-linejoin%3Amiter%3Bstroke-opacity%3A1%22%20/%3E%0A%20%20%3C/g%3E%0A%3C/svg%3E%0A) no-repeat center center;
+ background-size: 100% auto;
+ height: 8px;
+ width: 8px;
+ margin-right: 4px;
+ display: inline-block;
+}
+
+.pointing-right {
+ transform: rotate(0deg);
+ transition: .3s cubic-bezier(.17,.67,.21,1.69);
+}
+.pointing-down {
+ transform: rotate(90deg);
+ transition: .3s cubic-bezier(.17,.67,.21,1.69);
+}
+
+.wbs-qualifier .wbs-statement {
+ overflow: hidden;
+ background: transparent;
+}
+
+.wbs-monolingual-container, .wbs-quantity-container {
+ display: inline-block;
+}
+
+.wbs-reference-header {
+ height: 22px;
+ overflow: hidden;
+ display: block;
+ background-color: #b6c8ec;
+}
+
+.wbs-reference {
+ background-color: #eaf3ff;
+ margin-bottom: 5px;
+}
+
+.wbs-reference .wbs-statement {
+ background-color: #eaf3ff;
+}
+
+.wbs-namedesc-header, .wbs-statements-header {
+ font-size: 1.2em;
+}
+
+.wbs-namedesc {
+ border-left: 1px solid #c8ccd1;
+ border-right: 1px solid #c8ccd1;
+}
+
+.wbs-namedesc:first-child {
+ border-top: 1px solid #c8ccd1;
+}
+
+.wbs-namedesc:last-child {
+ border-bottom: 1px solid #c8ccd1;
+}
+
+
+.wbs-namedesc-container:empty:before {
+ content: attr(data-emptyplaceholder);
+ color: gray;
+ padding: 5px;
+ border: none;
+}
+
+.wbs-namedesc {
+ background-color: #eaecf0;
+ padding: 5px;
+}
+
+.wbs-namedesc-type, .wbs-namedesc-value {
+ display: inline-block;
+}
+
+.wbs-namedesc-value {
+ padding-left: 20px;
+ width: 300px;
+}
+
+.wbs-language-input {
+ width: 100%;
+}
+
+.schema-alignment-columns-header {
+ margin-bottom: 0.3em;
+}
+
+/*** Warnings rendering ****/
+
+#wikidata-issues-panel table {
+ width: 100%;
+}
+
+.wb-warning h1 {
+ font-size: 1.2em;
+}
+
+.schema-issues-area table,
+.perform-edits-warnings-area table {
+ width: 100%;
+}
+
+tr.wb-warning:nth-of-type(odd) {
+ background-color: #f2f2f2;
+}
+
+.wb-warning-count span,
+.schema-alignment-total-warning-count {
+ color: white;
+ background-color: #777;
+ padding: 0px 5px;
+ border-radius: 0.5em;
+}
+
+.wb-warning-severity {
+ width: 60px;
+ height: 60px;
+ background-repeat: no-repeat;
+ background-position: center;
+}
+
+.wb-warning-severity-INFO {
+ background-image: url('../../images/Information.png');
+}
+
+.wb-warning-severity-WARNING {
+ background-image: url('../../images/Warning.png');
+}
+
+.wb-warning-severity-IMPORTANT {
+ background-image: url('../../images/Important.png');
+}
+
+.wb-warning-severity-CRITICAL {
+ background-image: url('../../images/Critical.png');
+}
+
+.wb-warning-body {
+ padding: 5px;
+ vertical-align: top;
+}
+
+.wb-warning-count {
+ padding: 5px;
+ vertical-align: middle;
+ text-align: center;
+}
+
+.wb-warning-end {
+ float: clear;
+}
+
+div.perform-edits-warnings-area {
+ min-height: 340px;
+ max-height: 400px;
+ overflow-x: hidden;
+ overflow-y: auto;
+ border: 1px solid #bbb;
+}
+
+.wb-issue-preformat {
+ border: 1px solid #eaecf0;
+ background-color: #f8f9fa;
+ padding: 1px 3px;
+ border-radius: 2px;
+ font-family: monospace;
+}
+
+.wb-preview-new-entity {
+ color: #11c;
+}
+
+/*** QuickStatements Preview ***/
+
+div.schema-alignment-dialog-preview {
+ min-height: 340px;
+ max-width: 900px;
+ overflow: auto;
+ background: white;
+ padding: 10px;
+ margin-top: 3px;
+ white-space: pre;
+ font-size: 9pt;
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/commands/CommandUtilities.java b/extensions/wikidata/src/org/openrefine/wikidata/commands/CommandUtilities.java
new file mode 100644
index 000000000..74f1cbdfd
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/commands/CommandUtilities.java
@@ -0,0 +1,32 @@
+package org.openrefine.wikidata.commands;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import javax.servlet.http.HttpServletResponse;
+
+import org.json.JSONWriter;
+
+public class CommandUtilities {
+
+ /**
+ * Helper introduced to ease returning error messages from a response. Curiously
+ * this is not part of {@link Command}: the respond method uses the "status" JSON
+ * key instead of the "code" one required by the JS code.
+ *
+ * @param response
+ * @param errorMessage
+ * @throws IOException
+ */
+ public static void respondError(HttpServletResponse response, String errorMessage)
+ throws IOException {
+ Writer w = response.getWriter();
+ JSONWriter writer = new JSONWriter(w);
+ writer.object();
+ writer.key("code"); writer.value("error");
+ writer.key("message"); writer.value(errorMessage);
+ writer.endObject();
+ w.flush();
+ w.close();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/commands/LoginCommand.java b/extensions/wikidata/src/org/openrefine/wikidata/commands/LoginCommand.java
new file mode 100644
index 000000000..951741ddf
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/commands/LoginCommand.java
@@ -0,0 +1,77 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.commands;
+
+import java.io.IOException;
+import java.io.StringWriter;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.json.JSONException;
+import org.json.JSONWriter;
+import org.openrefine.wikidata.editing.ConnectionManager;
+
+import com.google.refine.commands.Command;
+
+public class LoginCommand extends Command {
+
+ @Override
+ public void doPost(HttpServletRequest request, HttpServletResponse response)
+ throws ServletException, IOException {
+ String username = request.getParameter("wb-username");
+ String password = request.getParameter("wb-password");
+ String remember = request.getParameter("remember-credentials");
+ ConnectionManager manager = ConnectionManager.getInstance();
+ if (username != null && password != null) {
+ manager.login(username, password, "on".equals(remember));
+ } else if ("true".equals(request.getParameter("logout"))) {
+ manager.logout();
+ }
+ response.setCharacterEncoding("UTF-8");
+ response.setHeader("Content-Type", "application/json");
+
+ StringWriter sb = new StringWriter(2048);
+ JSONWriter writer = new JSONWriter(sb);
+
+ try {
+ writer.object();
+ writer.key("logged_in");
+ writer.value(manager.isLoggedIn());
+ writer.key("username");
+ writer.value(manager.getUsername());
+ writer.endObject();
+ } catch (JSONException e) {
+ logger.error(e.getMessage());
+ }
+ respond(response, sb.toString());
+ }
+
+ @Override
+ public void doGet(HttpServletRequest request, HttpServletResponse response)
+ throws ServletException, IOException {
+ doPost(request, response);
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/commands/PerformWikibaseEditsCommand.java b/extensions/wikidata/src/org/openrefine/wikidata/commands/PerformWikibaseEditsCommand.java
new file mode 100644
index 000000000..f49a374b4
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/commands/PerformWikibaseEditsCommand.java
@@ -0,0 +1,44 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.commands;
+
+import javax.servlet.http.HttpServletRequest;
+
+import org.json.JSONObject;
+import org.openrefine.wikidata.operations.PerformWikibaseEditsOperation;
+
+import com.google.refine.commands.EngineDependentCommand;
+import com.google.refine.model.AbstractOperation;
+import com.google.refine.model.Project;
+
+public class PerformWikibaseEditsCommand extends EngineDependentCommand {
+
+ @Override
+ protected AbstractOperation createOperation(Project project, HttpServletRequest request, JSONObject engineConfig)
+ throws Exception {
+ String summary = request.getParameter("summary");
+ return new PerformWikibaseEditsOperation(engineConfig, summary);
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommand.java b/extensions/wikidata/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommand.java
new file mode 100644
index 000000000..28ed72400
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommand.java
@@ -0,0 +1,140 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+
+package org.openrefine.wikidata.commands;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.util.List;
+import java.util.Properties;
+import java.util.stream.Collectors;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.json.JSONWriter;
+import org.openrefine.wikidata.qa.EditInspector;
+import org.openrefine.wikidata.qa.QAWarning;
+import org.openrefine.wikidata.qa.QAWarningStore;
+import org.openrefine.wikidata.schema.WikibaseSchema;
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
+
+import static org.openrefine.wikidata.commands.CommandUtilities.respondError;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import com.google.refine.browsing.Engine;
+import com.google.refine.commands.Command;
+import com.google.refine.model.Project;
+
+public class PreviewWikibaseSchemaCommand extends Command {
+
+ @Override
+ public void doPost(HttpServletRequest request, HttpServletResponse response)
+ throws ServletException, IOException {
+
+ try {
+ Project project = getProject(request);
+
+ response.setCharacterEncoding("UTF-8");
+ response.setHeader("Content-Type", "application/json");
+
+ String jsonString = request.getParameter("schema");
+
+ WikibaseSchema schema = null;
+ if (jsonString != null) {
+ try {
+ schema = WikibaseSchema.reconstruct(jsonString);
+ } catch (JSONException e) {
+ respondError(response, "Wikibase schema could not be parsed.");
+ return;
+ }
+ } else {
+ schema = (WikibaseSchema) project.overlayModels.get("wikibaseSchema");
+ }
+ if (schema == null) {
+ respondError(response, "No Wikibase schema provided.");
+ return;
+ }
+
+ QAWarningStore warningStore = new QAWarningStore();
+
+ // Evaluate project
+ Engine engine = getEngine(request, project);
+ List
editBatch = schema.evaluate(project, engine, warningStore);
+
+ StringWriter sb = new StringWriter(2048);
+ JSONWriter writer = new JSONWriter(sb);
+ writer.object();
+
+ {
+ // Inspect the edits and generate warnings
+ EditInspector inspector = new EditInspector(warningStore);
+ inspector.inspect(editBatch);
+ writer.key("warnings");
+ writer.array();
+ for (QAWarning warning : warningStore.getWarnings()) {
+ warning.write(writer, new Properties());
+ }
+ writer.endArray();
+
+ // Add max warning level
+ writer.key("max_severity");
+ writer.value(warningStore.getMaxSeverity().toString());
+
+ // this is not the length of the warnings array written before,
+ // but the total number of issues raised (before deduplication)
+ writer.key("nb_warnings");
+ writer.value(warningStore.getNbWarnings());
+
+ // Dump the first 10 edits, scheduled with the default scheduler
+ WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
+ List nonNullEdits = scheduler.schedule(editBatch).stream()
+ .filter(e -> !e.isNull())
+ .collect(Collectors.toList());
+ writer.key("edit_count");
+ writer.value(nonNullEdits.size());
+ List firstEdits = nonNullEdits.stream()
+ .limit(10)
+ .collect(Collectors.toList());
+ ObjectMapper mapper = new ObjectMapper();
+ String firstEditsJson = mapper.writeValueAsString(firstEdits);
+
+ writer.key("edits_preview");
+ writer.value(new JSONArray(firstEditsJson));
+ }
+
+ writer.endObject();
+
+ respond(response, sb.toString());
+ } catch (Exception e) {
+ respondException(response, e);
+ }
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/commands/SaveWikibaseSchemaCommand.java b/extensions/wikidata/src/org/openrefine/wikidata/commands/SaveWikibaseSchemaCommand.java
new file mode 100644
index 000000000..9196fcef5
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/commands/SaveWikibaseSchemaCommand.java
@@ -0,0 +1,78 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.commands;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.openrefine.wikidata.operations.SaveWikibaseSchemaOperation;
+import org.openrefine.wikidata.schema.WikibaseSchema;
+import static org.openrefine.wikidata.commands.CommandUtilities.respondError;
+
+import com.google.refine.commands.Command;
+import com.google.refine.model.AbstractOperation;
+import com.google.refine.model.Project;
+import com.google.refine.process.Process;
+import com.google.refine.util.ParsingUtilities;
+
+public class SaveWikibaseSchemaCommand extends Command {
+
+ @Override
+ public void doPost(HttpServletRequest request, HttpServletResponse response)
+ throws ServletException, IOException {
+
+ try {
+ Project project = getProject(request);
+
+ String jsonString = request.getParameter("schema");
+ if (jsonString == null) {
+ respondError(response, "No Wikibase schema provided.");
+ return;
+ }
+
+ JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString);
+ WikibaseSchema schema = WikibaseSchema.reconstruct(json);
+
+ AbstractOperation op = new SaveWikibaseSchemaOperation(schema);
+ Process process = op.createProcess(project, new Properties());
+
+ performProcessAndRespond(request, response, project, process);
+
+ } catch (JSONException e) {
+ // We do not use respondException here because this is an expected
+ // exception which happens every time a user tries to save an incomplete
+ // schema - the exception should not be logged.
+ respondError(response, "Wikibase schema could not be parsed.");
+ } catch (Exception e) {
+ // This is an unexpected exception, so we log it.
+ respondException(response, e);
+ }
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/ConnectionManager.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/ConnectionManager.java
new file mode 100644
index 000000000..f631be707
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/ConnectionManager.java
@@ -0,0 +1,148 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.editing;
+
+import java.io.IOException;
+
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.wikidata.wdtk.wikibaseapi.ApiConnection;
+import org.wikidata.wdtk.wikibaseapi.LoginFailedException;
+
+import com.google.refine.ProjectManager;
+import com.google.refine.preference.PreferenceStore;
+
+/**
+ * Manages a connection to Wikidata, with login credentials stored in the
+ * preferences.
+ *
+ * Ideally, we should store only the cookies and not the password. But
+ * Wikidata-Toolkit does not allow for that as cookies are kept private.
+ *
+ * This class is also hard-coded for Wikidata: generalization to other Wikibase
+ * instances should be feasible though.
+ *
+ * @author Antonin Delpeuch
+ */
+
+public class ConnectionManager {
+
+ final static Logger logger = LoggerFactory.getLogger("connection_mananger");
+
+ public static final String PREFERENCE_STORE_KEY = "wikidata_credentials";
+
+ private PreferenceStore prefStore;
+ private ApiConnection connection;
+
+ private static final ConnectionManager instance = new ConnectionManager();
+
+ public static ConnectionManager getInstance() {
+ return instance;
+ }
+
+ private ConnectionManager() {
+ prefStore = ProjectManager.singleton.getPreferenceStore();
+ connection = null;
+ restoreSavedConnection();
+ }
+
+ public void login(String username, String password, boolean rememberCredentials) {
+ if (rememberCredentials) {
+ try {
+ JSONArray array = new JSONArray();
+ JSONObject obj = new JSONObject();
+ obj.put("username", username);
+ obj.put("password", password);
+ array.put(obj);
+ prefStore.put(PREFERENCE_STORE_KEY, array);
+ } catch (JSONException e) {
+ logger.error(e.getMessage());
+ }
+ }
+
+ connection = ApiConnection.getWikidataApiConnection();
+ try {
+ connection.login(username, password);
+ } catch (LoginFailedException e) {
+ connection = null;
+ }
+ }
+
+ public void restoreSavedConnection() {
+ JSONObject savedCredentials = getStoredCredentials();
+ if (savedCredentials != null) {
+ connection = ApiConnection.getWikidataApiConnection();
+ try {
+ connection.login(savedCredentials.getString("username"), savedCredentials.getString("password"));
+ } catch (LoginFailedException e) {
+ connection = null;
+ } catch (JSONException e) {
+ connection = null;
+ }
+ }
+ }
+
+ public JSONObject getStoredCredentials() {
+ JSONArray array = (JSONArray) prefStore.get(PREFERENCE_STORE_KEY);
+ if (array != null && array.length() > 0) {
+ try {
+ return array.getJSONObject(0);
+ } catch (JSONException e) {
+ logger.error(e.getMessage());
+ }
+ }
+ return null;
+ }
+
+ public void logout() {
+ prefStore.put(PREFERENCE_STORE_KEY, new JSONArray());
+ if (connection != null) {
+ try {
+ connection.logout();
+ connection = null;
+ } catch (IOException e) {
+ logger.error(e.getMessage());
+ }
+ }
+ }
+
+ public ApiConnection getConnection() {
+ return connection;
+ }
+
+ public boolean isLoggedIn() {
+ return connection != null;
+ }
+
+ public String getUsername() {
+ if (connection != null) {
+ return connection.getCurrentUser();
+ } else {
+ return null;
+ }
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/EditBatchProcessor.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/EditBatchProcessor.java
new file mode 100644
index 000000000..b4afae964
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/EditBatchProcessor.java
@@ -0,0 +1,210 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.editing;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
+import org.wikidata.wdtk.datamodel.interfaces.ItemDocument;
+import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
+import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor;
+import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
+import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
+
+/**
+ * Schedules and performs a list of updates to items via the API.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class EditBatchProcessor {
+
+ static final Logger logger = LoggerFactory.getLogger(EditBatchProcessor.class);
+
+ private WikibaseDataFetcher fetcher;
+ private WikibaseDataEditor editor;
+ private NewItemLibrary library;
+ private List scheduled;
+ private String summary;
+
+ private List remainingUpdates;
+ private List currentBatch;
+ private int batchCursor;
+ private int globalCursor;
+ private Map currentDocs;
+ private int batchSize;
+
+ /**
+ * Initiates the process of pushing a batch of updates to Wikibase. This
+ * schedules the updates and is a prerequisite for calling
+ * {@link performOneEdit}.
+ *
+ * @param fetcher
+ * the fetcher to use to retrieve the current state of items
+ * @param editor
+ * the object to use to perform the edits
+ * @param updates
+ * the list of item updates to perform
+ * @param library
+ * the library to use to keep track of new item creation
+ * @param summary
+ * the summary to append to all edits
+ * @param batchSize
+ * the number of items that should be retrieved in one go from the
+ * API
+ */
+ public EditBatchProcessor(WikibaseDataFetcher fetcher, WikibaseDataEditor editor, List updates,
+ NewItemLibrary library, String summary, int batchSize) {
+ this.fetcher = fetcher;
+ this.editor = editor;
+ editor.setEditAsBot(true); // this will not do anything if the user does not
+ // have a bot flag, and this is generally wanted if they have one.
+ this.library = library;
+ this.summary = summary;
+ this.batchSize = batchSize;
+
+ // Schedule the edit batch
+ WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
+ this.scheduled = scheduler.schedule(updates);
+ this.globalCursor = 0;
+
+ this.batchCursor = 0;
+ this.remainingUpdates = new ArrayList<>(scheduled);
+ this.currentBatch = Collections.emptyList();
+ this.currentDocs = Collections.emptyMap();
+ }
+
+ /**
+ * Performs the next edit in the batch.
+ *
+ * @throws InterruptedException
+ */
+ public void performEdit()
+ throws InterruptedException {
+ if (remainingEdits() == 0) {
+ return;
+ }
+ if (batchCursor == currentBatch.size()) {
+ prepareNewBatch();
+ }
+ ItemUpdate update = currentBatch.get(batchCursor);
+
+ // Rewrite mentions to new items
+ ReconEntityRewriter rewriter = new ReconEntityRewriter(library, update.getItemId());
+ update = rewriter.rewrite(update);
+
+ try {
+ // New item
+ if (update.isNew()) {
+ ReconEntityIdValue newCell = (ReconEntityIdValue) update.getItemId();
+ update = update.normalizeLabelsAndAliases();
+
+ ItemDocument itemDocument = Datamodel.makeItemDocument(update.getItemId(),
+ update.getLabels().stream().collect(Collectors.toList()),
+ update.getDescriptions().stream().collect(Collectors.toList()),
+ update.getAliases().stream().collect(Collectors.toList()), update.getAddedStatementGroups(),
+ Collections.emptyMap());
+
+ ItemDocument createdDoc = editor.createItemDocument(itemDocument, summary);
+ library.setQid(newCell.getReconInternalId(), createdDoc.getItemId().getId());
+ } else {
+ // Existing item
+ ItemDocument currentDocument = (ItemDocument) currentDocs.get(update.getItemId().getId());
+ editor.updateTermsStatements(currentDocument, update.getLabels().stream().collect(Collectors.toList()),
+ update.getDescriptions().stream().collect(Collectors.toList()),
+ update.getAliases().stream().collect(Collectors.toList()),
+ new ArrayList(),
+ update.getAddedStatements().stream().collect(Collectors.toList()),
+ update.getDeletedStatements().stream().collect(Collectors.toList()), summary);
+ }
+ } catch (MediaWikiApiErrorException e) {
+ // TODO find a way to report these errors to the user in a nice way
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ batchCursor++;
+ }
+
+ /**
+ * @return the number of edits that remain to be done in the current batch
+ */
+ public int remainingEdits() {
+ return scheduled.size() - (globalCursor + batchCursor);
+ }
+
+ /**
+ * @return the progress, measured as a percentage
+ */
+ public int progress() {
+ return (100 * (globalCursor + batchCursor)) / scheduled.size();
+ }
+
+ protected void prepareNewBatch()
+ throws InterruptedException {
+ // remove the previous batch from the remainingUpdates
+ globalCursor += currentBatch.size();
+ currentBatch.clear();
+
+ if (remainingUpdates.size() < batchSize) {
+ currentBatch = remainingUpdates;
+ remainingUpdates = Collections.emptyList();
+ } else {
+ currentBatch = remainingUpdates.subList(0, batchSize);
+ }
+ List qidsToFetch = currentBatch.stream().filter(u -> !u.isNew()).map(u -> u.getItemId().getId())
+ .collect(Collectors.toList());
+
+ // Get the current documents for this batch of updates
+ logger.info("Requesting documents");
+ currentDocs = null;
+ int retries = 3;
+ while (currentDocs == null && retries > 0) {
+ try {
+ currentDocs = fetcher.getEntityDocuments(qidsToFetch);
+ } catch (MediaWikiApiErrorException e) {
+ e.printStackTrace();
+ Thread.sleep(5000);
+ }
+ retries--;
+ }
+ if (currentDocs == null) {
+ throw new InterruptedException("Fetching current documents failed.");
+ }
+ batchCursor = 0;
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java
new file mode 100644
index 000000000..b84af31cb
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/NewItemLibrary.java
@@ -0,0 +1,159 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.editing;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import com.google.refine.model.Cell;
+import com.google.refine.model.Column;
+import com.google.refine.model.Project;
+import com.google.refine.model.Recon;
+import com.google.refine.model.ReconCandidate;
+import com.google.refine.model.ReconStats;
+import com.google.refine.model.Row;
+
+/**
+ * This keeps track of the new items that we have created for each internal
+ * reconciliation id.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class NewItemLibrary {
+
+ private Map map;
+
+ public NewItemLibrary() {
+ map = new HashMap<>();
+ }
+
+ @JsonCreator
+ public NewItemLibrary(@JsonProperty("qidMap") Map map) {
+ this.map = map;
+ }
+
+ /**
+ * Retrieves the Qid allocated to a given new cell
+ *
+ * @param id:
+ * the fake ItemId generated by the cell
+ * @return the qid (or null if unallocated yet)
+ */
+ public String getQid(long id) {
+ return map.get(id);
+ }
+
+ /**
+ * Stores a Qid associated to a new cell
+ *
+ * @param id
+ * : the internal reconciliation id of the new cell
+ * @param qid
+ * : the associated Qid returned by Wikibase
+ */
+ public void setQid(long id, String qid) {
+ map.put(id, qid);
+ }
+
+ /**
+ * Changes the "new" reconciled cells to their allocated qids for later use.
+ *
+ * @param reset:
+ * set to true to revert the operation (set cells to "new")
+ */
+ public void updateReconciledCells(Project project, boolean reset) {
+
+ Set impactedColumns = new HashSet<>();
+
+ /*
+ * Note that there is a slight violation of OpenRefine's model here: if we
+ * reconcile multiple cells to the same new item, and then perform this
+ * operation on a subset of the corresponding rows, we are going to modify cells
+ * that are outside the facet (because they are reconciled to the same cell).
+ * But I think this is the right thing to do.
+ */
+
+ for (Row row : project.rows) {
+ for (int i = 0; i != row.cells.size(); i++) {
+ Cell cell = row.cells.get(i);
+ if (cell == null || cell.recon == null) {
+ continue;
+ }
+ Recon recon = cell.recon;
+ if (Recon.Judgment.New.equals(recon.judgment) && !reset
+ && map.containsKey(recon.id)) {
+ recon.judgment = Recon.Judgment.Matched;
+ recon.match = new ReconCandidate(map.get(recon.id), cell.value.toString(),
+ new String[0], 100);
+ impactedColumns.add(i);
+ } else if (Recon.Judgment.Matched.equals(recon.judgment) && reset
+ && map.containsKey(recon.id)) {
+ recon.judgment = Recon.Judgment.New;
+ recon.match = null;
+ impactedColumns.add(i);
+ }
+ }
+ }
+ // Update reconciliation statistics for impacted columns
+ for (Integer colId : impactedColumns) {
+ Column column = project.columnModel.getColumnByCellIndex(colId);
+ column.setReconStats(ReconStats.create(project, colId));
+ }
+ }
+
+ /**
+ * Getter, only meant to be used by Jackson
+ *
+ * @return the underlying map
+ */
+ @JsonProperty("qidMap")
+ public Map getQidMap() {
+ return map;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !NewItemLibrary.class.isInstance(other)) {
+ return false;
+ }
+ NewItemLibrary otherLibrary = (NewItemLibrary) other;
+ return map.equals(otherLibrary.getQidMap());
+ }
+
+ @Override
+ public int hashCode() {
+ return map.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ return map.toString();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java
new file mode 100644
index 000000000..36938a3ee
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/ReconEntityRewriter.java
@@ -0,0 +1,102 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.editing;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.helpers.DatamodelConverter;
+import org.wikidata.wdtk.datamodel.implementation.DataObjectFactoryImpl;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+
+/**
+ * A class that rewrites an {@link ItemUpdate}, replacing reconciled entity id
+ * values by their concrete values after creation of all the new items involved.
+ *
+ * If an item has not been created yet, an {@link IllegalArgumentException} will
+ * be raised.
+ *
+ * The subject is treated as a special case: it is returned unchanged. This is
+ * because it is guaranteed not to appear in the update (but it does appear in
+ * the datamodel representation as the subject is passed around to the Claim
+ * objects its document contains).
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class ReconEntityRewriter extends DatamodelConverter {
+
+ private NewItemLibrary library;
+ private ItemIdValue subject;
+
+ /**
+ * Constructor. Sets up a rewriter which uses the provided library to look up
+ * qids of new items, and the subject (which should not be rewritten).
+ *
+ * @param library
+ * @param subject
+ */
+ public ReconEntityRewriter(NewItemLibrary library, ItemIdValue subject) {
+ super(new DataObjectFactoryImpl());
+ this.library = library;
+ this.subject = subject;
+ }
+
+ @Override
+ public ItemIdValue copy(ItemIdValue value) {
+ if (subject.equals(value)) {
+ return value;
+ }
+ if (value instanceof ReconItemIdValue) {
+ ReconItemIdValue recon = (ReconItemIdValue) value;
+ if (recon.isNew()) {
+ String newId = library.getQid(recon.getReconInternalId());
+ if (newId == null) {
+ throw new IllegalArgumentException(
+ "Trying to rewrite an update where a new item was not created yet.");
+ }
+ return Datamodel.makeItemIdValue(newId, recon.getRecon().identifierSpace);
+ }
+ }
+ return super.copy(value);
+ }
+
+ public ItemUpdate rewrite(ItemUpdate update) {
+ Set labels = update.getLabels().stream().map(l -> copy(l)).collect(Collectors.toSet());
+ Set descriptions = update.getDescriptions().stream().map(l -> copy(l))
+ .collect(Collectors.toSet());
+ Set aliases = update.getAliases().stream().map(l -> copy(l)).collect(Collectors.toSet());
+ List addedStatements = update.getAddedStatements().stream().map(l -> copy(l))
+ .collect(Collectors.toList());
+ Set deletedStatements = update.getDeletedStatements().stream().map(l -> copy(l))
+ .collect(Collectors.toSet());
+ return new ItemUpdate(update.getItemId(), addedStatements, deletedStatements, labels, descriptions, aliases);
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/editing/WikibaseCredentials.java b/extensions/wikidata/src/org/openrefine/wikidata/editing/WikibaseCredentials.java
new file mode 100644
index 000000000..b8de3581f
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/editing/WikibaseCredentials.java
@@ -0,0 +1,86 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.editing;
+
+import java.util.Properties;
+
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.json.JSONWriter;
+
+import com.google.refine.Jsonizable;
+
+/**
+ * This is just the necessary bits to store Wikidata credentials in OpenRefine's
+ * preference store.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+class WikibaseCredentials implements Jsonizable {
+
+ private String username;
+ private String password;
+
+ public WikibaseCredentials() {
+ username = null;
+ password = null;
+ }
+
+ public WikibaseCredentials(String username, String password) {
+ this.username = username;
+ this.password = password;
+ }
+
+ public String getUsername() {
+ return username;
+ }
+
+ public String getPassword() {
+ return password;
+ }
+
+ public boolean isNonNull() {
+ return username != null && password != null && !"null".equals(username) && !"null".equals(password);
+ }
+
+ @Override
+ public void write(JSONWriter writer, Properties options)
+ throws JSONException {
+ writer.object();
+ writer.key("class");
+ writer.value(this.getClass().getName());
+ writer.key("username");
+ writer.value(username);
+ writer.key("password");
+ writer.value(password);
+ writer.endObject();
+ }
+
+ public static WikibaseCredentials load(JSONObject obj)
+ throws JSONException {
+ return new WikibaseCredentials(obj.getString("username"), obj.getString("password"));
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/exporters/QSValuePrinter.java b/extensions/wikidata/src/org/openrefine/wikidata/exporters/QSValuePrinter.java
new file mode 100644
index 000000000..4a4153f57
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/exporters/QSValuePrinter.java
@@ -0,0 +1,106 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.exporters;
+
+import java.math.BigDecimal;
+import java.util.Locale;
+
+import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
+import org.openrefine.wikidata.updates.scheduler.QuickStatementsUpdateScheduler;
+import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
+import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
+import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
+import org.wikidata.wdtk.datamodel.interfaces.StringValue;
+import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
+import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
+
+/**
+ * Prints a Wikibase value as a string as required by QuickStatements. Format
+ * documentation: https://www.wikidata.org/wiki/Help:QuickStatements
+ *
+ * Any new entity id will be assumed to be the last one created, represented
+ * with "LAST". It is fine to do this assumption because we are working on edit
+ * batches previously scheduled by {@link QuickStatementsUpdateScheduler}.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class QSValuePrinter implements ValueVisitor {
+
+ @Override
+ public String visit(DatatypeIdValue value) {
+ // impossible case (this is actually a bug in WDTK, DatatypeIdValue should not subclass Value)
+ throw new IllegalArgumentException();
+ }
+
+ @Override
+ public String visit(EntityIdValue value) {
+ if (ReconEntityIdValue.class.isInstance(value) && ((ReconEntityIdValue) value).isNew()) {
+ return "LAST";
+ }
+ return value.getId();
+ }
+
+ @Override
+ public String visit(GlobeCoordinatesValue value) {
+ return String.format(Locale.US, "@%f/%f", value.getLatitude(), value.getLongitude());
+ }
+
+ @Override
+ public String visit(MonolingualTextValue value) {
+ return String.format("%s:\"%s\"", value.getLanguageCode(), value.getText());
+ }
+
+ @Override
+ public String visit(QuantityValue value) {
+ String unitPrefix = "http://www.wikidata.org/entity/Q";
+ String unitIri = value.getUnit();
+ String unitRepresentation = "", boundsRepresentation = "";
+ if (!unitIri.isEmpty()) {
+ if (!unitIri.startsWith(unitPrefix)) return null; // QuickStatements only accepts Qids as units
+ unitRepresentation = "U" + unitIri.substring(unitPrefix.length());
+ }
+ if (value.getLowerBound() != null) {
+ // bounds are always null at the same time so we know they are both not null
+ BigDecimal lowerBound = value.getLowerBound();
+ BigDecimal upperBound = value.getUpperBound();
+ boundsRepresentation = String.format(Locale.US, "[%s,%s]", lowerBound.toString(), upperBound.toString());
+ }
+ return String.format(Locale.US, "%s%s%s", value.getNumericValue().toString(), boundsRepresentation,
+ unitRepresentation);
+ }
+
+ @Override
+ public String visit(StringValue value) {
+ return "\"" + value.getString() + "\"";
+ }
+
+ @Override
+ public String visit(TimeValue value) {
+ return String.format("+%04d-%02d-%02dT%02d:%02d:%02dZ/%d", value.getYear(), value.getMonth(), value.getDay(),
+ value.getHour(), value.getMinute(), value.getSecond(), value.getPrecision());
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/exporters/QuickStatementsExporter.java b/extensions/wikidata/src/org/openrefine/wikidata/exporters/QuickStatementsExporter.java
new file mode 100644
index 000000000..6bc650bf8
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/exporters/QuickStatementsExporter.java
@@ -0,0 +1,191 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.exporters;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+
+import org.openrefine.wikidata.schema.WikibaseSchema;
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.openrefine.wikidata.updates.scheduler.ImpossibleSchedulingException;
+import org.openrefine.wikidata.updates.scheduler.QuickStatementsUpdateScheduler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.wikidata.wdtk.datamodel.interfaces.Claim;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
+import org.wikidata.wdtk.datamodel.interfaces.Reference;
+import org.wikidata.wdtk.datamodel.interfaces.Snak;
+import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+import org.wikidata.wdtk.datamodel.interfaces.Value;
+import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
+
+import com.google.refine.browsing.Engine;
+import com.google.refine.exporters.WriterExporter;
+import com.google.refine.model.Project;
+
+public class QuickStatementsExporter implements WriterExporter {
+
+ final static Logger logger = LoggerFactory.getLogger("QuickStatementsExporter");
+
+ public static final String impossibleSchedulingErrorMessage = "This edit batch cannot be performed with QuickStatements due to the structure of its new items.";
+ public static final String noSchemaErrorMessage = "No schema was provided. You need to align your project with Wikidata first.";
+
+ public QuickStatementsExporter() {
+ }
+
+ @Override
+ public String getContentType() {
+ return "text/plain";
+ }
+
+ @Override
+ public void export(Project project, Properties options, Engine engine, Writer writer)
+ throws IOException {
+ WikibaseSchema schema = (WikibaseSchema) project.overlayModels.get("wikibaseSchema");
+ if (schema == null) {
+ writer.write(noSchemaErrorMessage);
+ } else {
+ translateSchema(project, engine, schema, writer);
+ }
+ }
+
+ /**
+ * Exports a project and a schema to a QuickStatements file
+ *
+ * @param project
+ * the project to translate
+ * @param engine
+ * the engine used for evaluation of the edits
+ * @param schema
+ * the WikibaseSchema used for translation of tabular data to edits
+ * @param writer
+ * the writer to which the QS should be written
+ * @throws IOException
+ */
+ public void translateSchema(Project project, Engine engine, WikibaseSchema schema, Writer writer)
+ throws IOException {
+ List items = schema.evaluate(project, engine);
+ translateItemList(items, writer);
+ }
+
+ public void translateItemList(List updates, Writer writer)
+ throws IOException {
+ QuickStatementsUpdateScheduler scheduler = new QuickStatementsUpdateScheduler();
+ try {
+ List scheduled = scheduler.schedule(updates);
+ for (ItemUpdate item : scheduled) {
+ translateItem(item, writer);
+ }
+ } catch (ImpossibleSchedulingException e) {
+ writer.write(impossibleSchedulingErrorMessage);
+ }
+
+ }
+
+ protected void translateNameDescr(String qid, Set values, String prefix, ItemIdValue id,
+ Writer writer)
+ throws IOException {
+ for (MonolingualTextValue value : values) {
+ writer.write(qid + "\t");
+ writer.write(prefix);
+ writer.write(value.getLanguageCode());
+ writer.write("\t\"");
+ writer.write(value.getText());
+ writer.write("\"\n");
+ }
+ }
+
+ protected void translateItem(ItemUpdate item, Writer writer)
+ throws IOException {
+ String qid = item.getItemId().getId();
+ if (item.isNew()) {
+ writer.write("CREATE\n");
+ qid = "LAST";
+ item = item.normalizeLabelsAndAliases();
+ }
+
+ translateNameDescr(qid, item.getLabels(), "L", item.getItemId(), writer);
+ translateNameDescr(qid, item.getDescriptions(), "D", item.getItemId(), writer);
+ translateNameDescr(qid, item.getAliases(), "A", item.getItemId(), writer);
+
+ for (Statement s : item.getAddedStatements()) {
+ translateStatement(qid, s, s.getClaim().getMainSnak().getPropertyId().getId(), true, writer);
+ }
+ for (Statement s : item.getDeletedStatements()) {
+ translateStatement(qid, s, s.getClaim().getMainSnak().getPropertyId().getId(), false, writer);
+ }
+ }
+
+ protected void translateStatement(String qid, Statement statement, String pid, boolean add, Writer writer)
+ throws IOException {
+ Claim claim = statement.getClaim();
+
+ Value val = claim.getValue();
+ ValueVisitor vv = new QSValuePrinter();
+ String targetValue = val.accept(vv);
+ if (targetValue != null) {
+ if (!add) {
+ writer.write("- ");
+ }
+ writer.write(qid + "\t" + pid + "\t" + targetValue);
+ for (SnakGroup q : claim.getQualifiers()) {
+ translateSnakGroup(q, false, writer);
+ }
+ for (Reference r : statement.getReferences()) {
+ for (SnakGroup g : r.getSnakGroups()) {
+ translateSnakGroup(g, true, writer);
+ }
+ break; // QS only supports one reference
+ }
+ writer.write("\n");
+ }
+ }
+
+ protected void translateSnakGroup(SnakGroup sg, boolean reference, Writer writer)
+ throws IOException {
+ for (Snak s : sg.getSnaks()) {
+ translateSnak(s, reference, writer);
+ }
+ }
+
+ protected void translateSnak(Snak s, boolean reference, Writer writer)
+ throws IOException {
+ String pid = s.getPropertyId().getId();
+ if (reference) {
+ pid = pid.replace('P', 'S');
+ }
+ Value val = s.getValue();
+ ValueVisitor vv = new QSValuePrinter();
+ String valStr = val.accept(vv);
+ if (valStr != null) {
+ writer.write("\t" + pid + "\t" + valStr);
+ }
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/operations/PerformWikibaseEditsOperation.java b/extensions/wikidata/src/org/openrefine/wikidata/operations/PerformWikibaseEditsOperation.java
new file mode 100644
index 000000000..b3df354a3
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/operations/PerformWikibaseEditsOperation.java
@@ -0,0 +1,237 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.operations;
+
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.io.Writer;
+import java.util.List;
+import java.util.Properties;
+import java.util.Random;
+
+import org.apache.commons.lang.Validate;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.json.JSONWriter;
+import org.openrefine.wikidata.editing.ConnectionManager;
+import org.openrefine.wikidata.editing.EditBatchProcessor;
+import org.openrefine.wikidata.editing.NewItemLibrary;
+import org.openrefine.wikidata.schema.WikibaseSchema;
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.wikidata.wdtk.util.WebResourceFetcherImpl;
+import org.wikidata.wdtk.wikibaseapi.ApiConnection;
+import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor;
+import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import com.google.refine.browsing.Engine;
+import com.google.refine.history.Change;
+import com.google.refine.history.HistoryEntry;
+import com.google.refine.model.AbstractOperation;
+import com.google.refine.model.Project;
+import com.google.refine.operations.EngineDependentOperation;
+import com.google.refine.operations.OperationRegistry;
+import com.google.refine.process.LongRunningProcess;
+import com.google.refine.process.Process;
+import com.google.refine.util.Pool;
+
+public class PerformWikibaseEditsOperation extends EngineDependentOperation {
+
+ static final Logger logger = LoggerFactory.getLogger(PerformWikibaseEditsOperation.class);
+
+ private String summary;
+
+ public PerformWikibaseEditsOperation(JSONObject engineConfig, String summary) {
+ super(engineConfig);
+ Validate.notNull(summary, "An edit summary must be provided.");
+ Validate.notEmpty(summary, "An edit summary must be provided.");
+ this.summary = summary;
+ }
+
+ static public AbstractOperation reconstruct(Project project, JSONObject obj)
+ throws Exception {
+ JSONObject engineConfig = obj.getJSONObject("engineConfig");
+ String summary = null;
+ if (obj.has("summary")) {
+ summary = obj.getString("summary");
+ }
+ return new PerformWikibaseEditsOperation(engineConfig, summary);
+ }
+
+ @Override
+ public void write(JSONWriter writer, Properties options)
+ throws JSONException {
+ writer.object();
+ writer.key("op");
+ writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
+ writer.key("description");
+ writer.value("Perform Wikibase edits");
+ writer.key("summary");
+ writer.value(summary);
+ writer.key("engineConfig");
+ writer.value(getEngineConfig());
+ writer.endObject();
+ }
+
+ @Override
+ protected String getBriefDescription(Project project) {
+ return "Peform edits on Wikidata";
+ }
+
+ @Override
+ public Process createProcess(Project project, Properties options)
+ throws Exception {
+ return new PerformEditsProcess(project, createEngine(project), getBriefDescription(project), summary);
+ }
+
+ static public class PerformWikibaseEditsChange implements Change {
+
+ private NewItemLibrary newItemLibrary;
+
+ public PerformWikibaseEditsChange(NewItemLibrary library) {
+ newItemLibrary = library;
+ }
+
+ @Override
+ public void apply(Project project) {
+ // we don't re-run changes on Wikidata
+ newItemLibrary.updateReconciledCells(project, false);
+ }
+
+ @Override
+ public void revert(Project project) {
+ // this does not do anything on Wikibase side -
+ // (we don't revert changes on Wikidata either)
+ newItemLibrary.updateReconciledCells(project, true);
+ }
+
+ @Override
+ public void save(Writer writer, Properties options)
+ throws IOException {
+ if (newItemLibrary != null) {
+ writer.write("newItems=");
+ ObjectMapper mapper = new ObjectMapper();
+ writer.write(mapper.writeValueAsString(newItemLibrary) + "\n");
+ }
+ writer.write("/ec/\n"); // end of change
+ }
+
+ static public Change load(LineNumberReader reader, Pool pool)
+ throws Exception {
+ NewItemLibrary library = new NewItemLibrary();
+ String line = null;
+ while ((line = reader.readLine()) != null && !"/ec/".equals(line)) {
+ int equal = line.indexOf('=');
+ CharSequence field = line.subSequence(0, equal);
+ String value = line.substring(equal + 1);
+
+ if ("newItems".equals(field)) {
+ ObjectMapper mapper = new ObjectMapper();
+ library = mapper.readValue(value, NewItemLibrary.class);
+ }
+ }
+ return new PerformWikibaseEditsChange(library);
+ }
+
+ }
+
+ public class PerformEditsProcess extends LongRunningProcess implements Runnable {
+
+ protected Project _project;
+ protected Engine _engine;
+ protected WikibaseSchema _schema;
+ protected String _summary;
+ protected final long _historyEntryID;
+
+ protected PerformEditsProcess(Project project, Engine engine, String description, String summary) {
+ super(description);
+ this._project = project;
+ this._engine = engine;
+ this._schema = (WikibaseSchema) project.overlayModels.get("wikibaseSchema");
+ this._summary = summary;
+ this._historyEntryID = HistoryEntry.allocateID();
+ }
+
+ @Override
+ public void run() {
+
+ WebResourceFetcherImpl.setUserAgent("OpenRefine Wikidata extension");
+ ConnectionManager manager = ConnectionManager.getInstance();
+ if (!manager.isLoggedIn()) {
+ return;
+ }
+ ApiConnection connection = manager.getConnection();
+
+ WikibaseDataFetcher wbdf = new WikibaseDataFetcher(connection, _schema.getBaseIri());
+ WikibaseDataEditor wbde = new WikibaseDataEditor(connection, _schema.getBaseIri());
+
+ // Generate batch token
+ long token = (new Random()).nextLong();
+ String summary = _summary + String.format(" ([[:toollabs:editgroups/b/OR/%s|details]])",
+ (Long.toHexString(token).substring(0, 7)));
+
+ // Evaluate the schema
+ List itemDocuments = _schema.evaluate(_project, _engine);
+
+ // Prepare the edits
+ NewItemLibrary newItemLibrary = new NewItemLibrary();
+ EditBatchProcessor processor = new EditBatchProcessor(wbdf, wbde, itemDocuments, newItemLibrary, summary,
+ 50);
+
+ // Perform edits
+ logger.info("Performing edits");
+ while (processor.remainingEdits() > 0) {
+ try {
+ processor.performEdit();
+ } catch (InterruptedException e) {
+ _canceled = true;
+ }
+ _progress = processor.progress();
+ if (_canceled) {
+ break;
+ }
+ }
+
+ _progress = 100;
+
+ if (!_canceled) {
+ Change change = new PerformWikibaseEditsChange(newItemLibrary);
+
+ HistoryEntry historyEntry = new HistoryEntry(_historyEntryID, _project, _description,
+ PerformWikibaseEditsOperation.this, change);
+
+ _project.history.addEntry(historyEntry);
+ _project.processManager.onDoneProcess(this);
+ }
+ }
+
+ @Override
+ protected Runnable getRunnable() {
+ return this;
+ }
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/operations/SaveWikibaseSchemaOperation.java b/extensions/wikidata/src/org/openrefine/wikidata/operations/SaveWikibaseSchemaOperation.java
new file mode 100644
index 000000000..8303469c6
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/operations/SaveWikibaseSchemaOperation.java
@@ -0,0 +1,161 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.operations;
+
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.io.Writer;
+import java.util.Properties;
+
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.json.JSONWriter;
+import org.openrefine.wikidata.schema.WikibaseSchema;
+
+import com.google.refine.history.Change;
+import com.google.refine.history.HistoryEntry;
+import com.google.refine.model.AbstractOperation;
+import com.google.refine.model.Project;
+import com.google.refine.operations.OperationRegistry;
+import com.google.refine.util.ParsingUtilities;
+import com.google.refine.util.Pool;
+
+public class SaveWikibaseSchemaOperation extends AbstractOperation {
+
+ final public String operationDescription = "Save Wikibase schema";
+ final protected WikibaseSchema _schema;
+
+ public SaveWikibaseSchemaOperation(WikibaseSchema schema) {
+ this._schema = schema;
+
+ }
+
+ static public AbstractOperation reconstruct(Project project, JSONObject obj)
+ throws Exception {
+ return new SaveWikibaseSchemaOperation(WikibaseSchema.reconstruct(obj.getJSONObject("schema")));
+ }
+
+ public void write(JSONWriter writer, Properties options)
+ throws JSONException {
+ writer.object();
+ writer.key("op");
+ writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
+ writer.key("description");
+ writer.value(operationDescription);
+ writer.key("schema");
+ _schema.write(writer, options);
+ writer.endObject();
+
+ }
+
+ @Override
+ protected String getBriefDescription(Project project) {
+ return "Save Wikibase schema skelton";
+ }
+
+ @Override
+ protected HistoryEntry createHistoryEntry(Project project, long historyEntryID)
+ throws Exception {
+ String description = operationDescription;
+
+ Change change = new WikibaseSchemaChange(_schema);
+
+ return new HistoryEntry(historyEntryID, project, description, SaveWikibaseSchemaOperation.this, change);
+ }
+
+ static public class WikibaseSchemaChange implements Change {
+
+ final protected WikibaseSchema _newSchema;
+ protected WikibaseSchema _oldSchema = null;
+ public final static String overlayModelKey = "wikibaseSchema";
+
+ public WikibaseSchemaChange(WikibaseSchema newSchema) {
+ _newSchema = newSchema;
+ }
+
+ public void apply(Project project) {
+ synchronized (project) {
+ _oldSchema = (WikibaseSchema) project.overlayModels.get(overlayModelKey);
+ project.overlayModels.put(overlayModelKey, _newSchema);
+ }
+ }
+
+ public void revert(Project project) {
+ synchronized (project) {
+ if (_oldSchema == null) {
+ project.overlayModels.remove(overlayModelKey);
+ } else {
+ project.overlayModels.put(overlayModelKey, _oldSchema);
+ }
+ }
+ }
+
+ public void save(Writer writer, Properties options)
+ throws IOException {
+ writer.write("newSchema=");
+ writeWikibaseSchema(_newSchema, writer);
+ writer.write('\n');
+ writer.write("oldSchema=");
+ writeWikibaseSchema(_oldSchema, writer);
+ writer.write('\n');
+ writer.write("/ec/\n"); // end of change marker
+ }
+
+ static public Change load(LineNumberReader reader, Pool pool)
+ throws Exception {
+ WikibaseSchema oldSchema = null;
+ WikibaseSchema newSchema = null;
+
+ String line;
+ while ((line = reader.readLine()) != null && !"/ec/".equals(line)) {
+ int equal = line.indexOf('=');
+ CharSequence field = line.subSequence(0, equal);
+ String value = line.substring(equal + 1);
+
+ if ("oldSchema".equals(field) && value.length() > 0) {
+ oldSchema = WikibaseSchema.reconstruct(ParsingUtilities.evaluateJsonStringToObject(value));
+ } else if ("newSchema".equals(field) && value.length() > 0) {
+ newSchema = WikibaseSchema.reconstruct(ParsingUtilities.evaluateJsonStringToObject(value));
+ }
+ }
+
+ WikibaseSchemaChange change = new WikibaseSchemaChange(newSchema);
+ change._oldSchema = oldSchema;
+
+ return change;
+ }
+
+ static protected void writeWikibaseSchema(WikibaseSchema s, Writer writer)
+ throws IOException {
+ if (s != null) {
+ JSONWriter jsonWriter = new JSONWriter(writer);
+ try {
+ s.write(jsonWriter, new Properties());
+ } catch (JSONException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java
new file mode 100644
index 000000000..9933bb02d
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java
@@ -0,0 +1,94 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa;
+
+import java.util.Set;
+
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+
+/**
+ * An object that fetches constraints about properties.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public interface ConstraintFetcher {
+
+ /**
+ * Retrieves the regular expression for formatting a property, or null if there
+ * is no such constraint
+ *
+ * @param pid
+ * @return the expression of a regular expression which should be compatible
+ * with java.util.regex
+ */
+ String getFormatRegex(PropertyIdValue pid);
+
+ /**
+ * Retrieves the property that is the inverse of a given property
+ *
+ * @param pid:
+ * the property to retrieve the inverse for
+ * @return the pid of the inverse property
+ */
+ PropertyIdValue getInversePid(PropertyIdValue pid);
+
+ /**
+ * Is this property for values only?
+ */
+ boolean isForValuesOnly(PropertyIdValue pid);
+
+ /**
+ * Is this property for qualifiers only?
+ */
+ boolean isForQualifiersOnly(PropertyIdValue pid);
+
+ /**
+ * Is this property for references only?
+ */
+ boolean isForReferencesOnly(PropertyIdValue pid);
+
+ /**
+ * Get the list of allowed qualifiers (as property ids) for this property (null
+ * if any)
+ */
+ Set allowedQualifiers(PropertyIdValue pid);
+
+ /**
+ * Get the list of mandatory qualifiers (as property ids) for this property
+ * (null if any)
+ */
+ Set mandatoryQualifiers(PropertyIdValue pid);
+
+ /**
+ * Is this property expected to have at most one value per item?
+ */
+ boolean hasSingleValue(PropertyIdValue pid);
+
+ /**
+ * Is this property expected to have distinct values?
+ */
+ boolean hasDistinctValues(PropertyIdValue pid);
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java
new file mode 100644
index 000000000..83bfa37e9
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java
@@ -0,0 +1,124 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer;
+import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
+import org.openrefine.wikidata.qa.scrutinizers.FormatScrutinizer;
+import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
+import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
+import org.openrefine.wikidata.qa.scrutinizers.NoEditsMadeScrutinizer;
+import org.openrefine.wikidata.qa.scrutinizers.QualifierCompatibilityScrutinizer;
+import org.openrefine.wikidata.qa.scrutinizers.RestrictedPositionScrutinizer;
+import org.openrefine.wikidata.qa.scrutinizers.SelfReferentialScrutinizer;
+import org.openrefine.wikidata.qa.scrutinizers.SingleValueScrutinizer;
+import org.openrefine.wikidata.qa.scrutinizers.UnsourcedScrutinizer;
+import org.openrefine.wikidata.qa.scrutinizers.WhitespaceScrutinizer;
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+
+/**
+ * Runs a collection of edit scrutinizers on an edit batch.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class EditInspector {
+
+ private Map scrutinizers;
+ private QAWarningStore warningStore;
+ private ConstraintFetcher fetcher;
+
+ public EditInspector(QAWarningStore warningStore) {
+ this.scrutinizers = new HashMap<>();
+ this.fetcher = new WikidataConstraintFetcher();
+ this.warningStore = warningStore;
+
+ // Register all known scrutinizers here
+ register(new NewItemScrutinizer());
+ register(new FormatScrutinizer());
+ register(new InverseConstraintScrutinizer());
+ register(new SelfReferentialScrutinizer());
+ register(new UnsourcedScrutinizer());
+ register(new RestrictedPositionScrutinizer());
+ register(new QualifierCompatibilityScrutinizer());
+ register(new SingleValueScrutinizer());
+ register(new DistinctValuesScrutinizer());
+ register(new NoEditsMadeScrutinizer());
+ register(new WhitespaceScrutinizer());
+ }
+
+ /**
+ * Adds a new scrutinizer to the inspector
+ *
+ * @param scrutinizer
+ */
+ public void register(EditScrutinizer scrutinizer) {
+ String key = scrutinizer.getClass().getName();
+ scrutinizers.put(key, scrutinizer);
+ scrutinizer.setStore(warningStore);
+ scrutinizer.setFetcher(fetcher);
+ }
+
+ /**
+ * Inspect a batch of edits with the registered scrutinizers
+ *
+ * @param editBatch
+ */
+ public void inspect(List editBatch) {
+ // First, schedule them with some scheduler,
+ // so that all newly created entities appear in the batch
+ WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
+ editBatch = scheduler.schedule(editBatch);
+
+ Map updates = ItemUpdate.groupBySubject(editBatch);
+ List mergedUpdates = updates.values().stream().collect(Collectors.toList());
+
+ for (EditScrutinizer scrutinizer : scrutinizers.values()) {
+ scrutinizer.batchIsBeginning();
+ }
+
+ for(ItemUpdate update : mergedUpdates) {
+ if(!update.isNull()) {
+ for (EditScrutinizer scrutinizer : scrutinizers.values()) {
+ scrutinizer.scrutinize(update);
+ }
+ }
+ }
+
+ for(EditScrutinizer scrutinizer : scrutinizers.values()) {
+ scrutinizer.batchIsFinished();
+ }
+
+ if (warningStore.getNbWarnings() == 0) {
+ warningStore.addWarning(new QAWarning("no-issue-detected", null, QAWarning.Severity.INFO, 0));
+ }
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarning.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarning.java
new file mode 100644
index 000000000..a5b98dd09
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarning.java
@@ -0,0 +1,167 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.jsoup.helper.Validate;
+import org.openrefine.wikidata.utils.JacksonJsonizable;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * A class to represent a QA warning emitted by the Wikidata schema This could
+ * probably be reused at a broader scale, for instance for Data Package
+ * validation.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class QAWarning extends JacksonJsonizable implements Comparable {
+
+ public enum Severity {
+ INFO, // We just report something to the user but it is probably fine
+ WARNING, // Edits that look wrong but in some cases they are actually fine
+ IMPORTANT, // There is almost surely something wrong about the edit but in rare cases we
+ // might want to allow it
+ CRITICAL, // We should never edit if there is a critical issue
+ }
+
+ /// The type of QA warning emitted
+ private final String type;
+ // The key for aggregation of other QA warnings together - this specializes the
+ // id
+ private final String bucketId;
+ // The severity of the issue
+ private final Severity severity;
+ // The number of times this issue was found
+ private final int count;
+ // Other details about the warning, that can be displayed to the user
+ private final Map properties;
+
+ public QAWarning(String type, String bucketId, Severity severity, int count) {
+ Validate.notNull(type);
+ this.type = type;
+ this.bucketId = bucketId;
+ Validate.notNull(severity);
+ this.severity = severity;
+ this.count = count;
+ this.properties = new HashMap<>();
+ }
+
+ /**
+ * @return the full key for aggregation of QA warnings
+ */
+ @JsonIgnore
+ public String getAggregationId() {
+ if (this.bucketId != null) {
+ return this.type + "_" + this.bucketId;
+ } else {
+ return this.type;
+ }
+ }
+
+ /**
+ * Aggregates another QA warning of the same aggregation id.
+ *
+ * @param other
+ */
+ public QAWarning aggregate(QAWarning other) {
+ assert other.getAggregationId().equals(getAggregationId());
+ int newCount = count + other.getCount();
+ Severity newSeverity = severity;
+ if (other.getSeverity().compareTo(severity) > 0) {
+ newSeverity = other.getSeverity();
+ }
+ QAWarning merged = new QAWarning(getType(), getBucketId(), newSeverity, newCount);
+ for (Entry entry : properties.entrySet()) {
+ merged.setProperty(entry.getKey(), entry.getValue());
+ }
+ for (Entry entry : other.getProperties().entrySet()) {
+ merged.setProperty(entry.getKey(), entry.getValue());
+ }
+ return merged;
+ }
+
+ /**
+ * Sets a property of the QA warning, to be used by the front-end for display.
+ *
+ * @param key:
+ * the name of the property
+ * @param value
+ * should be Jackson-serializable
+ */
+ public void setProperty(String key, Object value) {
+ this.properties.put(key, value);
+ }
+
+ @JsonProperty("type")
+ public String getType() {
+ return type;
+ }
+
+ @JsonProperty("bucketId")
+ public String getBucketId() {
+ return bucketId;
+ }
+
+ @JsonProperty("severity")
+ public Severity getSeverity() {
+ return severity;
+ }
+
+ @JsonProperty("count")
+ public int getCount() {
+ return count;
+ }
+
+ @JsonProperty("properties")
+ @JsonInclude(JsonInclude.Include.NON_EMPTY)
+ public Map getProperties() {
+ return properties;
+ }
+
+ /**
+ * Warnings are sorted by decreasing severity.
+ */
+ @Override
+ public int compareTo(QAWarning other) {
+ return -severity.compareTo(other.getSeverity());
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !QAWarning.class.isInstance(other)) {
+ return false;
+ }
+ QAWarning otherWarning = (QAWarning) other;
+ return type.equals(otherWarning.getType()) && bucketId.equals(otherWarning.getBucketId())
+ && severity.equals(otherWarning.getSeverity()) && count == otherWarning.getCount()
+ && properties.equals(otherWarning.getProperties());
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarningStore.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarningStore.java
new file mode 100644
index 000000000..f458e7021
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarningStore.java
@@ -0,0 +1,99 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * A store for QA warnings which aggregates them by type.
+ *
+ * @author Antonin Delpeuch
+ */
+public class QAWarningStore {
+
+ @JsonIgnore
+ private Map map;
+ @JsonIgnore
+ private QAWarning.Severity maxSeverity;
+ @JsonIgnore
+ private int totalWarnings;
+
+ public QAWarningStore() {
+ this.map = new HashMap<>();
+ this.maxSeverity = QAWarning.Severity.INFO;
+ }
+
+ /**
+ * Stores a warning, aggregating it with any existing
+ *
+ * @param warning
+ */
+ public void addWarning(QAWarning warning) {
+ String aggregationKey = warning.getAggregationId();
+ QAWarning.Severity severity = warning.getSeverity();
+ if (severity.compareTo(maxSeverity) > 0) {
+ maxSeverity = severity;
+ }
+ totalWarnings += warning.getCount();
+ if (map.containsKey(aggregationKey)) {
+ QAWarning existing = map.get(aggregationKey);
+ map.put(aggregationKey, existing.aggregate(warning));
+ } else {
+ map.put(aggregationKey, warning);
+ }
+ }
+
+ /**
+ * Returns the list of aggregated warnings, ordered by decreasing severity
+ */
+ @JsonProperty("warnings")
+ public List getWarnings() {
+ List result = new ArrayList<>(map.values());
+ Collections.sort(result);
+ return result;
+ }
+
+ /**
+ * Returns the maximum severity of the stored warnings (INFO if empty)
+ */
+ @JsonProperty("max_severity")
+ public QAWarning.Severity getMaxSeverity() {
+ return maxSeverity;
+ }
+
+ /**
+ * Returns the total number of warnings
+ */
+ @JsonProperty("nb_warnings")
+ public int getNbWarnings() {
+ return totalWarnings;
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java
new file mode 100644
index 000000000..8b301a530
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java
@@ -0,0 +1,221 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.openrefine.wikidata.utils.EntityCache;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Snak;
+import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
+import org.wikidata.wdtk.datamodel.interfaces.StringValue;
+import org.wikidata.wdtk.datamodel.interfaces.Value;
+
+/**
+ * This class provides an abstraction over the way constraint definitions are
+ * stored in Wikidata.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class WikidataConstraintFetcher implements ConstraintFetcher {
+
+ public static String WIKIDATA_CONSTRAINT_PID = "P2302";
+
+ public static String FORMAT_CONSTRAINT_QID = "Q21502404";
+ public static String FORMAT_REGEX_PID = "P1793";
+
+ public static String INVERSE_CONSTRAINT_QID = "Q21510855";
+ public static String INVERSE_PROPERTY_PID = "P2306";
+
+ public static String USED_ONLY_AS_VALUES_CONSTRAINT_QID = "Q21528958";
+
+ public static String USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID = "Q21510863";
+
+ public static String USED_ONLY_AS_REFERENCE_CONSTRAINT_QID = "Q21528959";
+
+ public static String ALLOWED_QUALIFIERS_CONSTRAINT_QID = "Q21510851";
+ public static String ALLOWED_QUALIFIERS_CONSTRAINT_PID = "P2306";
+
+ public static String MANDATORY_QUALIFIERS_CONSTRAINT_QID = "Q21510856";
+ public static String MANDATORY_QUALIFIERS_CONSTRAINT_PID = "P2306";
+
+ public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404";
+ public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410";
+
+ // The following constraints still need to be implemented:
+
+ public static String TYPE_CONSTRAINT_QID = "Q21503250";
+
+ @Override
+ public String getFormatRegex(PropertyIdValue pid) {
+ List specs = getSingleConstraint(pid, FORMAT_CONSTRAINT_QID);
+ if (specs != null) {
+ List regexes = findValues(specs, FORMAT_REGEX_PID);
+ if (!regexes.isEmpty()) {
+ return ((StringValue) regexes.get(0)).getString();
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public PropertyIdValue getInversePid(PropertyIdValue pid) {
+ List specs = getSingleConstraint(pid, INVERSE_CONSTRAINT_QID);
+
+ if (specs != null) {
+ List inverses = findValues(specs, INVERSE_PROPERTY_PID);
+ if (!inverses.isEmpty()) {
+ return (PropertyIdValue) inverses.get(0);
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public boolean isForValuesOnly(PropertyIdValue pid) {
+ return getSingleConstraint(pid, USED_ONLY_AS_VALUES_CONSTRAINT_QID) != null;
+ }
+
+ @Override
+ public boolean isForQualifiersOnly(PropertyIdValue pid) {
+ return getSingleConstraint(pid, USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID) != null;
+ }
+
+ @Override
+ public boolean isForReferencesOnly(PropertyIdValue pid) {
+ return getSingleConstraint(pid, USED_ONLY_AS_REFERENCE_CONSTRAINT_QID) != null;
+ }
+
+ @Override
+ public Set allowedQualifiers(PropertyIdValue pid) {
+ List specs = getSingleConstraint(pid, ALLOWED_QUALIFIERS_CONSTRAINT_QID);
+
+ if (specs != null) {
+ List properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID);
+ return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
+ }
+ return null;
+ }
+
+ @Override
+ public Set mandatoryQualifiers(PropertyIdValue pid) {
+ List specs = getSingleConstraint(pid, MANDATORY_QUALIFIERS_CONSTRAINT_QID);
+
+ if (specs != null) {
+ List properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID);
+ return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
+ }
+ return null;
+ }
+
+ @Override
+ public boolean hasSingleValue(PropertyIdValue pid) {
+ return getSingleConstraint(pid, SINGLE_VALUE_CONSTRAINT_QID) != null;
+ }
+
+ @Override
+ public boolean hasDistinctValues(PropertyIdValue pid) {
+ return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null;
+ }
+
+ /**
+ * Returns a single constraint for a particular type and a property, or null if
+ * there is no such constraint
+ *
+ * @param pid:
+ * the property to retrieve the constraints for
+ * @param qid:
+ * the type of the constraints
+ * @return the list of qualifiers for the constraint, or null if it does not
+ * exist
+ */
+ protected List getSingleConstraint(PropertyIdValue pid, String qid) {
+ Statement statement = getConstraintsByType(pid, qid).findFirst().orElse(null);
+ if (statement != null) {
+ return statement.getClaim().getQualifiers();
+ }
+ return null;
+ }
+
+ /**
+ * Gets the list of constraints of a particular type for a property
+ *
+ * @param pid:
+ * the property to retrieve the constraints for
+ * @param qid:
+ * the type of the constraints
+ * @return the stream of matching constraint statements
+ */
+ protected Stream getConstraintsByType(PropertyIdValue pid, String qid) {
+ Stream allConstraints = getConstraintStatements(pid).stream()
+ .filter(s -> ((EntityIdValue) s.getValue()).getId().equals(qid));
+ return allConstraints;
+ }
+
+ /**
+ * Gets all the constraint statements for a given property
+ *
+ * @param pid
+ * : the id of the property to retrieve the constraints for
+ * @return the list of constraint statements
+ */
+ protected List getConstraintStatements(PropertyIdValue pid) {
+ PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid);
+ StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID);
+ if (group != null) {
+ return group.getStatements();
+ } else {
+ return new ArrayList();
+ }
+ }
+
+ /**
+ * Returns the values of a given property in qualifiers
+ *
+ * @param groups:
+ * the qualifiers
+ * @param pid:
+ * the property to filter on
+ * @return
+ */
+ protected List findValues(List groups, String pid) {
+ List results = new ArrayList<>();
+ for (SnakGroup group : groups) {
+ if (group.getProperty().getId().equals(pid)) {
+ for (Snak snak : group.getSnaks())
+ results.add(snak.getValue());
+ }
+ }
+ return results;
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java
new file mode 100644
index 000000000..03a5761d1
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java
@@ -0,0 +1,75 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.openrefine.wikidata.qa.QAWarning;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+import org.wikidata.wdtk.datamodel.interfaces.Value;
+
+/**
+ * A scrutinizer that checks for properties using the same value on different
+ * items.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class DistinctValuesScrutinizer extends StatementScrutinizer {
+
+ public final static String type = "identical-values-for-distinct-valued-property";
+
+ private Map> _seenValues;
+
+ public DistinctValuesScrutinizer() {
+ _seenValues = new HashMap<>();
+ }
+
+ @Override
+ public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
+ PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId();
+ if (_fetcher.hasDistinctValues(pid)) {
+ Value mainSnakValue = statement.getClaim().getMainSnak().getValue();
+ Map seen = _seenValues.get(pid);
+ if (seen == null) {
+ seen = new HashMap();
+ _seenValues.put(pid, seen);
+ }
+ if (seen.containsKey(mainSnakValue)) {
+ EntityIdValue otherId = seen.get(mainSnakValue);
+ QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
+ issue.setProperty("property_entity", pid);
+ issue.setProperty("item1_entity", entityId);
+ issue.setProperty("item2_entity", otherId);
+ addIssue(issue);
+ } else {
+ seen.put(mainSnakValue, entityId);
+ }
+ }
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/EditScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/EditScrutinizer.java
new file mode 100644
index 000000000..44ab9cff5
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/EditScrutinizer.java
@@ -0,0 +1,128 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import org.openrefine.wikidata.qa.ConstraintFetcher;
+import org.openrefine.wikidata.qa.QAWarning;
+import org.openrefine.wikidata.qa.QAWarning.Severity;
+import org.openrefine.wikidata.qa.QAWarningStore;
+import org.openrefine.wikidata.updates.ItemUpdate;
+
+/**
+ * Inspects an edit batch and emits warnings.
+ *
+ * @author Antonin Delpeuch
+ */
+public abstract class EditScrutinizer {
+
+ protected QAWarningStore _store;
+ protected ConstraintFetcher _fetcher;
+
+ public EditScrutinizer() {
+ _fetcher = null;
+ _store = null;
+ }
+
+ public void setStore(QAWarningStore store) {
+ _store = store;
+ }
+
+ public void setFetcher(ConstraintFetcher fetcher) {
+ _fetcher = fetcher;
+ }
+
+ /**
+ * Called before an edit batch is scrutinized.
+ */
+ public void batchIsBeginning() {
+
+ }
+
+ /**
+ * Reads the candidate edits and emits warnings in the store
+ *
+ * @param edit:
+ * the list of ItemUpdates to scrutinize
+ */
+ public abstract void scrutinize(ItemUpdate edit);
+
+ /**
+ * Method called once the edit batch has been read entirely
+ */
+ public void batchIsFinished() {
+
+ }
+
+ /**
+ * Emits an issue that will be reported to the user,
+ * after merging with other issues of the same kind.
+ *
+ * @param warning
+ * the issue to report
+ */
+ protected void addIssue(QAWarning warning) {
+ _store.addWarning(warning);
+ }
+
+ protected void addIssue(String type, String aggregationId, Severity severity, int count) {
+ addIssue(new QAWarning(type, aggregationId, severity, count));
+ }
+
+ /**
+ * Helper to be used by subclasses to emit simple INFO warnings
+ *
+ * @param warning
+ */
+ protected void info(String type) {
+ addIssue(type, null, QAWarning.Severity.INFO, 1);
+
+ }
+
+ /**
+ * Helper to be used by subclasses to emit simple warnings
+ *
+ * @param warning
+ */
+ protected void warning(String type) {
+ addIssue(type, null, QAWarning.Severity.WARNING, 1);
+ }
+
+ /**
+ * Helper to be used by subclasses to emit simple important warnings
+ *
+ * @param warning
+ */
+ protected void important(String type) {
+ addIssue(type, null, QAWarning.Severity.IMPORTANT, 1);
+ }
+
+ /**
+ * Helper to be used by subclasses to emit simple critical warnings
+ *
+ * @param warning
+ */
+ protected void critical(String type) {
+ addIssue(type, null, QAWarning.Severity.CRITICAL, 1);
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizer.java
new file mode 100644
index 000000000..c61b90a06
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizer.java
@@ -0,0 +1,100 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import org.openrefine.wikidata.qa.QAWarning;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Snak;
+import org.wikidata.wdtk.datamodel.interfaces.StringValue;
+
+/**
+ * A scrutinizer that detects incorrect formats in text values (mostly
+ * identifiers).
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class FormatScrutinizer extends SnakScrutinizer {
+
+ public static final String type = "add-statements-with-invalid-format";
+
+ private Map _patterns;
+
+ public FormatScrutinizer() {
+ _patterns = new HashMap<>();
+ }
+
+ /**
+ * Loads the regex for a property and compiles it to a pattern (this is cached
+ * upstream, plus we are doing it only once per property and batch).
+ *
+ * @param pid
+ * the id of the property to fetch the constraints for
+ * @return
+ */
+ protected Pattern getPattern(PropertyIdValue pid) {
+ if (_patterns.containsKey(pid)) {
+ return _patterns.get(pid);
+ } else {
+ String regex = _fetcher.getFormatRegex(pid);
+ Pattern pattern = null;
+ if (regex != null) {
+ pattern = Pattern.compile(regex);
+ }
+ _patterns.put(pid, pattern);
+ return pattern;
+ }
+ }
+
+ @Override
+ public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
+ if (StringValue.class.isInstance(snak.getValue())) {
+ String value = ((StringValue) snak.getValue()).getString();
+ PropertyIdValue pid = snak.getPropertyId();
+ Pattern pattern = getPattern(pid);
+ if (pattern == null) {
+ return;
+ }
+ if (!pattern.matcher(value).matches()) {
+ if (added) {
+ QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
+ issue.setProperty("property_entity", pid);
+ issue.setProperty("regex", pattern.toString());
+ issue.setProperty("example_value", value);
+ issue.setProperty("example_item_entity", entityId);
+ addIssue(issue);
+ } else {
+ info("remove-statements-with-invalid-format");
+ }
+ }
+ }
+
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstraintScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstraintScrutinizer.java
new file mode 100644
index 000000000..6a1a7b619
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstraintScrutinizer.java
@@ -0,0 +1,122 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.openrefine.wikidata.qa.QAWarning;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+import org.wikidata.wdtk.datamodel.interfaces.Value;
+
+/**
+ * A scrutinizer that checks for missing inverse statements in edit batches.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class InverseConstraintScrutinizer extends StatementScrutinizer {
+
+ public static final String type = "missing-inverse-statements";
+
+ private Map _inverse;
+ private Map>> _statements;
+
+ public InverseConstraintScrutinizer() {
+ _inverse = new HashMap<>();
+ _statements = new HashMap<>();
+ }
+
+ protected PropertyIdValue getInverseConstraint(PropertyIdValue pid) {
+ if (_inverse.containsKey(pid)) {
+ return _inverse.get(pid);
+ } else {
+ PropertyIdValue inversePid = _fetcher.getInversePid(pid);
+ _inverse.put(pid, inversePid);
+ _statements.put(pid, new HashMap>());
+
+ // We are doing this check because we do not have any guarantee that
+ // the inverse constraints are consistent on Wikidata.
+ if (inversePid != null && !_inverse.containsKey(inversePid)) {
+ _inverse.put(inversePid, pid);
+ _statements.put(inversePid, new HashMap>());
+ }
+ return inversePid;
+ }
+ }
+
+ @Override
+ public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
+ if (!added) {
+ return; // TODO support for deleted statements
+ }
+
+ Value mainSnakValue = statement.getClaim().getMainSnak().getValue();
+ if (ItemIdValue.class.isInstance(mainSnakValue)) {
+ PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId();
+ PropertyIdValue inversePid = getInverseConstraint(pid);
+ if (inversePid != null) {
+ EntityIdValue targetEntityId = (EntityIdValue) mainSnakValue;
+ Set currentValues = _statements.get(pid).get(entityId);
+ if (currentValues == null) {
+ currentValues = new HashSet();
+ _statements.get(pid).put(entityId, currentValues);
+ }
+ currentValues.add(targetEntityId);
+ }
+ }
+ }
+
+ @Override
+ public void batchIsFinished() {
+ // For each pair of inverse properties (in each direction)
+ for (Entry propertyPair : _inverse.entrySet()) {
+ // Get the statements made for the first
+ PropertyIdValue ourProperty = propertyPair.getKey();
+ for (Entry> itemLinks : _statements.get(ourProperty).entrySet()) {
+ // For each outgoing link
+ for (EntityIdValue idValue : itemLinks.getValue()) {
+ // Check that they are in the statements made for the second
+ PropertyIdValue missingProperty = propertyPair.getValue();
+ Set reciprocalLinks = _statements.get(missingProperty).get(idValue);
+ if (reciprocalLinks == null || !reciprocalLinks.contains(itemLinks.getKey())) {
+ QAWarning issue = new QAWarning(type, ourProperty.getId(), QAWarning.Severity.IMPORTANT, 1);
+ issue.setProperty("added_property_entity", ourProperty);
+ issue.setProperty("inverse_property_entity", missingProperty);
+ issue.setProperty("source_entity", itemLinks.getKey());
+ issue.setProperty("target_entity", idValue);
+ addIssue(issue);
+ }
+ }
+ }
+ }
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizer.java
new file mode 100644
index 000000000..335e00caf
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizer.java
@@ -0,0 +1,83 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import org.openrefine.wikidata.qa.QAWarning;
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
+
+/**
+ * A scrutinizer that inspects new items.
+ *
+ * @author Antonin Delpeuch
+ */
+public class NewItemScrutinizer extends EditScrutinizer {
+
+ public static final String noLabelType = "new-item-without-labels-or-aliases";
+ public static final String noDescType = "new-item-without-descriptions";
+ public static final String deletedStatementsType = "new-item-with-deleted-statements";
+ public static final String noTypeType = "new-item-without-P31-or-P279";
+ public static final String newItemType = "new-item-created";
+
+ @Override
+ public void scrutinize(ItemUpdate update) {
+ if (update.isNew()) {
+ info(newItemType);
+
+ if (update.getLabels().isEmpty() && update.getAliases().isEmpty()) {
+ QAWarning issue = new QAWarning(noLabelType, null, QAWarning.Severity.CRITICAL, 1);
+ issue.setProperty("example_entity", update.getItemId());
+ addIssue(issue);
+ }
+
+ if (update.getDescriptions().isEmpty()) {
+ QAWarning issue = new QAWarning(noDescType, null, QAWarning.Severity.WARNING, 1);
+ issue.setProperty("example_entity", update.getItemId());
+ addIssue(issue);
+ }
+
+ if (!update.getDeletedStatements().isEmpty()) {
+ QAWarning issue = new QAWarning(deletedStatementsType, null, QAWarning.Severity.WARNING, 1);
+ issue.setProperty("example_entity", update.getItemId());
+ addIssue(issue);
+ }
+
+ // Try to find a "instance of" or "subclass of" claim
+ boolean typeFound = false;
+ for (StatementGroup group : update.getAddedStatementGroups()) {
+ String pid = group.getProperty().getId();
+ if ("P31".equals(pid) || "P279".equals(pid)) {
+ typeFound = true;
+ break;
+ }
+ }
+ if (!typeFound) {
+ QAWarning issue = new QAWarning(noTypeType, null, QAWarning.Severity.WARNING, 1);
+ issue.setProperty("example_entity", update.getItemId());
+ addIssue(issue);
+ }
+ }
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NoEditsMadeScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NoEditsMadeScrutinizer.java
new file mode 100644
index 000000000..27e653b03
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NoEditsMadeScrutinizer.java
@@ -0,0 +1,51 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import org.openrefine.wikidata.updates.ItemUpdate;
+
+public class NoEditsMadeScrutinizer extends EditScrutinizer {
+
+ public static final String type = "no-edit-generated";
+
+ private boolean nonNullUpdateSeen = false;
+
+ @Override
+ public void batchIsBeginning() {
+ nonNullUpdateSeen = false;
+ }
+
+ @Override
+ public void scrutinize(ItemUpdate edit) {
+ nonNullUpdateSeen = true;
+ }
+
+ @Override
+ public void batchIsFinished() {
+ if(!nonNullUpdateSeen) {
+ info(type);
+ }
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/QualifierCompatibilityScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/QualifierCompatibilityScrutinizer.java
new file mode 100644
index 000000000..1e153a493
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/QualifierCompatibilityScrutinizer.java
@@ -0,0 +1,110 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.openrefine.wikidata.qa.QAWarning;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+
+/**
+ * A scrutinizer that checks the compatibility of the qualifiers and the
+ * property of a statement, and looks for mandatory qualifiers.
+ *
+ * @author Antonin Delpeuch
+ */
+public class QualifierCompatibilityScrutinizer extends StatementScrutinizer {
+
+ public static final String missingMandatoryQualifiersType = "missing-mandatory-qualifiers";
+ public static final String disallowedQualifiersType = "disallowed-qualifiers";
+
+ private Map> _allowedQualifiers;
+ private Map> _mandatoryQualifiers;
+
+ public QualifierCompatibilityScrutinizer() {
+ _allowedQualifiers = new HashMap<>();
+ _mandatoryQualifiers = new HashMap<>();
+ }
+
+ protected boolean qualifierIsAllowed(PropertyIdValue statementProperty, PropertyIdValue qualifierProperty) {
+ Set allowed = null;
+ if (_allowedQualifiers.containsKey(statementProperty)) {
+ allowed = _allowedQualifiers.get(statementProperty);
+ } else {
+ allowed = _fetcher.allowedQualifiers(statementProperty);
+ _allowedQualifiers.put(statementProperty, allowed);
+ }
+ return allowed == null || allowed.contains(qualifierProperty);
+ }
+
+ protected Set mandatoryQualifiers(PropertyIdValue statementProperty) {
+ Set mandatory = null;
+ if (_mandatoryQualifiers.containsKey(statementProperty)) {
+ mandatory = _mandatoryQualifiers.get(statementProperty);
+ } else {
+ mandatory = _fetcher.mandatoryQualifiers(statementProperty);
+ if (mandatory == null) {
+ mandatory = new HashSet<>();
+ }
+ _mandatoryQualifiers.put(statementProperty, mandatory);
+ }
+ return mandatory;
+ }
+
+ @Override
+ public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
+ PropertyIdValue statementProperty = statement.getClaim().getMainSnak().getPropertyId();
+ Set qualifiers = statement.getClaim().getQualifiers().stream().map(e -> e.getProperty())
+ .collect(Collectors.toSet());
+
+ Set missingQualifiers = mandatoryQualifiers(statementProperty).stream()
+ .filter(p -> !qualifiers.contains(p)).collect(Collectors.toSet());
+ Set disallowedQualifiers = qualifiers.stream()
+ .filter(p -> !qualifierIsAllowed(statementProperty, p)).collect(Collectors.toSet());
+
+ for (PropertyIdValue missing : missingQualifiers) {
+ QAWarning issue = new QAWarning(missingMandatoryQualifiersType,
+ statementProperty.getId() + "-" + missing.getId(), QAWarning.Severity.WARNING, 1);
+ issue.setProperty("statement_property_entity", statementProperty);
+ issue.setProperty("missing_property_entity", missing);
+ issue.setProperty("example_item_entity", entityId);
+ addIssue(issue);
+ }
+ for (PropertyIdValue disallowed : disallowedQualifiers) {
+ QAWarning issue = new QAWarning(disallowedQualifiersType,
+ statementProperty.getId() + "-" + disallowed.getId(), QAWarning.Severity.WARNING, 1);
+ issue.setProperty("statement_property_entity", statementProperty);
+ issue.setProperty("disallowed_property_entity", disallowed);
+ issue.setProperty("example_item_entity", entityId);
+ addIssue(issue);
+ }
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/RestrictedPositionScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/RestrictedPositionScrutinizer.java
new file mode 100644
index 000000000..fa95b12c0
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/RestrictedPositionScrutinizer.java
@@ -0,0 +1,114 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import org.openrefine.wikidata.qa.QAWarning;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Reference;
+import org.wikidata.wdtk.datamodel.interfaces.Snak;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+
+public class RestrictedPositionScrutinizer extends StatementScrutinizer {
+
+ protected enum SnakPosition {
+ MAINSNAK, QUALIFIER, REFERENCE
+ }
+
+ private Map _restrictedPids;
+ private Set _unrestrictedPids;
+
+ public RestrictedPositionScrutinizer() {
+ _restrictedPids = new HashMap<>();
+ _unrestrictedPids = new HashSet<>();
+ }
+
+ SnakPosition positionRestriction(PropertyIdValue pid) {
+ if (_unrestrictedPids.contains(pid)) {
+ return null;
+ }
+ SnakPosition restriction = _restrictedPids.get(pid);
+ if (restriction != null) {
+ return restriction;
+ } else {
+ if (_fetcher.isForValuesOnly(pid)) {
+ restriction = SnakPosition.MAINSNAK;
+ } else if (_fetcher.isForQualifiersOnly(pid)) {
+ restriction = SnakPosition.QUALIFIER;
+ } else if (_fetcher.isForReferencesOnly(pid)) {
+ restriction = SnakPosition.REFERENCE;
+ }
+
+ // Cache these results:
+ if (restriction != null) {
+ _restrictedPids.put(pid, restriction);
+ } else {
+ _unrestrictedPids.add(pid);
+ }
+ return restriction;
+ }
+ }
+
+ @Override
+ public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
+ // Skip the main snak
+ scrutinize(statement.getClaim().getMainSnak(), entityId, SnakPosition.MAINSNAK, added);
+
+ // Qualifiers
+ scrutinizeSnakSet(statement.getClaim().getAllQualifiers(), entityId, SnakPosition.QUALIFIER, added);
+
+ // References
+ for (Reference ref : statement.getReferences()) {
+ scrutinizeSnakSet(ref.getAllSnaks(), entityId, SnakPosition.REFERENCE, added);
+ }
+ }
+
+ protected void scrutinizeSnakSet(Iterator snaks, EntityIdValue entityId, SnakPosition position,
+ boolean added) {
+ while (snaks.hasNext()) {
+ Snak snak = snaks.next();
+ scrutinize(snak, entityId, position, added);
+ }
+ }
+
+ public void scrutinize(Snak snak, EntityIdValue entityId, SnakPosition position, boolean added) {
+ SnakPosition restriction = positionRestriction(snak.getPropertyId());
+ if (restriction != null && position != restriction) {
+ String positionStr = position.toString().toLowerCase();
+ String restrictionStr = restriction.toString().toLowerCase();
+
+ QAWarning issue = new QAWarning("property-restricted-to-" + restrictionStr + "-found-in-" + positionStr,
+ snak.getPropertyId().getId(), QAWarning.Severity.IMPORTANT, 1);
+ issue.setProperty("property_entity", snak.getPropertyId());
+ addIssue(issue);
+ }
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizer.java
new file mode 100644
index 000000000..c23f0606e
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizer.java
@@ -0,0 +1,50 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import org.openrefine.wikidata.qa.QAWarning;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Snak;
+
+/**
+ * A scrutinizer that checks for self-referential statements. These statements
+ * are flagged by Wikibase as suspicious.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class SelfReferentialScrutinizer extends SnakScrutinizer {
+
+ public static final String type = "self-referential-statements";
+
+ @Override
+ public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
+ if (entityId.equals(snak.getValue())) {
+ QAWarning issue = new QAWarning(type, null, QAWarning.Severity.WARNING, 1);
+ issue.setProperty("example_entity", entityId);
+ addIssue(issue);
+ }
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SingleValueScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SingleValueScrutinizer.java
new file mode 100644
index 000000000..bdd7bb3f4
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SingleValueScrutinizer.java
@@ -0,0 +1,63 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.openrefine.wikidata.qa.QAWarning;
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+
+/**
+ * For now this scrutinizer only checks for uniqueness at the item level (it
+ * ignores qualifiers and references).
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class SingleValueScrutinizer extends EditScrutinizer {
+
+ public static final String type = "single-valued-property-added-more-than-once";
+
+ @Override
+ public void scrutinize(ItemUpdate update) {
+ Set seenSingleProperties = new HashSet<>();
+
+ for (Statement statement : update.getAddedStatements()) {
+ PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId();
+ if (seenSingleProperties.contains(pid)) {
+
+ QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.WARNING, 1);
+ issue.setProperty("property_entity", pid);
+ issue.setProperty("example_entity", update.getItemId());
+ addIssue(issue);
+ } else if (_fetcher.hasSingleValue(pid)) {
+ seenSingleProperties.add(pid);
+ }
+ }
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizer.java
new file mode 100644
index 000000000..7e7e3afe7
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizer.java
@@ -0,0 +1,74 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import java.util.Iterator;
+
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Reference;
+import org.wikidata.wdtk.datamodel.interfaces.Snak;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+
+/**
+ * A scrutinizer that inspects snaks individually, no matter whether they appear
+ * as main snaks, qualifiers or references.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public abstract class SnakScrutinizer extends StatementScrutinizer {
+
+ /**
+ * This is the method that subclasses should override to implement their checks.
+ *
+ * @param snak:
+ * the snak to inspect
+ * @param entityId:
+ * the item on which it is going to (dis)appear
+ * @param added:
+ * whether this snak is going to be added or deleted
+ */
+ public abstract void scrutinize(Snak snak, EntityIdValue entityId, boolean added);
+
+ @Override
+ public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
+ // Main snak
+ scrutinize(statement.getClaim().getMainSnak(), entityId, added);
+
+ // Qualifiers
+ scrutinizeSnakSet(statement.getClaim().getAllQualifiers(), entityId, added);
+
+ // References
+ for (Reference ref : statement.getReferences()) {
+ scrutinizeSnakSet(ref.getAllSnaks(), entityId, added);
+ }
+ }
+
+ protected void scrutinizeSnakSet(Iterator snaks, EntityIdValue entityId, boolean added) {
+ while (snaks.hasNext()) {
+ Snak snak = snaks.next();
+ scrutinize(snak, entityId, added);
+ }
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/StatementScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/StatementScrutinizer.java
new file mode 100644
index 000000000..847ccaf07
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/StatementScrutinizer.java
@@ -0,0 +1,55 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+
+public abstract class StatementScrutinizer extends EditScrutinizer {
+
+ @Override
+ public void scrutinize(ItemUpdate update) {
+ EntityIdValue currentEntityId = update.getItemId();
+ for (Statement statement : update.getAddedStatements()) {
+ scrutinize(statement, currentEntityId, true);
+ }
+ for (Statement statement : update.getDeletedStatements()) {
+ scrutinize(statement, currentEntityId, false);
+ }
+ }
+
+ /**
+ * The method that should be overridden by subclasses, implementing the checks
+ * on one statement
+ *
+ * @param statement:
+ * the statement to scrutinize
+ * @param entityId:
+ * the id of the entity on which this statement is made or removed
+ * @param added:
+ * whether this statement was added or deleted
+ */
+ public abstract void scrutinize(Statement statement, EntityIdValue entityId, boolean added);
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/UnsourcedScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/UnsourcedScrutinizer.java
new file mode 100644
index 000000000..40a638d9e
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/UnsourcedScrutinizer.java
@@ -0,0 +1,46 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+
+/**
+ * A scrutinizer checking for unsourced statements
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class UnsourcedScrutinizer extends StatementScrutinizer {
+
+ public static final String type = "unsourced-statements";
+
+ @Override
+ public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
+ if (statement.getReferences().isEmpty() && added) {
+ warning(type);
+ }
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizer.java
new file mode 100644
index 000000000..1a70302a9
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizer.java
@@ -0,0 +1,62 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
+import org.wikidata.wdtk.datamodel.interfaces.Snak;
+import org.wikidata.wdtk.datamodel.interfaces.Value;
+
+/**
+ * A scrutinizer that inspects the values of snaks and terms
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public abstract class ValueScrutinizer extends SnakScrutinizer {
+
+ @Override
+ public void scrutinize(ItemUpdate update) {
+ super.scrutinize(update);
+
+ for (MonolingualTextValue label : update.getLabels()) {
+ scrutinize(label);
+ }
+ for (MonolingualTextValue alias : update.getAliases()) {
+ scrutinize(alias);
+ }
+ for (MonolingualTextValue description : update.getDescriptions()) {
+ scrutinize(description);
+ }
+ }
+
+ public abstract void scrutinize(Value value);
+
+ @Override
+ public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
+ scrutinize(snak.getValue());
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java
new file mode 100644
index 000000000..a5685384e
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java
@@ -0,0 +1,85 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.qa.scrutinizers;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.regex.Pattern;
+
+import org.openrefine.wikidata.qa.QAWarning;
+import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
+import org.wikidata.wdtk.datamodel.interfaces.StringValue;
+import org.wikidata.wdtk.datamodel.interfaces.Value;
+
+/**
+ * Scrutinizes strings for trailing / leading whitespace, and others
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class WhitespaceScrutinizer extends ValueScrutinizer {
+
+ private Map _issuesMap;
+
+ public static final String leadingWhitespaceType = "leading-whitespace";
+ public static final String trailingWhitespaceType = "trailing-whitespace";
+ public static final String duplicateWhitespaceType = "duplicate-whitespace";
+ public static final String nonPrintableCharsType = "non-printable-characters";
+
+ public WhitespaceScrutinizer() {
+ _issuesMap = new HashMap<>();
+ _issuesMap.put(leadingWhitespaceType, Pattern.compile("^\\s"));
+ _issuesMap.put(trailingWhitespaceType, Pattern.compile("\\s$"));
+ _issuesMap.put(duplicateWhitespaceType, Pattern.compile("\\s\\s"));
+
+ // https://stackoverflow.com/questions/14565934/regular-expression-to-remove-all-non-printable-characters
+ _issuesMap.put(nonPrintableCharsType, Pattern.compile("[\\x00\\x03\\x08\\x0B\\x0C\\x0E-\\x1F]"));
+ }
+
+ @Override
+ public void scrutinize(Value value) {
+ String str = null;
+ if (MonolingualTextValue.class.isInstance(value)) {
+ str = ((MonolingualTextValue) value).getText();
+ } else if (StringValue.class.isInstance(value)) {
+ str = ((StringValue) value).getString();
+ }
+
+ if (str != null) {
+ for (Entry entry : _issuesMap.entrySet()) {
+ if (entry.getValue().matcher(str).find()) {
+ emitWarning(entry.getKey(), str);
+ }
+ }
+ }
+ }
+
+ private void emitWarning(String type, String example) {
+ QAWarning warning = new QAWarning(type, null, QAWarning.Severity.WARNING, 1);
+ warning.setProperty("example_string", example);
+ addIssue(warning);
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/ExpressionContext.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/ExpressionContext.java
new file mode 100644
index 000000000..fd2efc7c5
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/ExpressionContext.java
@@ -0,0 +1,107 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.apache.commons.lang.Validate;
+import org.openrefine.wikidata.qa.QAWarning;
+import org.openrefine.wikidata.qa.QAWarningStore;
+
+import com.google.refine.model.Cell;
+import com.google.refine.model.Column;
+import com.google.refine.model.ColumnModel;
+import com.google.refine.model.Row;
+
+/**
+ * A class holding all the necessary information about the context in which a
+ * schema expression is evaluated.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class ExpressionContext {
+
+ private String baseIRI;
+ private int rowId;
+ private Row row;
+ private ColumnModel columnModel;
+ private QAWarningStore warningStore;
+
+ /**
+ * Builds an expression context to evaluate a schema on a row
+ *
+ * @param baseIRI
+ * the siteIRI of the schema
+ * @param rowId
+ * the id of the row currently visited
+ * @param row
+ * the row itself
+ * @param columnModel
+ * lets us access cells by column name
+ * @param warningStore
+ * where to store the issues encountered when evaluating (can be set
+ * to null if these issues should be ignored)
+ */
+ public ExpressionContext(String baseIRI, int rowId, Row row, ColumnModel columnModel, QAWarningStore warningStore) {
+ Validate.notNull(baseIRI);
+ this.baseIRI = baseIRI;
+ this.rowId = rowId;
+ Validate.notNull(row);
+ this.row = row;
+ Validate.notNull(columnModel);
+ this.columnModel = columnModel;
+ this.warningStore = warningStore;
+ }
+
+ public String getBaseIRI() {
+ return baseIRI;
+ }
+
+ /**
+ * Retrieves a cell in the current row, by column name. If the column does not
+ * exist, null is returned.
+ *
+ * @param name
+ * the name of the column to retrieve the cell from
+ * @return the cell
+ */
+ public Cell getCellByName(String name) {
+ Column column = columnModel.getColumnByName(name);
+ if (column != null) {
+ int idx = column.getCellIndex();
+ return row.getCell(idx);
+ } else {
+ return null;
+ }
+ }
+
+ public int getRowId() {
+ return rowId;
+ }
+
+ public void addWarning(QAWarning warning) {
+ if (warningStore != null) {
+ warningStore.addWarning(warning);
+ }
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateConstant.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateConstant.java
new file mode 100644
index 000000000..1a5209d39
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateConstant.java
@@ -0,0 +1,158 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import java.text.ParseException;
+import java.text.ParsePosition;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.jsoup.helper.Validate;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import com.google.common.collect.ImmutableMap;
+
+/**
+ * A constant for a time value, accepting a number of formats which determine
+ * the precision of the parsed value.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class WbDateConstant implements WbExpression {
+
+ /**
+ * Map of formats accepted by the parser. Each format is associated to the time
+ * precision it induces (an integer according to Wikibase's data model).
+ */
+ public static Map acceptedFormats = ImmutableMap. builder()
+ .put(new SimpleDateFormat("yyyy"), 9).put(new SimpleDateFormat("yyyy-MM"), 10)
+ .put(new SimpleDateFormat("yyyy-MM-dd"), 11).put(new SimpleDateFormat("yyyy-MM-dd'T'HH"), 12)
+ .put(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm"), 13)
+ .put(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"), 14).build();
+
+ private TimeValue parsed;
+ private String origDatestamp;
+
+ /**
+ * Constructor. Used for deserialization from JSON. The object will be
+ * constructed even if the time cannot be parsed (it will evaluate to null) in
+ * {@link evaluate}.
+ *
+ * @param origDatestamp
+ * the date value as a string
+ */
+ @JsonCreator
+ public WbDateConstant(@JsonProperty("value") String origDatestamp) {
+ Validate.notNull(origDatestamp);
+ this.setOrigDatestamp(origDatestamp);
+ }
+
+ @Override
+ public TimeValue evaluate(ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ return parsed;
+ }
+
+ /**
+ * Parses a timestamp into a Wikibase {@link TimeValue}. The precision is
+ * automatically inferred from the format.
+ *
+ * @param datestamp
+ * the time to parse
+ * @return
+ * @throws ParseException
+ * if the time cannot be parsed
+ */
+ public static TimeValue parse(String datestamp)
+ throws ParseException {
+ Date date = null;
+ int precision = 9; // default precision (will be overridden)
+ for (Entry entry : acceptedFormats.entrySet()) {
+ ParsePosition position = new ParsePosition(0);
+ String trimmedDatestamp = datestamp.trim();
+ date = entry.getKey().parse(trimmedDatestamp, position);
+
+ // Ignore parses which failed or do not consume all the input
+ if (date != null && position.getIndex() == trimmedDatestamp.length()) {
+ precision = entry.getValue();
+ break;
+ }
+ }
+ if (date == null) {
+ throw new ParseException("Invalid date.", 0);
+ } else {
+ Calendar calendar = Calendar.getInstance();
+ calendar = Calendar.getInstance();
+ calendar.setTime(date);
+ return Datamodel.makeTimeValue(calendar.get(Calendar.YEAR), (byte) (calendar.get(Calendar.MONTH) + 1), // java
+ // starts
+ // at
+ // 0
+ (byte) calendar.get(Calendar.DAY_OF_MONTH), (byte) calendar.get(Calendar.HOUR_OF_DAY),
+ (byte) calendar.get(Calendar.MINUTE), (byte) calendar.get(Calendar.SECOND), (byte) precision, 0, 1,
+ 0, TimeValue.CM_GREGORIAN_PRO);
+ }
+ }
+
+ /**
+ * @return the original datestamp
+ */
+ @JsonProperty("value")
+ public String getOrigDatestamp() {
+ return origDatestamp;
+ }
+
+ private void setOrigDatestamp(String origDatestamp) {
+ this.origDatestamp = origDatestamp;
+ try {
+ this.parsed = parse(origDatestamp);
+ } catch (ParseException e) {
+ throw new IllegalArgumentException("Invalid datestamp provided: " + origDatestamp);
+ }
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WbDateConstant.class.isInstance(other)) {
+ return false;
+ }
+ WbDateConstant otherConstant = (WbDateConstant) other;
+ return origDatestamp.equals(otherConstant.getOrigDatestamp());
+ }
+
+ @Override
+ public int hashCode() {
+ return origDatestamp.hashCode();
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateVariable.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateVariable.java
new file mode 100644
index 000000000..3e3675266
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateVariable.java
@@ -0,0 +1,68 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import java.text.ParseException;
+
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import com.google.refine.model.Cell;
+
+/**
+ * An expression that represents a time value, extracted from a string. A number
+ * of formats are recognized, see {@link WbDateConstant} for details.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class WbDateVariable extends WbVariableExpr {
+
+ @JsonCreator
+ public WbDateVariable() {
+
+ }
+
+ public WbDateVariable(String columnName) {
+ setColumnName(columnName);
+ }
+
+ @Override
+ public TimeValue fromCell(Cell cell, ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ try {
+ // TODO accept parsed dates (without converting them to strings)
+ return WbDateConstant.parse(cell.value.toString());
+ } catch (ParseException e) {
+ throw new SkipSchemaExpressionException();
+ }
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return equalAsVariables(other, WbDateVariable.class);
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbExpression.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbExpression.java
new file mode 100644
index 000000000..eec20eae1
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbExpression.java
@@ -0,0 +1,60 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+
+import com.fasterxml.jackson.annotation.JsonSubTypes;
+import com.fasterxml.jackson.annotation.JsonSubTypes.Type;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+
+/**
+ * The base interface for all expressions, which evaluate to a particular type T
+ * in an ExpressionContext.
+ */
+@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type")
+@JsonSubTypes({ @Type(value = WbStringConstant.class, name = "wbstringconstant"),
+ @Type(value = WbStringVariable.class, name = "wbstringvariable"),
+ @Type(value = WbLocationConstant.class, name = "wblocationconstant"),
+ @Type(value = WbLocationVariable.class, name = "wblocationvariable"),
+ @Type(value = WbItemConstant.class, name = "wbitemconstant"),
+ @Type(value = WbItemVariable.class, name = "wbitemvariable"),
+ @Type(value = WbLanguageConstant.class, name = "wblanguageconstant"),
+ @Type(value = WbLanguageVariable.class, name = "wblanguagevariable"),
+ @Type(value = WbDateConstant.class, name = "wbdateconstant"),
+ @Type(value = WbDateVariable.class, name = "wbdatevariable"),
+ @Type(value = WbMonolingualExpr.class, name = "wbmonolingualexpr"),
+ @Type(value = WbPropConstant.class, name = "wbpropconstant"),
+ @Type(value = WbLanguageConstant.class, name = "wblanguageconstant"),
+ @Type(value = WbLanguageVariable.class, name = "wblanguagevariable"),
+ @Type(value = WbQuantityExpr.class, name = "wbquantityexpr"), })
+public interface WbExpression {
+
+ /**
+ * Evaluates the value expression in a given context, returns a Wikibase value
+ * suitable to be the target of a claim.
+ */
+ public T evaluate(ExpressionContext ctxt)
+ throws SkipSchemaExpressionException;
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemConstant.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemConstant.java
new file mode 100644
index 000000000..19ee070a7
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemConstant.java
@@ -0,0 +1,77 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.jsoup.helper.Validate;
+import org.openrefine.wikidata.schema.entityvalues.SuggestedItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Represents an item that does not vary, it is independent of the row.
+ */
+public class WbItemConstant implements WbExpression {
+
+ private String qid;
+ private String label;
+
+ @JsonCreator
+ public WbItemConstant(@JsonProperty("qid") String qid, @JsonProperty("label") String label) {
+ Validate.notNull(qid);
+ this.qid = qid;
+ Validate.notNull(label);
+ this.label = label;
+ }
+
+ @Override
+ public ItemIdValue evaluate(ExpressionContext ctxt) {
+ return new SuggestedItemIdValue(qid, ctxt.getBaseIRI(), label);
+ }
+
+ @JsonProperty("qid")
+ public String getQid() {
+ return qid;
+ }
+
+ @JsonProperty("label")
+ public String getLabel() {
+ return label;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WbItemConstant.class.isInstance(other)) {
+ return false;
+ }
+ WbItemConstant otherConstant = (WbItemConstant) other;
+ return (qid.equals(otherConstant.getQid()) && label.equals(otherConstant.getLabel()));
+ }
+
+ @Override
+ public int hashCode() {
+ return qid.hashCode() + label.hashCode();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemDocumentExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemDocumentExpr.java
new file mode 100644
index 000000000..dc198adba
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemDocumentExpr.java
@@ -0,0 +1,122 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.jsoup.helper.Validate;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.openrefine.wikidata.updates.ItemUpdateBuilder;
+import org.openrefine.wikidata.utils.JacksonJsonizable;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+
+/**
+ * The representation of an item document, which can contain variables both for
+ * its own id and in its contents.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+@JsonTypeInfo(use = JsonTypeInfo.Id.NONE)
+public class WbItemDocumentExpr extends JacksonJsonizable implements WbExpression {
+
+ private WbExpression extends ItemIdValue> subject;
+ private List nameDescs;
+ private List statementGroups;
+
+ @JsonCreator
+ public WbItemDocumentExpr(@JsonProperty("subject") WbExpression extends ItemIdValue> subjectExpr,
+ @JsonProperty("nameDescs") List nameDescExprs,
+ @JsonProperty("statementGroups") List statementGroupExprs) {
+ Validate.notNull(subjectExpr);
+ this.subject = subjectExpr;
+ if (nameDescExprs == null) {
+ nameDescExprs = Collections.emptyList();
+ }
+ this.nameDescs = nameDescExprs;
+ if (statementGroupExprs == null) {
+ statementGroupExprs = Collections.emptyList();
+ }
+ this.statementGroups = statementGroupExprs;
+ }
+
+ @Override
+ public ItemUpdate evaluate(ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ ItemIdValue subjectId = getSubject().evaluate(ctxt);
+ ItemUpdateBuilder update = new ItemUpdateBuilder(subjectId);
+ for (WbStatementGroupExpr expr : getStatementGroups()) {
+ try {
+ for (Statement s : expr.evaluate(ctxt, subjectId).getStatements()) {
+ update.addStatement(s);
+ }
+ } catch (SkipSchemaExpressionException e) {
+ continue;
+ }
+ }
+ for (WbNameDescExpr expr : getNameDescs()) {
+ expr.contributeTo(update, ctxt);
+ }
+ return update.build();
+ }
+
+ @JsonProperty("subject")
+ public WbExpression extends ItemIdValue> getSubject() {
+ return subject;
+ }
+
+ @JsonProperty("nameDescs")
+ public List getNameDescs() {
+ return nameDescs;
+ }
+
+ @JsonProperty("statementGroups")
+ public List getStatementGroups() {
+ return statementGroups;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WbItemDocumentExpr.class.isInstance(other)) {
+ return false;
+ }
+ WbItemDocumentExpr otherExpr = (WbItemDocumentExpr) other;
+ return subject.equals(otherExpr.getSubject()) && nameDescs.equals(otherExpr.getNameDescs())
+ && statementGroups.equals(otherExpr.getStatementGroups());
+ }
+
+ @Override
+ public int hashCode() {
+ return subject.hashCode() + nameDescs.hashCode() + statementGroups.hashCode();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemVariable.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemVariable.java
new file mode 100644
index 000000000..00c634580
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbItemVariable.java
@@ -0,0 +1,81 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.openrefine.wikidata.qa.QAWarning;
+import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import com.google.refine.model.Cell;
+import com.google.refine.model.Recon.Judgment;
+
+/**
+ * An item that depends on a reconciled value in a column.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class WbItemVariable extends WbVariableExpr {
+
+ @JsonCreator
+ public WbItemVariable() {
+
+ }
+
+ /**
+ * Constructs a variable and sets the column it is bound to. Mostly used as a
+ * convenience method for testing.
+ *
+ * @param columnName
+ * the name of the column the expression should draw its value from
+ */
+ public WbItemVariable(String columnName) {
+ setColumnName(columnName);
+ }
+
+ @Override
+ public ItemIdValue fromCell(Cell cell, ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ if (cell.recon != null
+ && (Judgment.Matched.equals(cell.recon.judgment) || Judgment.New.equals(cell.recon.judgment))) {
+ if (!cell.recon.identifierSpace.equals(Datamodel.SITE_WIKIDATA)) {
+ QAWarning warning = new QAWarning("invalid-identifier-space", null, QAWarning.Severity.INFO, 1);
+ warning.setProperty("example_cell", cell.value.toString());
+ ctxt.addWarning(warning);
+ throw new SkipSchemaExpressionException();
+ }
+ return new ReconItemIdValue(cell.recon, cell.value.toString());
+ }
+ throw new SkipSchemaExpressionException();
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return equalAsVariables(other, WbItemVariable.class);
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLanguageConstant.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLanguageConstant.java
new file mode 100644
index 000000000..a6259b30d
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLanguageConstant.java
@@ -0,0 +1,105 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.apache.commons.lang.Validate;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.interfaces.WikimediaLanguageCodes;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * A constant that represents a Wikimedia language code.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class WbLanguageConstant implements WbExpression {
+
+ protected String _langId;
+ protected String _langLabel;
+
+ @JsonCreator
+ public WbLanguageConstant(@JsonProperty("id") String langId, @JsonProperty("label") String langLabel) {
+ _langId = normalizeLanguageCode(langId);
+ Validate.notNull(_langId, "A valid language code must be provided.");
+ Validate.notNull(langLabel);
+ _langLabel = langLabel;
+ }
+
+ /**
+ * Checks that a language code is valid and returns its preferred version
+ * (converting deprecated language codes to their better values).
+ *
+ * @param lang
+ * a Wikimedia language code
+ * @return the normalized code, or null if the code is invalid.
+ */
+ public static String normalizeLanguageCode(String lang) {
+ try {
+ WikimediaLanguageCodes.getLanguageCode(lang);
+ return WikimediaLanguageCodes.fixLanguageCodeIfDeprecated(lang);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ }
+
+ @Override
+ public String evaluate(ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ return _langId;
+ }
+
+ /**
+ * @return the language code for this language
+ */
+ @JsonProperty("id")
+ public String getLang() {
+ return _langId;
+ }
+
+ /**
+ * @return the name of the language in itself
+ */
+ @JsonProperty("label")
+ public String getLabel() {
+ return _langLabel;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WbLanguageConstant.class.isInstance(other)) {
+ return false;
+ }
+ WbLanguageConstant otherConstant = (WbLanguageConstant) other;
+ return _langId.equals(otherConstant.getLang()) && _langLabel.equals(otherConstant.getLabel());
+ }
+
+ @Override
+ public int hashCode() {
+ return _langId.hashCode();
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLanguageVariable.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLanguageVariable.java
new file mode 100644
index 000000000..fa46f562a
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLanguageVariable.java
@@ -0,0 +1,71 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import com.google.refine.model.Cell;
+
+/**
+ * A language variable generates a language code from a cell. It checks its
+ * values against a known list of valid language codes and fixes on the fly the
+ * deprecated ones (see {@link WbLanguageConstant}).
+ */
+public class WbLanguageVariable extends WbVariableExpr {
+
+ @JsonCreator
+ public WbLanguageVariable() {
+ }
+
+ /**
+ * Constructs a variable and sets the column it is bound to. Mostly used as a
+ * convenience method for testing.
+ *
+ * @param columnName
+ * the name of the column the expression should draw its value from
+ */
+ public WbLanguageVariable(String columnName) {
+ setColumnName(columnName);
+ }
+
+ @Override
+ public String fromCell(Cell cell, ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ if (cell.value != null && !cell.value.toString().isEmpty()) {
+ String code = cell.value.toString().trim();
+ String normalized = WbLanguageConstant.normalizeLanguageCode(code);
+ if (normalized != null) {
+ return normalized;
+ }
+ }
+ throw new SkipSchemaExpressionException();
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return equalAsVariables(other, WbLanguageVariable.class);
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLocationConstant.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLocationConstant.java
new file mode 100644
index 000000000..fd64a56d5
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLocationConstant.java
@@ -0,0 +1,114 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import java.text.ParseException;
+
+import org.apache.commons.lang.Validate;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * A constant for a geographical location. The accepted format is lat,lng or
+ * lat/lng.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class WbLocationConstant implements WbExpression {
+
+ public static final double defaultPrecision = GlobeCoordinatesValue.PREC_TEN_MICRO_DEGREE;
+
+ private String value;
+ private GlobeCoordinatesValue parsed;
+
+ @JsonCreator
+ public WbLocationConstant(@JsonProperty("value") String origValue) throws ParseException {
+ this.value = origValue;
+ Validate.notNull(origValue);
+ this.parsed = parse(origValue);
+ Validate.notNull(this.parsed);
+ }
+
+ /**
+ * Parses a string to a location.
+ *
+ * @param expr
+ * the string to parse
+ * @return the parsed location
+ * @throws ParseException
+ */
+ public static GlobeCoordinatesValue parse(String expr)
+ throws ParseException {
+ double lat = 0;
+ double lng = 0;
+ double precision = defaultPrecision;
+ String[] parts = expr.split("[,/]");
+ if (parts.length >= 2 && parts.length <= 3) {
+ try {
+ lat = Double.parseDouble(parts[0]);
+ lng = Double.parseDouble(parts[1]);
+ if (parts.length == 3) {
+ precision = Double.parseDouble(parts[2]);
+ }
+ return Datamodel.makeGlobeCoordinatesValue(lat, lng, precision, GlobeCoordinatesValue.GLOBE_EARTH);
+ } catch (NumberFormatException e) {
+ ;
+ }
+ }
+ throw new ParseException("Invalid globe coordinates", 0);
+ }
+
+ @Override
+ public GlobeCoordinatesValue evaluate(ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ return parsed;
+ }
+
+ /**
+ * @return the original value as a string.
+ */
+ @JsonProperty("value")
+ public String getValue() {
+ return value;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WbLocationConstant.class.isInstance(other)) {
+ return false;
+ }
+ WbLocationConstant otherConstant = (WbLocationConstant) other;
+ return value.equals(otherConstant.getValue());
+ }
+
+ @Override
+ public int hashCode() {
+ return value.hashCode();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLocationVariable.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLocationVariable.java
new file mode 100644
index 000000000..7423d6268
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLocationVariable.java
@@ -0,0 +1,61 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import java.text.ParseException;
+
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import com.google.refine.model.Cell;
+
+public class WbLocationVariable extends WbVariableExpr {
+
+ @JsonCreator
+ public WbLocationVariable() {
+
+ }
+
+ public WbLocationVariable(String columnName) {
+ setColumnName(columnName);
+ }
+
+ @Override
+ public GlobeCoordinatesValue fromCell(Cell cell, ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ String expr = cell.value.toString();
+ try {
+ return WbLocationConstant.parse(expr);
+ } catch (ParseException e) {
+ throw new SkipSchemaExpressionException();
+ }
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return equalAsVariables(other, WbLocationVariable.class);
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbMonolingualExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbMonolingualExpr.java
new file mode 100644
index 000000000..ef7622f2d
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbMonolingualExpr.java
@@ -0,0 +1,89 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.apache.commons.lang.Validate;
+import org.openrefine.wikidata.qa.QAWarning;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
+import org.wikidata.wdtk.datamodel.interfaces.StringValue;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+public class WbMonolingualExpr implements WbExpression {
+
+ private WbExpression extends String> languageExpr;
+ private WbExpression extends StringValue> valueExpr;
+
+ @JsonCreator
+ public WbMonolingualExpr(@JsonProperty("language") WbExpression extends String> languageExpr,
+ @JsonProperty("value") WbExpression extends StringValue> valueExpr) {
+ Validate.notNull(languageExpr);
+ this.languageExpr = languageExpr;
+ Validate.notNull(valueExpr);
+ this.valueExpr = valueExpr;
+ }
+
+ @Override
+ public MonolingualTextValue evaluate(ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ String text = getValueExpr().evaluate(ctxt).getString();
+ try {
+ String lang = getLanguageExpr().evaluate(ctxt);
+ return Datamodel.makeMonolingualTextValue(text, lang);
+
+ } catch (SkipSchemaExpressionException e) {
+ QAWarning warning = new QAWarning("monolingual-text-without-language", null, QAWarning.Severity.WARNING, 1);
+ warning.setProperty("example_text", text);
+ ctxt.addWarning(warning);
+ throw new SkipSchemaExpressionException();
+ }
+ }
+
+ @JsonProperty("language")
+ public WbExpression extends String> getLanguageExpr() {
+ return languageExpr;
+ }
+
+ @JsonProperty("value")
+ public WbExpression extends StringValue> getValueExpr() {
+ return valueExpr;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WbMonolingualExpr.class.isInstance(other)) {
+ return false;
+ }
+ WbMonolingualExpr otherExpr = (WbMonolingualExpr) other;
+ return languageExpr.equals(otherExpr.getLanguageExpr()) && valueExpr.equals(otherExpr.getValueExpr());
+ }
+
+ @Override
+ public int hashCode() {
+ return languageExpr.hashCode() + valueExpr.hashCode();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbNameDescExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbNameDescExpr.java
new file mode 100644
index 000000000..3e4a0c5dc
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbNameDescExpr.java
@@ -0,0 +1,112 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.jsoup.helper.Validate;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.openrefine.wikidata.updates.ItemUpdateBuilder;
+import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * An expression that represent a term (label, description or alias). The
+ * structure is slightly different from other expressions because we need to
+ * call different methods on {@link ItemUpdateBuilder}.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class WbNameDescExpr {
+
+ enum NameDescrType {
+ LABEL, DESCRIPTION, ALIAS,
+ }
+
+ private NameDescrType type;
+ private WbMonolingualExpr value;
+
+ @JsonCreator
+ public WbNameDescExpr(@JsonProperty("name_type") NameDescrType type,
+ @JsonProperty("value") WbMonolingualExpr value) {
+ Validate.notNull(type);
+ this.type = type;
+ Validate.notNull(value);
+ this.value = value;
+ }
+
+ /**
+ * Evaluates the expression and adds the result to the item update.
+ *
+ * @param item
+ * the item update where the term should be stored
+ * @param ctxt
+ * the evaluation context for the expression
+ */
+ public void contributeTo(ItemUpdateBuilder item, ExpressionContext ctxt) {
+ try {
+ MonolingualTextValue val = getValue().evaluate(ctxt);
+ switch (getType()) {
+ case LABEL:
+ item.addLabel(val);
+ break;
+ case DESCRIPTION:
+ item.addDescription(val);
+ break;
+ case ALIAS:
+ item.addAlias(val);
+ break;
+ }
+ } catch (SkipSchemaExpressionException e) {
+ return;
+ }
+ }
+
+ @JsonProperty("name_type")
+ public NameDescrType getType() {
+ return type;
+ }
+
+ @JsonProperty("value")
+ public WbMonolingualExpr getValue() {
+ return value;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WbNameDescExpr.class.isInstance(other)) {
+ return false;
+ }
+ WbNameDescExpr otherExpr = (WbNameDescExpr) other;
+ return type.equals(otherExpr.getType()) && value.equals(otherExpr.getValue());
+ }
+
+ @Override
+ public int hashCode() {
+ return type.hashCode() + value.hashCode();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbPropConstant.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbPropConstant.java
new file mode 100644
index 000000000..d40adc45f
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbPropConstant.java
@@ -0,0 +1,89 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.jsoup.helper.Validate;
+import org.openrefine.wikidata.schema.entityvalues.SuggestedPropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * A constant property, that does not change depending on the row
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class WbPropConstant implements WbExpression {
+
+ private String pid;
+ private String label;
+ private String datatype;
+
+ @JsonCreator
+ public WbPropConstant(@JsonProperty("pid") String pid, @JsonProperty("label") String label,
+ @JsonProperty("datatype") String datatype) {
+ Validate.notNull(pid);
+ this.pid = pid;
+ Validate.notNull(label);
+ this.label = label;
+ this.datatype = datatype;
+ }
+
+ @Override
+ public PropertyIdValue evaluate(ExpressionContext ctxt) {
+ return new SuggestedPropertyIdValue(pid, ctxt.getBaseIRI(), label);
+ }
+
+ @JsonProperty("pid")
+ public String getPid() {
+ return pid;
+ }
+
+ @JsonProperty("label")
+ public String getLabel() {
+ return label;
+ }
+
+ @JsonProperty("datatype")
+ public String getDatatype() {
+ return datatype;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WbPropConstant.class.isInstance(other)) {
+ return false;
+ }
+ WbPropConstant otherConstant = (WbPropConstant) other;
+ return pid.equals(otherConstant.getPid()) && label.equals(otherConstant.getLabel())
+ && datatype.equals(otherConstant.getDatatype());
+ }
+
+ @Override
+ public int hashCode() {
+ return pid.hashCode() + label.hashCode();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbQuantityExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbQuantityExpr.java
new file mode 100644
index 000000000..94521cbd0
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbQuantityExpr.java
@@ -0,0 +1,104 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import java.math.BigDecimal;
+
+import org.apache.commons.lang.Validate;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
+import org.wikidata.wdtk.datamodel.interfaces.StringValue;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+public class WbQuantityExpr implements WbExpression {
+
+ private final WbExpression extends StringValue> amountExpr;
+ private final WbExpression extends ItemIdValue> unitExpr;
+
+ /**
+ * Creates an expression for a quantity, which contains two sub-expressions: one
+ * for the amount (a string with a particular format) and one for the unit,
+ * which is optional.
+ *
+ * Setting unitExpr to null will give quantities without units. Setting it to a
+ * non-null value will make the unit mandatory: if the unit expression fails to
+ * evaluate, the whole quantity expression will fail too.
+ */
+ @JsonCreator
+ public WbQuantityExpr(@JsonProperty("amount") WbExpression extends StringValue> amountExpr,
+ @JsonProperty("unit") WbExpression extends ItemIdValue> unitExpr) {
+ Validate.notNull(amountExpr);
+ this.amountExpr = amountExpr;
+ this.unitExpr = unitExpr;
+ }
+
+ @Override
+ public QuantityValue evaluate(ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ StringValue amount = getLanguageExpr().evaluate(ctxt);
+ // we know the amount is nonnull, nonempty here
+
+ BigDecimal parsedAmount = null;
+ BigDecimal lowerBound = null;
+ BigDecimal upperBound = null;
+ try {
+ String originalAmount = amount.getString().toUpperCase();
+ parsedAmount = new BigDecimal(originalAmount);
+
+
+ if (originalAmount.contains("E")) {
+ // engineering notation: we derive the precision from
+ // the expression (feature!)
+ BigDecimal uncertainty = new BigDecimal("0.5").scaleByPowerOfTen(-parsedAmount.scale());
+ lowerBound = new BigDecimal(parsedAmount.subtract(uncertainty).toPlainString());
+ upperBound = new BigDecimal(parsedAmount.add(uncertainty).toPlainString());
+ }
+ // workaround for https://github.com/Wikidata/Wikidata-Toolkit/issues/341
+ parsedAmount = new BigDecimal(parsedAmount.toPlainString());
+ } catch (NumberFormatException e) {
+ throw new SkipSchemaExpressionException();
+ }
+
+ if (getUnitExpr() != null) {
+ ItemIdValue unit = getUnitExpr().evaluate(ctxt);
+ return Datamodel.makeQuantityValue(parsedAmount, lowerBound, upperBound, unit.getIri());
+ }
+
+ return Datamodel.makeQuantityValue(parsedAmount, lowerBound, upperBound);
+ }
+
+ @JsonProperty("amount")
+ public WbExpression extends StringValue> getLanguageExpr() {
+ return amountExpr;
+ }
+
+ @JsonProperty("unit")
+ public WbExpression extends ItemIdValue> getUnitExpr() {
+ return unitExpr;
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbReferenceExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbReferenceExpr.java
new file mode 100644
index 000000000..ca23aa03f
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbReferenceExpr.java
@@ -0,0 +1,97 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang.Validate;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.Reference;
+import org.wikidata.wdtk.datamodel.interfaces.Snak;
+import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+
+/**
+ * An expression for a reference (list of reference snaks).
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+@JsonTypeInfo(use = JsonTypeInfo.Id.NONE)
+public class WbReferenceExpr implements WbExpression {
+
+ private List snakExprs;
+
+ @JsonCreator
+ public WbReferenceExpr(@JsonProperty("snaks") List snakExprs) {
+ Validate.notNull(snakExprs);
+ this.snakExprs = snakExprs;
+ }
+
+ @Override
+ public Reference evaluate(ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ List snakGroups = new ArrayList();
+ for (WbSnakExpr expr : getSnaks()) {
+ List snakList = new ArrayList(1);
+ try {
+ snakList.add(expr.evaluate(ctxt));
+ snakGroups.add(Datamodel.makeSnakGroup(snakList));
+ } catch (SkipSchemaExpressionException e) {
+ continue;
+ }
+ }
+ if (!snakGroups.isEmpty()) {
+ return Datamodel.makeReference(snakGroups);
+ } else {
+ throw new SkipSchemaExpressionException();
+ }
+ }
+
+ @JsonProperty("snaks")
+ public List getSnaks() {
+ return snakExprs;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WbReferenceExpr.class.isInstance(other)) {
+ return false;
+ }
+ WbReferenceExpr otherExpr = (WbReferenceExpr) other;
+ return snakExprs.equals(otherExpr.getSnaks());
+ }
+
+ @Override
+ public int hashCode() {
+ return snakExprs.hashCode();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbSnakExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbSnakExpr.java
new file mode 100644
index 000000000..a9b33b952
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbSnakExpr.java
@@ -0,0 +1,92 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.jsoup.helper.Validate;
+import org.openrefine.wikidata.schema.entityvalues.FullyPropertySerializingValueSnak;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Snak;
+import org.wikidata.wdtk.datamodel.interfaces.Value;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+
+/**
+ * An expression for a snak (pair of property and value).
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+@JsonTypeInfo(use = JsonTypeInfo.Id.NONE)
+public class WbSnakExpr implements WbExpression {
+
+ private WbExpression extends PropertyIdValue> prop;
+ private WbExpression extends Value> value;
+
+ @JsonCreator
+ public WbSnakExpr(@JsonProperty("prop") WbExpression extends PropertyIdValue> propExpr,
+ @JsonProperty("value") WbExpression extends Value> valueExpr) {
+ Validate.notNull(propExpr);
+ this.prop = propExpr;
+ Validate.notNull(valueExpr);
+ this.value = valueExpr;
+ }
+
+ @Override
+ public Snak evaluate(ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ PropertyIdValue propertyId = getProp().evaluate(ctxt);
+ Value evaluatedValue = value.evaluate(ctxt);
+ return new FullyPropertySerializingValueSnak(propertyId, evaluatedValue);
+ }
+
+ @JsonProperty("prop")
+ public WbExpression extends PropertyIdValue> getProp() {
+ return prop;
+ }
+
+ @JsonProperty("value")
+ public WbExpression extends Value> getValue() {
+ return value;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WbSnakExpr.class.isInstance(other)) {
+ return false;
+ }
+ WbSnakExpr otherExpr = (WbSnakExpr) other;
+ return prop.equals(otherExpr.getProp()) && value.equals(otherExpr.getValue());
+ }
+
+ @Override
+ public int hashCode() {
+ return prop.hashCode() + value.hashCode();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementExpr.java
new file mode 100644
index 000000000..7ef7c1bd9
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementExpr.java
@@ -0,0 +1,147 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.jsoup.helper.Validate;
+import org.openrefine.wikidata.qa.QAWarning;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.Claim;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Reference;
+import org.wikidata.wdtk.datamodel.interfaces.Snak;
+import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
+import org.wikidata.wdtk.datamodel.interfaces.Value;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class WbStatementExpr {
+
+ private WbExpression extends Value> mainSnakValueExpr;
+ private List qualifierExprs;
+ private List referenceExprs;
+
+ @JsonCreator
+ public WbStatementExpr(@JsonProperty("value") WbExpression extends Value> mainSnakValueExpr,
+ @JsonProperty("qualifiers") List qualifierExprs,
+ @JsonProperty("references") List referenceExprs) {
+ Validate.notNull(mainSnakValueExpr);
+ this.mainSnakValueExpr = mainSnakValueExpr;
+ if (qualifierExprs == null) {
+ qualifierExprs = Collections.emptyList();
+ }
+ this.qualifierExprs = qualifierExprs;
+ if (referenceExprs == null) {
+ referenceExprs = Collections.emptyList();
+ }
+ this.referenceExprs = referenceExprs;
+ }
+
+ public static List groupSnaks(List snaks) {
+ List snakGroups = new ArrayList();
+ for (Snak snak : snaks) {
+ List singleton = new ArrayList();
+ singleton.add(snak);
+ snakGroups.add(Datamodel.makeSnakGroup(singleton));
+ }
+ return snakGroups;
+ }
+
+ public Statement evaluate(ExpressionContext ctxt, ItemIdValue subject, PropertyIdValue propertyId)
+ throws SkipSchemaExpressionException {
+ Value mainSnakValue = getMainsnak().evaluate(ctxt);
+ Snak mainSnak = Datamodel.makeValueSnak(propertyId, mainSnakValue);
+
+ // evaluate qualifiers
+ List qualifiers = new ArrayList(getQualifiers().size());
+ for (WbSnakExpr qExpr : getQualifiers()) {
+ try {
+ qualifiers.add(qExpr.evaluate(ctxt));
+ } catch (SkipSchemaExpressionException e) {
+ QAWarning warning = new QAWarning("ignored-qualifiers", null, QAWarning.Severity.INFO, 1);
+ warning.setProperty("example_entity", subject);
+ warning.setProperty("example_property_entity", mainSnak.getPropertyId());
+ ctxt.addWarning(warning);
+ }
+ }
+ List groupedQualifiers = groupSnaks(qualifiers);
+ Claim claim = Datamodel.makeClaim(subject, mainSnak, groupedQualifiers);
+
+ // evaluate references
+ List references = new ArrayList();
+ for (WbReferenceExpr rExpr : getReferences()) {
+ try {
+ references.add(rExpr.evaluate(ctxt));
+ } catch (SkipSchemaExpressionException e) {
+ QAWarning warning = new QAWarning("ignored-references", null, QAWarning.Severity.INFO, 1);
+ warning.setProperty("example_entity", subject);
+ warning.setProperty("example_property_entity", mainSnak.getPropertyId());
+ ctxt.addWarning(warning);
+ }
+ }
+
+ StatementRank rank = StatementRank.NORMAL;
+ return Datamodel.makeStatement(claim, references, rank, "");
+ }
+
+ @JsonProperty("value")
+ public WbExpression extends Value> getMainsnak() {
+ return mainSnakValueExpr;
+ }
+
+ @JsonProperty("qualifiers")
+ public List getQualifiers() {
+ return qualifierExprs;
+ }
+
+ @JsonProperty("references")
+ public List getReferences() {
+ return referenceExprs;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WbStatementExpr.class.isInstance(other)) {
+ return false;
+ }
+ WbStatementExpr otherExpr = (WbStatementExpr) other;
+ return mainSnakValueExpr.equals(otherExpr.getMainsnak()) && qualifierExprs.equals(otherExpr.getQualifiers())
+ && referenceExprs.equals(otherExpr.getReferences());
+ }
+
+ @Override
+ public int hashCode() {
+ return mainSnakValueExpr.hashCode() + qualifierExprs.hashCode() + referenceExprs.hashCode();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementGroupExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementGroupExpr.java
new file mode 100644
index 000000000..724c8656d
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStatementGroupExpr.java
@@ -0,0 +1,98 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.jsoup.helper.Validate;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class WbStatementGroupExpr {
+
+ private WbExpression extends PropertyIdValue> propertyExpr;
+ private List statementExprs;
+
+ @JsonCreator
+ public WbStatementGroupExpr(@JsonProperty("property") WbExpression extends PropertyIdValue> propertyExpr,
+ @JsonProperty("statements") List claimExprs) {
+ Validate.notNull(propertyExpr);
+ this.propertyExpr = propertyExpr;
+ Validate.notNull(claimExprs);
+ Validate.isTrue(!claimExprs.isEmpty());
+ this.statementExprs = claimExprs;
+ }
+
+ public StatementGroup evaluate(ExpressionContext ctxt, ItemIdValue subject)
+ throws SkipSchemaExpressionException {
+ PropertyIdValue propertyId = propertyExpr.evaluate(ctxt);
+ List statements = new ArrayList(statementExprs.size());
+ for (WbStatementExpr expr : statementExprs) {
+ try {
+ statements.add(expr.evaluate(ctxt, subject, propertyId));
+ } catch (SkipSchemaExpressionException e) {
+ continue;
+ }
+ }
+ if (!statements.isEmpty()) {
+ return Datamodel.makeStatementGroup(statements);
+ } else {
+ throw new SkipSchemaExpressionException();
+ }
+ }
+
+ @JsonProperty("property")
+ public WbExpression extends PropertyIdValue> getProperty() {
+ return propertyExpr;
+ }
+
+ @JsonProperty("statements")
+ public List getStatements() {
+ return statementExprs;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WbStatementGroupExpr.class.isInstance(other)) {
+ return false;
+ }
+ WbStatementGroupExpr otherExpr = (WbStatementGroupExpr) other;
+ return propertyExpr.equals(otherExpr.getProperty()) && statementExprs.equals(otherExpr.getStatements());
+ }
+
+ @Override
+ public int hashCode() {
+ return propertyExpr.hashCode() + statementExprs.hashCode();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringConstant.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringConstant.java
new file mode 100644
index 000000000..7a1ddf9e3
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringConstant.java
@@ -0,0 +1,67 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.apache.commons.lang.Validate;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.StringValue;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+public class WbStringConstant implements WbExpression {
+
+ private String value;
+
+ @JsonCreator
+ public WbStringConstant(@JsonProperty("value") String value) {
+ Validate.notNull(value);
+ Validate.isTrue(!value.isEmpty()); // for now we don't accept empty strings
+ // because in the variable counterpart of this expression, they are skipped
+ this.value = value;
+ }
+
+ @Override
+ public StringValue evaluate(ExpressionContext ctxt) {
+ return Datamodel.makeStringValue(value);
+ }
+
+ @JsonProperty("value")
+ public String getValue() {
+ return value;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if(other == null || !WbStringConstant.class.isInstance(other)) {
+ return false;
+ }
+ return value.equals(((WbStringConstant)other).getValue());
+ }
+
+ @Override
+ public int hashCode() {
+ return value.hashCode();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java
new file mode 100644
index 000000000..cd02e7883
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java
@@ -0,0 +1,70 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.StringValue;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import com.google.refine.model.Cell;
+
+/**
+ * A variable that returns a simple string value.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class WbStringVariable extends WbVariableExpr {
+
+ @JsonCreator
+ public WbStringVariable() {
+ }
+
+ /**
+ * Constructs a variable and sets the column it is bound to. Mostly used as a
+ * convenience method for testing.
+ *
+ * @param columnName
+ * the name of the column the expression should draw its value from
+ */
+ public WbStringVariable(String columnName) {
+ setColumnName(columnName);
+ }
+
+ @Override
+ public StringValue fromCell(Cell cell, ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ if (!cell.value.toString().isEmpty()) {
+ return Datamodel.makeStringValue(cell.value.toString());
+ }
+ throw new SkipSchemaExpressionException();
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return equalAsVariables(other, WbStringVariable.class);
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbVariableExpr.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbVariableExpr.java
new file mode 100644
index 000000000..c86fb540c
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbVariableExpr.java
@@ -0,0 +1,125 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import com.google.refine.model.Cell;
+
+/**
+ * A base class for expressions which draw their values from a particular
+ * column.
+ *
+ * @author Antonin Delpeuch
+ *
+ * @param
+ * the type of Wikibase value returned by the expression.
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+public abstract class WbVariableExpr implements WbExpression {
+
+ private String columnName;
+
+ /**
+ * Constructs a variable without setting the column name yet.
+ */
+ @JsonCreator
+ public WbVariableExpr() {
+ columnName = null;
+ }
+
+ /**
+ * Returns the column name used by the variable.
+ *
+ * @return the OpenRefine column name
+ */
+ @JsonProperty("columnName")
+ public String getColumnName() {
+ return columnName;
+ }
+
+ /**
+ * Changes the column name used by the variable. This is useful for
+ * deserialization, as well as updates when column names change.
+ */
+ @JsonProperty("columnName")
+ public void setColumnName(String columnName) {
+ this.columnName = columnName;
+ }
+
+ /**
+ * Evaluates the expression in a given context, returning
+ */
+ @Override
+ public T evaluate(ExpressionContext ctxt)
+ throws SkipSchemaExpressionException {
+ Cell cell = ctxt.getCellByName(columnName);
+ if (cell != null) {
+ return fromCell(cell, ctxt);
+ }
+ throw new SkipSchemaExpressionException();
+ }
+
+ /**
+ * Method that should be implemented by subclasses, converting an OpenRefine
+ * cell to a Wikibase value. Access to other values and emiting warnings is
+ * possible via the supplied EvaluationContext object.
+ *
+ * @param cell
+ * the cell to convert
+ * @param ctxt
+ * the evaluation context
+ * @return the corresponding Wikibase value
+ */
+ public abstract T fromCell(Cell cell, ExpressionContext ctxt)
+ throws SkipSchemaExpressionException;
+
+ /**
+ * Helper for equality methods of subclasses.
+ *
+ * @param other
+ * the object to compare
+ * @param columnName
+ * the column name to compare to
+ * @param targetClass
+ * the target class for equality
+ * @return
+ */
+ protected boolean equalAsVariables(Object other, Class extends WbVariableExpr>> targetClass) {
+ if (other == null || !targetClass.isInstance(other)) {
+ return false;
+ }
+ return columnName.equals(targetClass.cast(other).getColumnName());
+ }
+
+ @Override
+ public int hashCode() {
+ return columnName.hashCode();
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WikibaseSchema.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WikibaseSchema.java
new file mode 100644
index 000000000..6c5d98c45
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WikibaseSchema.java
@@ -0,0 +1,241 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.json.JSONWriter;
+import org.openrefine.wikidata.qa.QAWarningStore;
+import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.databind.JsonMappingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import com.google.refine.browsing.Engine;
+import com.google.refine.browsing.FilteredRows;
+import com.google.refine.browsing.RowVisitor;
+import com.google.refine.model.OverlayModel;
+import com.google.refine.model.Project;
+import com.google.refine.model.Row;
+
+/**
+ * Main class representing a skeleton of Wikibase edits with OpenRefine columns
+ * as variables.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class WikibaseSchema implements OverlayModel {
+
+ final static Logger logger = LoggerFactory.getLogger("RdfSchema");
+
+ protected List itemDocumentExprs = new ArrayList();
+
+ protected String baseIri = "http://www.wikidata.org/entity/";
+
+ /**
+ * Constructor.
+ */
+ public WikibaseSchema() {
+
+ }
+
+ /**
+ * Constructor for deserialization via Jackson
+ */
+ @JsonCreator
+ public WikibaseSchema(@JsonProperty("itemDocuments") List exprs) {
+ this.itemDocumentExprs = exprs;
+ }
+
+ /**
+ * @return the site IRI of the Wikibase instance referenced by this schema
+ */
+ public String getBaseIri() {
+ return baseIri;
+ }
+
+ /**
+ * @return the list of document expressions for this schema
+ */
+ public List getItemDocumentExpressions() {
+ return itemDocumentExprs;
+ }
+
+ public void setItemDocumentExpressions(List exprs) {
+ this.itemDocumentExprs = exprs;
+ }
+
+ /**
+ * Evaluates all item documents in a particular expression context. This
+ * specifies, among others, a row where the values of the variables will be
+ * read.
+ *
+ * @param ctxt
+ * the context in which the schema should be evaluated.
+ * @return
+ */
+ public List evaluateItemDocuments(ExpressionContext ctxt) {
+ List result = new ArrayList<>();
+ for (WbItemDocumentExpr expr : itemDocumentExprs) {
+
+ try {
+ result.add(expr.evaluate(ctxt));
+ } catch (SkipSchemaExpressionException e) {
+ continue;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Evaluates the schema on a project, returning a list of ItemUpdates generated
+ * by the schema.
+ *
+ * Some warnings will be emitted in the warning store: those are only the ones
+ * that are generated at evaluation time (such as invalid formats for dates).
+ * Issues detected on candidate statements (such as constraint violations) are
+ * not included at this stage.
+ *
+ * @param project
+ * the project on which the schema should be evaluated
+ * @param engine
+ * the engine, which gives access to the current facets
+ * @param warningStore
+ * a store in which issues will be emitted
+ * @return item updates are stored in their generating order (not merged yet).
+ */
+ public List evaluate(Project project, Engine engine, QAWarningStore warningStore) {
+ List result = new ArrayList<>();
+ FilteredRows filteredRows = engine.getAllFilteredRows();
+ filteredRows.accept(project, new EvaluatingRowVisitor(result, warningStore));
+ return result;
+ }
+
+ /**
+ * Same as above, ignoring any warnings.
+ */
+ public List evaluate(Project project, Engine engine) {
+ return evaluate(project, engine, null);
+ }
+
+ protected class EvaluatingRowVisitor implements RowVisitor {
+
+ private List result;
+ private QAWarningStore warningStore;
+
+ public EvaluatingRowVisitor(List result, QAWarningStore warningStore) {
+ this.result = result;
+ this.warningStore = warningStore;
+ }
+
+ @Override
+ public void start(Project project) {
+ ;
+ }
+
+ @Override
+ public boolean visit(Project project, int rowIndex, Row row) {
+ ExpressionContext ctxt = new ExpressionContext(baseIri, rowIndex, row, project.columnModel, warningStore);
+ result.addAll(evaluateItemDocuments(ctxt));
+ return false;
+ }
+
+ @Override
+ public void end(Project project) {
+ ;
+ }
+ }
+
+ static public WikibaseSchema reconstruct(JSONObject o)
+ throws JSONException {
+ return reconstruct(o.toString());
+ }
+
+ static public WikibaseSchema reconstruct(String json) throws JSONException {
+ ObjectMapper mapper = new ObjectMapper();
+ try {
+ return mapper.readValue(json, WikibaseSchema.class);
+ } catch (JsonParseException e) {
+ throw new JSONException(e.toString());
+ } catch (JsonMappingException e) {
+ throw new JSONException(e.toString());
+ } catch (IOException e) {
+ throw new JSONException(e.toString());
+ }
+ }
+
+ @Override
+ public void write(JSONWriter writer, Properties options)
+ throws JSONException {
+ writer.object();
+ writer.key("itemDocuments");
+ writer.array();
+ for (WbItemDocumentExpr changeExpr : itemDocumentExprs) {
+ changeExpr.write(writer, options);
+ }
+ writer.endArray();
+ writer.endObject();
+ }
+
+ static public WikibaseSchema load(Project project, JSONObject obj)
+ throws Exception {
+ return reconstruct(obj);
+ }
+
+ @Override
+ public void onBeforeSave(Project project) {
+ }
+
+ @Override
+ public void onAfterSave(Project project) {
+ }
+
+ @Override
+ public void dispose(Project project) {
+
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !WikibaseSchema.class.isInstance(other)) {
+ return false;
+ }
+ WikibaseSchema otherSchema = (WikibaseSchema) other;
+ return itemDocumentExprs.equals(otherSchema.getItemDocumentExpressions());
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/FullyPropertySerializingValueSnak.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/FullyPropertySerializingValueSnak.java
new file mode 100644
index 000000000..7f969b7dc
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/FullyPropertySerializingValueSnak.java
@@ -0,0 +1,28 @@
+package org.openrefine.wikidata.schema.entityvalues;
+
+import org.wikidata.wdtk.datamodel.implementation.ValueSnakImpl;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Value;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * A tweaked version of {@link SnakImpl} that serializes
+ * the full property (not just its PID), so that we can also
+ * get the label for that property and display it in the UI
+ * without having to query the remove server.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class FullyPropertySerializingValueSnak extends ValueSnakImpl {
+
+ public FullyPropertySerializingValueSnak(PropertyIdValue property, Value value) {
+ super(property, value);
+ }
+
+ @JsonProperty("full_property")
+ public PropertyIdValue getFullPropertyId() {
+ return getPropertyId();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/PrefetchedEntityIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/PrefetchedEntityIdValue.java
new file mode 100644
index 000000000..ee98c58ab
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/PrefetchedEntityIdValue.java
@@ -0,0 +1,60 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema.entityvalues;
+
+import java.util.List;
+
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+
+/**
+ * An entity id value that also comes with a label and possibly types.
+ *
+ * The rationale behind this classes is that OpenRefine already stores labels
+ * and types for the Wikidata items it knows about (in the reconciliation data),
+ * so it is worth keeping this data to avoid re-fetching it when we need it.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public interface PrefetchedEntityIdValue extends EntityIdValue {
+
+ /**
+ * This should return the label "as we got it", with no guarantee that it is
+ * current or that its language matches that of the user. In general though,
+ * that should be the case if the user always uses OpenRefine with the same
+ * language settings.
+ *
+ * @return the preferred label of the entity
+ */
+ public String getLabel();
+
+ /**
+ * Returns a list of types for this item. Again these are the types as they were
+ * originally fetched from the reconciliation interface: they can diverge from
+ * what is currently on the item.
+ *
+ * Empty lists should be returned for
+ */
+ public List getTypes();
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconEntityIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconEntityIdValue.java
new file mode 100644
index 000000000..f55bc3c8c
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconEntityIdValue.java
@@ -0,0 +1,166 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema.entityvalues;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.wikidata.wdtk.datamodel.helpers.Equality;
+import org.wikidata.wdtk.datamodel.helpers.Hash;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+import com.google.refine.model.Recon;
+
+/**
+ * An EntityIdValue that holds not just the id but also the label as fetched by
+ * either the reconciliation interface or the suggester and its type, both
+ * stored as reconciliation candidates.
+ *
+ * This label will be localized depending on the language chosen by the user for
+ * OpenRefine's interface. Storing it lets us reuse it later on without having
+ * to re-fetch it.
+ *
+ * Storing the types also lets us perform some constraint checks without
+ * re-fetching the types of many items.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public abstract class ReconEntityIdValue implements PrefetchedEntityIdValue {
+
+ private Recon _recon;
+ private String _cellValue;
+
+ public ReconEntityIdValue(Recon match, String cellValue) {
+ _recon = match;
+ _cellValue = cellValue;
+ assert (Recon.Judgment.Matched.equals(_recon.judgment) || Recon.Judgment.New.equals(_recon.judgment));
+ }
+
+ @JsonIgnore
+ public boolean isMatched() {
+ return Recon.Judgment.Matched.equals(_recon.judgment) && _recon.match != null;
+ }
+
+ @JsonIgnore
+ public boolean isNew() {
+ return !isMatched();
+ }
+
+ public String getLabel() {
+ if (isMatched()) {
+ return _recon.match.name;
+ } else {
+ return _cellValue;
+ }
+ }
+
+ public List getTypes() {
+ if (isMatched()) {
+ return Arrays.asList(_recon.match.types);
+ } else {
+ return new ArrayList<>();
+ }
+ }
+
+ @Override
+ public abstract String getEntityType();
+
+ /**
+ * Returns the integer used internally in OpenRefine to identify the new item.
+ *
+ * @return the reconciliation id of the reconciled cell
+ */
+ public long getReconInternalId() {
+ return getRecon().id;
+ }
+
+ /**
+ * Returns the reconciliation object corresponding to this entity.
+ *
+ * @return the full reconciliation metadata of the corresponding cell
+ */
+ @JsonIgnore // just to clean up a bit the json serialization
+ public Recon getRecon() {
+ return _recon;
+ }
+
+ /**
+ * Returns the id of the reconciled item
+ */
+ @Override
+ public String getId() {
+ if (isMatched()) {
+ return _recon.match.id;
+ } else if (ET_ITEM.equals(getEntityType())) {
+ return "Q" + getReconInternalId();
+ } else if (ET_PROPERTY.equals(getEntityType())) {
+ return "P" + getReconInternalId();
+ }
+ return null;
+ }
+
+ @Override
+ public String getSiteIri() {
+ if (isMatched()) {
+ return _recon.identifierSpace;
+ } else {
+ return EntityIdValue.SITE_LOCAL;
+ }
+ }
+
+ @Override
+ public String getIri() {
+ return getSiteIri() + getId();
+ }
+
+ @Override
+ public T accept(ValueVisitor valueVisitor) {
+ return valueVisitor.visit(this);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return Equality.equalsEntityIdValue(this, other);
+
+ }
+
+ @Override
+ public int hashCode() {
+ return Hash.hashCode(this);
+ }
+
+ @Override
+ public String toString() {
+ if (isNew()) {
+ return "new item (reconciled from " + getReconInternalId() + ")";
+ } else {
+ return getIri() + " (reconciled from " + getReconInternalId() + ")";
+ }
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconItemIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconItemIdValue.java
new file mode 100644
index 000000000..dc57d364d
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconItemIdValue.java
@@ -0,0 +1,41 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema.entityvalues;
+
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+
+import com.google.refine.model.Recon;
+
+public class ReconItemIdValue extends ReconEntityIdValue implements ItemIdValue {
+
+ public ReconItemIdValue(Recon recon, String cellValue) {
+ super(recon, cellValue);
+ }
+
+ @Override
+ public String getEntityType() {
+ return ET_ITEM;
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconPropertyIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconPropertyIdValue.java
new file mode 100644
index 000000000..b6767d2ba
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/ReconPropertyIdValue.java
@@ -0,0 +1,40 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema.entityvalues;
+
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+
+import com.google.refine.model.Recon;
+
+public class ReconPropertyIdValue extends ReconEntityIdValue implements PropertyIdValue {
+
+ public ReconPropertyIdValue(Recon recon, String cellValue) {
+ super(recon, cellValue);
+ }
+
+ @Override
+ public String getEntityType() {
+ return ET_PROPERTY;
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedEntityIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedEntityIdValue.java
new file mode 100644
index 000000000..d2b97edea
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedEntityIdValue.java
@@ -0,0 +1,96 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema.entityvalues;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.wikidata.wdtk.datamodel.helpers.Hash;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
+
+/**
+ * An EntityIdValue that we have obtained from a suggest widget in the schema
+ * alignment dialog.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public abstract class SuggestedEntityIdValue implements PrefetchedEntityIdValue {
+
+ private String _id;
+ private String _siteIRI;
+ private String _label;
+
+ public SuggestedEntityIdValue(String id, String siteIRI, String label) {
+ _id = id;
+ _siteIRI = siteIRI;
+ _label = label;
+ }
+
+ @Override
+ public String getId() {
+ return _id;
+ }
+
+ @Override
+ public String getSiteIri() {
+ return _siteIRI;
+ }
+
+ @Override
+ public String getLabel() {
+ return _label;
+ }
+
+ @Override
+ public List getTypes() {
+ return new ArrayList<>();
+ }
+
+ @Override
+ public String getIri() {
+ return getSiteIri() + getId();
+ }
+
+ @Override
+ public T accept(ValueVisitor valueVisitor) {
+ return valueVisitor.visit(this);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !EntityIdValue.class.isInstance(other)) {
+ return false;
+ }
+ final EntityIdValue otherNew = (EntityIdValue) other;
+ return getIri().equals(otherNew.getIri());
+ }
+
+ @Override
+ public int hashCode() {
+ return Hash.hashCode(this);
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedItemIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedItemIdValue.java
new file mode 100644
index 000000000..2c277f2e5
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedItemIdValue.java
@@ -0,0 +1,39 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema.entityvalues;
+
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+
+public class SuggestedItemIdValue extends SuggestedEntityIdValue implements ItemIdValue {
+
+ public SuggestedItemIdValue(String id, String siteIRI, String label) {
+ super(id, siteIRI, label);
+ }
+
+ @Override
+ public String getEntityType() {
+ return ET_ITEM;
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedPropertyIdValue.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedPropertyIdValue.java
new file mode 100644
index 000000000..95cb973a2
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/entityvalues/SuggestedPropertyIdValue.java
@@ -0,0 +1,44 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema.entityvalues;
+
+import org.wikidata.wdtk.datamodel.helpers.ToString;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+
+public class SuggestedPropertyIdValue extends SuggestedEntityIdValue implements PropertyIdValue {
+
+ public SuggestedPropertyIdValue(String id, String siteIRI, String label) {
+ super(id, siteIRI, label);
+ }
+
+ @Override
+ public String getEntityType() {
+ return ET_PROPERTY;
+ }
+
+ @Override
+ public String toString() {
+ return "suggested " + ToString.toString(this) + " (\"" + getLabel() + "\")";
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/exceptions/InvalidSchemaException.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/exceptions/InvalidSchemaException.java
new file mode 100644
index 000000000..83e8f7734
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/exceptions/InvalidSchemaException.java
@@ -0,0 +1,29 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema.exceptions;
+
+public class InvalidSchemaException extends Exception {
+
+ static final long serialVersionUID = 494837587034L;
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/exceptions/SkipSchemaExpressionException.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/exceptions/SkipSchemaExpressionException.java
new file mode 100644
index 000000000..0a3521f4e
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/exceptions/SkipSchemaExpressionException.java
@@ -0,0 +1,29 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.schema.exceptions;
+
+public class SkipSchemaExpressionException extends Exception {
+
+ static final long serialVersionUID = 738592057L;
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdate.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdate.java
new file mode 100644
index 000000000..7f2ba76a3
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdate.java
@@ -0,0 +1,333 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.updates;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.jsoup.helper.Validate;
+import org.wikidata.wdtk.datamodel.implementation.StatementGroupImpl;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
+import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * A class to plan an update of an item, after evaluating the statements but
+ * before fetching the current content of the item (this is why it does not
+ * extend StatementsUpdate).
+ *
+ * @author Antonin Delpeuch
+ */
+public class ItemUpdate {
+
+ private final ItemIdValue qid;
+ private final List addedStatements;
+ private final Set deletedStatements;
+ private final Set labels;
+ private final Set descriptions;
+ private final Set aliases;
+
+ /**
+ * Constructor.
+ *
+ * @param qid
+ * the subject of the document. It can be a reconciled item value for
+ * new items.
+ * @param addedStatements
+ * the statements to add on the item. They should be distinct. They
+ * are modelled as a list because their insertion order matters.
+ * @param deletedStatements
+ * the statements to remove from the item
+ * @param labels
+ * the labels to add on the item
+ * @param descriptions
+ * the descriptions to add on the item
+ * @param aliases
+ * the aliases to add on the item. In theory their order should
+ * matter but in practice people rarely rely on the order of aliases
+ * so this is just kept as a set for simplicity.
+ */
+ @JsonCreator
+ public ItemUpdate(@JsonProperty("subject") ItemIdValue qid,
+ @JsonProperty("addedStatements") List addedStatements,
+ @JsonProperty("deletedStatements") Set deletedStatements,
+ @JsonProperty("labels") Set labels,
+ @JsonProperty("descriptions") Set descriptions,
+ @JsonProperty("addedAliases") Set aliases) {
+ Validate.notNull(qid);
+ this.qid = qid;
+ if (addedStatements == null) {
+ addedStatements = Collections.emptyList();
+ }
+ this.addedStatements = addedStatements;
+ if (deletedStatements == null) {
+ deletedStatements = Collections.emptySet();
+ }
+ this.deletedStatements = deletedStatements;
+ if (labels == null) {
+ labels = Collections.emptySet();
+ }
+ this.labels = labels;
+ if (descriptions == null) {
+ descriptions = Collections.emptySet();
+ }
+ this.descriptions = descriptions;
+ if (aliases == null) {
+ aliases = Collections.emptySet();
+ }
+ this.aliases = aliases;
+ }
+
+ /**
+ * @return the subject of the item
+ */
+ @JsonProperty("subject")
+ public ItemIdValue getItemId() {
+ return qid;
+ }
+
+ /**
+ * Added statements are recorded as a list because their order of insertion
+ * matters.
+ *
+ * @return the list of all added statements
+ */
+ @JsonProperty("addedStatements")
+ public List getAddedStatements() {
+ return addedStatements;
+ }
+
+ /**
+ * @return the list of all deleted statements
+ */
+ @JsonProperty("deletedStatements")
+ public Set getDeletedStatements() {
+ return deletedStatements;
+ }
+
+ /**
+ * @return the list of updated labels
+ */
+ @JsonProperty("labels")
+ public Set getLabels() {
+ return labels;
+ }
+
+ /**
+ * @return the list of updated descriptions
+ */
+ @JsonProperty("descriptions")
+ public Set getDescriptions() {
+ return descriptions;
+ }
+
+ /**
+ * @return the list of updated aliases
+ */
+ @JsonProperty("addedAliases")
+ public Set getAliases() {
+ return aliases;
+ }
+
+ /**
+ * @return true when this change is empty and its subject is not new
+ */
+ @JsonIgnore
+ public boolean isNull() {
+ return isEmpty() && !isNew();
+ }
+
+ /**
+ * @return true when this change leaves the content of the document untouched
+ */
+ @JsonIgnore
+ public boolean isEmpty() {
+ return (addedStatements.isEmpty() && deletedStatements.isEmpty() && labels.isEmpty() && descriptions.isEmpty()
+ && aliases.isEmpty());
+ }
+
+ /**
+ * Merges all the changes in other into this instance. Both updates should have
+ * the same subject.
+ *
+ * @param other
+ * the other change that should be merged
+ */
+ public ItemUpdate merge(ItemUpdate other) {
+ Validate.isTrue(qid.equals(other.getItemId()));
+ List newAddedStatements = new ArrayList<>(addedStatements);
+ for (Statement statement : other.getAddedStatements()) {
+ if (!newAddedStatements.contains(statement)) {
+ newAddedStatements.add(statement);
+ }
+ }
+ Set newDeletedStatements = new HashSet<>(deletedStatements);
+ newDeletedStatements.addAll(other.getDeletedStatements());
+ Set newLabels = new HashSet<>(labels);
+ newLabels.addAll(other.getLabels());
+ Set newDescriptions = new HashSet<>(descriptions);
+ newDescriptions.addAll(other.getDescriptions());
+ Set newAliases = new HashSet<>(aliases);
+ newAliases.addAll(other.getAliases());
+ return new ItemUpdate(qid, newAddedStatements, newDeletedStatements, newLabels, newDescriptions, newAliases);
+ }
+
+ /**
+ * Group added statements in StatementGroups: useful if the item is new.
+ *
+ * @return a grouped version of getAddedStatements()
+ */
+ public List getAddedStatementGroups() {
+ Map> map = new HashMap<>();
+ for (Statement statement : getAddedStatements()) {
+ PropertyIdValue propertyId = statement.getClaim().getMainSnak().getPropertyId();
+ if (!map.containsKey(propertyId)) {
+ map.put(propertyId, new ArrayList());
+ }
+ map.get(propertyId).add(statement);
+ }
+ List result = new ArrayList<>();
+ for (Map.Entry> entry : map.entrySet()) {
+ result.add(new StatementGroupImpl(entry.getValue()));
+ }
+ return result;
+ }
+
+ /**
+ * Group a list of ItemUpdates by subject: this is useful to make one single
+ * edit per item.
+ *
+ * @param itemDocuments
+ * @return a map from item ids to merged ItemUpdate for that id
+ */
+ public static Map groupBySubject(List itemDocuments) {
+ Map map = new HashMap<>();
+ for (ItemUpdate update : itemDocuments) {
+ if (update.isNull()) {
+ continue;
+ }
+
+ ItemIdValue qid = update.getItemId();
+ if (map.containsKey(qid)) {
+ ItemUpdate oldUpdate = map.get(qid);
+ map.put(qid, oldUpdate.merge(update));
+ } else {
+ map.put(qid, update);
+ }
+ }
+ return map;
+ }
+
+ /**
+ * Is this update about a new item?
+ */
+ public boolean isNew() {
+ return EntityIdValue.SITE_LOCAL.equals(getItemId().getSiteIri());
+ }
+
+ /**
+ * This should only be used when creating a new item. This ensures that we never
+ * add an alias without adding a label in the same language.
+ */
+ public ItemUpdate normalizeLabelsAndAliases() {
+ // Ensure that we are only adding aliases with labels
+ Set labelLanguages = labels.stream().map(l -> l.getLanguageCode()).collect(Collectors.toSet());
+
+ Set filteredAliases = new HashSet<>();
+ Set newLabels = new HashSet<>(labels);
+ for (MonolingualTextValue alias : aliases) {
+ if (!labelLanguages.contains(alias.getLanguageCode())) {
+ labelLanguages.add(alias.getLanguageCode());
+ newLabels.add(alias);
+ } else {
+ filteredAliases.add(alias);
+ }
+ }
+ return new ItemUpdate(qid, addedStatements, deletedStatements, newLabels, descriptions, filteredAliases);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || !ItemUpdate.class.isInstance(other)) {
+ return false;
+ }
+ ItemUpdate otherUpdate = (ItemUpdate) other;
+ return qid.equals(otherUpdate.getItemId()) && addedStatements.equals(otherUpdate.getAddedStatements())
+ && deletedStatements.equals(otherUpdate.getDeletedStatements())
+ && labels.equals(otherUpdate.getLabels()) && descriptions.equals(otherUpdate.getDescriptions())
+ && aliases.equals(otherUpdate.getAliases());
+ }
+
+ @Override
+ public int hashCode() {
+ return qid.hashCode() + addedStatements.hashCode() + deletedStatements.hashCode() + labels.hashCode()
+ + descriptions.hashCode() + aliases.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder builder = new StringBuilder();
+ builder.append("");
+ return builder.toString();
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdateBuilder.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdateBuilder.java
new file mode 100644
index 000000000..1556370c0
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/ItemUpdateBuilder.java
@@ -0,0 +1,208 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.updates;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.jsoup.helper.Validate;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+
+/**
+ * Constructs a {@link ItemUpdate} incrementally.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class ItemUpdateBuilder {
+
+ private ItemIdValue qid;
+ private List addedStatements;
+ private Set deletedStatements;
+ private Set labels;
+ private Set descriptions;
+ private Set aliases;
+ private boolean built;
+
+ /**
+ * Constructor.
+ *
+ * @param qid
+ * the subject of the document. It can be a reconciled item value for
+ * new items.
+ */
+ public ItemUpdateBuilder(ItemIdValue qid) {
+ Validate.notNull(qid);
+ this.qid = qid;
+ this.addedStatements = new ArrayList<>();
+ this.deletedStatements = new HashSet();
+ this.labels = new HashSet();
+ this.descriptions = new HashSet();
+ this.aliases = new HashSet();
+ this.built = false;
+ }
+
+ /**
+ * Mark a statement for insertion. If it matches an existing statement, it will
+ * update the statement instead.
+ *
+ * @param statement
+ * the statement to add or update
+ */
+ public ItemUpdateBuilder addStatement(Statement statement) {
+ Validate.isTrue(!built, "ItemUpdate has already been built");
+ addedStatements.add(statement);
+ return this;
+ }
+
+ /**
+ * Mark a statement for deletion. If no such statement exists, nothing will be
+ * deleted.
+ *
+ * @param statement
+ * the statement to delete
+ */
+ public ItemUpdateBuilder deleteStatement(Statement statement) {
+ Validate.isTrue(!built, "ItemUpdate has already been built");
+ deletedStatements.add(statement);
+ return this;
+ }
+
+ /**
+ * Add a list of statement, as in {@link addStatement}.
+ *
+ * @param statements
+ * the statements to add
+ */
+ public ItemUpdateBuilder addStatements(Set statements) {
+ Validate.isTrue(!built, "ItemUpdate has already been built");
+ addedStatements.addAll(statements);
+ return this;
+ }
+
+ /**
+ * Delete a list of statements, as in {@link deleteStatement}.
+ *
+ * @param statements
+ * the statements to delete
+ */
+ public ItemUpdateBuilder deleteStatements(Set statements) {
+ Validate.isTrue(!built, "ItemUpdate has already been built");
+ deletedStatements.addAll(statements);
+ return this;
+ }
+
+ /**
+ * Adds a label to the item. It will override any existing label in this
+ * language.
+ *
+ * @param label
+ * the label to add
+ */
+ public ItemUpdateBuilder addLabel(MonolingualTextValue label) {
+ Validate.isTrue(!built, "ItemUpdate has already been built");
+ labels.add(label);
+ return this;
+ }
+
+ /**
+ * Adds a list of labels to the item. It will override any existing label in
+ * each language.
+ *
+ * @param labels
+ * the labels to add
+ */
+ public ItemUpdateBuilder addLabels(Set labels) {
+ Validate.isTrue(!built, "ItemUpdate has already been built");
+ this.labels.addAll(labels);
+ return this;
+ }
+
+ /**
+ * Adds a description to the item. It will override any existing description in
+ * this language.
+ *
+ * @param description
+ * the description to add
+ */
+ public ItemUpdateBuilder addDescription(MonolingualTextValue description) {
+ Validate.isTrue(!built, "ItemUpdate has already been built");
+ descriptions.add(description);
+ return this;
+ }
+
+ /**
+ * Adds a list of descriptions to the item. It will override any existing
+ * description in each language.
+ *
+ * @param descriptions
+ * the descriptions to add
+ */
+ public ItemUpdateBuilder addDescriptions(Set descriptions) {
+ Validate.isTrue(!built, "ItemUpdate has already been built");
+ this.descriptions.addAll(descriptions);
+ return this;
+ }
+
+ /**
+ * Adds an alias to the item. It will be added to any existing aliases in that
+ * language.
+ *
+ * @param alias
+ * the alias to add
+ */
+ public ItemUpdateBuilder addAlias(MonolingualTextValue alias) {
+ Validate.isTrue(!built, "ItemUpdate has already been built");
+ aliases.add(alias);
+ return this;
+ }
+
+ /**
+ * Adds a list of aliases to the item. They will be added to any existing
+ * aliases in each language.
+ *
+ * @param aliases
+ * the aliases to add
+ */
+ public ItemUpdateBuilder addAliases(Set aliases) {
+ Validate.isTrue(!built, "ItemUpdate has already been built");
+ this.aliases.addAll(aliases);
+ return this;
+ }
+
+ /**
+ * Constructs the {@link ItemUpdate}.
+ *
+ * @return
+ */
+ public ItemUpdate build() {
+ built = true;
+ return new ItemUpdate(qid, addedStatements, deletedStatements, labels, descriptions, aliases);
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/ImpossibleSchedulingException.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/ImpossibleSchedulingException.java
new file mode 100644
index 000000000..9b1d0f504
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/ImpossibleSchedulingException.java
@@ -0,0 +1,30 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.updates.scheduler;
+
+public class ImpossibleSchedulingException extends Exception {
+
+ private static final long serialVersionUID = 6621563898380564148L;
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/PointerExtractor.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/PointerExtractor.java
new file mode 100644
index 000000000..7fac7b09e
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/PointerExtractor.java
@@ -0,0 +1,166 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.updates.scheduler;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
+import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
+import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
+import org.wikidata.wdtk.datamodel.interfaces.Snak;
+import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+import org.wikidata.wdtk.datamodel.interfaces.StringValue;
+import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
+import org.wikidata.wdtk.datamodel.interfaces.Value;
+import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
+
+/**
+ * A class that extracts the new entity ids referred to in a statement.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class PointerExtractor implements ValueVisitor> {
+
+ /**
+ * Extracts all the new entities mentioned by this statement. This does not
+ * include the subject of the statement.
+ *
+ * @param statement
+ * the statement to inspect
+ * @return the set of all new entities mentioned by the statement
+ */
+ public Set extractPointers(Statement statement) {
+ Set result = new HashSet<>();
+ result.addAll(extractPointers(statement.getClaim().getMainSnak()));
+ result.addAll(extractPointers(statement.getClaim().getQualifiers()));
+ statement.getReferences().stream().map(l -> extractPointers(l.getSnakGroups())).forEach(s -> result.addAll(s));
+ return result;
+ }
+
+ /**
+ * Extracts all the new entities mentioned by this list of snak groups.
+ *
+ * @param snakGroups
+ * @return
+ */
+ public Set extractPointers(List snakGroups) {
+ Set result = new HashSet<>();
+ snakGroups.stream().map(s -> extractPointers(s)).forEach(s -> result.addAll(s));
+ return result;
+ }
+
+ /***
+ * Extracts all the new entities mentioned by this snak group.
+ *
+ * @param snakGroup
+ * @return
+ */
+ public Set extractPointers(SnakGroup snakGroup) {
+ Set result = new HashSet<>();
+ snakGroup.getSnaks().stream().map(s -> extractPointers(s)).forEach(s -> result.addAll(s));
+ return result;
+ }
+
+ /**
+ * Extracts all new entities mentioned by this snak group. Currently there will
+ * be at most one: the target of the snak (as property ids cannot be new for
+ * now).
+ *
+ * @param snak
+ * @return
+ */
+ public Set extractPointers(Snak snak) {
+ Set result = new HashSet<>();
+ result.addAll(extractPointers(snak.getPropertyId()));
+ result.addAll(extractPointers(snak.getValue()));
+ return result;
+ }
+
+ /**
+ * Extracts any new entity from the value.
+ *
+ * @param value
+ * @return
+ */
+ public Set extractPointers(Value value) {
+ if (value == null) {
+ return Collections.emptySet();
+ }
+ Set pointers = value.accept(this);
+ if (pointers == null) {
+ return Collections.emptySet();
+ }
+ return pointers;
+ }
+
+ @Override
+ public Set visit(DatatypeIdValue value) {
+ return null;
+ }
+
+ @Override
+ public Set visit(EntityIdValue value) {
+ if (ReconItemIdValue.class.isInstance(value)) {
+ ReconItemIdValue recon = (ReconItemIdValue) value;
+ if (recon.isNew()) {
+ return Collections.singleton(recon);
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public Set visit(GlobeCoordinatesValue value) {
+ return null;
+ }
+
+ @Override
+ public Set visit(MonolingualTextValue value) {
+ return null;
+ }
+
+ @Override
+ public Set visit(QuantityValue value) {
+ // units cannot be new because WDTK represents them as strings already
+ return null;
+ }
+
+ @Override
+ public Set visit(StringValue value) {
+ return null;
+ }
+
+ @Override
+ public Set visit(TimeValue value) {
+ return null;
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/QuickStatementsUpdateScheduler.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/QuickStatementsUpdateScheduler.java
new file mode 100644
index 000000000..4640daa61
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/QuickStatementsUpdateScheduler.java
@@ -0,0 +1,140 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.updates.scheduler;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.openrefine.wikidata.updates.ItemUpdateBuilder;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+
+public class QuickStatementsUpdateScheduler implements UpdateScheduler {
+
+ private PointerExtractor extractor = new PointerExtractor();
+
+ /**
+ * This map holds for each new entity id value a list of updates that refer to
+ * this id (and should hence be scheduled right after creation of that entity).
+ */
+ private Map pointerUpdates;
+
+ /**
+ * This contains all updates which do not refer to any new entity apart from
+ * possibly the subject, in the order that they were supplied to us.
+ */
+ private UpdateSequence pointerFreeUpdates;
+
+ /**
+ * Separates out the statements which refer to new items from the rest of the
+ * update. The resulting updates are stored in {@link referencingUpdates} and
+ * {@link updatesWithoutReferences}.
+ *
+ * @param update
+ * @throws ImpossibleSchedulingException
+ * if two new item ids are referred to in the same statement
+ */
+ protected void splitUpdate(ItemUpdate update)
+ throws ImpossibleSchedulingException {
+ ItemUpdateBuilder remainingUpdateBuilder = new ItemUpdateBuilder(update.getItemId())
+ .addLabels(update.getLabels()).addDescriptions(update.getDescriptions()).addAliases(update.getAliases())
+ .deleteStatements(update.getDeletedStatements());
+ Map referencingUpdates = new HashMap<>();
+
+ for (Statement statement : update.getAddedStatements()) {
+ Set pointers = extractor.extractPointers(statement);
+ if (pointers.isEmpty()) {
+ remainingUpdateBuilder.addStatement(statement);
+ } else if (pointers.size() == 1 && !update.isNew()) {
+ ItemIdValue pointer = pointers.stream().findFirst().get();
+ ItemUpdateBuilder referencingBuilder = referencingUpdates.get(pointer);
+ if (referencingBuilder == null) {
+ referencingBuilder = new ItemUpdateBuilder(update.getItemId());
+ }
+ referencingBuilder.addStatement(statement);
+ referencingUpdates.put(pointer, referencingBuilder);
+ } else if (pointers.size() == 1 && pointers.stream().findFirst().get().equals(update.getItemId())) {
+ remainingUpdateBuilder.addStatement(statement);
+ } else {
+ throw new ImpossibleSchedulingException();
+ }
+ }
+
+ // Add the update that is not referring to anything to the schedule
+ ItemUpdate pointerFree = remainingUpdateBuilder.build();
+ if (!pointerFree.isNull()) {
+ pointerFreeUpdates.add(pointerFree);
+ }
+ // Add the other updates to the map
+ for (Entry entry : referencingUpdates.entrySet()) {
+ ItemUpdate pointerUpdate = entry.getValue().build();
+ UpdateSequence pointerUpdatesForKey = pointerUpdates.get(entry.getKey());
+ if (pointerUpdatesForKey == null) {
+ pointerUpdatesForKey = new UpdateSequence();
+ }
+ pointerUpdatesForKey.add(pointerUpdate);
+ pointerUpdates.put(entry.getKey(), pointerUpdatesForKey);
+ }
+ }
+
+ @Override
+ public List schedule(List updates)
+ throws ImpossibleSchedulingException {
+ pointerUpdates = new HashMap<>();
+ pointerFreeUpdates = new UpdateSequence();
+
+ for (ItemUpdate update : updates) {
+ splitUpdate(update);
+ }
+
+ // Reconstruct
+ List fullSchedule = new ArrayList<>();
+ Set mentionedNewEntities = new HashSet<>(pointerUpdates.keySet());
+ for (ItemUpdate update : pointerFreeUpdates.getUpdates()) {
+ fullSchedule.add(update);
+ UpdateSequence backPointers = pointerUpdates.get(update.getItemId());
+ if (backPointers != null) {
+ fullSchedule.addAll(backPointers.getUpdates());
+ }
+ mentionedNewEntities.remove(update.getItemId());
+ }
+
+ // Create any item that was referred to but untouched
+ // (this is just for the sake of correctness, it would be bad to do that
+ // as the items would remain blank in this batch).
+ for (ItemIdValue missingId : mentionedNewEntities) {
+ fullSchedule.add(new ItemUpdateBuilder(missingId).build());
+ fullSchedule.addAll(pointerUpdates.get(missingId).getUpdates());
+ }
+ return fullSchedule;
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/UpdateScheduler.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/UpdateScheduler.java
new file mode 100644
index 000000000..e1e743056
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/UpdateScheduler.java
@@ -0,0 +1,53 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.updates.scheduler;
+
+import java.util.List;
+
+import org.openrefine.wikidata.updates.ItemUpdate;
+
+/**
+ * A scheduling strategy for item updates. Given a list of initial updates, the
+ * scheduler reorganizes these updates (possibly splitting them or merging them)
+ * to create a sequence that is suitable for a particular import process.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public interface UpdateScheduler {
+
+ /**
+ * Performs the scheduling. The initial updates are provided as a list so that
+ * the scheduler can attempt to respect the initial order (but no guarantee is
+ * made for that in general).
+ *
+ * @param updates
+ * the updates to schedule
+ * @return the reorganized updates
+ * @throws ImpossibleSchedulingException
+ * when the scheduler cannot cope with a particular edit plan.
+ */
+ public List schedule(List updates)
+ throws ImpossibleSchedulingException;
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/UpdateSequence.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/UpdateSequence.java
new file mode 100644
index 000000000..0602bd463
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/UpdateSequence.java
@@ -0,0 +1,83 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.updates.scheduler;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+
+/**
+ * Helper class to store a list of updates where each subject appears at most
+ * once. It preserves order of insertion.
+ *
+ * @author Antonin Delpeuch
+ */
+public class UpdateSequence {
+
+ /**
+ * The list of updates stored by this container
+ */
+ private List updates = new ArrayList<>();
+ /**
+ * An index to keep track of where each item is touched in the sequence
+ */
+ private Map index = new HashMap<>();
+
+ /**
+ * Adds a new update to the list, merging it with any existing one with the same
+ * subject.
+ *
+ * @param update
+ */
+ public void add(ItemUpdate update) {
+ ItemIdValue subject = update.getItemId();
+ if (index.containsKey(subject)) {
+ int i = index.get(subject);
+ ItemUpdate oldUpdate = updates.get(i);
+ updates.set(i, oldUpdate.merge(update));
+ } else {
+ index.put(subject, updates.size());
+ updates.add(update);
+ }
+ }
+
+ /**
+ * @return the list of merged updates
+ */
+ public List getUpdates() {
+ return updates;
+ }
+
+ /**
+ * @return the set of touched subjects
+ */
+ public Set getSubjects() {
+ return index.keySet();
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/WikibaseAPIUpdateScheduler.java b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/WikibaseAPIUpdateScheduler.java
new file mode 100644
index 000000000..a3fea60e2
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/updates/scheduler/WikibaseAPIUpdateScheduler.java
@@ -0,0 +1,134 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.updates.scheduler;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
+import org.openrefine.wikidata.updates.ItemUpdate;
+import org.openrefine.wikidata.updates.ItemUpdateBuilder;
+import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
+import org.wikidata.wdtk.datamodel.interfaces.Statement;
+
+/**
+ * A simple scheduler for batches commited via the Wikibase API.
+ *
+ * The strategy is quite simple and makes at most two edits per touched item
+ * (which is not minimal though). Each update is split between statements making
+ * references to new items, and statements not making these references. All
+ * updates with no references to new items are done first (which creates all new
+ * items), then all other updates are done.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+public class WikibaseAPIUpdateScheduler implements UpdateScheduler {
+
+ /**
+ * The first part of updates: the ones which create new items without referring
+ * to any other new item.
+ */
+ private UpdateSequence pointerFreeUpdates;
+ /**
+ * The second part of the updates: all existing items, plus all parts of new
+ * items that refer to other new items.
+ */
+ private UpdateSequence pointerFullUpdates;
+ /**
+ * The set of all new items referred to in the whole batch.
+ */
+ private Set allPointers;
+
+ private PointerExtractor extractor = new PointerExtractor();
+
+ @Override
+ public List schedule(List updates) {
+ List result = new ArrayList<>();
+ pointerFreeUpdates = new UpdateSequence();
+ pointerFullUpdates = new UpdateSequence();
+ allPointers = new HashSet<>();
+
+ for (ItemUpdate update : updates) {
+ splitUpdate(update);
+ }
+
+ // Part 1: add all the pointer free updates
+ result.addAll(pointerFreeUpdates.getUpdates());
+
+ // Part 1': add the remaining new items that have not been touched
+ Set unseenPointers = new HashSet<>(allPointers);
+ unseenPointers.removeAll(pointerFreeUpdates.getSubjects());
+
+ result.addAll(unseenPointers.stream().map(e -> new ItemUpdateBuilder(e).build()).collect(Collectors.toList()));
+
+ // Part 2: add all the pointer full updates
+ result.addAll(pointerFullUpdates.getUpdates());
+
+ return result;
+ }
+
+ /**
+ * Splits an update into two parts
+ *
+ * @param update
+ */
+ protected void splitUpdate(ItemUpdate update) {
+ ItemUpdateBuilder pointerFreeBuilder = new ItemUpdateBuilder(update.getItemId()).addLabels(update.getLabels())
+ .addDescriptions(update.getDescriptions()).addAliases(update.getAliases())
+ .deleteStatements(update.getDeletedStatements());
+ ItemUpdateBuilder pointerFullBuilder = new ItemUpdateBuilder(update.getItemId());
+
+ for (Statement statement : update.getAddedStatements()) {
+ Set pointers = extractor.extractPointers(statement);
+ if (pointers.isEmpty()) {
+ pointerFreeBuilder.addStatement(statement);
+ } else {
+ pointerFullBuilder.addStatement(statement);
+ }
+ allPointers.addAll(pointers);
+ }
+
+ if (update.isNew()) {
+ // If the update is new, we might need to split it
+ // in two (if it refers to any other new entity).
+ ItemUpdate pointerFree = pointerFreeBuilder.build();
+ if (!pointerFree.isNull()) {
+ pointerFreeUpdates.add(pointerFree);
+ }
+ ItemUpdate pointerFull = pointerFullBuilder.build();
+ if (!pointerFull.isEmpty()) {
+ pointerFullUpdates.add(pointerFull);
+ }
+ } else {
+ // Otherwise, we just make sure this edit is done after
+ // all item creations.
+ pointerFullUpdates.add(update);
+ }
+ }
+
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/utils/EntityCache.java b/extensions/wikidata/src/org/openrefine/wikidata/utils/EntityCache.java
new file mode 100644
index 000000000..482a1b6cf
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/utils/EntityCache.java
@@ -0,0 +1,79 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.utils;
+
+import java.util.concurrent.TimeUnit;
+
+import org.wikidata.wdtk.datamodel.helpers.Datamodel;
+import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
+import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
+import org.wikidata.wdtk.wikibaseapi.ApiConnection;
+import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
+import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
+
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+
+public class EntityCache {
+
+ private static EntityCache _entityCache = new EntityCache();
+
+ private LoadingCache _cache = null;
+ private WikibaseDataFetcher _fetcher;
+
+ private EntityCache() {
+ ApiConnection connection = ApiConnection.getWikidataApiConnection();
+ _fetcher = new WikibaseDataFetcher(connection, Datamodel.SITE_WIKIDATA);
+
+ _cache = CacheBuilder.newBuilder().maximumSize(4096).expireAfterWrite(1, TimeUnit.HOURS)
+ .build(new CacheLoader() {
+
+ public EntityDocument load(String entityId)
+ throws Exception {
+ EntityDocument doc = _fetcher.getEntityDocument(entityId);
+ if (doc != null) {
+ return doc;
+ } else {
+ throw new MediaWikiApiErrorException("400", "Unknown entity id \"" + entityId + "\"");
+ }
+ }
+ });
+ }
+
+ public EntityDocument get(EntityIdValue id) {
+ return _cache.apply(id.getId());
+ }
+
+ public static EntityCache getEntityCache() {
+ if (_entityCache == null) {
+ _entityCache = new EntityCache();
+ }
+ return _entityCache;
+ }
+
+ public static EntityDocument getEntityDocument(EntityIdValue id) {
+ return getEntityCache().get(id);
+ }
+}
diff --git a/extensions/wikidata/src/org/openrefine/wikidata/utils/JacksonJsonizable.java b/extensions/wikidata/src/org/openrefine/wikidata/utils/JacksonJsonizable.java
new file mode 100644
index 000000000..b84c79b7d
--- /dev/null
+++ b/extensions/wikidata/src/org/openrefine/wikidata/utils/JacksonJsonizable.java
@@ -0,0 +1,79 @@
+/*******************************************************************************
+ * MIT License
+ *
+ * Copyright (c) 2018 Antonin Delpeuch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ ******************************************************************************/
+package org.openrefine.wikidata.utils;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.json.JSONWriter;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonMappingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import com.google.refine.Jsonizable;
+
+/**
+ * This class is inefficient because it serializes the object to string and then
+ * deserializes it back. Unfortunately, this is the only simple way to bridge
+ * Jackson to org.json. This conversion should be removed when (if ?) we migrate
+ * OpenRefine a better JSON library.
+ *
+ * @author Antonin Delpeuch
+ *
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+public abstract class JacksonJsonizable implements Jsonizable {
+
+ @Override
+ public void write(JSONWriter writer, Properties options)
+ throws JSONException {
+ ObjectMapper mapper = new ObjectMapper();
+ try {
+ writer.value(new JSONObject(mapper.writeValueAsString(this)));
+ } catch (JsonProcessingException e) {
+ throw new JSONException(e.toString());
+ }
+ }
+
+ public static