Update Wikidata-Toolkit to solve various issues (#2215)

Updates Wikidata-Toolkit to 0.11.0

This enables the following improvements:
- https://github.com/Wikidata/Wikidata-Toolkit/issues/431
- https://github.com/Wikidata/Wikidata-Toolkit/issues/456

This closes #2196 although the feature will not be enabled until 3.4 for technical reasons
(the openrefine-3.3 tag on Wikidata can only be applied by an AbuseFilter, not manually through
the API).

* Update WDTK and add timeouts to Wikidata editing, closes #2211.

* Explicitly add wdtk-util as a dependency

* Catch MediaWikiApiError following change of interface
This commit is contained in:
Antonin Delpeuch 2019-11-21 21:47:49 +01:00 committed by GitHub
parent b561824d04
commit b19e8a9a74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 102 additions and 33 deletions

View File

@ -128,12 +128,17 @@
<dependency>
<groupId>org.wikidata.wdtk</groupId>
<artifactId>wdtk-wikibaseapi</artifactId>
<version>0.10.0</version>
<version>0.11.0</version>
</dependency>
<dependency>
<groupId>org.wikidata.wdtk</groupId>
<artifactId>wdtk-datamodel</artifactId>
<version>0.10.0</version>
<version>0.11.0</version>
</dependency>
<dependency>
<groupId>org.wikidata.wdtk</groupId>
<artifactId>wdtk-util</artifactId>
<version>0.11.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>

View File

@ -28,7 +28,9 @@ import java.io.IOException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikidata.wdtk.wikibaseapi.ApiConnection;
import org.wikidata.wdtk.wikibaseapi.BasicApiConnection;
import org.wikidata.wdtk.wikibaseapi.LoginFailedException;
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
@ -54,9 +56,11 @@ public class ConnectionManager {
final static Logger logger = LoggerFactory.getLogger("connection_mananger");
public static final String PREFERENCE_STORE_KEY = "wikidata_credentials";
public static final int CONNECT_TIMEOUT = 5000;
public static final int READ_TIMEOUT = 10000;
private PreferenceStore prefStore;
private ApiConnection connection;
private BasicApiConnection connection;
private static final ConnectionManager instance = new ConnectionManager();
@ -64,12 +68,26 @@ public class ConnectionManager {
return instance;
}
/**
* Creates a connection manager, which attempts to restore any
* previous connection (from the preferences).
*/
private ConnectionManager() {
prefStore = ProjectManager.singleton.getPreferenceStore();
connection = null;
restoreSavedConnection();
}
/**
* Logs in to the Wikibase instance, using login/password
*
* @param username
* the username to log in with
* @param password
* the password to log in with
* @param rememberCredentials
* whether to store these credentials in the preferences (unencrypted!)
*/
public void login(String username, String password, boolean rememberCredentials) {
if (rememberCredentials) {
ArrayNode array = ParsingUtilities.mapper.createArrayNode();
@ -80,7 +98,7 @@ public class ConnectionManager {
prefStore.put(PREFERENCE_STORE_KEY, array);
}
connection = ApiConnection.getWikidataApiConnection();
connection = createNewConnection();
try {
connection.login(username, password);
} catch (LoginFailedException e) {
@ -88,10 +106,13 @@ public class ConnectionManager {
}
}
/**
* Restore any previously saved connection, from the preferences.
*/
public void restoreSavedConnection() {
ObjectNode savedCredentials = getStoredCredentials();
if (savedCredentials != null) {
connection = ApiConnection.getWikidataApiConnection();
connection = createNewConnection();
try {
connection.login(savedCredentials.get("username").asText(), savedCredentials.get("password").asText());
} catch (LoginFailedException e) {
@ -114,7 +135,7 @@ public class ConnectionManager {
try {
connection.logout();
connection = null;
} catch (IOException e) {
} catch (IOException | MediaWikiApiErrorException e) {
logger.error(e.getMessage());
}
}
@ -135,4 +156,16 @@ public class ConnectionManager {
return null;
}
}
/**
* Creates a fresh connection object with our
* prefered settings.
* @return
*/
protected BasicApiConnection createNewConnection() {
BasicApiConnection conn = BasicApiConnection.getWikidataApiConnection();
conn.setConnectTimeout(CONNECT_TIMEOUT);
conn.setReadTimeout(READ_TIMEOUT);
return conn;
}
}

View File

@ -59,6 +59,7 @@ public class EditBatchProcessor {
private NewItemLibrary library;
private List<ItemUpdate> scheduled;
private String summary;
private List<String> tags;
private List<ItemUpdate> remainingUpdates;
private List<ItemUpdate> currentBatch;
@ -82,12 +83,14 @@ public class EditBatchProcessor {
* the library to use to keep track of new item creation
* @param summary
* the summary to append to all edits
* @param tags
* the list of tags to apply to all edits
* @param batchSize
* the number of items that should be retrieved in one go from the
* API
*/
public EditBatchProcessor(WikibaseDataFetcher fetcher, WikibaseDataEditor editor, List<ItemUpdate> updates,
NewItemLibrary library, String summary, int batchSize) {
NewItemLibrary library, String summary, List<String> tags, int batchSize) {
this.fetcher = fetcher;
this.editor = editor;
editor.setEditAsBot(true); // this will not do anything if the user does not
@ -99,6 +102,7 @@ public class EditBatchProcessor {
this.library = library;
this.summary = summary;
this.tags = tags;
this.batchSize = batchSize;
// Schedule the edit batch
@ -133,6 +137,7 @@ public class EditBatchProcessor {
update = rewriter.rewrite(update);
} catch (NewItemNotCreatedYetException e) {
logger.warn("Failed to rewrite update on entity "+update.getItemId()+". Missing entity: "+e.getMissingEntity()+". Skipping update.");
batchCursor++;
return;
}
@ -148,8 +153,8 @@ public class EditBatchProcessor {
update.getAliases().stream().collect(Collectors.toList()), update.getAddedStatementGroups(),
Collections.emptyMap());
ItemDocument createdDoc = editor.createItemDocument(itemDocument, summary);
library.setQid(newCell.getReconInternalId(), createdDoc.getItemId().getId());
ItemDocument createdDoc = editor.createItemDocument(itemDocument, summary, tags);
library.setQid(newCell.getReconInternalId(), createdDoc.getEntityId().getId());
} else {
// Existing item
ItemDocument currentDocument = (ItemDocument) currentDocs.get(update.getItemId().getId());
@ -165,7 +170,8 @@ public class EditBatchProcessor {
update.getAliases().stream().collect(Collectors.toList()),
new ArrayList<MonolingualTextValue>(),
update.getAddedStatements().stream().collect(Collectors.toList()),
update.getDeletedStatements().stream().collect(Collectors.toList()), summary);
update.getDeletedStatements().stream().collect(Collectors.toList()),
summary, tags);
}
} catch (MediaWikiApiErrorException e) {
// TODO find a way to report these errors to the user in a nice way

View File

@ -28,7 +28,6 @@ import java.util.Locale;
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
import org.openrefine.wikidata.updates.scheduler.QuickStatementsUpdateScheduler;
import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
@ -36,6 +35,7 @@ import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
import org.wikidata.wdtk.datamodel.interfaces.UnsupportedValue;
import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
/**
@ -96,4 +96,10 @@ public class QSValuePrinter implements ValueVisitor<String> {
return String.format("+%04d-%02d-%02dT%02d:%02d:%02dZ/%d", value.getYear(), value.getMonth(), value.getDay(),
value.getHour(), value.getMinute(), value.getSecond(), value.getPrecision());
}
@Override
public String visit(UnsupportedValue value) {
// we know this cannot happen, since UnsupportedValues cannot be generated in OpenRefine
return "<UNSUPPORTED>";
}
}

View File

@ -26,11 +26,16 @@ package org.openrefine.wikidata.operations;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.Writer;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.Validate;
import org.apache.log4j.spi.LoggerRepository;
import org.openrefine.wikidata.editing.ConnectionManager;
import org.openrefine.wikidata.editing.EditBatchProcessor;
import org.openrefine.wikidata.editing.NewItemLibrary;
@ -46,6 +51,7 @@ import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.refine.RefineServlet;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.EngineConfig;
import com.google.refine.history.Change;
@ -143,6 +149,7 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
protected Engine _engine;
protected WikibaseSchema _schema;
protected String _summary;
protected List<String> _tags;
protected final long _historyEntryID;
protected PerformEditsProcess(Project project, Engine engine, String description, String summary) {
@ -151,6 +158,15 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
this._engine = engine;
this._schema = (WikibaseSchema) project.overlayModels.get("wikibaseSchema");
this._summary = summary;
// TODO this is one of the attributes that should be configured on a per-wiki basis
// TODO enable this tag once 3.3 final is released and create 3.4 tag without AbuseFilter
String tag = "openrefine";
Pattern pattern = Pattern.compile("^(\\d+\\.\\d+).*$");
Matcher matcher = pattern.matcher(RefineServlet.VERSION);
if (matcher.matches()) {
tag += "-"+matcher.group(1);
}
this._tags = Collections.emptyList(); // TODO Arrays.asList(tag);
this._historyEntryID = HistoryEntry.allocateID();
}
@ -183,7 +199,7 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
// Prepare the edits
NewItemLibrary newItemLibrary = new NewItemLibrary();
EditBatchProcessor processor = new EditBatchProcessor(wbdf, wbde, itemDocuments, newItemLibrary, summary,
50);
_tags, 50);
// Perform edits
logger.info("Performing edits");

View File

@ -1,10 +1,8 @@
package org.openrefine.wikidata.qa.scrutinizers;
import java.util.Set;
import java.util.stream.Collectors;
import org.openrefine.wikidata.qa.QAWarning;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;

View File

@ -28,7 +28,6 @@ import java.util.List;
import org.jsoup.helper.Validate;
import org.openrefine.wikidata.schema.exceptions.SkipSchemaExpressionException;
import org.openrefine.wikidata.utils.StatementGroupJson;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;

View File

@ -32,12 +32,10 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.jsoup.helper.Validate;
import org.openrefine.wikidata.utils.StatementGroupJson;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.implementation.StatementGroupImpl;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;

View File

@ -29,7 +29,6 @@ import java.util.List;
import java.util.Set;
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
@ -39,6 +38,7 @@ import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
import org.wikidata.wdtk.datamodel.interfaces.UnsupportedValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
@ -158,4 +158,9 @@ public class PointerExtractor implements ValueVisitor<Set<ReconItemIdValue>> {
public Set<ReconItemIdValue> visit(TimeValue value) {
return null;
}
@Override
public Set<ReconItemIdValue> visit(UnsupportedValue value) {
return null;
}
}

View File

@ -29,6 +29,7 @@ import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.wikibaseapi.ApiConnection;
import org.wikidata.wdtk.wikibaseapi.BasicApiConnection;
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
@ -44,7 +45,7 @@ public class EntityCache {
private WikibaseDataFetcher _fetcher;
private EntityCache() {
ApiConnection connection = ApiConnection.getWikidataApiConnection();
ApiConnection connection = BasicApiConnection.getWikidataApiConnection();
_fetcher = new WikibaseDataFetcher(connection, Datamodel.SITE_WIKIDATA);
_cache = CacheBuilder.newBuilder().maximumSize(4096).expireAfterWrite(1, TimeUnit.HOURS)

View File

@ -2,7 +2,6 @@ package org.openrefine.wikidata.utils;
import java.util.List;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement;

View File

@ -1,7 +1,7 @@
package org.openrefine.wikidata.commands;
import static org.testng.Assert.assertEquals;
import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals;
import java.io.IOException;

View File

@ -23,9 +23,9 @@
******************************************************************************/
package org.openrefine.wikidata.commands;
import static org.testng.Assert.assertEquals;
import static org.mockito.Mockito.when;
import static org.openrefine.wikidata.testing.TestingData.jsonFromFile;
import static org.testng.Assert.assertEquals;
import java.io.IOException;

View File

@ -23,9 +23,9 @@
******************************************************************************/
package org.openrefine.wikidata.commands;
import static org.testng.Assert.assertTrue;
import static org.mockito.Mockito.when;
import static org.openrefine.wikidata.testing.TestingData.jsonFromFile;
import static org.testng.Assert.assertTrue;
import java.io.IOException;

View File

@ -23,8 +23,8 @@
******************************************************************************/
package org.openrefine.wikidata.commands;
import static org.testng.Assert.assertEquals;
import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals;
import java.io.IOException;

View File

@ -31,6 +31,7 @@ import static org.testng.Assert.assertEquals;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@ -58,6 +59,7 @@ public class EditBatchProcessorTest extends WikidataRefineTest {
private WikibaseDataEditor editor = null;
private NewItemLibrary library = null;
private String summary = "my fantastic edits";
private List<String> tags = null;
@BeforeMethod
public void setUp() {
@ -65,6 +67,7 @@ public class EditBatchProcessorTest extends WikidataRefineTest {
editor = mock(WikibaseDataEditor.class);
editor.disableEditing(); // just in case we got mocking wrong
library = new NewItemLibrary();
tags = Arrays.asList("my-tag");
}
@Test
@ -87,9 +90,9 @@ public class EditBatchProcessorTest extends WikidataRefineTest {
ItemDocument expectedNewItem = ItemDocumentBuilder.forItemId(TestingData.newIdA).withLabel(label).build();
ItemDocument createdNewItem = ItemDocumentBuilder.forItemId(Datamodel.makeWikidataItemIdValue("Q1234"))
.withLabel(label).withRevisionId(37828L).build();
when(editor.createItemDocument(expectedNewItem, summary)).thenReturn(createdNewItem);
when(editor.createItemDocument(expectedNewItem, summary, tags)).thenReturn(createdNewItem);
EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, 50);
EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, tags, 50);
assertEquals(2, processor.remainingEdits());
assertEquals(0, processor.progress());
processor.performEdit();
@ -134,7 +137,7 @@ public class EditBatchProcessorTest extends WikidataRefineTest {
when(fetcher.getEntityDocuments(toQids(secondBatch))).thenReturn(toMap(secondBatch));
// Run edits
EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, batchSize);
EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, tags, batchSize);
assertEquals(0, processor.progress());
for (int i = 124; i < 190; i++) {
assertEquals(processor.remainingEdits(), 190 - i);
@ -150,15 +153,15 @@ public class EditBatchProcessorTest extends WikidataRefineTest {
for (ItemDocument doc : fullBatch) {
verify(editor, times(1)).updateTermsStatements(doc, Collections.emptyList(),
Collections.singletonList(description), Collections.emptyList(), Collections.emptyList(),
Collections.emptyList(), Collections.emptyList(), summary);
Collections.emptyList(), Collections.emptyList(), summary, tags);
}
}
private Map<String, EntityDocument> toMap(List<ItemDocument> docs) {
return docs.stream().collect(Collectors.toMap(doc -> doc.getItemId().getId(), doc -> doc));
return docs.stream().collect(Collectors.toMap(doc -> doc.getEntityId().getId(), doc -> doc));
}
private List<String> toQids(List<ItemDocument> docs) {
return docs.stream().map(doc -> doc.getItemId().getId()).collect(Collectors.toList());
return docs.stream().map(doc -> doc.getEntityId().getId()).collect(Collectors.toList());
}
}

View File

@ -34,7 +34,6 @@ import org.openrefine.wikidata.testing.WikidataRefineTest;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import com.google.refine.RefineTest;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;

View File

@ -45,7 +45,6 @@ import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
import com.google.refine.RefineTest;
import com.google.refine.browsing.Engine;
import com.google.refine.model.Project;

View File

@ -7,7 +7,6 @@ import java.util.Properties;
import org.openrefine.wikidata.testing.WikidataRefineTest;
import org.testng.annotations.Test;
import com.google.refine.RefineTest;
import com.google.refine.browsing.Engine;
import com.google.refine.model.Project;
import com.google.refine.util.TestUtils;

View File

@ -23,6 +23,7 @@
******************************************************************************/
package org.openrefine.wikidata.updates.scheduler;
import static org.mockito.Mockito.mock;
import static org.testng.Assert.assertEquals;
import java.math.BigDecimal;
@ -41,6 +42,7 @@ import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
import org.wikidata.wdtk.datamodel.interfaces.UnsupportedValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
public class PointerExtractorTest {
@ -73,6 +75,7 @@ public class PointerExtractorTest {
assertEmpty(Datamodel.makeQuantityValue(new BigDecimal("898")));
assertEmpty(Datamodel.makeQuantityValue(new BigDecimal("7.87"), "http://www.wikidata.org/entity/Q34"));
assertEmpty(Datamodel.makeTimeValue(1898, (byte) 2, (byte) 3, TimeValue.CM_GREGORIAN_PRO));
assertEmpty(mock(UnsupportedValue.class));
}
@Test