Better testing of the editing process
This commit is contained in:
parent
773be2e161
commit
a002468e7d
@ -4,8 +4,6 @@ import javax.servlet.http.HttpServletRequest;
|
|||||||
|
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
import org.openrefine.wikidata.operations.PerformWikibaseEditsOperation;
|
import org.openrefine.wikidata.operations.PerformWikibaseEditsOperation;
|
||||||
import org.openrefine.wikidata.operations.PerformWikibaseEditsOperation.DuplicateDetectionStrategy;
|
|
||||||
import org.openrefine.wikidata.operations.PerformWikibaseEditsOperation.OnDuplicateAction;
|
|
||||||
|
|
||||||
import com.google.refine.commands.EngineDependentCommand;
|
import com.google.refine.commands.EngineDependentCommand;
|
||||||
import com.google.refine.model.AbstractOperation;
|
import com.google.refine.model.AbstractOperation;
|
||||||
@ -16,12 +14,8 @@ public class PerformWikibaseEditsCommand extends EngineDependentCommand {
|
|||||||
@Override
|
@Override
|
||||||
protected AbstractOperation createOperation(Project project, HttpServletRequest request, JSONObject engineConfig)
|
protected AbstractOperation createOperation(Project project, HttpServletRequest request, JSONObject engineConfig)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
String strategy = request.getParameter("strategy");
|
|
||||||
String action = request.getParameter("action");
|
|
||||||
String summary = request.getParameter("summary");
|
String summary = request.getParameter("summary");
|
||||||
return new PerformWikibaseEditsOperation(engineConfig,
|
return new PerformWikibaseEditsOperation(engineConfig,
|
||||||
DuplicateDetectionStrategy.valueOf(strategy),
|
|
||||||
OnDuplicateAction.valueOf(action),
|
|
||||||
summary);
|
summary);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,211 @@
|
|||||||
|
package org.openrefine.wikidata.editing;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemDocument;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||||
|
import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor;
|
||||||
|
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
|
||||||
|
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Schedules and performs a list of updates to items via the API.
|
||||||
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class EditBatchProcessor {
|
||||||
|
|
||||||
|
static final Logger logger = LoggerFactory
|
||||||
|
.getLogger(EditBatchProcessor.class);
|
||||||
|
|
||||||
|
private WikibaseDataFetcher fetcher;
|
||||||
|
private WikibaseDataEditor editor;
|
||||||
|
private NewItemLibrary library;
|
||||||
|
private List<ItemUpdate> scheduled;
|
||||||
|
private String summary;
|
||||||
|
|
||||||
|
private List<ItemUpdate> remainingUpdates;
|
||||||
|
private List<ItemUpdate> currentBatch;
|
||||||
|
private int batchCursor;
|
||||||
|
private int globalCursor;
|
||||||
|
private Map<String, EntityDocument> currentDocs;
|
||||||
|
private int batchSize;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initiates the process of pushing a batch of updates
|
||||||
|
* to Wikibase. This schedules the updates and is a
|
||||||
|
* prerequisite for calling {@link performOneEdit}.
|
||||||
|
*
|
||||||
|
* @param fetcher
|
||||||
|
* the fetcher to use to retrieve the current state of items
|
||||||
|
* @param editor
|
||||||
|
* the object to use to perform the edits
|
||||||
|
* @param updates
|
||||||
|
* the list of item updates to perform
|
||||||
|
* @param library
|
||||||
|
* the library to use to keep track of new item creation
|
||||||
|
* @param summary
|
||||||
|
* the summary to append to all edits
|
||||||
|
* @param batchSize
|
||||||
|
* the number of items that should be retrieved in one go from the API
|
||||||
|
*/
|
||||||
|
public EditBatchProcessor(WikibaseDataFetcher fetcher,
|
||||||
|
WikibaseDataEditor editor,
|
||||||
|
List<ItemUpdate> updates,
|
||||||
|
NewItemLibrary library,
|
||||||
|
String summary,
|
||||||
|
int batchSize) {
|
||||||
|
this.fetcher = fetcher;
|
||||||
|
this.editor = editor;
|
||||||
|
editor.setEditAsBot(true); // this will not do anything if the user does not
|
||||||
|
// have a bot flag, and this is generally wanted if they have one.
|
||||||
|
this.library = library;
|
||||||
|
this.summary = summary;
|
||||||
|
this.batchSize = batchSize;
|
||||||
|
|
||||||
|
// Schedule the edit batch
|
||||||
|
WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
|
||||||
|
this.scheduled = scheduler.schedule(updates);
|
||||||
|
this.globalCursor = 0;
|
||||||
|
|
||||||
|
this.batchCursor = 0;
|
||||||
|
this.remainingUpdates = new ArrayList<>(scheduled);
|
||||||
|
this.currentBatch = Collections.emptyList();
|
||||||
|
this.currentDocs = Collections.emptyMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs the next edit in the batch.
|
||||||
|
*
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
public void performEdit() throws InterruptedException {
|
||||||
|
if (remainingEdits() == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (batchCursor == currentBatch.size()) {
|
||||||
|
prepareNewBatch();
|
||||||
|
}
|
||||||
|
ItemUpdate update = currentBatch.get(batchCursor);
|
||||||
|
|
||||||
|
// Rewrite mentions to new items
|
||||||
|
ReconEntityRewriter rewriter = new ReconEntityRewriter(library, update.getItemId());
|
||||||
|
update = rewriter.rewrite(update);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// New item
|
||||||
|
if (update.isNew()) {
|
||||||
|
ReconEntityIdValue newCell = (ReconEntityIdValue)update.getItemId();
|
||||||
|
update.normalizeLabelsAndAliases();
|
||||||
|
|
||||||
|
ItemDocument itemDocument = Datamodel.makeItemDocument(ItemIdValue.NULL,
|
||||||
|
update.getLabels().stream().collect(Collectors.toList()),
|
||||||
|
update.getDescriptions().stream().collect(Collectors.toList()),
|
||||||
|
update.getAliases().stream().collect(Collectors.toList()),
|
||||||
|
update.getAddedStatementGroups(),
|
||||||
|
Collections.emptyMap());
|
||||||
|
|
||||||
|
ItemDocument createdDoc = editor.createItemDocument(itemDocument, summary);
|
||||||
|
library.setQid(newCell.getReconInternalId(), createdDoc.getItemId().getId());
|
||||||
|
} else {
|
||||||
|
// Existing item
|
||||||
|
ItemDocument currentDocument = (ItemDocument)currentDocs.get(update.getItemId().getId());
|
||||||
|
/*
|
||||||
|
TermStatementUpdate tsUpdate = new TermStatementUpdate(
|
||||||
|
currentDocument,
|
||||||
|
update.getAddedStatements().stream().collect(Collectors.toList()),
|
||||||
|
update.getDeletedStatements().stream().collect(Collectors.toList()),
|
||||||
|
update.getLabels().stream().collect(Collectors.toList()),
|
||||||
|
update.getDescriptions().stream().collect(Collectors.toList()),
|
||||||
|
update.getAliases().stream().collect(Collectors.toList()),
|
||||||
|
new ArrayList<MonolingualTextValue>()
|
||||||
|
);
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
logger.info(mapper.writeValueAsString(update));
|
||||||
|
logger.info(update.toString());
|
||||||
|
logger.info(tsUpdate.getJsonUpdateString()); */
|
||||||
|
editor.updateTermsStatements(currentDocument,
|
||||||
|
update.getLabels().stream().collect(Collectors.toList()),
|
||||||
|
update.getDescriptions().stream().collect(Collectors.toList()),
|
||||||
|
update.getAliases().stream().collect(Collectors.toList()),
|
||||||
|
new ArrayList<MonolingualTextValue>(),
|
||||||
|
update.getAddedStatements().stream().collect(Collectors.toList()),
|
||||||
|
update.getDeletedStatements().stream().collect(Collectors.toList()),
|
||||||
|
summary);
|
||||||
|
}
|
||||||
|
} catch (MediaWikiApiErrorException e) {
|
||||||
|
// TODO find a way to report these errors to the user in a nice way
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
batchCursor++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the number of edits that remain to be done in the current batch
|
||||||
|
*/
|
||||||
|
public int remainingEdits() {
|
||||||
|
return scheduled.size() - (globalCursor + batchCursor);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the progress, measured as a percentage
|
||||||
|
*/
|
||||||
|
public int progress() {
|
||||||
|
return (100*(globalCursor + batchCursor)) / scheduled.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void prepareNewBatch() throws InterruptedException {
|
||||||
|
// remove the previous batch from the remainingUpdates
|
||||||
|
globalCursor += currentBatch.size();
|
||||||
|
currentBatch.clear();
|
||||||
|
|
||||||
|
if(remainingUpdates.size() < batchSize) {
|
||||||
|
currentBatch = remainingUpdates;
|
||||||
|
remainingUpdates = Collections.emptyList();
|
||||||
|
} else {
|
||||||
|
currentBatch = remainingUpdates.subList(0, batchSize);
|
||||||
|
}
|
||||||
|
List<String> qidsToFetch = currentBatch.stream()
|
||||||
|
.filter(u -> !u.isNew())
|
||||||
|
.map(u -> u.getItemId().getId())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
// Get the current documents for this batch of updates
|
||||||
|
logger.info("Requesting documents");
|
||||||
|
currentDocs = null;
|
||||||
|
int retries = 3;
|
||||||
|
while (currentDocs == null && retries > 0) {
|
||||||
|
try {
|
||||||
|
currentDocs = fetcher.getEntityDocuments(qidsToFetch);
|
||||||
|
} catch (MediaWikiApiErrorException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
Thread.sleep(5000);
|
||||||
|
}
|
||||||
|
retries--;
|
||||||
|
}
|
||||||
|
if (currentDocs == null) {
|
||||||
|
throw new InterruptedException("Fetching current documents failed.");
|
||||||
|
}
|
||||||
|
batchCursor = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -15,6 +15,7 @@ import org.json.JSONObject;
|
|||||||
import org.json.JSONWriter;
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
import org.openrefine.wikidata.editing.ConnectionManager;
|
import org.openrefine.wikidata.editing.ConnectionManager;
|
||||||
|
import org.openrefine.wikidata.editing.EditBatchProcessor;
|
||||||
import org.openrefine.wikidata.editing.NewItemLibrary;
|
import org.openrefine.wikidata.editing.NewItemLibrary;
|
||||||
import org.openrefine.wikidata.editing.ReconEntityRewriter;
|
import org.openrefine.wikidata.editing.ReconEntityRewriter;
|
||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
@ -57,26 +58,12 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
|
|||||||
static final Logger logger = LoggerFactory
|
static final Logger logger = LoggerFactory
|
||||||
.getLogger(PerformWikibaseEditsOperation.class);
|
.getLogger(PerformWikibaseEditsOperation.class);
|
||||||
|
|
||||||
public enum DuplicateDetectionStrategy {
|
|
||||||
PROPERTY, SNAK, SNAK_QUALIFIERS
|
|
||||||
}
|
|
||||||
|
|
||||||
public enum OnDuplicateAction {
|
|
||||||
SKIP, MERGE
|
|
||||||
}
|
|
||||||
|
|
||||||
private DuplicateDetectionStrategy strategy;
|
|
||||||
private OnDuplicateAction duplicateAction;
|
|
||||||
private String summary;
|
private String summary;
|
||||||
|
|
||||||
public PerformWikibaseEditsOperation(
|
public PerformWikibaseEditsOperation(
|
||||||
JSONObject engineConfig,
|
JSONObject engineConfig,
|
||||||
DuplicateDetectionStrategy strategy,
|
|
||||||
OnDuplicateAction duplicateAction,
|
|
||||||
String summary) {
|
String summary) {
|
||||||
super(engineConfig);
|
super(engineConfig);
|
||||||
this.strategy = strategy;
|
|
||||||
this.duplicateAction = duplicateAction;
|
|
||||||
this.summary = summary;
|
this.summary = summary;
|
||||||
|
|
||||||
// getEngine(request, project);
|
// getEngine(request, project);
|
||||||
@ -85,16 +72,12 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
|
|||||||
static public AbstractOperation reconstruct(Project project, JSONObject obj)
|
static public AbstractOperation reconstruct(Project project, JSONObject obj)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
JSONObject engineConfig = obj.getJSONObject("engineConfig");
|
JSONObject engineConfig = obj.getJSONObject("engineConfig");
|
||||||
String strategy = obj.getString("duplicate_strategy");
|
|
||||||
String action = obj.getString("duplicate_action");
|
|
||||||
String summary = null;
|
String summary = null;
|
||||||
if (obj.has("summary")) {
|
if (obj.has("summary")) {
|
||||||
summary = obj.getString("summary");
|
summary = obj.getString("summary");
|
||||||
}
|
}
|
||||||
return new PerformWikibaseEditsOperation(
|
return new PerformWikibaseEditsOperation(
|
||||||
engineConfig,
|
engineConfig,
|
||||||
DuplicateDetectionStrategy.valueOf(strategy),
|
|
||||||
OnDuplicateAction.valueOf(action),
|
|
||||||
summary);
|
summary);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,10 +90,6 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
|
|||||||
writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
|
writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
|
||||||
writer.key("description");
|
writer.key("description");
|
||||||
writer.value("Perform Wikibase edits");
|
writer.value("Perform Wikibase edits");
|
||||||
writer.key("duplicate_strategy");
|
|
||||||
writer.value(strategy.name());
|
|
||||||
writer.key("duplicate_action");
|
|
||||||
writer.value(duplicateAction.name());
|
|
||||||
writer.key("summary");
|
writer.key("summary");
|
||||||
writer.value(summary);
|
writer.value(summary);
|
||||||
writer.key("engineConfig");
|
writer.key("engineConfig");
|
||||||
@ -214,137 +193,29 @@ public class PerformWikibaseEditsOperation extends EngineDependentOperation {
|
|||||||
|
|
||||||
WikibaseDataFetcher wbdf = new WikibaseDataFetcher(connection, _schema.getBaseIri());
|
WikibaseDataFetcher wbdf = new WikibaseDataFetcher(connection, _schema.getBaseIri());
|
||||||
WikibaseDataEditor wbde = new WikibaseDataEditor(connection, _schema.getBaseIri());
|
WikibaseDataEditor wbde = new WikibaseDataEditor(connection, _schema.getBaseIri());
|
||||||
wbde.setEditAsBot(true);
|
|
||||||
//wbde.disableEditing();
|
|
||||||
|
|
||||||
// Evaluate the schema
|
// Evaluate the schema
|
||||||
List<ItemUpdate> itemDocuments = _schema.evaluate(_project, _engine);
|
List<ItemUpdate> itemDocuments = _schema.evaluate(_project, _engine);
|
||||||
|
|
||||||
// Schedule the edit batch
|
// Prepare the edits
|
||||||
WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
|
NewItemLibrary newItemLibrary = new NewItemLibrary();
|
||||||
List<ItemUpdate> updates = null;
|
EditBatchProcessor processor = new EditBatchProcessor(wbdf,
|
||||||
updates = scheduler.schedule(itemDocuments);
|
wbde, itemDocuments, newItemLibrary, _summary, 50);
|
||||||
|
|
||||||
/**
|
|
||||||
* TODO:
|
|
||||||
* - support for new items
|
|
||||||
* - support for duplicate strategy and action
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Perform edits
|
// Perform edits
|
||||||
NewItemLibrary newItemLibrary = new NewItemLibrary();
|
logger.info("Performing edits");
|
||||||
DataObjectFactory factory = new DataObjectFactoryImpl();
|
while(processor.remainingEdits() > 0) {
|
||||||
List<ItemUpdate> remainingItemUpdates = new ArrayList<>();
|
try {
|
||||||
remainingItemUpdates.addAll(updates);
|
processor.performEdit();
|
||||||
int totalItemUpdates = updates.size();
|
} catch(InterruptedException e) {
|
||||||
int updatesDone = 0;
|
_canceled = true;
|
||||||
int batchSize = 50;
|
|
||||||
while(updatesDone < totalItemUpdates) {
|
|
||||||
// Split the remaining updates in batches
|
|
||||||
List<ItemUpdate> batch = null;
|
|
||||||
if(totalItemUpdates - updatesDone < batchSize) {
|
|
||||||
batch = remainingItemUpdates;
|
|
||||||
} else {
|
|
||||||
batch = remainingItemUpdates.subList(0, batchSize);
|
|
||||||
}
|
}
|
||||||
List<String> qids = new ArrayList<>(batch.size());
|
_progress = processor.progress();
|
||||||
for(ItemUpdate update : batch) {
|
|
||||||
String qid = update.getItemId().getId();
|
|
||||||
if (!update.isNew()) {
|
|
||||||
qids.add(qid);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the current documents for this batch of updates
|
|
||||||
logger.info("Requesting documents");
|
|
||||||
Map<String, EntityDocument> currentDocs = null;
|
|
||||||
int retries = 3;
|
|
||||||
while (currentDocs == null && retries > 0) {
|
|
||||||
try {
|
|
||||||
currentDocs = wbdf.getEntityDocuments(qids);
|
|
||||||
} catch (MediaWikiApiErrorException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
try {
|
|
||||||
Thread.sleep(5000);
|
|
||||||
} catch (InterruptedException e1) {
|
|
||||||
_canceled = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
retries--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_canceled) {
|
if (_canceled) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
logger.info("Performing edits");
|
|
||||||
|
|
||||||
for(ItemUpdate update : batch) {
|
|
||||||
// Rewrite the update
|
|
||||||
ReconEntityRewriter rewriter = new ReconEntityRewriter(newItemLibrary, update.getItemId());
|
|
||||||
update = rewriter.rewrite(update);
|
|
||||||
|
|
||||||
try {
|
|
||||||
// New item
|
|
||||||
if (update.getItemId().getId().equals("Q0")) {
|
|
||||||
ReconEntityIdValue newCell = (ReconEntityIdValue)update.getItemId();
|
|
||||||
update.normalizeLabelsAndAliases();
|
|
||||||
|
|
||||||
|
|
||||||
ItemDocument itemDocument = factory.getItemDocument(
|
|
||||||
update.getItemId(),
|
|
||||||
update.getLabels().stream().collect(Collectors.toList()),
|
|
||||||
update.getDescriptions().stream().collect(Collectors.toList()),
|
|
||||||
update.getAliases().stream().collect(Collectors.toList()),
|
|
||||||
update.getAddedStatementGroups(),
|
|
||||||
new HashMap<String,SiteLink>(),
|
|
||||||
0L);
|
|
||||||
|
|
||||||
ItemDocument createdDoc = wbde.createItemDocument(itemDocument, _summary);
|
|
||||||
newItemLibrary.setQid(newCell.getReconInternalId(), createdDoc.getItemId().getId());
|
|
||||||
} else {
|
|
||||||
// Existing item
|
|
||||||
ItemDocument currentDocument = (ItemDocument)currentDocs.get(update.getItemId().getId());
|
|
||||||
/*
|
|
||||||
TermStatementUpdate tsUpdate = new TermStatementUpdate(
|
|
||||||
currentDocument,
|
|
||||||
update.getAddedStatements().stream().collect(Collectors.toList()),
|
|
||||||
update.getDeletedStatements().stream().collect(Collectors.toList()),
|
|
||||||
update.getLabels().stream().collect(Collectors.toList()),
|
|
||||||
update.getDescriptions().stream().collect(Collectors.toList()),
|
|
||||||
update.getAliases().stream().collect(Collectors.toList()),
|
|
||||||
new ArrayList<MonolingualTextValue>()
|
|
||||||
);
|
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
|
||||||
logger.info(mapper.writeValueAsString(update));
|
|
||||||
logger.info(update.toString());
|
|
||||||
logger.info(tsUpdate.getJsonUpdateString()); */
|
|
||||||
wbde.updateTermsStatements(currentDocument,
|
|
||||||
update.getLabels().stream().collect(Collectors.toList()),
|
|
||||||
update.getDescriptions().stream().collect(Collectors.toList()),
|
|
||||||
update.getAliases().stream().collect(Collectors.toList()),
|
|
||||||
new ArrayList<MonolingualTextValue>(),
|
|
||||||
update.getAddedStatements().stream().collect(Collectors.toList()),
|
|
||||||
update.getDeletedStatements().stream().collect(Collectors.toList()),
|
|
||||||
_summary);
|
|
||||||
}
|
|
||||||
} catch (MediaWikiApiErrorException e) {
|
|
||||||
// TODO Auto-generated catch block
|
|
||||||
e.printStackTrace();
|
|
||||||
} catch (IOException e) {
|
|
||||||
// TODO Auto-generated catch block
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
updatesDone++;
|
|
||||||
_progress = (100*updatesDone) / totalItemUpdates;
|
|
||||||
if(_canceled) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
batch.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_progress = 100;
|
_progress = 100;
|
||||||
|
|
||||||
if (!_canceled) {
|
if (!_canceled) {
|
||||||
|
@ -1,19 +0,0 @@
|
|||||||
package org.openrefine.wikidata.qa.scrutinizers;
|
|
||||||
|
|
||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
|
||||||
|
|
||||||
public abstract class StatementGroupScrutinizer extends ItemUpdateScrutinizer {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void scrutinize(ItemUpdate update) {
|
|
||||||
for(StatementGroup statementGroup : update.getAddedStatementGroups()) {
|
|
||||||
scrutinizeAdded(statementGroup);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public abstract void scrutinizeAdded(StatementGroup statementGroup);
|
|
||||||
|
|
||||||
public abstract void scrutinizeDeleted(StatementGroup statementGroup);
|
|
||||||
}
|
|
@ -13,7 +13,6 @@ import org.testng.Assert;
|
|||||||
import org.testng.annotations.BeforeMethod;
|
import org.testng.annotations.BeforeMethod;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
import static org.mockito.Mockito.verify;
|
import static org.mockito.Mockito.verify;
|
||||||
|
@ -0,0 +1,152 @@
|
|||||||
|
package org.openrefine.wikidata.editing;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingData;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.ItemDocumentBuilder;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemDocument;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||||
|
import org.wikidata.wdtk.wikibaseapi.WikibaseDataEditor;
|
||||||
|
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
|
||||||
|
import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException;
|
||||||
|
|
||||||
|
import com.google.refine.tests.RefineTest;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.verify;
|
||||||
|
import static org.mockito.Mockito.times;
|
||||||
|
|
||||||
|
|
||||||
|
public class EditBatchProcessorTest extends RefineTest {
|
||||||
|
|
||||||
|
private WikibaseDataFetcher fetcher = null;
|
||||||
|
private WikibaseDataEditor editor = null;
|
||||||
|
private NewItemLibrary library = null;
|
||||||
|
private String summary = "my fantastic edits";
|
||||||
|
|
||||||
|
@BeforeMethod
|
||||||
|
public void setUp() {
|
||||||
|
fetcher = mock(WikibaseDataFetcher.class);
|
||||||
|
editor = mock(WikibaseDataEditor.class);
|
||||||
|
editor.disableEditing(); // just in case we got mocking wrong…
|
||||||
|
library = new NewItemLibrary();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNewItem() throws InterruptedException, MediaWikiApiErrorException, IOException {
|
||||||
|
List<ItemUpdate> batch = new ArrayList<>();
|
||||||
|
batch.add(new ItemUpdateBuilder(TestingData.existingId)
|
||||||
|
.addAlias(Datamodel.makeMonolingualTextValue("my new alias", "en"))
|
||||||
|
.addStatement(TestingData.generateStatement(TestingData.existingId, TestingData.newIdA))
|
||||||
|
.build());
|
||||||
|
MonolingualTextValue label = Datamodel.makeMonolingualTextValue("better label", "en");
|
||||||
|
batch.add(new ItemUpdateBuilder(TestingData.newIdA)
|
||||||
|
.addLabel(label)
|
||||||
|
.build());
|
||||||
|
|
||||||
|
// Plan expected edits
|
||||||
|
ItemDocument existingItem = ItemDocumentBuilder.forItemId(TestingData.existingId)
|
||||||
|
.withLabel(Datamodel.makeMonolingualTextValue("pomme", "fr"))
|
||||||
|
.withDescription(Datamodel.makeMonolingualTextValue("fruit délicieux", "fr"))
|
||||||
|
.build();
|
||||||
|
when(fetcher.getEntityDocuments(Collections.singletonList(TestingData.existingId.getId())))
|
||||||
|
.thenReturn(Collections.singletonMap(TestingData.existingId.getId(), existingItem));
|
||||||
|
|
||||||
|
ItemDocument expectedNewItem = ItemDocumentBuilder.forItemId(ItemIdValue.NULL)
|
||||||
|
.withLabel(label).build();
|
||||||
|
ItemDocument createdNewItem = ItemDocumentBuilder.forItemId(Datamodel.makeWikidataItemIdValue("Q1234"))
|
||||||
|
.withLabel(label).withRevisionId(37828L).build();
|
||||||
|
when(editor.createItemDocument(expectedNewItem, summary)).thenReturn(createdNewItem);
|
||||||
|
|
||||||
|
EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, 50);
|
||||||
|
assertEquals(2, processor.remainingEdits());
|
||||||
|
assertEquals(0, processor.progress());
|
||||||
|
processor.performEdit();
|
||||||
|
assertEquals(1, processor.remainingEdits());
|
||||||
|
assertEquals(50, processor.progress());
|
||||||
|
processor.performEdit();
|
||||||
|
assertEquals(0, processor.remainingEdits());
|
||||||
|
assertEquals(100, processor.progress());
|
||||||
|
processor.performEdit(); // does not do anything
|
||||||
|
assertEquals(0, processor.remainingEdits());
|
||||||
|
assertEquals(100, processor.progress());
|
||||||
|
|
||||||
|
NewItemLibrary expectedLibrary = new NewItemLibrary();
|
||||||
|
expectedLibrary.setQid(1234L, "Q1234");
|
||||||
|
assertEquals(expectedLibrary, library);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleBatches() throws MediaWikiApiErrorException, InterruptedException, IOException {
|
||||||
|
// Prepare test data
|
||||||
|
MonolingualTextValue description = Datamodel.makeMonolingualTextValue("village in Nepal", "en");
|
||||||
|
List<String> ids = new ArrayList<>();
|
||||||
|
for(int i = 124; i < 190; i++) {
|
||||||
|
ids.add("Q"+String.valueOf(i));
|
||||||
|
}
|
||||||
|
List<ItemIdValue> qids = ids.stream()
|
||||||
|
.map(e -> Datamodel.makeWikidataItemIdValue(e))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
List<ItemUpdate> batch = qids.stream()
|
||||||
|
.map(qid -> new ItemUpdateBuilder(qid)
|
||||||
|
.addDescription(description)
|
||||||
|
.build())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
int batchSize = 50;
|
||||||
|
List<ItemDocument> fullBatch = qids.stream()
|
||||||
|
.map(qid -> ItemDocumentBuilder.forItemId(qid)
|
||||||
|
.withStatement(TestingData.generateStatement(qid, TestingData.existingId))
|
||||||
|
.build())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
List<ItemDocument> firstBatch = fullBatch.subList(0, batchSize);
|
||||||
|
List<ItemDocument> secondBatch = fullBatch.subList(batchSize, fullBatch.size());
|
||||||
|
|
||||||
|
when(fetcher.getEntityDocuments(toQids(firstBatch))).thenReturn(toMap(firstBatch));
|
||||||
|
when(fetcher.getEntityDocuments(toQids(secondBatch))).thenReturn(toMap(secondBatch));
|
||||||
|
|
||||||
|
// Run edits
|
||||||
|
EditBatchProcessor processor = new EditBatchProcessor(fetcher, editor, batch, library, summary, batchSize);
|
||||||
|
assertEquals(0, processor.progress());
|
||||||
|
for(int i = 124; i < 190; i++) {
|
||||||
|
assertEquals(processor.remainingEdits(), 190-i);
|
||||||
|
processor.performEdit();
|
||||||
|
}
|
||||||
|
assertEquals(0, processor.remainingEdits());
|
||||||
|
assertEquals(100, processor.progress());
|
||||||
|
|
||||||
|
// Check result
|
||||||
|
assertEquals(new NewItemLibrary(), library);
|
||||||
|
verify(fetcher, times(1)).getEntityDocuments(toQids(firstBatch));
|
||||||
|
verify(fetcher, times(1)).getEntityDocuments(toQids(secondBatch));
|
||||||
|
for(ItemDocument doc : fullBatch) {
|
||||||
|
verify(editor, times(1)).updateTermsStatements(doc, Collections.emptyList(),
|
||||||
|
Collections.singletonList(description), Collections.emptyList(), Collections.emptyList(),
|
||||||
|
Collections.emptyList(), Collections.emptyList(), summary);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, EntityDocument> toMap(List<ItemDocument> docs) {
|
||||||
|
return docs.stream()
|
||||||
|
.collect(Collectors.toMap(doc -> doc.getItemId().getId(), doc -> doc));
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> toQids(List<ItemDocument> docs) {
|
||||||
|
return docs.stream().map(doc -> doc.getItemId().getId()).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user