Add SchemaPropertyFetcher class (#3060)

* Add SchemaPropertyFetcher class

To retrieve and inspect all the properties appearing in the issues tab.

* created new list from unmodifiable lists

* created new list from unmodifiable lists

* rebased successfully

* overriden getMultipleDocuments method

* Test failure issue resolved

* added tests for SchemaPropertyExtractor class
This commit is contained in:
Ekta Mishra 2020-09-18 00:19:44 +05:30 committed by GitHub
parent 95983c4ac0
commit b92691d946
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 130 additions and 7 deletions

View File

@ -107,7 +107,7 @@ public class PreviewWikibaseSchemaCommand extends Command {
// Inspect the edits and generate warnings // Inspect the edits and generate warnings
EditInspector inspector = new EditInspector(warningStore, manifest); EditInspector inspector = new EditInspector(warningStore, manifest);
inspector.inspect(editBatch); inspector.inspect(editBatch, schema);
// Dump the first 10 edits, scheduled with the default scheduler // Dump the first 10 edits, scheduled with the default scheduler
WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler(); WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();

View File

@ -25,16 +25,22 @@ package org.openrefine.wikidata.qa;
import org.openrefine.wikidata.manifests.Manifest; import org.openrefine.wikidata.manifests.Manifest;
import org.openrefine.wikidata.qa.scrutinizers.*; import org.openrefine.wikidata.qa.scrutinizers.*;
import org.openrefine.wikidata.schema.WikibaseSchema;
import org.openrefine.wikidata.updates.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler; import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
import org.openrefine.wikidata.utils.EntityCache; import org.openrefine.wikidata.utils.EntityCache;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors; import java.util.stream.Collectors;
/** /**
@ -50,6 +56,7 @@ public class EditInspector {
private QAWarningStore warningStore; private QAWarningStore warningStore;
private ConstraintFetcher fetcher; private ConstraintFetcher fetcher;
private Manifest manifest; private Manifest manifest;
private EntityCache entityCache;
public EditInspector(QAWarningStore warningStore, Manifest manifest) { public EditInspector(QAWarningStore warningStore, Manifest manifest) {
this.scrutinizers = new HashMap<>(); this.scrutinizers = new HashMap<>();
@ -58,7 +65,7 @@ public class EditInspector {
String propertyConstraintPid = manifest.getConstraintsRelatedId("property_constraint_pid"); String propertyConstraintPid = manifest.getConstraintsRelatedId("property_constraint_pid");
if (propertyConstraintPid != null) { if (propertyConstraintPid != null) {
EntityCache entityCache = EntityCache.getEntityCache(manifest.getSiteIri(), manifest.getMediaWikiApiEndpoint()); entityCache = EntityCache.getEntityCache(manifest.getSiteIri(), manifest.getMediaWikiApiEndpoint());
this.fetcher = new ConstraintFetcher(entityCache, propertyConstraintPid); this.fetcher = new ConstraintFetcher(entityCache, propertyConstraintPid);
} }
@ -112,9 +119,12 @@ public class EditInspector {
* *
* @param editBatch * @param editBatch
*/ */
public void inspect(List<ItemUpdate> editBatch) { public void inspect(List<ItemUpdate> editBatch, WikibaseSchema schema) throws ExecutionException {
// First, schedule them with some scheduler, // First, schedule them with some scheduler,
// so that all newly created entities appear in the batch // so that all newly created entities appear in the batch
SchemaPropertyExtractor fetcher = new SchemaPropertyExtractor();
Set<PropertyIdValue> properties = fetcher.getAllProperties(schema);
List<EntityDocument> propertyDocuments = entityCache.getMultipleDocuments(properties.stream().collect(Collectors.toList()));
WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler(); WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
editBatch = scheduler.schedule(editBatch); editBatch = scheduler.schedule(editBatch);

View File

@ -0,0 +1,52 @@
package org.openrefine.wikidata.qa;
import org.openrefine.wikidata.schema.WbExpression;
import org.openrefine.wikidata.schema.WbItemDocumentExpr;
import org.openrefine.wikidata.schema.WbPropConstant;
import org.openrefine.wikidata.schema.WbReferenceExpr;
import org.openrefine.wikidata.schema.WbSnakExpr;
import org.openrefine.wikidata.schema.WbStatementExpr;
import org.openrefine.wikidata.schema.WbStatementGroupExpr;
import org.openrefine.wikidata.schema.WikibaseSchema;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class SchemaPropertyExtractor {
public Set<PropertyIdValue> getAllProperties(WikibaseSchema schema) {
Set<PropertyIdValue> properties = new HashSet<>();
List<WbItemDocumentExpr> itemDocumentExprs = schema.getItemDocumentExpressions();
for (WbItemDocumentExpr itemDocumentExpr : itemDocumentExprs) {
List<WbStatementGroupExpr> statementGroups = itemDocumentExpr.getStatementGroups();
for(WbStatementGroupExpr statementGroup : statementGroups) {
WbExpression<? extends PropertyIdValue> statementGroupProperty = statementGroup.getProperty();
if (statementGroupProperty instanceof WbPropConstant) {
properties.add(Datamodel.makeWikidataPropertyIdValue(((WbPropConstant) statementGroupProperty).getPid()));
}
List<WbStatementExpr> statementExprs = statementGroup.getStatements();
for(WbStatementExpr statementExpr : statementExprs) {
List<WbSnakExpr> snakExprs = new ArrayList<>(statementExpr.getQualifiers());
List<WbReferenceExpr> referenceExprs = statementExpr.getReferences();
for (WbReferenceExpr referenceExpr : referenceExprs) {
snakExprs.addAll(referenceExpr.getSnaks());
}
for (WbSnakExpr snakExpr : snakExprs) {
WbExpression<? extends PropertyIdValue> qualifierProperty = snakExpr.getProp();
if (qualifierProperty instanceof WbPropConstant) {
properties.add(Datamodel.makeWikidataPropertyIdValue(((WbPropConstant) qualifierProperty).getPid()));
}
}
}
}
}
return properties;
}
}

View File

@ -25,6 +25,7 @@ package org.openrefine.wikidata.commands;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
import static org.openrefine.wikidata.testing.TestingData.jsonFromFile; import static org.openrefine.wikidata.testing.TestingData.jsonFromFile;
import org.openrefine.wikidata.utils.EntityCache;
import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertEquals;
import org.openrefine.wikidata.qa.EditInspector; import org.openrefine.wikidata.qa.EditInspector;
@ -42,7 +43,7 @@ import com.google.refine.util.ParsingUtilities;
import javax.servlet.ServletException; import javax.servlet.ServletException;
import java.io.IOException; import java.io.IOException;
@PrepareForTest(EditInspector.class) @PrepareForTest({EditInspector.class, EntityCache.class})
public class PreviewWikibaseSchemaCommandTest extends SchemaCommandTest { public class PreviewWikibaseSchemaCommandTest extends SchemaCommandTest {
@BeforeMethod @BeforeMethod
@ -52,8 +53,10 @@ public class PreviewWikibaseSchemaCommandTest extends SchemaCommandTest {
@Test @Test
public void testValidSchema() throws Exception { public void testValidSchema() throws Exception {
ConstraintFetcher fetcher = new ConstraintFetcher(new EntityCacheStub(), "P2302"); EntityCacheStub entityCacheStub = new EntityCacheStub();
ConstraintFetcher fetcher = new ConstraintFetcher(entityCacheStub, "P2302");
PowerMockito.whenNew(ConstraintFetcher.class).withAnyArguments().thenReturn(fetcher); PowerMockito.whenNew(ConstraintFetcher.class).withAnyArguments().thenReturn(fetcher);
PowerMockito.whenNew(EntityCache.class).withAnyArguments().thenReturn(entityCacheStub);
String schemaJson = jsonFromFile("schema/inception.json"); String schemaJson = jsonFromFile("schema/inception.json");
String manifestJson = jsonFromFile("manifest/wikidata-manifest-v1.0.json"); String manifestJson = jsonFromFile("manifest/wikidata-manifest-v1.0.json");

View File

@ -0,0 +1,51 @@
package org.openrefine.wikidata.qa;
import org.openrefine.wikidata.commands.CommandTest;
import org.openrefine.wikidata.schema.WikibaseSchema;
import org.openrefine.wikidata.testing.TestingData;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
public class SchemaPropertyExtractorTest {
public WikibaseSchema schema;
String serialized;
@BeforeTest
public void initialize() throws IOException {
serialized = TestingData.jsonFromFile("schema/inception.json");
}
public Set<PropertyIdValue> makePropertySet(String... pids) {
Set<PropertyIdValue> propertyIdValues = new HashSet<>();
for (String pid : pids) {
PropertyIdValue propertyIdValue = Datamodel.makeWikidataPropertyIdValue(pid);
propertyIdValues.add(propertyIdValue);
}
return propertyIdValues;
}
@Test
public void testGetAllProperties() throws IOException {
schema = WikibaseSchema.reconstruct(serialized);
SchemaPropertyExtractor extractor = new SchemaPropertyExtractor();
Set<PropertyIdValue> propertyIdValues = extractor.getAllProperties(schema);
Assert.assertEquals(propertyIdValues, makePropertySet("P813", "P571", "P854"));
}
@Test
public void testNoProperties() {
schema = new WikibaseSchema();
SchemaPropertyExtractor extractor = new SchemaPropertyExtractor();
Set<PropertyIdValue> propertyIdValues = extractor.getAllProperties(schema);
Assert.assertEquals(propertyIdValues, new HashSet<>());
}
}

View File

@ -1,6 +1,8 @@
package org.openrefine.wikidata.utils; package org.openrefine.wikidata.utils;
import java.io.InputStream; import java.io.InputStream;
import java.util.List;
import java.util.stream.Collectors;
import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.helpers.DatamodelMapper; import org.wikidata.wdtk.datamodel.helpers.DatamodelMapper;
@ -38,4 +40,9 @@ public class EntityCacheStub extends EntityCache {
} }
return null; return null;
} }
@Override
public List<EntityDocument> getMultipleDocuments(List<EntityIdValue> entityIds) {
return entityIds.stream().map(id -> get(id)).collect(Collectors.toList());
}
} }