From b92691d946a6aebdd41b91403063262e029f3033 Mon Sep 17 00:00:00 2001 From: Ekta Mishra Date: Fri, 18 Sep 2020 00:19:44 +0530 Subject: [PATCH] Add SchemaPropertyFetcher class (#3060) * Add SchemaPropertyFetcher class To retrieve and inspect all the properties appearing in the issues tab. * created new list from unmodifiable lists * created new list from unmodifiable lists * rebased successfully * overriden getMultipleDocuments method * Test failure issue resolved * added tests for SchemaPropertyExtractor class --- .../PreviewWikibaseSchemaCommand.java | 4 +- .../openrefine/wikidata/qa/EditInspector.java | 16 ++++-- .../wikidata/qa/SchemaPropertyExtractor.java | 52 +++++++++++++++++++ .../PreviewWikibaseSchemaCommandTest.java | 7 ++- .../qa/SchemaPropertyExtractorTest.java | 51 ++++++++++++++++++ .../wikidata/utils/EntityCacheStub.java | 7 +++ 6 files changed, 130 insertions(+), 7 deletions(-) create mode 100644 extensions/wikidata/src/org/openrefine/wikidata/qa/SchemaPropertyExtractor.java create mode 100644 extensions/wikidata/tests/src/org/openrefine/wikidata/qa/SchemaPropertyExtractorTest.java diff --git a/extensions/wikidata/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommand.java b/extensions/wikidata/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommand.java index ef806551b..3647b80c9 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommand.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommand.java @@ -107,8 +107,8 @@ public class PreviewWikibaseSchemaCommand extends Command { // Inspect the edits and generate warnings EditInspector inspector = new EditInspector(warningStore, manifest); - inspector.inspect(editBatch); - + inspector.inspect(editBatch, schema); + // Dump the first 10 edits, scheduled with the default scheduler WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler(); List nonNullEdits = scheduler.schedule(editBatch).stream() diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java index 94a66f511..66488e0a7 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java @@ -25,16 +25,22 @@ package org.openrefine.wikidata.qa; import org.openrefine.wikidata.manifests.Manifest; import org.openrefine.wikidata.qa.scrutinizers.*; +import org.openrefine.wikidata.schema.WikibaseSchema; import org.openrefine.wikidata.updates.ItemUpdate; import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler; import org.openrefine.wikidata.utils.EntityCache; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.EntityDocument; import java.util.HashMap; import java.util.List; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; + import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; /** @@ -50,6 +56,7 @@ public class EditInspector { private QAWarningStore warningStore; private ConstraintFetcher fetcher; private Manifest manifest; + private EntityCache entityCache; public EditInspector(QAWarningStore warningStore, Manifest manifest) { this.scrutinizers = new HashMap<>(); @@ -58,7 +65,7 @@ public class EditInspector { String propertyConstraintPid = manifest.getConstraintsRelatedId("property_constraint_pid"); if (propertyConstraintPid != null) { - EntityCache entityCache = EntityCache.getEntityCache(manifest.getSiteIri(), manifest.getMediaWikiApiEndpoint()); + entityCache = EntityCache.getEntityCache(manifest.getSiteIri(), manifest.getMediaWikiApiEndpoint()); this.fetcher = new ConstraintFetcher(entityCache, propertyConstraintPid); } @@ -112,9 +119,12 @@ public class EditInspector { * * @param editBatch */ - public void inspect(List editBatch) { + public void inspect(List editBatch, WikibaseSchema schema) throws ExecutionException { // First, schedule them with some scheduler, // so that all newly created entities appear in the batch + SchemaPropertyExtractor fetcher = new SchemaPropertyExtractor(); + Set properties = fetcher.getAllProperties(schema); + List propertyDocuments = entityCache.getMultipleDocuments(properties.stream().collect(Collectors.toList())); WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler(); editBatch = scheduler.schedule(editBatch); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/SchemaPropertyExtractor.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/SchemaPropertyExtractor.java new file mode 100644 index 000000000..eacaa3436 --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/SchemaPropertyExtractor.java @@ -0,0 +1,52 @@ +package org.openrefine.wikidata.qa; + +import org.openrefine.wikidata.schema.WbExpression; +import org.openrefine.wikidata.schema.WbItemDocumentExpr; +import org.openrefine.wikidata.schema.WbPropConstant; +import org.openrefine.wikidata.schema.WbReferenceExpr; +import org.openrefine.wikidata.schema.WbSnakExpr; +import org.openrefine.wikidata.schema.WbStatementExpr; +import org.openrefine.wikidata.schema.WbStatementGroupExpr; +import org.openrefine.wikidata.schema.WikibaseSchema; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class SchemaPropertyExtractor { + + public Set getAllProperties(WikibaseSchema schema) { + Set properties = new HashSet<>(); + List itemDocumentExprs = schema.getItemDocumentExpressions(); + for (WbItemDocumentExpr itemDocumentExpr : itemDocumentExprs) { + List statementGroups = itemDocumentExpr.getStatementGroups(); + for(WbStatementGroupExpr statementGroup : statementGroups) { + WbExpression statementGroupProperty = statementGroup.getProperty(); + if (statementGroupProperty instanceof WbPropConstant) { + properties.add(Datamodel.makeWikidataPropertyIdValue(((WbPropConstant) statementGroupProperty).getPid())); + } + List statementExprs = statementGroup.getStatements(); + for(WbStatementExpr statementExpr : statementExprs) { + List snakExprs = new ArrayList<>(statementExpr.getQualifiers()); + List referenceExprs = statementExpr.getReferences(); + for (WbReferenceExpr referenceExpr : referenceExprs) { + snakExprs.addAll(referenceExpr.getSnaks()); + } + + for (WbSnakExpr snakExpr : snakExprs) { + WbExpression qualifierProperty = snakExpr.getProp(); + if (qualifierProperty instanceof WbPropConstant) { + properties.add(Datamodel.makeWikidataPropertyIdValue(((WbPropConstant) qualifierProperty).getPid())); + } + } + } + } + } + + return properties; + } +} + diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommandTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommandTest.java index fe0ad5d87..cef251fe3 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommandTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/commands/PreviewWikibaseSchemaCommandTest.java @@ -25,6 +25,7 @@ package org.openrefine.wikidata.commands; import static org.mockito.Mockito.when; import static org.openrefine.wikidata.testing.TestingData.jsonFromFile; +import org.openrefine.wikidata.utils.EntityCache; import static org.testng.Assert.assertEquals; import org.openrefine.wikidata.qa.EditInspector; @@ -42,7 +43,7 @@ import com.google.refine.util.ParsingUtilities; import javax.servlet.ServletException; import java.io.IOException; -@PrepareForTest(EditInspector.class) +@PrepareForTest({EditInspector.class, EntityCache.class}) public class PreviewWikibaseSchemaCommandTest extends SchemaCommandTest { @BeforeMethod @@ -52,8 +53,10 @@ public class PreviewWikibaseSchemaCommandTest extends SchemaCommandTest { @Test public void testValidSchema() throws Exception { - ConstraintFetcher fetcher = new ConstraintFetcher(new EntityCacheStub(), "P2302"); + EntityCacheStub entityCacheStub = new EntityCacheStub(); + ConstraintFetcher fetcher = new ConstraintFetcher(entityCacheStub, "P2302"); PowerMockito.whenNew(ConstraintFetcher.class).withAnyArguments().thenReturn(fetcher); + PowerMockito.whenNew(EntityCache.class).withAnyArguments().thenReturn(entityCacheStub); String schemaJson = jsonFromFile("schema/inception.json"); String manifestJson = jsonFromFile("manifest/wikidata-manifest-v1.0.json"); diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/SchemaPropertyExtractorTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/SchemaPropertyExtractorTest.java new file mode 100644 index 000000000..a0bd4cbf8 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/SchemaPropertyExtractorTest.java @@ -0,0 +1,51 @@ +package org.openrefine.wikidata.qa; + +import org.openrefine.wikidata.commands.CommandTest; +import org.openrefine.wikidata.schema.WikibaseSchema; +import org.openrefine.wikidata.testing.TestingData; +import org.testng.Assert; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +public class SchemaPropertyExtractorTest { + + public WikibaseSchema schema; + String serialized; + + @BeforeTest + public void initialize() throws IOException { + serialized = TestingData.jsonFromFile("schema/inception.json"); + } + + public Set makePropertySet(String... pids) { + Set propertyIdValues = new HashSet<>(); + for (String pid : pids) { + PropertyIdValue propertyIdValue = Datamodel.makeWikidataPropertyIdValue(pid); + propertyIdValues.add(propertyIdValue); + } + return propertyIdValues; + } + + @Test + public void testGetAllProperties() throws IOException { + schema = WikibaseSchema.reconstruct(serialized); + SchemaPropertyExtractor extractor = new SchemaPropertyExtractor(); + Set propertyIdValues = extractor.getAllProperties(schema); + Assert.assertEquals(propertyIdValues, makePropertySet("P813", "P571", "P854")); + } + + @Test + public void testNoProperties() { + schema = new WikibaseSchema(); + SchemaPropertyExtractor extractor = new SchemaPropertyExtractor(); + Set propertyIdValues = extractor.getAllProperties(schema); + Assert.assertEquals(propertyIdValues, new HashSet<>()); + } + +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/utils/EntityCacheStub.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/utils/EntityCacheStub.java index ab6c4b36c..5d54423b0 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/utils/EntityCacheStub.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/utils/EntityCacheStub.java @@ -1,6 +1,8 @@ package org.openrefine.wikidata.utils; import java.io.InputStream; +import java.util.List; +import java.util.stream.Collectors; import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.DatamodelMapper; @@ -38,4 +40,9 @@ public class EntityCacheStub extends EntityCache { } return null; } + + @Override + public List getMultipleDocuments(List entityIds) { + return entityIds.stream().map(id -> get(id)).collect(Collectors.toList()); + } }