Added support for Multi-value Constraint in Wikidata extension (#2629)

This commit is contained in:
Ekta Mishra 2020-05-31 18:54:51 +05:30 committed by GitHub
parent 0690f45762
commit 29a757dc57
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 136 additions and 28 deletions

View File

@ -126,6 +126,10 @@
"warnings-messages/no-edit-generated/body": "There might be something wrong with your schema.",
"warnings-messages/no-issue-detected/title": "No issue was detected in your edits.",
"warnings-messages/no-issue-detected/body": "Note that OpenRefine cannot detect all the types of problems Wikidata edits can have.",
"warnings-messages/multi-valued-property-is-required-for-new-item/title": "{property_entity} should have more than one statement on new items.",
"warnings-messages/multi-valued-property-is-required-for-new-item/body": "This property is expected to have more than one statement on each item but it has single statement, for instance on {example_entity}.",
"warnings-messages/multi-valued-property-is-required-for-existing-item/title": "{property_entity} should have more than one statement on existing items.",
"warnings-messages/multi-valued-property-is-required-for-existing-item/body": "This property is expected to have more than one statement on each item but it has single statement, for instance on {example_entity}. If the item already has statements with this property in Wikidata, then this warning can be ignored.",
"warnings-messages/ignored-qualifiers/title": "Some qualifiers were ignored.",
"warnings-messages/ignored-qualifiers/body": "Qualifier values could not be parsed, so they will not be added to the corresponding statements.",
"warnings-messages/ignored-references/title": "Some references were ignored.",

View File

@ -23,12 +23,12 @@
******************************************************************************/
package org.openrefine.wikidata.qa;
import java.util.Set;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
import java.util.Set;
/**
* An object that fetches constraints about properties.
*
@ -115,6 +115,11 @@ public interface ConstraintFetcher {
*/
boolean hasDistinctValues(PropertyIdValue pid);
/**
* Is this property expected to have more than one value per item?
*/
boolean hasMultiValue(PropertyIdValue pid);
/**
* Can statements using this property have uncertainty bounds?
*/

View File

@ -23,17 +23,17 @@
******************************************************************************/
package org.openrefine.wikidata.qa;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.openrefine.wikidata.qa.scrutinizers.*;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
import org.openrefine.wikidata.utils.EntityCache;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* Runs a collection of edit scrutinizers on an edit batch.
*
@ -69,6 +69,7 @@ public class EditInspector {
register(new CalendarScrutinizer());
register(new CommonDescriptionScrutinizer());
register(new EnglishDescriptionScrutinizer());
register(new MultiValueScrutinizer());
}
/**

View File

@ -23,26 +23,16 @@
******************************************************************************/
package org.openrefine.wikidata.qa;
import org.openrefine.wikidata.utils.EntityCache;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.openrefine.wikidata.utils.EntityCache;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
/**
* This class provides an abstraction over the way constraint definitions are
* stored in Wikidata.
@ -87,6 +77,8 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
public static String SINGLE_BEST_VALUE_CONSTRAINT_QID = "Q52060874";
public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410";
public static String MULTI_VALUE_CONSTRAINT_QID = "Q21510857";
public static String NO_BOUNDS_CONSTRAINT_QID = "Q51723761";
public static String INTEGER_VALUED_CONSTRAINT_QID = "Q52848401";
@ -209,6 +201,11 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null;
}
@Override
public boolean hasMultiValue(PropertyIdValue pid) {
return getSingleConstraint(pid, MULTI_VALUE_CONSTRAINT_QID) != null;
}
@Override
public boolean isSymmetric(PropertyIdValue pid) {
return getSingleConstraint(pid, SYMMETRIC_CONSTRAINT_QID) != null;

View File

@ -0,0 +1,50 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import java.util.HashMap;
import java.util.Map;
public class MultiValueScrutinizer extends EditScrutinizer {
public static final String new_type = "multi-valued-property-is-required-for-new-item";
public static final String existing_type = "multi-valued-property-is-required-for-existing-item";
@Override
public void scrutinize(ItemUpdate update) {
Map<PropertyIdValue, Integer> propertyCount = new HashMap<>();
for (Statement statement : update.getAddedStatements()) {
PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId();
if (propertyCount.containsKey(pid)) {
propertyCount.put(pid, propertyCount.get(pid) + 1);
} else if (_fetcher.hasMultiValue(pid)) {
propertyCount.put(pid, 1);
}
}
if (update.isNew()) {
for (PropertyIdValue pid : propertyCount.keySet()) {
if (propertyCount.get(pid) == 1) {
QAWarning issue = new QAWarning(new_type, pid.getId(), QAWarning.Severity.WARNING, 1);
issue.setProperty("property_entity", pid);
issue.setProperty("example_entity", update.getItemId());
addIssue(issue);
}
}
} else {
for (PropertyIdValue pid : propertyCount.keySet()) {
if (propertyCount.get(pid) == 1) {
QAWarning issue = new QAWarning(existing_type, pid.getId(), QAWarning.Severity.INFO, 1);
issue.setProperty("property_entity", pid);
issue.setProperty("example_entity", update.getItemId());
addIssue(issue);
}
}
}
}
}

View File

@ -23,16 +23,16 @@
******************************************************************************/
package org.openrefine.wikidata.qa;
import java.util.Arrays;
import java.util.Collections;
import java.util.Set;
import java.util.stream.Collectors;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
import java.util.Arrays;
import java.util.Collections;
import java.util.Set;
import java.util.stream.Collectors;
public class MockConstraintFetcher implements ConstraintFetcher {
public static PropertyIdValue pidWithInverse = Datamodel.makeWikidataPropertyIdValue("P350");
@ -116,6 +116,11 @@ public class MockConstraintFetcher implements ConstraintFetcher {
return true;
}
@Override
public boolean hasMultiValue(PropertyIdValue pid) {
return true;
}
@Override
public boolean isSymmetric(PropertyIdValue pid) {
return pid.equals(symmetricPid);

View File

@ -0,0 +1,46 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.testing.TestingData;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
public class MultiValueScrutinizerTest extends ScrutinizerTest {
@Override
public EditScrutinizer getScrutinizer() {
return new MultiValueScrutinizer();
}
@Test
public void testNoIssue() {
ItemIdValue idA = TestingData.existingId;
ItemIdValue idB = TestingData.matchedId;
ItemUpdate update = new ItemUpdateBuilder(idA).addStatement(TestingData.generateStatement(idA, idB))
.addStatement(TestingData.generateStatement(idA, idB)).build();
scrutinize(update);
assertNoWarningRaised();
}
@Test
public void testNewItemTrigger() {
ItemIdValue idA = TestingData.newIdA;
ItemIdValue idB = TestingData.newIdB;
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(TestingData.generateStatement(idA, idB)).build();
ItemUpdate updateB = new ItemUpdateBuilder(idB).addStatement(TestingData.generateStatement(idB, idB)).build();
scrutinize(updateA, updateB);
assertWarningsRaised(MultiValueScrutinizer.new_type);
}
@Test
public void testExistingItemTrigger() {
ItemIdValue idA = TestingData.existingId;
ItemIdValue idB = TestingData.matchedId;
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(TestingData.generateStatement(idA, idB)).build();
ItemUpdate updateB = new ItemUpdateBuilder(idB).addStatement(TestingData.generateStatement(idB, idB)).build();
scrutinize(updateA, updateB);
assertWarningsRaised(MultiValueScrutinizer.existing_type);
}
}