Added support for Multi-value Constraint in Wikidata extension (#2629)
This commit is contained in:
parent
0690f45762
commit
29a757dc57
@ -126,6 +126,10 @@
|
||||
"warnings-messages/no-edit-generated/body": "There might be something wrong with your schema.",
|
||||
"warnings-messages/no-issue-detected/title": "No issue was detected in your edits.",
|
||||
"warnings-messages/no-issue-detected/body": "Note that OpenRefine cannot detect all the types of problems Wikidata edits can have.",
|
||||
"warnings-messages/multi-valued-property-is-required-for-new-item/title": "{property_entity} should have more than one statement on new items.",
|
||||
"warnings-messages/multi-valued-property-is-required-for-new-item/body": "This property is expected to have more than one statement on each item but it has single statement, for instance on {example_entity}.",
|
||||
"warnings-messages/multi-valued-property-is-required-for-existing-item/title": "{property_entity} should have more than one statement on existing items.",
|
||||
"warnings-messages/multi-valued-property-is-required-for-existing-item/body": "This property is expected to have more than one statement on each item but it has single statement, for instance on {example_entity}. If the item already has statements with this property in Wikidata, then this warning can be ignored.",
|
||||
"warnings-messages/ignored-qualifiers/title": "Some qualifiers were ignored.",
|
||||
"warnings-messages/ignored-qualifiers/body": "Qualifier values could not be parsed, so they will not be added to the corresponding statements.",
|
||||
"warnings-messages/ignored-references/title": "Some references were ignored.",
|
||||
|
@ -23,12 +23,12 @@
|
||||
******************************************************************************/
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* An object that fetches constraints about properties.
|
||||
*
|
||||
@ -115,6 +115,11 @@ public interface ConstraintFetcher {
|
||||
*/
|
||||
boolean hasDistinctValues(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Is this property expected to have more than one value per item?
|
||||
*/
|
||||
boolean hasMultiValue(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Can statements using this property have uncertainty bounds?
|
||||
*/
|
||||
|
@ -23,17 +23,17 @@
|
||||
******************************************************************************/
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.openrefine.wikidata.qa.scrutinizers.*;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
|
||||
import org.openrefine.wikidata.utils.EntityCache;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Runs a collection of edit scrutinizers on an edit batch.
|
||||
*
|
||||
@ -69,6 +69,7 @@ public class EditInspector {
|
||||
register(new CalendarScrutinizer());
|
||||
register(new CommonDescriptionScrutinizer());
|
||||
register(new EnglishDescriptionScrutinizer());
|
||||
register(new MultiValueScrutinizer());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -23,26 +23,16 @@
|
||||
******************************************************************************/
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import org.openrefine.wikidata.utils.EntityCache;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.openrefine.wikidata.utils.EntityCache;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
/**
|
||||
* This class provides an abstraction over the way constraint definitions are
|
||||
* stored in Wikidata.
|
||||
@ -87,6 +77,8 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
|
||||
public static String SINGLE_BEST_VALUE_CONSTRAINT_QID = "Q52060874";
|
||||
public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410";
|
||||
|
||||
public static String MULTI_VALUE_CONSTRAINT_QID = "Q21510857";
|
||||
|
||||
public static String NO_BOUNDS_CONSTRAINT_QID = "Q51723761";
|
||||
public static String INTEGER_VALUED_CONSTRAINT_QID = "Q52848401";
|
||||
|
||||
@ -209,6 +201,11 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
|
||||
return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasMultiValue(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, MULTI_VALUE_CONSTRAINT_QID) != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSymmetric(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, SYMMETRIC_CONSTRAINT_QID) != null;
|
||||
|
@ -0,0 +1,50 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class MultiValueScrutinizer extends EditScrutinizer {
|
||||
|
||||
public static final String new_type = "multi-valued-property-is-required-for-new-item";
|
||||
public static final String existing_type = "multi-valued-property-is-required-for-existing-item";
|
||||
|
||||
@Override
|
||||
public void scrutinize(ItemUpdate update) {
|
||||
Map<PropertyIdValue, Integer> propertyCount = new HashMap<>();
|
||||
|
||||
for (Statement statement : update.getAddedStatements()) {
|
||||
PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId();
|
||||
if (propertyCount.containsKey(pid)) {
|
||||
propertyCount.put(pid, propertyCount.get(pid) + 1);
|
||||
} else if (_fetcher.hasMultiValue(pid)) {
|
||||
propertyCount.put(pid, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (update.isNew()) {
|
||||
for (PropertyIdValue pid : propertyCount.keySet()) {
|
||||
if (propertyCount.get(pid) == 1) {
|
||||
QAWarning issue = new QAWarning(new_type, pid.getId(), QAWarning.Severity.WARNING, 1);
|
||||
issue.setProperty("property_entity", pid);
|
||||
issue.setProperty("example_entity", update.getItemId());
|
||||
addIssue(issue);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (PropertyIdValue pid : propertyCount.keySet()) {
|
||||
if (propertyCount.get(pid) == 1) {
|
||||
QAWarning issue = new QAWarning(existing_type, pid.getId(), QAWarning.Severity.INFO, 1);
|
||||
issue.setProperty("property_entity", pid);
|
||||
issue.setProperty("example_entity", update.getItemId());
|
||||
addIssue(issue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -23,16 +23,16 @@
|
||||
******************************************************************************/
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class MockConstraintFetcher implements ConstraintFetcher {
|
||||
|
||||
public static PropertyIdValue pidWithInverse = Datamodel.makeWikidataPropertyIdValue("P350");
|
||||
@ -116,6 +116,11 @@ public class MockConstraintFetcher implements ConstraintFetcher {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasMultiValue(PropertyIdValue pid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSymmetric(PropertyIdValue pid) {
|
||||
return pid.equals(symmetricPid);
|
||||
|
@ -0,0 +1,46 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingData;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
|
||||
public class MultiValueScrutinizerTest extends ScrutinizerTest {
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new MultiValueScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoIssue() {
|
||||
ItemIdValue idA = TestingData.existingId;
|
||||
ItemIdValue idB = TestingData.matchedId;
|
||||
ItemUpdate update = new ItemUpdateBuilder(idA).addStatement(TestingData.generateStatement(idA, idB))
|
||||
.addStatement(TestingData.generateStatement(idA, idB)).build();
|
||||
scrutinize(update);
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNewItemTrigger() {
|
||||
ItemIdValue idA = TestingData.newIdA;
|
||||
ItemIdValue idB = TestingData.newIdB;
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(TestingData.generateStatement(idA, idB)).build();
|
||||
ItemUpdate updateB = new ItemUpdateBuilder(idB).addStatement(TestingData.generateStatement(idB, idB)).build();
|
||||
scrutinize(updateA, updateB);
|
||||
assertWarningsRaised(MultiValueScrutinizer.new_type);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExistingItemTrigger() {
|
||||
ItemIdValue idA = TestingData.existingId;
|
||||
ItemIdValue idB = TestingData.matchedId;
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(TestingData.generateStatement(idA, idB)).build();
|
||||
ItemUpdate updateB = new ItemUpdateBuilder(idB).addStatement(TestingData.generateStatement(idB, idB)).build();
|
||||
scrutinize(updateA, updateB);
|
||||
assertWarningsRaised(MultiValueScrutinizer.existing_type);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user