Added support for Multi-value Constraint in Wikidata extension (#2629)
This commit is contained in:
parent
0690f45762
commit
29a757dc57
@ -126,6 +126,10 @@
|
|||||||
"warnings-messages/no-edit-generated/body": "There might be something wrong with your schema.",
|
"warnings-messages/no-edit-generated/body": "There might be something wrong with your schema.",
|
||||||
"warnings-messages/no-issue-detected/title": "No issue was detected in your edits.",
|
"warnings-messages/no-issue-detected/title": "No issue was detected in your edits.",
|
||||||
"warnings-messages/no-issue-detected/body": "Note that OpenRefine cannot detect all the types of problems Wikidata edits can have.",
|
"warnings-messages/no-issue-detected/body": "Note that OpenRefine cannot detect all the types of problems Wikidata edits can have.",
|
||||||
|
"warnings-messages/multi-valued-property-is-required-for-new-item/title": "{property_entity} should have more than one statement on new items.",
|
||||||
|
"warnings-messages/multi-valued-property-is-required-for-new-item/body": "This property is expected to have more than one statement on each item but it has single statement, for instance on {example_entity}.",
|
||||||
|
"warnings-messages/multi-valued-property-is-required-for-existing-item/title": "{property_entity} should have more than one statement on existing items.",
|
||||||
|
"warnings-messages/multi-valued-property-is-required-for-existing-item/body": "This property is expected to have more than one statement on each item but it has single statement, for instance on {example_entity}. If the item already has statements with this property in Wikidata, then this warning can be ignored.",
|
||||||
"warnings-messages/ignored-qualifiers/title": "Some qualifiers were ignored.",
|
"warnings-messages/ignored-qualifiers/title": "Some qualifiers were ignored.",
|
||||||
"warnings-messages/ignored-qualifiers/body": "Qualifier values could not be parsed, so they will not be added to the corresponding statements.",
|
"warnings-messages/ignored-qualifiers/body": "Qualifier values could not be parsed, so they will not be added to the corresponding statements.",
|
||||||
"warnings-messages/ignored-references/title": "Some references were ignored.",
|
"warnings-messages/ignored-references/title": "Some references were ignored.",
|
||||||
|
@ -23,12 +23,12 @@
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
package org.openrefine.wikidata.qa;
|
package org.openrefine.wikidata.qa;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An object that fetches constraints about properties.
|
* An object that fetches constraints about properties.
|
||||||
*
|
*
|
||||||
@ -115,6 +115,11 @@ public interface ConstraintFetcher {
|
|||||||
*/
|
*/
|
||||||
boolean hasDistinctValues(PropertyIdValue pid);
|
boolean hasDistinctValues(PropertyIdValue pid);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this property expected to have more than one value per item?
|
||||||
|
*/
|
||||||
|
boolean hasMultiValue(PropertyIdValue pid);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Can statements using this property have uncertainty bounds?
|
* Can statements using this property have uncertainty bounds?
|
||||||
*/
|
*/
|
||||||
|
@ -23,17 +23,17 @@
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
package org.openrefine.wikidata.qa;
|
package org.openrefine.wikidata.qa;
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.*;
|
import org.openrefine.wikidata.qa.scrutinizers.*;
|
||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
|
import org.openrefine.wikidata.updates.scheduler.WikibaseAPIUpdateScheduler;
|
||||||
import org.openrefine.wikidata.utils.EntityCache;
|
import org.openrefine.wikidata.utils.EntityCache;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Runs a collection of edit scrutinizers on an edit batch.
|
* Runs a collection of edit scrutinizers on an edit batch.
|
||||||
*
|
*
|
||||||
@ -69,6 +69,7 @@ public class EditInspector {
|
|||||||
register(new CalendarScrutinizer());
|
register(new CalendarScrutinizer());
|
||||||
register(new CommonDescriptionScrutinizer());
|
register(new CommonDescriptionScrutinizer());
|
||||||
register(new EnglishDescriptionScrutinizer());
|
register(new EnglishDescriptionScrutinizer());
|
||||||
|
register(new MultiValueScrutinizer());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -23,26 +23,16 @@
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
package org.openrefine.wikidata.qa;
|
package org.openrefine.wikidata.qa;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.utils.EntityCache;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.*;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.openrefine.wikidata.utils.EntityCache;
|
|
||||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class provides an abstraction over the way constraint definitions are
|
* This class provides an abstraction over the way constraint definitions are
|
||||||
* stored in Wikidata.
|
* stored in Wikidata.
|
||||||
@ -86,7 +76,9 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
|
|||||||
public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404";
|
public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404";
|
||||||
public static String SINGLE_BEST_VALUE_CONSTRAINT_QID = "Q52060874";
|
public static String SINGLE_BEST_VALUE_CONSTRAINT_QID = "Q52060874";
|
||||||
public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410";
|
public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410";
|
||||||
|
|
||||||
|
public static String MULTI_VALUE_CONSTRAINT_QID = "Q21510857";
|
||||||
|
|
||||||
public static String NO_BOUNDS_CONSTRAINT_QID = "Q51723761";
|
public static String NO_BOUNDS_CONSTRAINT_QID = "Q51723761";
|
||||||
public static String INTEGER_VALUED_CONSTRAINT_QID = "Q52848401";
|
public static String INTEGER_VALUED_CONSTRAINT_QID = "Q52848401";
|
||||||
|
|
||||||
@ -208,7 +200,12 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
|
|||||||
public boolean hasDistinctValues(PropertyIdValue pid) {
|
public boolean hasDistinctValues(PropertyIdValue pid) {
|
||||||
return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null;
|
return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasMultiValue(PropertyIdValue pid) {
|
||||||
|
return getSingleConstraint(pid, MULTI_VALUE_CONSTRAINT_QID) != null;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isSymmetric(PropertyIdValue pid) {
|
public boolean isSymmetric(PropertyIdValue pid) {
|
||||||
return getSingleConstraint(pid, SYMMETRIC_CONSTRAINT_QID) != null;
|
return getSingleConstraint(pid, SYMMETRIC_CONSTRAINT_QID) != null;
|
||||||
|
@ -0,0 +1,50 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.qa.QAWarning;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class MultiValueScrutinizer extends EditScrutinizer {
|
||||||
|
|
||||||
|
public static final String new_type = "multi-valued-property-is-required-for-new-item";
|
||||||
|
public static final String existing_type = "multi-valued-property-is-required-for-existing-item";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void scrutinize(ItemUpdate update) {
|
||||||
|
Map<PropertyIdValue, Integer> propertyCount = new HashMap<>();
|
||||||
|
|
||||||
|
for (Statement statement : update.getAddedStatements()) {
|
||||||
|
PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId();
|
||||||
|
if (propertyCount.containsKey(pid)) {
|
||||||
|
propertyCount.put(pid, propertyCount.get(pid) + 1);
|
||||||
|
} else if (_fetcher.hasMultiValue(pid)) {
|
||||||
|
propertyCount.put(pid, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (update.isNew()) {
|
||||||
|
for (PropertyIdValue pid : propertyCount.keySet()) {
|
||||||
|
if (propertyCount.get(pid) == 1) {
|
||||||
|
QAWarning issue = new QAWarning(new_type, pid.getId(), QAWarning.Severity.WARNING, 1);
|
||||||
|
issue.setProperty("property_entity", pid);
|
||||||
|
issue.setProperty("example_entity", update.getItemId());
|
||||||
|
addIssue(issue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (PropertyIdValue pid : propertyCount.keySet()) {
|
||||||
|
if (propertyCount.get(pid) == 1) {
|
||||||
|
QAWarning issue = new QAWarning(existing_type, pid.getId(), QAWarning.Severity.INFO, 1);
|
||||||
|
issue.setProperty("property_entity", pid);
|
||||||
|
issue.setProperty("example_entity", update.getItemId());
|
||||||
|
addIssue(issue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
@ -23,16 +23,16 @@
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
package org.openrefine.wikidata.qa;
|
package org.openrefine.wikidata.qa;
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
public class MockConstraintFetcher implements ConstraintFetcher {
|
public class MockConstraintFetcher implements ConstraintFetcher {
|
||||||
|
|
||||||
public static PropertyIdValue pidWithInverse = Datamodel.makeWikidataPropertyIdValue("P350");
|
public static PropertyIdValue pidWithInverse = Datamodel.makeWikidataPropertyIdValue("P350");
|
||||||
@ -116,6 +116,11 @@ public class MockConstraintFetcher implements ConstraintFetcher {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasMultiValue(PropertyIdValue pid) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isSymmetric(PropertyIdValue pid) {
|
public boolean isSymmetric(PropertyIdValue pid) {
|
||||||
return pid.equals(symmetricPid);
|
return pid.equals(symmetricPid);
|
||||||
|
@ -0,0 +1,46 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingData;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
|
||||||
|
public class MultiValueScrutinizerTest extends ScrutinizerTest {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new MultiValueScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoIssue() {
|
||||||
|
ItemIdValue idA = TestingData.existingId;
|
||||||
|
ItemIdValue idB = TestingData.matchedId;
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(idA).addStatement(TestingData.generateStatement(idA, idB))
|
||||||
|
.addStatement(TestingData.generateStatement(idA, idB)).build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNewItemTrigger() {
|
||||||
|
ItemIdValue idA = TestingData.newIdA;
|
||||||
|
ItemIdValue idB = TestingData.newIdB;
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(TestingData.generateStatement(idA, idB)).build();
|
||||||
|
ItemUpdate updateB = new ItemUpdateBuilder(idB).addStatement(TestingData.generateStatement(idB, idB)).build();
|
||||||
|
scrutinize(updateA, updateB);
|
||||||
|
assertWarningsRaised(MultiValueScrutinizer.new_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExistingItemTrigger() {
|
||||||
|
ItemIdValue idA = TestingData.existingId;
|
||||||
|
ItemIdValue idB = TestingData.matchedId;
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(TestingData.generateStatement(idA, idB)).build();
|
||||||
|
ItemUpdate updateB = new ItemUpdateBuilder(idB).addStatement(TestingData.generateStatement(idB, idB)).build();
|
||||||
|
scrutinize(updateA, updateB);
|
||||||
|
assertWarningsRaised(MultiValueScrutinizer.existing_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user