New implementation for Conflicts-With Scrutinizer (#2693)

part of #2354
This commit is contained in:
Ekta Mishra 2020-06-19 13:48:21 +05:30 committed by GitHub
parent f88c0e3657
commit 45fab05bf6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 106 additions and 76 deletions

View File

@ -27,10 +27,10 @@ import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue; import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
import org.wikidata.wdtk.datamodel.interfaces.Value; import org.wikidata.wdtk.datamodel.interfaces.Value;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import java.util.List;
import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.stream.Stream;
/** /**
* An object that fetches constraints about properties. * An object that fetches constraints about properties.
@ -53,7 +53,7 @@ public interface ConstraintFetcher {
/** /**
* Retrieves the property that is the inverse of a given property * Retrieves the property that is the inverse of a given property
* *
* @param pid: * @param pid
* the property to retrieve the inverse for * the property to retrieve the inverse for
* @return the pid of the inverse property * @return the pid of the inverse property
*/ */
@ -143,6 +143,17 @@ public interface ConstraintFetcher {
*/ */
boolean usableOnItems(PropertyIdValue pid); boolean usableOnItems(PropertyIdValue pid);
/**
* Gets the list of constraints of a particular type for a property
*
* @param pid
* the property to retrieve the constraints for
* @param qid
* the type of the constraints
* @return the stream of matching constraint statements
*/
Stream<Statement> getConstraintsByType(PropertyIdValue pid, String qid);
/** /**
* Retrieves the lower bound of the range * Retrieves the lower bound of the range
* required in difference-within-range constraint * required in difference-within-range constraint
@ -165,7 +176,7 @@ public interface ConstraintFetcher {
* Retrieves the lower value property for calculating the difference * Retrieves the lower value property for calculating the difference
* required in difference-within-range constraint * required in difference-within-range constraint
* *
* @param pid: * @param pid
* the property to calculate difference with * the property to calculate difference with
* @return the pid of the lower bound property * @return the pid of the lower bound property
*/ */
@ -177,13 +188,4 @@ public interface ConstraintFetcher {
*/ */
boolean hasDiffWithinRange(PropertyIdValue pid); boolean hasDiffWithinRange(PropertyIdValue pid);
/*
* Returns the Map of all the conflicting pid and their item values
*
* @param pid:
* the property having conflicts-with constraint
* @return
*/
Map<PropertyIdValue, List<Value>> getParamConflictsWith(PropertyIdValue pid);
} }

View File

@ -30,9 +30,7 @@ import org.wikidata.wdtk.datamodel.interfaces.*;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.HashMap;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
@ -97,10 +95,6 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
public static String ALLOWED_ITEM_TYPE_QID = "Q29934200"; public static String ALLOWED_ITEM_TYPE_QID = "Q29934200";
public static String ALLOWED_ENTITY_TYPES_PID = "P2305"; public static String ALLOWED_ENTITY_TYPES_PID = "P2305";
public static String CONFLICTS_WITH_CONSTRAINT_QID = "Q21502838";
public static String CONFLICTS_WITH_PROPERTY_PID = "P2306";
public static String ITEM_OF_PROPERTY_CONSTRAINT_PID = "P2305";
// The following constraints still need to be implemented: // The following constraints still need to be implemented:
public static String TYPE_CONSTRAINT_QID = "Q21503250"; public static String TYPE_CONSTRAINT_QID = "Q21503250";
@ -279,9 +273,9 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
* Returns a single constraint for a particular type and a property, or null if * Returns a single constraint for a particular type and a property, or null if
* there is no such constraint * there is no such constraint
* *
* @param pid: * @param pid
* the property to retrieve the constraints for * the property to retrieve the constraints for
* @param qid: * @param qid
* the type of the constraints * the type of the constraints
* @return the list of qualifiers for the constraint, or null if it does not * @return the list of qualifiers for the constraint, or null if it does not
* exist * exist
@ -297,13 +291,14 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
/** /**
* Gets the list of constraints of a particular type for a property * Gets the list of constraints of a particular type for a property
* *
* @param pid: * @param pid
* the property to retrieve the constraints for * the property to retrieve the constraints for
* @param qid: * @param qid
* the type of the constraints * the type of the constraints
* @return the stream of matching constraint statements * @return the stream of matching constraint statements
*/ */
protected Stream<Statement> getConstraintsByType(PropertyIdValue pid, String qid) { @Override
public Stream<Statement> getConstraintsByType(PropertyIdValue pid, String qid) {
Stream<Statement> allConstraints = getConstraintStatements(pid).stream() Stream<Statement> allConstraints = getConstraintStatements(pid).stream()
.filter(s -> s.getValue() != null && ((EntityIdValue) s.getValue()).getId().equals(qid)) .filter(s -> s.getValue() != null && ((EntityIdValue) s.getValue()).getId().equals(qid))
.filter(s -> !StatementRank.DEPRECATED.equals(s.getRank())); .filter(s -> !StatementRank.DEPRECATED.equals(s.getRank()));
@ -314,7 +309,7 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
* Gets all the constraint statements for a given property * Gets all the constraint statements for a given property
* *
* @param pid * @param pid
* : the id of the property to retrieve the constraints for * the id of the property to retrieve the constraints for
* @return the list of constraint statements * @return the list of constraint statements
*/ */
protected List<Statement> getConstraintStatements(PropertyIdValue pid) { protected List<Statement> getConstraintStatements(PropertyIdValue pid) {
@ -332,9 +327,9 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
/** /**
* Returns the values of a given property in qualifiers * Returns the values of a given property in qualifiers
* *
* @param groups: * @param groups
* the qualifiers * the qualifiers
* @param pid: * @param pid
* the property to filter on * the property to filter on
* @return * @return
*/ */
@ -373,7 +368,7 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
* Retrieves the lower value property for calculating the difference * Retrieves the lower value property for calculating the difference
* required in difference-within-range constraint * required in difference-within-range constraint
* *
* @param pid: * @param pid
* the property to calculate difference with * the property to calculate difference with
* @return the pid of the lower bound property * @return the pid of the lower bound property
*/ */
@ -428,36 +423,4 @@ public class WikidataConstraintFetcher implements ConstraintFetcher {
return null; return null;
} }
/**
* Returns the Map of all the conflicting pid and their item values
*
* @param pid:
* the property having conflicts-with constraint
* @return
*/
@Override
public Map<PropertyIdValue, List<Value>> getParamConflictsWith(PropertyIdValue pid) {
List<Statement> statementList = getConstraintsByType(pid, CONFLICTS_WITH_CONSTRAINT_QID).collect(Collectors.toList());
Map<PropertyIdValue, List<Value>> propertyIdValueListMap = new HashMap<>();
for (Statement statement : statementList) {
List<SnakGroup> specs = statement.getClaim().getQualifiers();
PropertyIdValue conflictingPid = null;
List<Value> items = new ArrayList<>();
for(SnakGroup group : specs) {
for (Snak snak : group.getSnaks()) {
if (group.getProperty().getId().equals(CONFLICTS_WITH_PROPERTY_PID)){
conflictingPid = (PropertyIdValue) snak.getValue();
}
if (group.getProperty().getId().equals(ITEM_OF_PROPERTY_CONSTRAINT_PID)){
items.add(snak.getValue());
}
}
}
if (conflictingPid != null) {
propertyIdValueListMap.put(conflictingPid, items);
}
}
return propertyIdValueListMap;
}
} }

View File

@ -3,18 +3,47 @@ package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.qa.QAWarning; import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.updates.ItemUpdate; import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.Value; import org.wikidata.wdtk.datamodel.interfaces.Value;
import java.util.Set; import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
public class ConflictsWithScrutinizer extends EditScrutinizer { public class ConflictsWithScrutinizer extends EditScrutinizer {
public static final String type = "having-conflicts-with-statements"; public static final String type = "having-conflicts-with-statements";
public static String CONFLICTS_WITH_CONSTRAINT_QID = "Q21502838";
public static String CONFLICTS_WITH_PROPERTY_PID = "P2306";
public static String ITEM_OF_PROPERTY_CONSTRAINT_PID = "P2305";
class ConflictsWithConstraint {
final PropertyIdValue conflictingPid;
final List<Value> itemList;
ConflictsWithConstraint(Statement statement) {
List<SnakGroup> specs = statement.getClaim().getQualifiers();
PropertyIdValue pid = null;
this.itemList = new ArrayList<>();
for(SnakGroup group : specs) {
for (Snak snak : group.getSnaks()) {
if (group.getProperty().getId().equals(CONFLICTS_WITH_PROPERTY_PID)){
pid = (PropertyIdValue) snak.getValue();
}
if (group.getProperty().getId().equals(ITEM_OF_PROPERTY_CONSTRAINT_PID)){
this.itemList.add(snak.getValue());
}
}
}
this.conflictingPid = pid;
}
}
@Override @Override
public void scrutinize(ItemUpdate update) { public void scrutinize(ItemUpdate update) {
@ -36,9 +65,12 @@ public class ConflictsWithScrutinizer extends EditScrutinizer {
} }
for(PropertyIdValue propertyId : propertyIdValueValueMap.keySet()){ for(PropertyIdValue propertyId : propertyIdValueValueMap.keySet()){
Map<PropertyIdValue, List<Value>> conflictingPropertyMap = _fetcher.getParamConflictsWith(propertyId); List<Statement> statementList = _fetcher.getConstraintsByType(propertyId, CONFLICTS_WITH_CONSTRAINT_QID).collect(Collectors.toList());
for (PropertyIdValue conflictingPid : conflictingPropertyMap.keySet()) { for (Statement statement : statementList) {
if (propertyIdValueValueMap.containsKey(conflictingPid) && raiseWarning(propertyIdValueValueMap, conflictingPid, conflictingPropertyMap)) { ConflictsWithConstraint constraint = new ConflictsWithConstraint(statement);
PropertyIdValue conflictingPid = constraint.conflictingPid;
List<Value> itemList = constraint.itemList;
if (propertyIdValueValueMap.containsKey(conflictingPid) && raiseWarning(propertyIdValueValueMap, conflictingPid, itemList)) {
QAWarning issue = new QAWarning(type, propertyId.getId()+conflictingPid.getId(), QAWarning.Severity.WARNING, 1); QAWarning issue = new QAWarning(type, propertyId.getId()+conflictingPid.getId(), QAWarning.Severity.WARNING, 1);
issue.setProperty("property_entity", propertyId); issue.setProperty("property_entity", propertyId);
issue.setProperty("added_property_entity", conflictingPid); issue.setProperty("added_property_entity", conflictingPid);
@ -46,15 +78,16 @@ public class ConflictsWithScrutinizer extends EditScrutinizer {
addIssue(issue); addIssue(issue);
} }
} }
} }
} }
private boolean raiseWarning(Map<PropertyIdValue, Set<Value>> propertyIdValueValueMap, PropertyIdValue conflictingPid, Map<PropertyIdValue, List<Value>> conflictingPropertyMap) { private boolean raiseWarning(Map<PropertyIdValue, Set<Value>> propertyIdValueValueMap, PropertyIdValue conflictingPid, List<Value> itemList) {
if (conflictingPropertyMap.get(conflictingPid).isEmpty()){ if (itemList.isEmpty()){
return true; return true;
} }
for (Value value : conflictingPropertyMap.get(conflictingPid)) { for (Value value : itemList) {
if (propertyIdValueValueMap.get(conflictingPid).contains(value)){ if (propertyIdValueValueMap.get(conflictingPid).contains(value)){
return true; return true;
} }

View File

@ -24,19 +24,25 @@
package org.openrefine.wikidata.qa; package org.openrefine.wikidata.qa;
import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.Claim;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue; import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
import org.wikidata.wdtk.datamodel.interfaces.Reference;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
import org.wikidata.wdtk.datamodel.interfaces.Value; import org.wikidata.wdtk.datamodel.interfaces.Value;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Set;
import java.util.Map;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
public class MockConstraintFetcher implements ConstraintFetcher { public class MockConstraintFetcher implements ConstraintFetcher {
@ -211,10 +217,36 @@ public class MockConstraintFetcher implements ConstraintFetcher {
return true; return true;
} }
public Map<PropertyIdValue, List<Value>> getParamConflictsWith(PropertyIdValue pid) { @Override
Map<PropertyIdValue, List<Value>> propertyIdValueListMap = new HashMap<>(); public Stream<Statement> getConstraintsByType(PropertyIdValue pid, String qid) {
List<Value> items = Arrays.asList(conflictingStatementValue, null); EntityIdValue entityIdValue = Datamodel.makeWikidataItemIdValue("Q21502838");
propertyIdValueListMap.put(conflictingStatementPid, items); PropertyIdValue propertyIdValue = Datamodel.makeWikidataPropertyIdValue("P2302");
return propertyIdValueListMap; Snak snak = Datamodel.makeValueSnak(propertyIdValue,entityIdValue);
PropertyIdValue property = Datamodel.makeWikidataPropertyIdValue("P2306");
Value propertyValue = Datamodel.makeWikidataPropertyIdValue("P31");
Snak snak1 = Datamodel.makeValueSnak(property, propertyValue);
List<Snak> group1 = Collections.singletonList(snak1);
PropertyIdValue item = Datamodel.makeWikidataPropertyIdValue("P2305");
Value itemValue = Datamodel.makeWikidataItemIdValue("Q5");
Snak snak2 = Datamodel.makeValueSnak(item, itemValue);
List<Snak> group2 = Collections.singletonList(snak2);
SnakGroup snakGroup1 = Datamodel.makeSnakGroup(group1);
SnakGroup snakGroup2 = Datamodel.makeSnakGroup(group2);
List<SnakGroup> listSnakGroup = Arrays.asList(snakGroup1, snakGroup2);
Claim claim = Datamodel.makeClaim(entityIdValue, snak, listSnakGroup);
Reference reference = Datamodel.makeReference(listSnakGroup);
List<Reference> referenceList = Collections.singletonList(reference);
Statement statement = Datamodel.makeStatement(claim, referenceList, StatementRank.NORMAL, "P2302$77BD7FE4-C051-4776-855C-543F0CE697D0");
List<Statement> statements = Collections.singletonList(statement);
return statements.stream()
.filter(s -> s.getValue() != null && ((EntityIdValue) s.getValue()).getId().equals(qid))
.filter(s -> !StatementRank.DEPRECATED.equals(s.getRank()));
} }
} }