Test the scrutinizers
This commit is contained in:
parent
fd7462f749
commit
973a28cc90
@ -1,60 +1,17 @@
|
||||
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.openrefine.wikidata.utils.EntityCache;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
/**
|
||||
* This class provides an abstraction over the way constraint
|
||||
* definitions are stored in Wikidata.
|
||||
* An object that fetches constraints about properties.
|
||||
*
|
||||
* @author antonin
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
public class ConstraintFetcher {
|
||||
public static String WIKIDATA_CONSTRAINT_PID = "P2302";
|
||||
|
||||
public static String FORMAT_CONSTRAINT_QID = "Q21502404";
|
||||
public static String FORMAT_REGEX_PID = "P1793";
|
||||
|
||||
public static String INVERSE_CONSTRAINT_QID = "Q21510855";
|
||||
public static String INVERSE_PROPERTY_PID = "P2306";
|
||||
|
||||
public static String USED_ONLY_AS_VALUES_CONSTRAINT_QID = "Q21528958";
|
||||
|
||||
public static String USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID = "Q21510863";
|
||||
|
||||
public static String USED_ONLY_AS_REFERENCE_CONSTRAINT_QID = "Q21528959";
|
||||
|
||||
public static String ALLOWED_QUALIFIERS_CONSTRAINT_QID = "Q21510851";
|
||||
public static String ALLOWED_QUALIFIERS_CONSTRAINT_PID = "P2306";
|
||||
|
||||
public static String MANDATORY_QUALIFIERS_CONSTRAINT_QID = "Q21510856";
|
||||
public static String MANDATORY_QUALIFIERS_CONSTRAINT_PID = "P2306";
|
||||
|
||||
public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404";
|
||||
public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410";
|
||||
|
||||
// The following constraints still need to be implemented:
|
||||
|
||||
public static String TYPE_CONSTRAINT_QID = "Q21503250";
|
||||
|
||||
public interface ConstraintFetcher {
|
||||
|
||||
/**
|
||||
* Retrieves the regular expression for formatting a property, or null if
|
||||
@ -62,152 +19,48 @@ public class ConstraintFetcher {
|
||||
* @param pid
|
||||
* @return the expression of a regular expression which should be compatible with java.util.regex
|
||||
*/
|
||||
public String getFormatRegex(PropertyIdValue pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, FORMAT_CONSTRAINT_QID);
|
||||
if (specs != null) {
|
||||
List<Value> regexes = findValues(specs, FORMAT_REGEX_PID);
|
||||
if (! regexes.isEmpty()) {
|
||||
return ((StringValue)regexes.get(0)).getString();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
String getFormatRegex(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Retrieves the property that is the inverse of a given property
|
||||
* @param pid: the property to retrieve the inverse for
|
||||
* @return the pid of the inverse property
|
||||
*/
|
||||
public PropertyIdValue getInversePid(PropertyIdValue pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, INVERSE_CONSTRAINT_QID);
|
||||
|
||||
if(specs != null) {
|
||||
List<Value> inverses = findValues(specs, INVERSE_PROPERTY_PID);
|
||||
if (! inverses.isEmpty()) {
|
||||
return (PropertyIdValue)inverses.get(0);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
PropertyIdValue getInversePid(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Is this property for values only?
|
||||
*/
|
||||
public boolean isForValuesOnly(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, USED_ONLY_AS_VALUES_CONSTRAINT_QID) != null;
|
||||
}
|
||||
boolean isForValuesOnly(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Is this property for qualifiers only?
|
||||
*/
|
||||
public boolean isForQualifiersOnly(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID) != null;
|
||||
}
|
||||
boolean isForQualifiersOnly(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Is this property for references only?
|
||||
*/
|
||||
public boolean isForReferencesOnly(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, USED_ONLY_AS_REFERENCE_CONSTRAINT_QID) != null;
|
||||
}
|
||||
boolean isForReferencesOnly(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Get the list of allowed qualifiers (as property ids) for this property (null if any)
|
||||
*/
|
||||
public Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, ALLOWED_QUALIFIERS_CONSTRAINT_QID);
|
||||
|
||||
if (specs != null) {
|
||||
List<Value> properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID);
|
||||
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Get the list of mandatory qualifiers (as property ids) for this property (null if any)
|
||||
*/
|
||||
public Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, MANDATORY_QUALIFIERS_CONSTRAINT_QID);
|
||||
|
||||
if (specs != null) {
|
||||
List<Value> properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID);
|
||||
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Is this property expected to have at most one value per item?
|
||||
*/
|
||||
public boolean hasSingleValue(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, SINGLE_VALUE_CONSTRAINT_QID) != null;
|
||||
}
|
||||
boolean hasSingleValue(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Is this property expected to have distinct values?
|
||||
*/
|
||||
public boolean hasDistinctValues(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null;
|
||||
}
|
||||
boolean hasDistinctValues(PropertyIdValue pid);
|
||||
|
||||
/**
|
||||
* Returns a single constraint for a particular type and a property, or null
|
||||
* if there is no such constraint
|
||||
* @param pid: the property to retrieve the constraints for
|
||||
* @param qid: the type of the constraints
|
||||
* @return the list of qualifiers for the constraint, or null if it does not exist
|
||||
*/
|
||||
protected List<SnakGroup> getSingleConstraint(PropertyIdValue pid, String qid) {
|
||||
Statement statement = getConstraintsByType(pid, qid).findFirst().orElse(null);
|
||||
if (statement != null) {
|
||||
return statement.getClaim().getQualifiers();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the list of constraints of a particular type for a property
|
||||
* @param pid: the property to retrieve the constraints for
|
||||
* @param qid: the type of the constraints
|
||||
* @return the stream of matching constraint statements
|
||||
*/
|
||||
protected Stream<Statement> getConstraintsByType(PropertyIdValue pid, String qid) {
|
||||
Stream<Statement> allConstraints = getConstraintStatements(pid)
|
||||
.stream()
|
||||
.filter(s -> ((EntityIdValue) s.getValue()).getId().equals(qid));
|
||||
return allConstraints;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets all the constraint statements for a given property
|
||||
* @param pid : the id of the property to retrieve the constraints for
|
||||
* @return the list of constraint statements
|
||||
*/
|
||||
protected List<Statement> getConstraintStatements(PropertyIdValue pid) {
|
||||
PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid);
|
||||
StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID);
|
||||
if (group != null) {
|
||||
return group.getStatements();
|
||||
} else {
|
||||
return new ArrayList<Statement>();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the values of a given property in qualifiers
|
||||
* @param groups: the qualifiers
|
||||
* @param pid: the property to filter on
|
||||
* @return
|
||||
*/
|
||||
protected List<Value> findValues(List<SnakGroup> groups, String pid) {
|
||||
List<Value> results = new ArrayList<>();
|
||||
for(SnakGroup group : groups) {
|
||||
if (group.getProperty().getId().equals(pid)) {
|
||||
for (Snak snak : group.getSnaks())
|
||||
results.add(snak.getValue());
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ import java.util.stream.Collectors;
|
||||
|
||||
import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.FormatScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.NoEditsMadeScrutinizer;
|
||||
@ -32,14 +32,16 @@ import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
public class EditInspector {
|
||||
private Map<String, EditScrutinizer> scrutinizers;
|
||||
private QAWarningStore warningStore;
|
||||
private ConstraintFetcher fetcher;
|
||||
|
||||
public EditInspector(QAWarningStore warningStore) {
|
||||
this.scrutinizers = new HashMap<>();
|
||||
this.fetcher = new WikidataConstraintFetcher();
|
||||
this.warningStore = warningStore;
|
||||
|
||||
// Register all known scrutinizers here
|
||||
register(new NewItemScrutinizer());
|
||||
register(new FormatConstraintScrutinizer());
|
||||
register(new FormatScrutinizer());
|
||||
register(new InverseConstraintScrutinizer());
|
||||
register(new SelfReferentialScrutinizer());
|
||||
register(new UnsourcedScrutinizer());
|
||||
@ -59,6 +61,7 @@ public class EditInspector {
|
||||
String key = scrutinizer.getClass().getName();
|
||||
scrutinizers.put(key, scrutinizer);
|
||||
scrutinizer.setStore(warningStore);
|
||||
scrutinizer.setFetcher(fetcher);
|
||||
}
|
||||
|
||||
|
||||
@ -69,19 +72,15 @@ public class EditInspector {
|
||||
public void inspect(List<ItemUpdate> editBatch) {
|
||||
// First, schedule them with some scheduler,
|
||||
// so that all newly created entities appear in the batch
|
||||
UpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
|
||||
try {
|
||||
editBatch = scheduler.schedule(editBatch);
|
||||
Map<EntityIdValue, ItemUpdate> updates = ItemUpdate.groupBySubject(editBatch);
|
||||
List<ItemUpdate> mergedUpdates = updates.values().stream().collect(Collectors.toList());
|
||||
for(EditScrutinizer scrutinizer : scrutinizers.values()) {
|
||||
scrutinizer.scrutinize(mergedUpdates);
|
||||
}
|
||||
} catch(ImpossibleSchedulingException e) {
|
||||
warningStore.addWarning(new QAWarning(
|
||||
"scheduling-failed", null, QAWarning.Severity.CRITICAL, 1));
|
||||
WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
|
||||
editBatch = scheduler.schedule(editBatch);
|
||||
Map<EntityIdValue, ItemUpdate> updates = ItemUpdate.groupBySubject(editBatch);
|
||||
List<ItemUpdate> mergedUpdates = updates.values().stream().collect(Collectors.toList());
|
||||
for(EditScrutinizer scrutinizer : scrutinizers.values()) {
|
||||
scrutinizer.scrutinize(mergedUpdates);
|
||||
}
|
||||
|
||||
|
||||
if (warningStore.getNbWarnings() == 0) {
|
||||
warningStore.addWarning(new QAWarning(
|
||||
"no-issue-detected", null, QAWarning.Severity.INFO, 0));
|
||||
|
@ -1,20 +1,25 @@
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.jsoup.helper.Validate;
|
||||
import org.openrefine.wikidata.utils.JacksonJsonizable;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
/**
|
||||
* A class to represent a QA warning emited by the Wikidata schema
|
||||
* A class to represent a QA warning emitted by the Wikidata schema
|
||||
* This could probably be reused at a broader scale, for instance for
|
||||
* Data Package validation.
|
||||
*
|
||||
* @author antonin
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning> {
|
||||
@ -27,42 +32,30 @@ public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning
|
||||
}
|
||||
|
||||
/// The type of QA warning emitted
|
||||
private String type;
|
||||
private final String type;
|
||||
// The key for aggregation of other QA warnings together - this specializes the id
|
||||
private String bucketId;
|
||||
private final String bucketId;
|
||||
// The severity of the issue
|
||||
private Severity severity;
|
||||
private final Severity severity;
|
||||
// The number of times this issue was found
|
||||
private int count;
|
||||
private final int count;
|
||||
// Other details about the warning, that can be displayed to the user
|
||||
private Map<String,Object> properties;
|
||||
private final Map<String,Object> properties;
|
||||
|
||||
public QAWarning(String type, String bucketId, Severity severity, int count) {
|
||||
Validate.notNull(type);
|
||||
this.type = type;
|
||||
this.bucketId = bucketId;
|
||||
Validate.notNull(severity);
|
||||
this.severity = severity;
|
||||
this.count = count;
|
||||
this.properties = new HashMap<String,Object>();
|
||||
}
|
||||
|
||||
@JsonCreator
|
||||
public QAWarning(
|
||||
@JsonProperty("type") String type,
|
||||
@JsonProperty("bucket_id") String bucketId,
|
||||
@JsonProperty("severity") Severity severity,
|
||||
@JsonProperty("count") int count,
|
||||
@JsonProperty("properties") Map<String,Object> properties) {
|
||||
this.type = type;
|
||||
this.bucketId = bucketId;
|
||||
this.severity = severity;
|
||||
this.count = count;
|
||||
this.properties = properties;
|
||||
this.properties = new HashMap<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the full key for aggregation of QA warnings
|
||||
* @return
|
||||
* @return the full key for aggregation of QA warnings
|
||||
*/
|
||||
@JsonIgnore
|
||||
public String getAggregationId() {
|
||||
if (this.bucketId != null) {
|
||||
return this.type + "_" + this.bucketId;
|
||||
@ -75,12 +68,22 @@ public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning
|
||||
* Aggregates another QA warning of the same aggregation id.
|
||||
* @param other
|
||||
*/
|
||||
public void aggregate(QAWarning other) {
|
||||
assert other.getAggregationId() == getAggregationId();
|
||||
this.count += other.getCount();
|
||||
if(this.severity.compareTo(other.getSeverity()) < 0) {
|
||||
this.severity = other.getSeverity();
|
||||
public QAWarning aggregate(QAWarning other) {
|
||||
assert other.getAggregationId().equals(getAggregationId());
|
||||
int newCount = count+other.getCount();
|
||||
Severity newSeverity = severity;
|
||||
if (other.getSeverity().compareTo(severity) > 0) {
|
||||
newSeverity = other.getSeverity();
|
||||
}
|
||||
QAWarning merged = new QAWarning(getType(), getBucketId(), newSeverity,
|
||||
newCount);
|
||||
for(Entry<String,Object> entry : properties.entrySet()) {
|
||||
merged.setProperty(entry.getKey(),entry.getValue());
|
||||
}
|
||||
for(Entry<String,Object> entry : other.getProperties().entrySet()) {
|
||||
merged.setProperty(entry.getKey(),entry.getValue());
|
||||
}
|
||||
return merged;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -114,6 +117,7 @@ public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning
|
||||
}
|
||||
|
||||
@JsonProperty("properties")
|
||||
@JsonInclude(JsonInclude.Include.NON_EMPTY)
|
||||
public Map<String,Object> getProperties() {
|
||||
return properties;
|
||||
}
|
||||
@ -125,4 +129,17 @@ public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning
|
||||
public int compareTo(QAWarning other) {
|
||||
return - severity.compareTo(other.getSeverity());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == null || !QAWarning.class.isInstance(other)) {
|
||||
return false;
|
||||
}
|
||||
QAWarning otherWarning = (QAWarning)other;
|
||||
return type.equals(otherWarning.getType()) &&
|
||||
bucketId.equals(otherWarning.getBucketId()) &&
|
||||
severity.equals(otherWarning.getSeverity()) &&
|
||||
count == otherWarning.getCount() &&
|
||||
properties.equals(otherWarning.getProperties());
|
||||
}
|
||||
}
|
||||
|
@ -6,16 +6,21 @@ import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
/**
|
||||
* A store for QA warnings which aggregates them by type.
|
||||
* @author antonin
|
||||
*
|
||||
* @author Antonin Delpeuch
|
||||
*/
|
||||
public class QAWarningStore {
|
||||
|
||||
@JsonIgnore
|
||||
private Map<String, QAWarning> map;
|
||||
@JsonIgnore
|
||||
private QAWarning.Severity maxSeverity;
|
||||
@JsonIgnore
|
||||
private int totalWarnings;
|
||||
|
||||
public QAWarningStore() {
|
||||
@ -36,7 +41,7 @@ public class QAWarningStore {
|
||||
totalWarnings += warning.getCount();
|
||||
if (map.containsKey(aggregationKey)) {
|
||||
QAWarning existing = map.get(aggregationKey);
|
||||
existing.aggregate(warning);
|
||||
map.put(aggregationKey, existing.aggregate(warning));
|
||||
} else {
|
||||
map.put(aggregationKey, warning);
|
||||
}
|
||||
|
@ -0,0 +1,190 @@
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.openrefine.wikidata.utils.EntityCache;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
/**
|
||||
* This class provides an abstraction over the way constraint
|
||||
* definitions are stored in Wikidata.
|
||||
*
|
||||
* @author antonin
|
||||
*
|
||||
*/
|
||||
public class WikidataConstraintFetcher implements ConstraintFetcher {
|
||||
public static String WIKIDATA_CONSTRAINT_PID = "P2302";
|
||||
|
||||
public static String FORMAT_CONSTRAINT_QID = "Q21502404";
|
||||
public static String FORMAT_REGEX_PID = "P1793";
|
||||
|
||||
public static String INVERSE_CONSTRAINT_QID = "Q21510855";
|
||||
public static String INVERSE_PROPERTY_PID = "P2306";
|
||||
|
||||
public static String USED_ONLY_AS_VALUES_CONSTRAINT_QID = "Q21528958";
|
||||
|
||||
public static String USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID = "Q21510863";
|
||||
|
||||
public static String USED_ONLY_AS_REFERENCE_CONSTRAINT_QID = "Q21528959";
|
||||
|
||||
public static String ALLOWED_QUALIFIERS_CONSTRAINT_QID = "Q21510851";
|
||||
public static String ALLOWED_QUALIFIERS_CONSTRAINT_PID = "P2306";
|
||||
|
||||
public static String MANDATORY_QUALIFIERS_CONSTRAINT_QID = "Q21510856";
|
||||
public static String MANDATORY_QUALIFIERS_CONSTRAINT_PID = "P2306";
|
||||
|
||||
public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404";
|
||||
public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410";
|
||||
|
||||
// The following constraints still need to be implemented:
|
||||
|
||||
public static String TYPE_CONSTRAINT_QID = "Q21503250";
|
||||
|
||||
|
||||
@Override
|
||||
public String getFormatRegex(PropertyIdValue pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, FORMAT_CONSTRAINT_QID);
|
||||
if (specs != null) {
|
||||
List<Value> regexes = findValues(specs, FORMAT_REGEX_PID);
|
||||
if (! regexes.isEmpty()) {
|
||||
return ((StringValue)regexes.get(0)).getString();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PropertyIdValue getInversePid(PropertyIdValue pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, INVERSE_CONSTRAINT_QID);
|
||||
|
||||
if(specs != null) {
|
||||
List<Value> inverses = findValues(specs, INVERSE_PROPERTY_PID);
|
||||
if (! inverses.isEmpty()) {
|
||||
return (PropertyIdValue)inverses.get(0);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isForValuesOnly(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, USED_ONLY_AS_VALUES_CONSTRAINT_QID) != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isForQualifiersOnly(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID) != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isForReferencesOnly(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, USED_ONLY_AS_REFERENCE_CONSTRAINT_QID) != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, ALLOWED_QUALIFIERS_CONSTRAINT_QID);
|
||||
|
||||
if (specs != null) {
|
||||
List<Value> properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID);
|
||||
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, MANDATORY_QUALIFIERS_CONSTRAINT_QID);
|
||||
|
||||
if (specs != null) {
|
||||
List<Value> properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID);
|
||||
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasSingleValue(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, SINGLE_VALUE_CONSTRAINT_QID) != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasDistinctValues(PropertyIdValue pid) {
|
||||
return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a single constraint for a particular type and a property, or null
|
||||
* if there is no such constraint
|
||||
* @param pid: the property to retrieve the constraints for
|
||||
* @param qid: the type of the constraints
|
||||
* @return the list of qualifiers for the constraint, or null if it does not exist
|
||||
*/
|
||||
protected List<SnakGroup> getSingleConstraint(PropertyIdValue pid, String qid) {
|
||||
Statement statement = getConstraintsByType(pid, qid).findFirst().orElse(null);
|
||||
if (statement != null) {
|
||||
return statement.getClaim().getQualifiers();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the list of constraints of a particular type for a property
|
||||
* @param pid: the property to retrieve the constraints for
|
||||
* @param qid: the type of the constraints
|
||||
* @return the stream of matching constraint statements
|
||||
*/
|
||||
protected Stream<Statement> getConstraintsByType(PropertyIdValue pid, String qid) {
|
||||
Stream<Statement> allConstraints = getConstraintStatements(pid)
|
||||
.stream()
|
||||
.filter(s -> ((EntityIdValue) s.getValue()).getId().equals(qid));
|
||||
return allConstraints;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets all the constraint statements for a given property
|
||||
* @param pid : the id of the property to retrieve the constraints for
|
||||
* @return the list of constraint statements
|
||||
*/
|
||||
protected List<Statement> getConstraintStatements(PropertyIdValue pid) {
|
||||
PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid);
|
||||
StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID);
|
||||
if (group != null) {
|
||||
return group.getStatements();
|
||||
} else {
|
||||
return new ArrayList<Statement>();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the values of a given property in qualifiers
|
||||
* @param groups: the qualifiers
|
||||
* @param pid: the property to filter on
|
||||
* @return
|
||||
*/
|
||||
protected List<Value> findValues(List<SnakGroup> groups, String pid) {
|
||||
List<Value> results = new ArrayList<>();
|
||||
for(SnakGroup group : groups) {
|
||||
if (group.getProperty().getId().equals(pid)) {
|
||||
for (Snak snak : group.getSnaks())
|
||||
results.add(snak.getValue());
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
@ -15,11 +15,13 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
* A scrutinizer that checks for properties using the same value
|
||||
* on different items.
|
||||
*
|
||||
* @author antonin
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
public class DistinctValuesScrutinizer extends StatementScrutinizer {
|
||||
|
||||
public final static String type = "identical-values-for-distinct-valued-property";
|
||||
|
||||
private Map<PropertyIdValue, Map<Value, EntityIdValue>> _seenValues;
|
||||
|
||||
public DistinctValuesScrutinizer() {
|
||||
@ -39,7 +41,7 @@ public class DistinctValuesScrutinizer extends StatementScrutinizer {
|
||||
if (seen.containsKey(mainSnakValue)) {
|
||||
EntityIdValue otherId = seen.get(mainSnakValue);
|
||||
QAWarning issue = new QAWarning(
|
||||
"identical-values-for-distinct-valued-property",
|
||||
type,
|
||||
pid.getId(),
|
||||
QAWarning.Severity.IMPORTANT,
|
||||
1);
|
||||
|
@ -2,6 +2,7 @@ package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.openrefine.wikidata.qa.WikidataConstraintFetcher;
|
||||
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.qa.QAWarning.Severity;
|
||||
@ -9,9 +10,9 @@ import org.openrefine.wikidata.qa.QAWarningStore;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
|
||||
/**
|
||||
* Interface for any class that
|
||||
* @author antonin
|
||||
* Inspects an edit batch and emits warnings.
|
||||
*
|
||||
* @author Antonin Delpeuch
|
||||
*/
|
||||
public abstract class EditScrutinizer {
|
||||
|
||||
@ -19,13 +20,18 @@ public abstract class EditScrutinizer {
|
||||
protected ConstraintFetcher _fetcher;
|
||||
|
||||
public EditScrutinizer() {
|
||||
_fetcher = new ConstraintFetcher();
|
||||
_fetcher = null;
|
||||
_store = null;
|
||||
}
|
||||
|
||||
public void setStore(QAWarningStore store) {
|
||||
_store = store;
|
||||
}
|
||||
|
||||
public void setFetcher(ConstraintFetcher fetcher) {
|
||||
_fetcher = fetcher;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the candidate edits and emits warnings in the store
|
||||
* @param edit: the list of ItemUpdates to scrutinize
|
||||
|
@ -15,14 +15,16 @@ import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||
* A scrutinizer that detects incorrect formats in text values
|
||||
* (mostly identifiers).
|
||||
*
|
||||
* @author antonin
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
public class FormatConstraintScrutinizer extends SnakScrutinizer {
|
||||
public class FormatScrutinizer extends SnakScrutinizer {
|
||||
|
||||
public static final String type = "add-statements-with-invalid-format";
|
||||
|
||||
private Map<PropertyIdValue, Pattern> _patterns;
|
||||
|
||||
public FormatConstraintScrutinizer() {
|
||||
public FormatScrutinizer() {
|
||||
_patterns = new HashMap<>();
|
||||
}
|
||||
|
||||
@ -59,7 +61,7 @@ public class FormatConstraintScrutinizer extends SnakScrutinizer {
|
||||
if (!pattern.matcher(value).matches()) {
|
||||
if (added) {
|
||||
QAWarning issue = new QAWarning(
|
||||
"add-statements-with-invalid-format",
|
||||
type,
|
||||
pid.getId(),
|
||||
QAWarning.Severity.IMPORTANT,
|
||||
1);
|
@ -17,11 +17,13 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
* A scrutinizer that checks for missing inverse statements in
|
||||
* edit batches.
|
||||
*
|
||||
* @author antonin
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
public class InverseConstraintScrutinizer extends StatementScrutinizer {
|
||||
|
||||
public static final String type = "missing-inverse-statements";
|
||||
|
||||
private Map<PropertyIdValue, PropertyIdValue> _inverse;
|
||||
private Map<PropertyIdValue, Map<EntityIdValue, Set<EntityIdValue> >> _statements;
|
||||
|
||||
@ -83,7 +85,7 @@ public class InverseConstraintScrutinizer extends StatementScrutinizer {
|
||||
PropertyIdValue missingProperty = propertyPair.getValue();
|
||||
Set<EntityIdValue> reciprocalLinks = _statements.get(missingProperty).get(idValue);
|
||||
if (reciprocalLinks == null || !reciprocalLinks.contains(itemLinks.getKey())) {
|
||||
QAWarning issue = new QAWarning("missing-inverse-statements",
|
||||
QAWarning issue = new QAWarning(type,
|
||||
ourProperty.getId(),
|
||||
QAWarning.Severity.IMPORTANT,
|
||||
1);
|
||||
|
@ -4,7 +4,7 @@ import java.util.List;
|
||||
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
|
||||
public abstract class ItemEditScrutinizer extends EditScrutinizer {
|
||||
public abstract class ItemUpdateScrutinizer extends EditScrutinizer {
|
||||
|
||||
@Override
|
||||
public void scrutinize(List<ItemUpdate> edit) {
|
@ -5,19 +5,26 @@ import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||
|
||||
/**
|
||||
* A scrutinizer that inspects new items
|
||||
* @author antonin
|
||||
* A scrutinizer that inspects new items.
|
||||
*
|
||||
* @author Antonin Delpeuch
|
||||
*/
|
||||
public class NewItemScrutinizer extends ItemEditScrutinizer {
|
||||
public class NewItemScrutinizer extends ItemUpdateScrutinizer {
|
||||
|
||||
public static final String noLabelType = "new-item-without-labels-or-aliases";
|
||||
public static final String noDescType = "new-item-without-descriptions";
|
||||
public static final String deletedStatementsType = "new-item-with-deleted-statements";
|
||||
public static final String noTypeType = "new-item-without-P31-or-P279";
|
||||
public static final String newItemType = "new-item-created";
|
||||
|
||||
@Override
|
||||
public void scrutinize(ItemUpdate update) {
|
||||
if (update.isNew()) {
|
||||
info("new-item-created");
|
||||
info(newItemType);
|
||||
|
||||
if (update.getLabels().isEmpty() && update.getAliases().isEmpty()) {
|
||||
QAWarning issue = new QAWarning(
|
||||
"new-item-without-labels-or-aliases",
|
||||
noLabelType,
|
||||
null,
|
||||
QAWarning.Severity.CRITICAL,
|
||||
1);
|
||||
@ -27,7 +34,7 @@ public class NewItemScrutinizer extends ItemEditScrutinizer {
|
||||
|
||||
if (update.getDescriptions().isEmpty()) {
|
||||
QAWarning issue = new QAWarning(
|
||||
"new-item-without-descriptions",
|
||||
noDescType,
|
||||
null,
|
||||
QAWarning.Severity.WARNING,
|
||||
1);
|
||||
@ -35,9 +42,9 @@ public class NewItemScrutinizer extends ItemEditScrutinizer {
|
||||
addIssue(issue);
|
||||
}
|
||||
|
||||
if (! update.getDeletedStatements().isEmpty()) {
|
||||
if (!update.getDeletedStatements().isEmpty()) {
|
||||
QAWarning issue = new QAWarning(
|
||||
"new-item-with-deleted-statements",
|
||||
deletedStatementsType,
|
||||
null,
|
||||
QAWarning.Severity.WARNING,
|
||||
1);
|
||||
@ -56,7 +63,7 @@ public class NewItemScrutinizer extends ItemEditScrutinizer {
|
||||
}
|
||||
if (!typeFound) {
|
||||
QAWarning issue = new QAWarning(
|
||||
"new-item-without-P31-or-P279",
|
||||
noTypeType,
|
||||
null,
|
||||
QAWarning.Severity.WARNING,
|
||||
1);
|
||||
|
@ -7,10 +7,12 @@ import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
|
||||
public class NoEditsMadeScrutinizer extends EditScrutinizer {
|
||||
|
||||
public static final String type = "no-edit-generated";
|
||||
|
||||
@Override
|
||||
public void scrutinize(List<ItemUpdate> edit) {
|
||||
if(edit.stream().allMatch(e -> e.isNull())) {
|
||||
info("no-edit-generated");
|
||||
info(type);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -14,11 +14,14 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
/**
|
||||
* A scrutinizer that checks the compatibility of the qualifiers
|
||||
* and the property of a statement, and looks for mandatory qualifiers.
|
||||
* @author antonin
|
||||
*
|
||||
* @author Antonin Delpeuch
|
||||
*/
|
||||
public class QualifierCompatibilityScrutinizer extends StatementScrutinizer {
|
||||
|
||||
public static final String missingMandatoryQualifiersType = "missing-mandatory-qualifiers";
|
||||
public static final String disallowedQualifiersType = "disallowed-qualifiers";
|
||||
|
||||
private Map<PropertyIdValue, Set<PropertyIdValue>> _allowedQualifiers;
|
||||
private Map<PropertyIdValue, Set<PropertyIdValue>> _mandatoryQualifiers;
|
||||
|
||||
@ -65,7 +68,7 @@ public class QualifierCompatibilityScrutinizer extends StatementScrutinizer {
|
||||
|
||||
for (PropertyIdValue missing : missingQualifiers) {
|
||||
QAWarning issue = new QAWarning(
|
||||
"missing-mandatory-qualifiers",
|
||||
missingMandatoryQualifiersType,
|
||||
statementProperty.getId()+"-"+missing.getId(),
|
||||
QAWarning.Severity.WARNING,
|
||||
1);
|
||||
@ -76,7 +79,7 @@ public class QualifierCompatibilityScrutinizer extends StatementScrutinizer {
|
||||
}
|
||||
for (PropertyIdValue disallowed : disallowedQualifiers) {
|
||||
QAWarning issue = new QAWarning(
|
||||
"disallowed-qualifiers",
|
||||
disallowedQualifiersType,
|
||||
statementProperty.getId()+"-"+disallowed.getId(),
|
||||
QAWarning.Severity.WARNING,
|
||||
1);
|
||||
|
@ -13,14 +13,14 @@ import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
*/
|
||||
public class SelfReferentialScrutinizer extends SnakScrutinizer {
|
||||
|
||||
public static final String type = "self-referential-statements";
|
||||
|
||||
@Override
|
||||
public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
|
||||
if (entityId.equals(snak.getValue())) {
|
||||
QAWarning issue = new QAWarning(
|
||||
"self-referential-statements",
|
||||
null,
|
||||
QAWarning.Severity.WARNING,
|
||||
1);
|
||||
type, null,
|
||||
QAWarning.Severity.WARNING, 1);
|
||||
issue.setProperty("example_entity", entityId);
|
||||
addIssue(issue);
|
||||
}
|
||||
|
@ -11,10 +11,13 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
/**
|
||||
* For now this scrutinizer only checks for uniqueness at
|
||||
* the item level (it ignores qualifiers and references).
|
||||
* @author antonin
|
||||
*
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
public class SingleValueScrutinizer extends ItemEditScrutinizer {
|
||||
public class SingleValueScrutinizer extends ItemUpdateScrutinizer {
|
||||
|
||||
public static final String type = "single-valued-property-added-more-than-once";
|
||||
|
||||
@Override
|
||||
public void scrutinize(ItemUpdate update) {
|
||||
@ -25,10 +28,8 @@ public class SingleValueScrutinizer extends ItemEditScrutinizer {
|
||||
if (seenSingleProperties.contains(pid)) {
|
||||
|
||||
QAWarning issue = new QAWarning(
|
||||
"single-valued-property-added-more-than-once",
|
||||
pid.getId(),
|
||||
QAWarning.Severity.WARNING,
|
||||
1);
|
||||
type, pid.getId(),
|
||||
QAWarning.Severity.WARNING, 1);
|
||||
issue.setProperty("property_entity", pid);
|
||||
issue.setProperty("example_entity", update.getItemId());
|
||||
addIssue(issue);
|
||||
|
@ -11,7 +11,7 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
* A scrutinizer that inspects snaks individually, no matter whether they
|
||||
* appear as main snaks, qualifiers or references.
|
||||
*
|
||||
* @author antonin
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
public abstract class SnakScrutinizer extends StatementScrutinizer {
|
||||
|
@ -3,7 +3,7 @@ package org.openrefine.wikidata.qa.scrutinizers;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||
|
||||
public abstract class StatementGroupScrutinizer extends ItemEditScrutinizer {
|
||||
public abstract class StatementGroupScrutinizer extends ItemUpdateScrutinizer {
|
||||
|
||||
@Override
|
||||
public void scrutinize(ItemUpdate update) {
|
||||
|
@ -4,7 +4,7 @@ import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
|
||||
public abstract class StatementScrutinizer extends ItemEditScrutinizer {
|
||||
public abstract class StatementScrutinizer extends ItemUpdateScrutinizer {
|
||||
|
||||
@Override
|
||||
public void scrutinize(ItemUpdate update) {
|
||||
|
@ -11,10 +11,12 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
*/
|
||||
public class UnsourcedScrutinizer extends StatementScrutinizer {
|
||||
|
||||
public static final String type = "unsourced-statements";
|
||||
|
||||
@Override
|
||||
public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
|
||||
if(statement.getReferences().isEmpty() && added) {
|
||||
warning("unsourced-statements");
|
||||
warning(type);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12,21 +12,27 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
/**
|
||||
* Scrutinizes strings for trailing / leading whitespace, and others
|
||||
* @author antonin
|
||||
*
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
public class WhitespaceScrutinizer extends ValueScrutinizer {
|
||||
|
||||
private Map<String,Pattern> _issuesMap;
|
||||
|
||||
public static final String leadingWhitespaceType = "leading-whitespace";
|
||||
public static final String trailingWhitespaceType = "trailing-whitespace";
|
||||
public static final String duplicateWhitespaceType = "duplicate-whitespace";
|
||||
public static final String nonPrintableCharsType = "non-printable-characters";
|
||||
|
||||
public WhitespaceScrutinizer() {
|
||||
_issuesMap = new HashMap<>();
|
||||
_issuesMap.put("leading-whitespace", Pattern.compile("^\\s"));
|
||||
_issuesMap.put("trailing-whitespace", Pattern.compile("\\s$"));
|
||||
_issuesMap.put("duplicate-whitespace", Pattern.compile("\\s\\s"));
|
||||
_issuesMap.put(leadingWhitespaceType, Pattern.compile("^\\s"));
|
||||
_issuesMap.put(trailingWhitespaceType, Pattern.compile("\\s$"));
|
||||
_issuesMap.put(duplicateWhitespaceType, Pattern.compile("\\s\\s"));
|
||||
|
||||
// https://stackoverflow.com/questions/14565934/regular-expression-to-remove-all-non-printable-characters
|
||||
_issuesMap.put("non-printable-characters", Pattern.compile("[\\x00\\x08\\x0B\\x0C\\x0E-\\x1F]"));
|
||||
_issuesMap.put(nonPrintableCharsType, Pattern.compile("[\\x00\\x03\\x08\\x0B\\x0C\\x0E-\\x1F]"));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -0,0 +1,75 @@
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
|
||||
|
||||
public class MockConstraintFetcher implements ConstraintFetcher {
|
||||
|
||||
public static PropertyIdValue pidWithInverse = Datamodel.makeWikidataPropertyIdValue("P350");
|
||||
public static PropertyIdValue inversePid = Datamodel.makeWikidataPropertyIdValue("P57");
|
||||
public static PropertyIdValue allowedQualifierPid = Datamodel.makeWikidataPropertyIdValue("P34");
|
||||
public static PropertyIdValue mandatoryQualifierPid = Datamodel.makeWikidataPropertyIdValue("P97");
|
||||
|
||||
public static PropertyIdValue mainSnakPid = Datamodel.makeWikidataPropertyIdValue("P1234");
|
||||
public static PropertyIdValue qualifierPid = Datamodel.makeWikidataPropertyIdValue("P987");
|
||||
public static PropertyIdValue referencePid = Datamodel.makeWikidataPropertyIdValue("P384");
|
||||
|
||||
@Override
|
||||
public String getFormatRegex(PropertyIdValue pid) {
|
||||
return "[1-9]\\d+";
|
||||
}
|
||||
|
||||
/**
|
||||
* This constraint is purposely left inconsistent (the inverse
|
||||
* constraint holds only on one side).
|
||||
*/
|
||||
@Override
|
||||
public PropertyIdValue getInversePid(PropertyIdValue pid) {
|
||||
if (pidWithInverse.equals(pid)) {
|
||||
return inversePid;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isForValuesOnly(PropertyIdValue pid) {
|
||||
return mainSnakPid.equals(pid);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isForQualifiersOnly(PropertyIdValue pid) {
|
||||
return qualifierPid.equals(pid);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isForReferencesOnly(PropertyIdValue pid) {
|
||||
return referencePid.equals(pid);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid) {
|
||||
return Arrays.asList(allowedQualifierPid, mandatoryQualifierPid).stream().collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid) {
|
||||
return Collections.singleton(mandatoryQualifierPid);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasSingleValue(PropertyIdValue pid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasDistinctValues(PropertyIdValue pid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,44 @@
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import org.openrefine.wikidata.testing.JacksonSerializationTest;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class QAWarningStoreTest {
|
||||
|
||||
public static String exampleJson = "{\"max_severity\":\"CRITICAL\",\"nb_warnings\":5,"
|
||||
+"\"warnings\":[{\"type\":\"new-item-without-label\",\"bucketId\":null,"
|
||||
+"\"severity\":\"CRITICAL\",\"count\":3},{\"type\":\"add-statements-with-invalid-format\","
|
||||
+"\"bucketId\":\"P2427\",\"severity\":\"IMPORTANT\",\"count\":2}]}";
|
||||
|
||||
private QAWarningStore store;
|
||||
private QAWarning otherWarning;
|
||||
|
||||
@BeforeMethod
|
||||
public void setUp() {
|
||||
store = new QAWarningStore();
|
||||
store.addWarning(QAWarningTest.exampleWarning);
|
||||
store.addWarning(QAWarningTest.exampleWarning);
|
||||
otherWarning = new QAWarning("new-item-without-label", null, QAWarning.Severity.CRITICAL, 3);
|
||||
store.addWarning(otherWarning);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerialize() {
|
||||
JacksonSerializationTest.testSerialize(store, exampleJson);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCount() {
|
||||
assertEquals(5, store.getNbWarnings());
|
||||
assertEquals(2, store.getWarnings().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxSeverity() {
|
||||
assertEquals(QAWarning.Severity.CRITICAL, store.getMaxSeverity());
|
||||
assertEquals(QAWarning.Severity.INFO, (new QAWarningStore()).getMaxSeverity());
|
||||
}
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import org.openrefine.wikidata.testing.JacksonSerializationTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class QAWarningTest {
|
||||
|
||||
public static QAWarning exampleWarning = new QAWarning("add-statements-with-invalid-format",
|
||||
"P2427",
|
||||
QAWarning.Severity.IMPORTANT,
|
||||
1);
|
||||
public static String exampleJson =
|
||||
"{\"severity\":\"IMPORTANT\","+
|
||||
"\"count\":1,\"bucketId\":\"P2427\",\"type\":\"add-statements-with-invalid-format\"}";
|
||||
|
||||
@Test
|
||||
public void testSerialize() {
|
||||
JacksonSerializationTest.testSerialize(exampleWarning, exampleJson);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAggregate() {
|
||||
QAWarning firstWarning = new QAWarning("add-statements-with-invalid-format",
|
||||
"P2427",
|
||||
QAWarning.Severity.INFO,
|
||||
1);
|
||||
firstWarning.setProperty("foo", "bar");
|
||||
assertEquals(exampleWarning.getAggregationId(), firstWarning.getAggregationId());
|
||||
QAWarning merged = firstWarning.aggregate(exampleWarning);
|
||||
assertEquals(2, merged.getCount());
|
||||
assertEquals(exampleWarning.getAggregationId(), merged.getAggregationId());
|
||||
assertEquals(exampleWarning.getType(), merged.getType());
|
||||
assertEquals(exampleWarning.getSeverity(), merged.getSeverity());
|
||||
assertEquals("bar", merged.getProperties().get("foo"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompare() {
|
||||
QAWarning otherWarning = new QAWarning("no-reference",
|
||||
"no-reference",
|
||||
QAWarning.Severity.WARNING,
|
||||
1);
|
||||
assertEquals(1, otherWarning.compareTo(exampleWarning));
|
||||
assertEquals(-1, exampleWarning.compareTo(otherWarning));
|
||||
assertEquals(0, exampleWarning.compareTo(exampleWarning));
|
||||
}
|
||||
|
||||
}
|
@ -7,7 +7,7 @@ import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class ConstraintFetcherTests {
|
||||
public class WikidataConstraintFetcherTests {
|
||||
|
||||
private ConstraintFetcher fetcher;
|
||||
|
||||
@ -21,8 +21,8 @@ public class ConstraintFetcherTests {
|
||||
private PropertyIdValue referenceURL;
|
||||
private PropertyIdValue reasonForDeprecation;
|
||||
|
||||
public ConstraintFetcherTests() {
|
||||
fetcher = new ConstraintFetcher();
|
||||
public WikidataConstraintFetcherTests() {
|
||||
fetcher = new WikidataConstraintFetcher();
|
||||
headOfGovernment = Datamodel.makeWikidataPropertyIdValue("P6");
|
||||
startTime = Datamodel.makeWikidataPropertyIdValue("P580");
|
||||
endTime = Datamodel.makeWikidataPropertyIdValue("P582");
|
@ -0,0 +1,29 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
|
||||
public class DistinctValuesScrutinizerTest extends StatementScrutinizerTest {
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new DistinctValuesScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrigger() {
|
||||
ItemIdValue idA = TestingDataGenerator.existingId;
|
||||
ItemIdValue idB = TestingDataGenerator.matchedId;
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA)
|
||||
.addStatement(TestingDataGenerator.generateStatement(idA, idB))
|
||||
.build();
|
||||
ItemUpdate updateB = new ItemUpdateBuilder(idB)
|
||||
.addStatement(TestingDataGenerator.generateStatement(idB, idB))
|
||||
.build();
|
||||
scrutinize(updateA, updateB);
|
||||
assertWarningsRaised(DistinctValuesScrutinizer.type);
|
||||
}
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
|
||||
public class FormatScrutinizerTest extends ValueScrutinizerTest {
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new FormatScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrigger() {
|
||||
scrutinize(Datamodel.makeStringValue("not a number"));
|
||||
assertWarningsRaised(FormatScrutinizer.type);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoIssue() {
|
||||
scrutinize(Datamodel.makeStringValue("1234"));
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncompleteMatch() {
|
||||
scrutinize(Datamodel.makeStringValue("42 is a number"));
|
||||
assertWarningsRaised(FormatScrutinizer.type);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.qa.MockConstraintFetcher;
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
|
||||
public class InverseConstaintScrutinizerTest extends StatementScrutinizerTest {
|
||||
|
||||
private ItemIdValue idA = TestingDataGenerator.existingId;
|
||||
private ItemIdValue idB = TestingDataGenerator.newIdB;
|
||||
private PropertyIdValue pidWithInverse = MockConstraintFetcher.pidWithInverse;
|
||||
private PropertyIdValue inversePid = MockConstraintFetcher.inversePid;
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new InverseConstraintScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrigger() {
|
||||
ItemUpdate update = new ItemUpdateBuilder(idA)
|
||||
.addStatement(TestingDataGenerator.generateStatement(idA, pidWithInverse, idB))
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertWarningsRaised(InverseConstraintScrutinizer.type);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoSymmetricClosure() {
|
||||
ItemUpdate update = new ItemUpdateBuilder(idA)
|
||||
.addStatement(TestingDataGenerator.generateStatement(idA, inversePid, idB))
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,70 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
|
||||
|
||||
|
||||
public class NewItemScrutinizerTest extends ScrutinizerTest {
|
||||
|
||||
private Claim claim = Datamodel.makeClaim(TestingDataGenerator.newIdA,
|
||||
Datamodel.makeValueSnak(Datamodel.makeWikidataPropertyIdValue("P31"), TestingDataGenerator.existingId),
|
||||
Collections.emptyList());
|
||||
private Statement p31Statement = Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new NewItemScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrigger() {
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.newIdA).build();
|
||||
scrutinize(update);
|
||||
assertWarningsRaised(
|
||||
NewItemScrutinizer.noDescType,
|
||||
NewItemScrutinizer.noLabelType,
|
||||
NewItemScrutinizer.noTypeType,
|
||||
NewItemScrutinizer.newItemType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyItem() {
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.existingId).build();
|
||||
scrutinize(update);
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGoodNewItem() {
|
||||
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.newIdA)
|
||||
.addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"))
|
||||
.addDescription(Datamodel.makeMonolingualTextValue("interesting item", "en"))
|
||||
.addStatement(p31Statement)
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertWarningsRaised(NewItemScrutinizer.newItemType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeletedStatements() {
|
||||
ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.newIdA)
|
||||
.addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"))
|
||||
.addDescription(Datamodel.makeMonolingualTextValue("interesting item", "en"))
|
||||
.addStatement(p31Statement)
|
||||
.deleteStatement(TestingDataGenerator.generateStatement(TestingDataGenerator.newIdA,
|
||||
TestingDataGenerator.matchedId))
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertWarningsRaised(NewItemScrutinizer.newItemType, NewItemScrutinizer.deletedStatementsType);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class NoEditsMadeScrutinizerTest extends ScrutinizerTest {
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new NoEditsMadeScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrigger() {
|
||||
scrutinize();
|
||||
assertWarningsRaised(NoEditsMadeScrutinizer.type);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonNull() {
|
||||
scrutinize(new ItemUpdateBuilder(TestingDataGenerator.newIdA).build());
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNull() {
|
||||
scrutinize(new ItemUpdateBuilder(TestingDataGenerator.existingId).build());
|
||||
assertWarningsRaised(NoEditsMadeScrutinizer.type);
|
||||
}
|
||||
}
|
@ -0,0 +1,59 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.openrefine.wikidata.qa.MockConstraintFetcher;
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
|
||||
|
||||
public class QualifierCompatibilityScrutinizerTest extends StatementScrutinizerTest {
|
||||
private Snak disallowedQualifier = Datamodel.makeNoValueSnak(MockConstraintFetcher.qualifierPid);
|
||||
private Snak mandatoryQualifier = Datamodel.makeNoValueSnak(MockConstraintFetcher.mandatoryQualifierPid);
|
||||
private Snak allowedQualifier = Datamodel.makeNoValueSnak(MockConstraintFetcher.allowedQualifierPid);
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new QualifierCompatibilityScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDisallowedQualifier() {
|
||||
|
||||
scrutinize(makeStatement(disallowedQualifier,mandatoryQualifier));
|
||||
assertWarningsRaised(QualifierCompatibilityScrutinizer.disallowedQualifiersType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMissingQualifier() {
|
||||
scrutinize(makeStatement());
|
||||
assertWarningsRaised(QualifierCompatibilityScrutinizer.missingMandatoryQualifiersType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGoodEdit() {
|
||||
scrutinize(makeStatement(allowedQualifier,mandatoryQualifier));
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
private Statement makeStatement(Snak... qualifiers) {
|
||||
Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId,
|
||||
Datamodel.makeNoValueSnak(MockConstraintFetcher.mainSnakPid), makeQualifiers(qualifiers));
|
||||
return Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
|
||||
}
|
||||
private List<SnakGroup> makeQualifiers(Snak[] qualifiers) {
|
||||
List<Snak> snaks = Arrays.asList(qualifiers);
|
||||
return snaks.stream()
|
||||
.map((Snak q) -> Datamodel.makeSnakGroup(Collections.<Snak>singletonList(q)))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,55 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.openrefine.wikidata.qa.MockConstraintFetcher;
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
|
||||
|
||||
public class RestrictedPositionScrutinizerTest extends SnakScrutinizerTest {
|
||||
|
||||
private ItemIdValue qid = TestingDataGenerator.existingId;
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new RestrictedPositionScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTriggerMainSnak() {
|
||||
scrutinize(TestingDataGenerator.generateStatement(qid, MockConstraintFetcher.qualifierPid, qid));
|
||||
assertWarningsRaised("property-restricted-to-qualifier-found-in-mainsnak");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoProblem() {
|
||||
scrutinize(TestingDataGenerator.generateStatement(qid, MockConstraintFetcher.mainSnakPid, qid));
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNotRestricted() {
|
||||
scrutinize(TestingDataGenerator.generateStatement(qid, Datamodel.makeWikidataPropertyIdValue("P3748"), qid));
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTriggerReference() {
|
||||
Snak snak = Datamodel.makeValueSnak(MockConstraintFetcher.mainSnakPid, qid);
|
||||
List<SnakGroup> snakGroups = Collections.singletonList(Datamodel.makeSnakGroup(Collections.singletonList(snak)));
|
||||
Statement statement = Datamodel.makeStatement(
|
||||
TestingDataGenerator.generateStatement(qid, MockConstraintFetcher.mainSnakPid, qid).getClaim(),
|
||||
Collections.singletonList(Datamodel.makeReference(snakGroups)),
|
||||
StatementRank.NORMAL, "");
|
||||
scrutinize(statement);
|
||||
assertWarningsRaised("property-restricted-to-mainsnak-found-in-reference");
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
||||
import org.openrefine.wikidata.qa.MockConstraintFetcher;
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.openrefine.wikidata.qa.QAWarningStore;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
|
||||
public abstract class ScrutinizerTest {
|
||||
public abstract EditScrutinizer getScrutinizer();
|
||||
|
||||
private EditScrutinizer scrutinizer;
|
||||
private QAWarningStore store;
|
||||
private ConstraintFetcher fetcher;
|
||||
|
||||
@BeforeMethod
|
||||
public void setUp() {
|
||||
store = new QAWarningStore();
|
||||
fetcher = new MockConstraintFetcher();
|
||||
scrutinizer = getScrutinizer();
|
||||
scrutinizer.setStore(store);
|
||||
scrutinizer.setFetcher(fetcher);
|
||||
}
|
||||
|
||||
public void scrutinize(ItemUpdate... updates) {
|
||||
scrutinizer.scrutinize(Arrays.asList(updates));
|
||||
}
|
||||
|
||||
public void assertWarningsRaised(String... types) {
|
||||
assertEquals(Arrays.asList(types).stream().collect(Collectors.toSet()), getWarningTypes());
|
||||
}
|
||||
|
||||
public void assertWarningRaised(QAWarning warning) {
|
||||
assertTrue(store.getWarnings().contains(warning));
|
||||
}
|
||||
|
||||
public void assertNoWarningRaised() {
|
||||
assertWarningsRaised();
|
||||
}
|
||||
|
||||
public Set<String> getWarningTypes() {
|
||||
return store.getWarnings().stream().map(w -> w.getType()).collect(Collectors.toSet());
|
||||
}
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
|
||||
public class SelfReferentialScrutinizerTest extends StatementScrutinizerTest {
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new SelfReferentialScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrigger() {
|
||||
ItemIdValue id = TestingDataGenerator.matchedId;
|
||||
scrutinize(TestingDataGenerator.generateStatement(id, id));
|
||||
assertWarningsRaised(SelfReferentialScrutinizer.type);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoProblem() {
|
||||
ItemIdValue id = TestingDataGenerator.matchedId;
|
||||
scrutinize(TestingDataGenerator.generateStatement(id, TestingDataGenerator.existingId));
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
|
||||
public class SingleValueScrutinizerTest extends ScrutinizerTest {
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new SingleValueScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrigger() {
|
||||
ItemIdValue idA = TestingDataGenerator.existingId;
|
||||
ItemIdValue idB = TestingDataGenerator.matchedId;
|
||||
ItemUpdate update = new ItemUpdateBuilder(idA)
|
||||
.addStatement(TestingDataGenerator.generateStatement(idA, idB))
|
||||
.addStatement(TestingDataGenerator.generateStatement(idA, idB))
|
||||
.build();
|
||||
scrutinize(update);
|
||||
assertWarningsRaised(SingleValueScrutinizer.type);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoIssue() {
|
||||
ItemIdValue idA = TestingDataGenerator.existingId;
|
||||
ItemIdValue idB = TestingDataGenerator.matchedId;
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA)
|
||||
.addStatement(TestingDataGenerator.generateStatement(idA, idB))
|
||||
.build();
|
||||
ItemUpdate updateB = new ItemUpdateBuilder(idB)
|
||||
.addStatement(TestingDataGenerator.generateStatement(idB, idB))
|
||||
.build();
|
||||
scrutinize(updateA, updateB);
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
|
||||
|
||||
public abstract class SnakScrutinizerTest extends StatementScrutinizerTest {
|
||||
|
||||
public static Snak defaultMainSnak = Datamodel.makeNoValueSnak(Datamodel.makeWikidataPropertyIdValue("P3928"));
|
||||
|
||||
public void scrutinize(Snak snak) {
|
||||
Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, snak,
|
||||
Collections.emptyList());
|
||||
Statement statement = Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
|
||||
scrutinize(statement);
|
||||
}
|
||||
|
||||
public void scrutinizeAsQualifier(Snak snak) {
|
||||
Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, defaultMainSnak,
|
||||
toSnakGroups(snak));
|
||||
Statement statement = Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
|
||||
scrutinize(statement);
|
||||
}
|
||||
|
||||
public void scrutinizeAsReference(Snak snak) {
|
||||
Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, defaultMainSnak,
|
||||
Collections.emptyList());
|
||||
Statement statement = Datamodel.makeStatement(claim,
|
||||
Collections.singletonList(Datamodel.makeReference(toSnakGroups(snak))), StatementRank.NORMAL, "");
|
||||
scrutinize(statement);
|
||||
}
|
||||
|
||||
private List<SnakGroup> toSnakGroups(Snak snak) {
|
||||
return Collections.singletonList(Datamodel.makeSnakGroup(Collections.singletonList(snak)));
|
||||
}
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
|
||||
public abstract class StatementScrutinizerTest extends ScrutinizerTest {
|
||||
|
||||
public void scrutinize(Statement statement) {
|
||||
ItemUpdate update = new ItemUpdateBuilder((ItemIdValue)statement.getClaim().getSubject())
|
||||
.addStatement(statement).build();
|
||||
scrutinize(update);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class UnsourcedScrutinizerTest extends StatementScrutinizerTest {
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new UnsourcedScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrigger() {
|
||||
scrutinize(TestingDataGenerator.generateStatement(TestingDataGenerator.existingId,
|
||||
TestingDataGenerator.matchedId));
|
||||
assertWarningsRaised(UnsourcedScrutinizer.type);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
public abstract class ValueScrutinizerTest extends SnakScrutinizerTest {
|
||||
|
||||
public static final PropertyIdValue defaultPid = Datamodel.makeWikidataPropertyIdValue("P328");
|
||||
|
||||
public void scrutinize(Value value) {
|
||||
scrutinize(Datamodel.makeValueSnak(defaultPid, value));
|
||||
}
|
||||
|
||||
public void scrutinizeLabel(MonolingualTextValue text) {
|
||||
scrutinize(new ItemUpdateBuilder(TestingDataGenerator.existingId).addLabel(text).build());
|
||||
}
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
|
||||
public class WhitespaceScrutinizerTest extends ValueScrutinizerTest {
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new WhitespaceScrutinizer();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLeadingWhitespace() {
|
||||
scrutinize(Datamodel.makeStringValue(" a"));
|
||||
assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrailingWhitespace() {
|
||||
scrutinize(Datamodel.makeStringValue("a\t"));
|
||||
assertWarningsRaised(WhitespaceScrutinizer.trailingWhitespaceType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDuplicateWhitespace() {
|
||||
scrutinize(Datamodel.makeStringValue("a\t b"));
|
||||
assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonPrintableChars() {
|
||||
scrutinize(Datamodel.makeStringValue("c\u0003"));
|
||||
assertWarningsRaised(WhitespaceScrutinizer.nonPrintableCharsType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoIssue() {
|
||||
scrutinize(Datamodel.makeStringValue("a b"));
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleIssues() {
|
||||
scrutinize(Datamodel.makeStringValue(" a\t b "));
|
||||
assertWarningsRaised(
|
||||
WhitespaceScrutinizer.duplicateWhitespaceType,
|
||||
WhitespaceScrutinizer.leadingWhitespaceType,
|
||||
WhitespaceScrutinizer.trailingWhitespaceType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMonolingualTextValue() {
|
||||
scrutinizeLabel(Datamodel.makeMonolingualTextValue(" a", "fr"));
|
||||
assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType);
|
||||
}
|
||||
}
|
@ -68,10 +68,19 @@ public class TestingDataGenerator {
|
||||
return new WbMonolingualExpr(new WbLanguageConstant(langCode, langLabel), new WbStringConstant(text));
|
||||
}
|
||||
|
||||
public static Statement generateStatement(ItemIdValue from, ItemIdValue to) {
|
||||
public static Statement generateStatement(ItemIdValue from, PropertyIdValue pid, ItemIdValue to) {
|
||||
Claim claim = Datamodel.makeClaim(from, Datamodel.makeValueSnak(pid, to), Collections.emptyList());
|
||||
return Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
|
||||
}
|
||||
|
||||
public static Statement generateStatement(ItemIdValue from, ItemIdValue to) {
|
||||
return generateStatement(from, pid, to);
|
||||
}
|
||||
|
||||
public static ItemIdValue newIdA = makeNewItemIdValue(1234L, "new item A");
|
||||
public static ItemIdValue newIdB = makeNewItemIdValue(4567L, "new item B");
|
||||
public static ItemIdValue matchedId = makeMatchedItemIdValue("Q89","eist");
|
||||
public static ItemIdValue existingId = Datamodel.makeWikidataItemIdValue("Q43");
|
||||
|
||||
|
||||
}
|
||||
|
@ -12,7 +12,6 @@ import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Reference;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
@ -23,28 +22,23 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
public class PointerExtractorTest {
|
||||
|
||||
private ItemIdValue existingId = Datamodel.makeWikidataItemIdValue("Q43");
|
||||
private ItemIdValue matchedId = TestingDataGenerator.makeMatchedItemIdValue("Q89","eist");
|
||||
private ItemIdValue newIdA = TestingDataGenerator.makeNewItemIdValue(1234L, "new item A");
|
||||
private ItemIdValue newIdB = TestingDataGenerator.makeNewItemIdValue(4567L, "new item B");
|
||||
|
||||
private PropertyIdValue pid = Datamodel.makeWikidataPropertyIdValue("P89");
|
||||
private Snak snakWithNew = Datamodel.makeValueSnak(pid, newIdA);
|
||||
private Snak snakWithoutNew = Datamodel.makeValueSnak(pid, matchedId);
|
||||
private Snak snakWithNew = Datamodel.makeValueSnak(pid, TestingDataGenerator.newIdA);
|
||||
private Snak snakWithoutNew = Datamodel.makeValueSnak(pid, TestingDataGenerator.matchedId);
|
||||
private SnakGroup snakGroupWithNew = Datamodel.makeSnakGroup(Collections.singletonList(snakWithNew));
|
||||
private SnakGroup snakGroupWithoutNew = Datamodel.makeSnakGroup(Collections.singletonList(snakWithoutNew));
|
||||
private Claim claimWithNew = Datamodel.makeClaim(existingId, snakWithNew, Collections.emptyList());
|
||||
private Claim claimNewSubject = Datamodel.makeClaim(newIdB, snakWithoutNew, Collections.emptyList());
|
||||
private Claim claimNewQualifier = Datamodel.makeClaim(matchedId, snakWithoutNew,
|
||||
private Claim claimWithNew = Datamodel.makeClaim(TestingDataGenerator.existingId, snakWithNew, Collections.emptyList());
|
||||
private Claim claimNewSubject = Datamodel.makeClaim(TestingDataGenerator.newIdB, snakWithoutNew, Collections.emptyList());
|
||||
private Claim claimNewQualifier = Datamodel.makeClaim(TestingDataGenerator.matchedId, snakWithoutNew,
|
||||
Collections.singletonList(snakGroupWithNew));
|
||||
|
||||
private static PointerExtractor e = new PointerExtractor();
|
||||
|
||||
@Test
|
||||
public void testExtractEntityId() {
|
||||
assertEquals(Collections.singleton(newIdA), e.extractPointers(newIdA));
|
||||
assertEmpty(e.extractPointers(existingId));
|
||||
assertEmpty(e.extractPointers(matchedId));
|
||||
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(TestingDataGenerator.newIdA));
|
||||
assertEmpty(e.extractPointers(TestingDataGenerator.existingId));
|
||||
assertEmpty(e.extractPointers(TestingDataGenerator.matchedId));
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -62,26 +56,26 @@ public class PointerExtractorTest {
|
||||
@Test
|
||||
public void testSnak() {
|
||||
assertEmpty(e.extractPointers(snakWithoutNew));
|
||||
assertEquals(Collections.singleton(newIdA), e.extractPointers(snakWithNew));
|
||||
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(snakWithNew));
|
||||
assertEmpty(e.extractPointers(Datamodel.makeNoValueSnak(pid)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSnakGroup() {
|
||||
assertEmpty(e.extractPointers(snakGroupWithoutNew));
|
||||
assertEquals(Collections.singleton(newIdA), e.extractPointers(snakGroupWithNew));
|
||||
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(snakGroupWithNew));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStatement() {
|
||||
assertEmpty(e.extractPointers(Datamodel.makeStatement(claimNewSubject,
|
||||
Collections.emptyList(), StatementRank.NORMAL, "")));
|
||||
assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimWithNew,
|
||||
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(Datamodel.makeStatement(claimWithNew,
|
||||
Collections.emptyList(), StatementRank.NORMAL, "")));
|
||||
assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimNewQualifier,
|
||||
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(Datamodel.makeStatement(claimNewQualifier,
|
||||
Collections.emptyList(), StatementRank.NORMAL, "")));
|
||||
Reference reference = Datamodel.makeReference(Collections.singletonList(snakGroupWithNew));
|
||||
assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimNewSubject,
|
||||
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(Datamodel.makeStatement(claimNewSubject,
|
||||
Collections.singletonList(reference), StatementRank.NORMAL, "")));
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user