Test the scrutinizers
This commit is contained in:
parent
fd7462f749
commit
973a28cc90
@ -1,60 +1,17 @@
|
|||||||
|
|
||||||
package org.openrefine.wikidata.qa;
|
package org.openrefine.wikidata.qa;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.function.Predicate;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import org.openrefine.wikidata.utils.EntityCache;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class provides an abstraction over the way constraint
|
* An object that fetches constraints about properties.
|
||||||
* definitions are stored in Wikidata.
|
|
||||||
*
|
*
|
||||||
* @author antonin
|
* @author Antonin Delpeuch
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class ConstraintFetcher {
|
public interface ConstraintFetcher {
|
||||||
public static String WIKIDATA_CONSTRAINT_PID = "P2302";
|
|
||||||
|
|
||||||
public static String FORMAT_CONSTRAINT_QID = "Q21502404";
|
|
||||||
public static String FORMAT_REGEX_PID = "P1793";
|
|
||||||
|
|
||||||
public static String INVERSE_CONSTRAINT_QID = "Q21510855";
|
|
||||||
public static String INVERSE_PROPERTY_PID = "P2306";
|
|
||||||
|
|
||||||
public static String USED_ONLY_AS_VALUES_CONSTRAINT_QID = "Q21528958";
|
|
||||||
|
|
||||||
public static String USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID = "Q21510863";
|
|
||||||
|
|
||||||
public static String USED_ONLY_AS_REFERENCE_CONSTRAINT_QID = "Q21528959";
|
|
||||||
|
|
||||||
public static String ALLOWED_QUALIFIERS_CONSTRAINT_QID = "Q21510851";
|
|
||||||
public static String ALLOWED_QUALIFIERS_CONSTRAINT_PID = "P2306";
|
|
||||||
|
|
||||||
public static String MANDATORY_QUALIFIERS_CONSTRAINT_QID = "Q21510856";
|
|
||||||
public static String MANDATORY_QUALIFIERS_CONSTRAINT_PID = "P2306";
|
|
||||||
|
|
||||||
public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404";
|
|
||||||
public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410";
|
|
||||||
|
|
||||||
// The following constraints still need to be implemented:
|
|
||||||
|
|
||||||
public static String TYPE_CONSTRAINT_QID = "Q21503250";
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the regular expression for formatting a property, or null if
|
* Retrieves the regular expression for formatting a property, or null if
|
||||||
@ -62,152 +19,48 @@ public class ConstraintFetcher {
|
|||||||
* @param pid
|
* @param pid
|
||||||
* @return the expression of a regular expression which should be compatible with java.util.regex
|
* @return the expression of a regular expression which should be compatible with java.util.regex
|
||||||
*/
|
*/
|
||||||
public String getFormatRegex(PropertyIdValue pid) {
|
String getFormatRegex(PropertyIdValue pid);
|
||||||
List<SnakGroup> specs = getSingleConstraint(pid, FORMAT_CONSTRAINT_QID);
|
|
||||||
if (specs != null) {
|
|
||||||
List<Value> regexes = findValues(specs, FORMAT_REGEX_PID);
|
|
||||||
if (! regexes.isEmpty()) {
|
|
||||||
return ((StringValue)regexes.get(0)).getString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the property that is the inverse of a given property
|
* Retrieves the property that is the inverse of a given property
|
||||||
* @param pid: the property to retrieve the inverse for
|
* @param pid: the property to retrieve the inverse for
|
||||||
* @return the pid of the inverse property
|
* @return the pid of the inverse property
|
||||||
*/
|
*/
|
||||||
public PropertyIdValue getInversePid(PropertyIdValue pid) {
|
PropertyIdValue getInversePid(PropertyIdValue pid);
|
||||||
List<SnakGroup> specs = getSingleConstraint(pid, INVERSE_CONSTRAINT_QID);
|
|
||||||
|
|
||||||
if(specs != null) {
|
|
||||||
List<Value> inverses = findValues(specs, INVERSE_PROPERTY_PID);
|
|
||||||
if (! inverses.isEmpty()) {
|
|
||||||
return (PropertyIdValue)inverses.get(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is this property for values only?
|
* Is this property for values only?
|
||||||
*/
|
*/
|
||||||
public boolean isForValuesOnly(PropertyIdValue pid) {
|
boolean isForValuesOnly(PropertyIdValue pid);
|
||||||
return getSingleConstraint(pid, USED_ONLY_AS_VALUES_CONSTRAINT_QID) != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is this property for qualifiers only?
|
* Is this property for qualifiers only?
|
||||||
*/
|
*/
|
||||||
public boolean isForQualifiersOnly(PropertyIdValue pid) {
|
boolean isForQualifiersOnly(PropertyIdValue pid);
|
||||||
return getSingleConstraint(pid, USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID) != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is this property for references only?
|
* Is this property for references only?
|
||||||
*/
|
*/
|
||||||
public boolean isForReferencesOnly(PropertyIdValue pid) {
|
boolean isForReferencesOnly(PropertyIdValue pid);
|
||||||
return getSingleConstraint(pid, USED_ONLY_AS_REFERENCE_CONSTRAINT_QID) != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the list of allowed qualifiers (as property ids) for this property (null if any)
|
* Get the list of allowed qualifiers (as property ids) for this property (null if any)
|
||||||
*/
|
*/
|
||||||
public Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid) {
|
Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid);
|
||||||
List<SnakGroup> specs = getSingleConstraint(pid, ALLOWED_QUALIFIERS_CONSTRAINT_QID);
|
|
||||||
|
|
||||||
if (specs != null) {
|
|
||||||
List<Value> properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID);
|
|
||||||
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the list of mandatory qualifiers (as property ids) for this property (null if any)
|
* Get the list of mandatory qualifiers (as property ids) for this property (null if any)
|
||||||
*/
|
*/
|
||||||
public Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid) {
|
Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid);
|
||||||
List<SnakGroup> specs = getSingleConstraint(pid, MANDATORY_QUALIFIERS_CONSTRAINT_QID);
|
|
||||||
|
|
||||||
if (specs != null) {
|
|
||||||
List<Value> properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID);
|
|
||||||
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is this property expected to have at most one value per item?
|
* Is this property expected to have at most one value per item?
|
||||||
*/
|
*/
|
||||||
public boolean hasSingleValue(PropertyIdValue pid) {
|
boolean hasSingleValue(PropertyIdValue pid);
|
||||||
return getSingleConstraint(pid, SINGLE_VALUE_CONSTRAINT_QID) != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is this property expected to have distinct values?
|
* Is this property expected to have distinct values?
|
||||||
*/
|
*/
|
||||||
public boolean hasDistinctValues(PropertyIdValue pid) {
|
boolean hasDistinctValues(PropertyIdValue pid);
|
||||||
return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a single constraint for a particular type and a property, or null
|
|
||||||
* if there is no such constraint
|
|
||||||
* @param pid: the property to retrieve the constraints for
|
|
||||||
* @param qid: the type of the constraints
|
|
||||||
* @return the list of qualifiers for the constraint, or null if it does not exist
|
|
||||||
*/
|
|
||||||
protected List<SnakGroup> getSingleConstraint(PropertyIdValue pid, String qid) {
|
|
||||||
Statement statement = getConstraintsByType(pid, qid).findFirst().orElse(null);
|
|
||||||
if (statement != null) {
|
|
||||||
return statement.getClaim().getQualifiers();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the list of constraints of a particular type for a property
|
|
||||||
* @param pid: the property to retrieve the constraints for
|
|
||||||
* @param qid: the type of the constraints
|
|
||||||
* @return the stream of matching constraint statements
|
|
||||||
*/
|
|
||||||
protected Stream<Statement> getConstraintsByType(PropertyIdValue pid, String qid) {
|
|
||||||
Stream<Statement> allConstraints = getConstraintStatements(pid)
|
|
||||||
.stream()
|
|
||||||
.filter(s -> ((EntityIdValue) s.getValue()).getId().equals(qid));
|
|
||||||
return allConstraints;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets all the constraint statements for a given property
|
|
||||||
* @param pid : the id of the property to retrieve the constraints for
|
|
||||||
* @return the list of constraint statements
|
|
||||||
*/
|
|
||||||
protected List<Statement> getConstraintStatements(PropertyIdValue pid) {
|
|
||||||
PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid);
|
|
||||||
StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID);
|
|
||||||
if (group != null) {
|
|
||||||
return group.getStatements();
|
|
||||||
} else {
|
|
||||||
return new ArrayList<Statement>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the values of a given property in qualifiers
|
|
||||||
* @param groups: the qualifiers
|
|
||||||
* @param pid: the property to filter on
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
protected List<Value> findValues(List<SnakGroup> groups, String pid) {
|
|
||||||
List<Value> results = new ArrayList<>();
|
|
||||||
for(SnakGroup group : groups) {
|
|
||||||
if (group.getProperty().getId().equals(pid)) {
|
|
||||||
for (Snak snak : group.getSnaks())
|
|
||||||
results.add(snak.getValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@ import java.util.stream.Collectors;
|
|||||||
|
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer;
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.FormatScrutinizer;
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.NoEditsMadeScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.NoEditsMadeScrutinizer;
|
||||||
@ -32,14 +32,16 @@ import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
|||||||
public class EditInspector {
|
public class EditInspector {
|
||||||
private Map<String, EditScrutinizer> scrutinizers;
|
private Map<String, EditScrutinizer> scrutinizers;
|
||||||
private QAWarningStore warningStore;
|
private QAWarningStore warningStore;
|
||||||
|
private ConstraintFetcher fetcher;
|
||||||
|
|
||||||
public EditInspector(QAWarningStore warningStore) {
|
public EditInspector(QAWarningStore warningStore) {
|
||||||
this.scrutinizers = new HashMap<>();
|
this.scrutinizers = new HashMap<>();
|
||||||
|
this.fetcher = new WikidataConstraintFetcher();
|
||||||
this.warningStore = warningStore;
|
this.warningStore = warningStore;
|
||||||
|
|
||||||
// Register all known scrutinizers here
|
// Register all known scrutinizers here
|
||||||
register(new NewItemScrutinizer());
|
register(new NewItemScrutinizer());
|
||||||
register(new FormatConstraintScrutinizer());
|
register(new FormatScrutinizer());
|
||||||
register(new InverseConstraintScrutinizer());
|
register(new InverseConstraintScrutinizer());
|
||||||
register(new SelfReferentialScrutinizer());
|
register(new SelfReferentialScrutinizer());
|
||||||
register(new UnsourcedScrutinizer());
|
register(new UnsourcedScrutinizer());
|
||||||
@ -59,6 +61,7 @@ public class EditInspector {
|
|||||||
String key = scrutinizer.getClass().getName();
|
String key = scrutinizer.getClass().getName();
|
||||||
scrutinizers.put(key, scrutinizer);
|
scrutinizers.put(key, scrutinizer);
|
||||||
scrutinizer.setStore(warningStore);
|
scrutinizer.setStore(warningStore);
|
||||||
|
scrutinizer.setFetcher(fetcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -69,18 +72,14 @@ public class EditInspector {
|
|||||||
public void inspect(List<ItemUpdate> editBatch) {
|
public void inspect(List<ItemUpdate> editBatch) {
|
||||||
// First, schedule them with some scheduler,
|
// First, schedule them with some scheduler,
|
||||||
// so that all newly created entities appear in the batch
|
// so that all newly created entities appear in the batch
|
||||||
UpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
|
WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
|
||||||
try {
|
|
||||||
editBatch = scheduler.schedule(editBatch);
|
editBatch = scheduler.schedule(editBatch);
|
||||||
Map<EntityIdValue, ItemUpdate> updates = ItemUpdate.groupBySubject(editBatch);
|
Map<EntityIdValue, ItemUpdate> updates = ItemUpdate.groupBySubject(editBatch);
|
||||||
List<ItemUpdate> mergedUpdates = updates.values().stream().collect(Collectors.toList());
|
List<ItemUpdate> mergedUpdates = updates.values().stream().collect(Collectors.toList());
|
||||||
for(EditScrutinizer scrutinizer : scrutinizers.values()) {
|
for(EditScrutinizer scrutinizer : scrutinizers.values()) {
|
||||||
scrutinizer.scrutinize(mergedUpdates);
|
scrutinizer.scrutinize(mergedUpdates);
|
||||||
}
|
}
|
||||||
} catch(ImpossibleSchedulingException e) {
|
|
||||||
warningStore.addWarning(new QAWarning(
|
|
||||||
"scheduling-failed", null, QAWarning.Severity.CRITICAL, 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (warningStore.getNbWarnings() == 0) {
|
if (warningStore.getNbWarnings() == 0) {
|
||||||
warningStore.addWarning(new QAWarning(
|
warningStore.addWarning(new QAWarning(
|
||||||
|
@ -1,20 +1,25 @@
|
|||||||
package org.openrefine.wikidata.qa;
|
package org.openrefine.wikidata.qa;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.jsoup.helper.Validate;
|
||||||
import org.openrefine.wikidata.utils.JacksonJsonizable;
|
import org.openrefine.wikidata.utils.JacksonJsonizable;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A class to represent a QA warning emited by the Wikidata schema
|
* A class to represent a QA warning emitted by the Wikidata schema
|
||||||
* This could probably be reused at a broader scale, for instance for
|
* This could probably be reused at a broader scale, for instance for
|
||||||
* Data Package validation.
|
* Data Package validation.
|
||||||
*
|
*
|
||||||
* @author antonin
|
* @author Antonin Delpeuch
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning> {
|
public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning> {
|
||||||
@ -27,42 +32,30 @@ public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// The type of QA warning emitted
|
/// The type of QA warning emitted
|
||||||
private String type;
|
private final String type;
|
||||||
// The key for aggregation of other QA warnings together - this specializes the id
|
// The key for aggregation of other QA warnings together - this specializes the id
|
||||||
private String bucketId;
|
private final String bucketId;
|
||||||
// The severity of the issue
|
// The severity of the issue
|
||||||
private Severity severity;
|
private final Severity severity;
|
||||||
// The number of times this issue was found
|
// The number of times this issue was found
|
||||||
private int count;
|
private final int count;
|
||||||
// Other details about the warning, that can be displayed to the user
|
// Other details about the warning, that can be displayed to the user
|
||||||
private Map<String,Object> properties;
|
private final Map<String,Object> properties;
|
||||||
|
|
||||||
public QAWarning(String type, String bucketId, Severity severity, int count) {
|
public QAWarning(String type, String bucketId, Severity severity, int count) {
|
||||||
|
Validate.notNull(type);
|
||||||
this.type = type;
|
this.type = type;
|
||||||
this.bucketId = bucketId;
|
this.bucketId = bucketId;
|
||||||
|
Validate.notNull(severity);
|
||||||
this.severity = severity;
|
this.severity = severity;
|
||||||
this.count = count;
|
this.count = count;
|
||||||
this.properties = new HashMap<String,Object>();
|
this.properties = new HashMap<>();
|
||||||
}
|
|
||||||
|
|
||||||
@JsonCreator
|
|
||||||
public QAWarning(
|
|
||||||
@JsonProperty("type") String type,
|
|
||||||
@JsonProperty("bucket_id") String bucketId,
|
|
||||||
@JsonProperty("severity") Severity severity,
|
|
||||||
@JsonProperty("count") int count,
|
|
||||||
@JsonProperty("properties") Map<String,Object> properties) {
|
|
||||||
this.type = type;
|
|
||||||
this.bucketId = bucketId;
|
|
||||||
this.severity = severity;
|
|
||||||
this.count = count;
|
|
||||||
this.properties = properties;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the full key for aggregation of QA warnings
|
* @return the full key for aggregation of QA warnings
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
|
@JsonIgnore
|
||||||
public String getAggregationId() {
|
public String getAggregationId() {
|
||||||
if (this.bucketId != null) {
|
if (this.bucketId != null) {
|
||||||
return this.type + "_" + this.bucketId;
|
return this.type + "_" + this.bucketId;
|
||||||
@ -75,12 +68,22 @@ public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning
|
|||||||
* Aggregates another QA warning of the same aggregation id.
|
* Aggregates another QA warning of the same aggregation id.
|
||||||
* @param other
|
* @param other
|
||||||
*/
|
*/
|
||||||
public void aggregate(QAWarning other) {
|
public QAWarning aggregate(QAWarning other) {
|
||||||
assert other.getAggregationId() == getAggregationId();
|
assert other.getAggregationId().equals(getAggregationId());
|
||||||
this.count += other.getCount();
|
int newCount = count+other.getCount();
|
||||||
if(this.severity.compareTo(other.getSeverity()) < 0) {
|
Severity newSeverity = severity;
|
||||||
this.severity = other.getSeverity();
|
if (other.getSeverity().compareTo(severity) > 0) {
|
||||||
|
newSeverity = other.getSeverity();
|
||||||
}
|
}
|
||||||
|
QAWarning merged = new QAWarning(getType(), getBucketId(), newSeverity,
|
||||||
|
newCount);
|
||||||
|
for(Entry<String,Object> entry : properties.entrySet()) {
|
||||||
|
merged.setProperty(entry.getKey(),entry.getValue());
|
||||||
|
}
|
||||||
|
for(Entry<String,Object> entry : other.getProperties().entrySet()) {
|
||||||
|
merged.setProperty(entry.getKey(),entry.getValue());
|
||||||
|
}
|
||||||
|
return merged;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -114,6 +117,7 @@ public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning
|
|||||||
}
|
}
|
||||||
|
|
||||||
@JsonProperty("properties")
|
@JsonProperty("properties")
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_EMPTY)
|
||||||
public Map<String,Object> getProperties() {
|
public Map<String,Object> getProperties() {
|
||||||
return properties;
|
return properties;
|
||||||
}
|
}
|
||||||
@ -125,4 +129,17 @@ public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning
|
|||||||
public int compareTo(QAWarning other) {
|
public int compareTo(QAWarning other) {
|
||||||
return - severity.compareTo(other.getSeverity());
|
return - severity.compareTo(other.getSeverity());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (other == null || !QAWarning.class.isInstance(other)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
QAWarning otherWarning = (QAWarning)other;
|
||||||
|
return type.equals(otherWarning.getType()) &&
|
||||||
|
bucketId.equals(otherWarning.getBucketId()) &&
|
||||||
|
severity.equals(otherWarning.getSeverity()) &&
|
||||||
|
count == otherWarning.getCount() &&
|
||||||
|
properties.equals(otherWarning.getProperties());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,16 +6,21 @@ import java.util.HashMap;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A store for QA warnings which aggregates them by type.
|
* A store for QA warnings which aggregates them by type.
|
||||||
* @author antonin
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
*/
|
*/
|
||||||
public class QAWarningStore {
|
public class QAWarningStore {
|
||||||
|
|
||||||
|
@JsonIgnore
|
||||||
private Map<String, QAWarning> map;
|
private Map<String, QAWarning> map;
|
||||||
|
@JsonIgnore
|
||||||
private QAWarning.Severity maxSeverity;
|
private QAWarning.Severity maxSeverity;
|
||||||
|
@JsonIgnore
|
||||||
private int totalWarnings;
|
private int totalWarnings;
|
||||||
|
|
||||||
public QAWarningStore() {
|
public QAWarningStore() {
|
||||||
@ -36,7 +41,7 @@ public class QAWarningStore {
|
|||||||
totalWarnings += warning.getCount();
|
totalWarnings += warning.getCount();
|
||||||
if (map.containsKey(aggregationKey)) {
|
if (map.containsKey(aggregationKey)) {
|
||||||
QAWarning existing = map.get(aggregationKey);
|
QAWarning existing = map.get(aggregationKey);
|
||||||
existing.aggregate(warning);
|
map.put(aggregationKey, existing.aggregate(warning));
|
||||||
} else {
|
} else {
|
||||||
map.put(aggregationKey, warning);
|
map.put(aggregationKey, warning);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,190 @@
|
|||||||
|
package org.openrefine.wikidata.qa;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.utils.EntityCache;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class provides an abstraction over the way constraint
|
||||||
|
* definitions are stored in Wikidata.
|
||||||
|
*
|
||||||
|
* @author antonin
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class WikidataConstraintFetcher implements ConstraintFetcher {
|
||||||
|
public static String WIKIDATA_CONSTRAINT_PID = "P2302";
|
||||||
|
|
||||||
|
public static String FORMAT_CONSTRAINT_QID = "Q21502404";
|
||||||
|
public static String FORMAT_REGEX_PID = "P1793";
|
||||||
|
|
||||||
|
public static String INVERSE_CONSTRAINT_QID = "Q21510855";
|
||||||
|
public static String INVERSE_PROPERTY_PID = "P2306";
|
||||||
|
|
||||||
|
public static String USED_ONLY_AS_VALUES_CONSTRAINT_QID = "Q21528958";
|
||||||
|
|
||||||
|
public static String USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID = "Q21510863";
|
||||||
|
|
||||||
|
public static String USED_ONLY_AS_REFERENCE_CONSTRAINT_QID = "Q21528959";
|
||||||
|
|
||||||
|
public static String ALLOWED_QUALIFIERS_CONSTRAINT_QID = "Q21510851";
|
||||||
|
public static String ALLOWED_QUALIFIERS_CONSTRAINT_PID = "P2306";
|
||||||
|
|
||||||
|
public static String MANDATORY_QUALIFIERS_CONSTRAINT_QID = "Q21510856";
|
||||||
|
public static String MANDATORY_QUALIFIERS_CONSTRAINT_PID = "P2306";
|
||||||
|
|
||||||
|
public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404";
|
||||||
|
public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410";
|
||||||
|
|
||||||
|
// The following constraints still need to be implemented:
|
||||||
|
|
||||||
|
public static String TYPE_CONSTRAINT_QID = "Q21503250";
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getFormatRegex(PropertyIdValue pid) {
|
||||||
|
List<SnakGroup> specs = getSingleConstraint(pid, FORMAT_CONSTRAINT_QID);
|
||||||
|
if (specs != null) {
|
||||||
|
List<Value> regexes = findValues(specs, FORMAT_REGEX_PID);
|
||||||
|
if (! regexes.isEmpty()) {
|
||||||
|
return ((StringValue)regexes.get(0)).getString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PropertyIdValue getInversePid(PropertyIdValue pid) {
|
||||||
|
List<SnakGroup> specs = getSingleConstraint(pid, INVERSE_CONSTRAINT_QID);
|
||||||
|
|
||||||
|
if(specs != null) {
|
||||||
|
List<Value> inverses = findValues(specs, INVERSE_PROPERTY_PID);
|
||||||
|
if (! inverses.isEmpty()) {
|
||||||
|
return (PropertyIdValue)inverses.get(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isForValuesOnly(PropertyIdValue pid) {
|
||||||
|
return getSingleConstraint(pid, USED_ONLY_AS_VALUES_CONSTRAINT_QID) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isForQualifiersOnly(PropertyIdValue pid) {
|
||||||
|
return getSingleConstraint(pid, USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isForReferencesOnly(PropertyIdValue pid) {
|
||||||
|
return getSingleConstraint(pid, USED_ONLY_AS_REFERENCE_CONSTRAINT_QID) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid) {
|
||||||
|
List<SnakGroup> specs = getSingleConstraint(pid, ALLOWED_QUALIFIERS_CONSTRAINT_QID);
|
||||||
|
|
||||||
|
if (specs != null) {
|
||||||
|
List<Value> properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID);
|
||||||
|
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid) {
|
||||||
|
List<SnakGroup> specs = getSingleConstraint(pid, MANDATORY_QUALIFIERS_CONSTRAINT_QID);
|
||||||
|
|
||||||
|
if (specs != null) {
|
||||||
|
List<Value> properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID);
|
||||||
|
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasSingleValue(PropertyIdValue pid) {
|
||||||
|
return getSingleConstraint(pid, SINGLE_VALUE_CONSTRAINT_QID) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasDistinctValues(PropertyIdValue pid) {
|
||||||
|
return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a single constraint for a particular type and a property, or null
|
||||||
|
* if there is no such constraint
|
||||||
|
* @param pid: the property to retrieve the constraints for
|
||||||
|
* @param qid: the type of the constraints
|
||||||
|
* @return the list of qualifiers for the constraint, or null if it does not exist
|
||||||
|
*/
|
||||||
|
protected List<SnakGroup> getSingleConstraint(PropertyIdValue pid, String qid) {
|
||||||
|
Statement statement = getConstraintsByType(pid, qid).findFirst().orElse(null);
|
||||||
|
if (statement != null) {
|
||||||
|
return statement.getClaim().getQualifiers();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the list of constraints of a particular type for a property
|
||||||
|
* @param pid: the property to retrieve the constraints for
|
||||||
|
* @param qid: the type of the constraints
|
||||||
|
* @return the stream of matching constraint statements
|
||||||
|
*/
|
||||||
|
protected Stream<Statement> getConstraintsByType(PropertyIdValue pid, String qid) {
|
||||||
|
Stream<Statement> allConstraints = getConstraintStatements(pid)
|
||||||
|
.stream()
|
||||||
|
.filter(s -> ((EntityIdValue) s.getValue()).getId().equals(qid));
|
||||||
|
return allConstraints;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets all the constraint statements for a given property
|
||||||
|
* @param pid : the id of the property to retrieve the constraints for
|
||||||
|
* @return the list of constraint statements
|
||||||
|
*/
|
||||||
|
protected List<Statement> getConstraintStatements(PropertyIdValue pid) {
|
||||||
|
PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid);
|
||||||
|
StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID);
|
||||||
|
if (group != null) {
|
||||||
|
return group.getStatements();
|
||||||
|
} else {
|
||||||
|
return new ArrayList<Statement>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the values of a given property in qualifiers
|
||||||
|
* @param groups: the qualifiers
|
||||||
|
* @param pid: the property to filter on
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected List<Value> findValues(List<SnakGroup> groups, String pid) {
|
||||||
|
List<Value> results = new ArrayList<>();
|
||||||
|
for(SnakGroup group : groups) {
|
||||||
|
if (group.getProperty().getId().equals(pid)) {
|
||||||
|
for (Snak snak : group.getSnaks())
|
||||||
|
results.add(snak.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
@ -15,11 +15,13 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
|
|||||||
* A scrutinizer that checks for properties using the same value
|
* A scrutinizer that checks for properties using the same value
|
||||||
* on different items.
|
* on different items.
|
||||||
*
|
*
|
||||||
* @author antonin
|
* @author Antonin Delpeuch
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class DistinctValuesScrutinizer extends StatementScrutinizer {
|
public class DistinctValuesScrutinizer extends StatementScrutinizer {
|
||||||
|
|
||||||
|
public final static String type = "identical-values-for-distinct-valued-property";
|
||||||
|
|
||||||
private Map<PropertyIdValue, Map<Value, EntityIdValue>> _seenValues;
|
private Map<PropertyIdValue, Map<Value, EntityIdValue>> _seenValues;
|
||||||
|
|
||||||
public DistinctValuesScrutinizer() {
|
public DistinctValuesScrutinizer() {
|
||||||
@ -39,7 +41,7 @@ public class DistinctValuesScrutinizer extends StatementScrutinizer {
|
|||||||
if (seen.containsKey(mainSnakValue)) {
|
if (seen.containsKey(mainSnakValue)) {
|
||||||
EntityIdValue otherId = seen.get(mainSnakValue);
|
EntityIdValue otherId = seen.get(mainSnakValue);
|
||||||
QAWarning issue = new QAWarning(
|
QAWarning issue = new QAWarning(
|
||||||
"identical-values-for-distinct-valued-property",
|
type,
|
||||||
pid.getId(),
|
pid.getId(),
|
||||||
QAWarning.Severity.IMPORTANT,
|
QAWarning.Severity.IMPORTANT,
|
||||||
1);
|
1);
|
||||||
|
@ -2,6 +2,7 @@ package org.openrefine.wikidata.qa.scrutinizers;
|
|||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.qa.WikidataConstraintFetcher;
|
||||||
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
||||||
import org.openrefine.wikidata.qa.QAWarning;
|
import org.openrefine.wikidata.qa.QAWarning;
|
||||||
import org.openrefine.wikidata.qa.QAWarning.Severity;
|
import org.openrefine.wikidata.qa.QAWarning.Severity;
|
||||||
@ -9,9 +10,9 @@ import org.openrefine.wikidata.qa.QAWarningStore;
|
|||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interface for any class that
|
* Inspects an edit batch and emits warnings.
|
||||||
* @author antonin
|
|
||||||
*
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
*/
|
*/
|
||||||
public abstract class EditScrutinizer {
|
public abstract class EditScrutinizer {
|
||||||
|
|
||||||
@ -19,13 +20,18 @@ public abstract class EditScrutinizer {
|
|||||||
protected ConstraintFetcher _fetcher;
|
protected ConstraintFetcher _fetcher;
|
||||||
|
|
||||||
public EditScrutinizer() {
|
public EditScrutinizer() {
|
||||||
_fetcher = new ConstraintFetcher();
|
_fetcher = null;
|
||||||
|
_store = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setStore(QAWarningStore store) {
|
public void setStore(QAWarningStore store) {
|
||||||
_store = store;
|
_store = store;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setFetcher(ConstraintFetcher fetcher) {
|
||||||
|
_fetcher = fetcher;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads the candidate edits and emits warnings in the store
|
* Reads the candidate edits and emits warnings in the store
|
||||||
* @param edit: the list of ItemUpdates to scrutinize
|
* @param edit: the list of ItemUpdates to scrutinize
|
||||||
|
@ -15,14 +15,16 @@ import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
|||||||
* A scrutinizer that detects incorrect formats in text values
|
* A scrutinizer that detects incorrect formats in text values
|
||||||
* (mostly identifiers).
|
* (mostly identifiers).
|
||||||
*
|
*
|
||||||
* @author antonin
|
* @author Antonin Delpeuch
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class FormatConstraintScrutinizer extends SnakScrutinizer {
|
public class FormatScrutinizer extends SnakScrutinizer {
|
||||||
|
|
||||||
|
public static final String type = "add-statements-with-invalid-format";
|
||||||
|
|
||||||
private Map<PropertyIdValue, Pattern> _patterns;
|
private Map<PropertyIdValue, Pattern> _patterns;
|
||||||
|
|
||||||
public FormatConstraintScrutinizer() {
|
public FormatScrutinizer() {
|
||||||
_patterns = new HashMap<>();
|
_patterns = new HashMap<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,7 +61,7 @@ public class FormatConstraintScrutinizer extends SnakScrutinizer {
|
|||||||
if (!pattern.matcher(value).matches()) {
|
if (!pattern.matcher(value).matches()) {
|
||||||
if (added) {
|
if (added) {
|
||||||
QAWarning issue = new QAWarning(
|
QAWarning issue = new QAWarning(
|
||||||
"add-statements-with-invalid-format",
|
type,
|
||||||
pid.getId(),
|
pid.getId(),
|
||||||
QAWarning.Severity.IMPORTANT,
|
QAWarning.Severity.IMPORTANT,
|
||||||
1);
|
1);
|
@ -17,11 +17,13 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
|
|||||||
* A scrutinizer that checks for missing inverse statements in
|
* A scrutinizer that checks for missing inverse statements in
|
||||||
* edit batches.
|
* edit batches.
|
||||||
*
|
*
|
||||||
* @author antonin
|
* @author Antonin Delpeuch
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class InverseConstraintScrutinizer extends StatementScrutinizer {
|
public class InverseConstraintScrutinizer extends StatementScrutinizer {
|
||||||
|
|
||||||
|
public static final String type = "missing-inverse-statements";
|
||||||
|
|
||||||
private Map<PropertyIdValue, PropertyIdValue> _inverse;
|
private Map<PropertyIdValue, PropertyIdValue> _inverse;
|
||||||
private Map<PropertyIdValue, Map<EntityIdValue, Set<EntityIdValue> >> _statements;
|
private Map<PropertyIdValue, Map<EntityIdValue, Set<EntityIdValue> >> _statements;
|
||||||
|
|
||||||
@ -83,7 +85,7 @@ public class InverseConstraintScrutinizer extends StatementScrutinizer {
|
|||||||
PropertyIdValue missingProperty = propertyPair.getValue();
|
PropertyIdValue missingProperty = propertyPair.getValue();
|
||||||
Set<EntityIdValue> reciprocalLinks = _statements.get(missingProperty).get(idValue);
|
Set<EntityIdValue> reciprocalLinks = _statements.get(missingProperty).get(idValue);
|
||||||
if (reciprocalLinks == null || !reciprocalLinks.contains(itemLinks.getKey())) {
|
if (reciprocalLinks == null || !reciprocalLinks.contains(itemLinks.getKey())) {
|
||||||
QAWarning issue = new QAWarning("missing-inverse-statements",
|
QAWarning issue = new QAWarning(type,
|
||||||
ourProperty.getId(),
|
ourProperty.getId(),
|
||||||
QAWarning.Severity.IMPORTANT,
|
QAWarning.Severity.IMPORTANT,
|
||||||
1);
|
1);
|
||||||
|
@ -4,7 +4,7 @@ import java.util.List;
|
|||||||
|
|
||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
|
||||||
public abstract class ItemEditScrutinizer extends EditScrutinizer {
|
public abstract class ItemUpdateScrutinizer extends EditScrutinizer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scrutinize(List<ItemUpdate> edit) {
|
public void scrutinize(List<ItemUpdate> edit) {
|
@ -5,19 +5,26 @@ import org.openrefine.wikidata.updates.ItemUpdate;
|
|||||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A scrutinizer that inspects new items
|
* A scrutinizer that inspects new items.
|
||||||
* @author antonin
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
*/
|
*/
|
||||||
public class NewItemScrutinizer extends ItemEditScrutinizer {
|
public class NewItemScrutinizer extends ItemUpdateScrutinizer {
|
||||||
|
|
||||||
|
public static final String noLabelType = "new-item-without-labels-or-aliases";
|
||||||
|
public static final String noDescType = "new-item-without-descriptions";
|
||||||
|
public static final String deletedStatementsType = "new-item-with-deleted-statements";
|
||||||
|
public static final String noTypeType = "new-item-without-P31-or-P279";
|
||||||
|
public static final String newItemType = "new-item-created";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scrutinize(ItemUpdate update) {
|
public void scrutinize(ItemUpdate update) {
|
||||||
if (update.isNew()) {
|
if (update.isNew()) {
|
||||||
info("new-item-created");
|
info(newItemType);
|
||||||
|
|
||||||
if (update.getLabels().isEmpty() && update.getAliases().isEmpty()) {
|
if (update.getLabels().isEmpty() && update.getAliases().isEmpty()) {
|
||||||
QAWarning issue = new QAWarning(
|
QAWarning issue = new QAWarning(
|
||||||
"new-item-without-labels-or-aliases",
|
noLabelType,
|
||||||
null,
|
null,
|
||||||
QAWarning.Severity.CRITICAL,
|
QAWarning.Severity.CRITICAL,
|
||||||
1);
|
1);
|
||||||
@ -27,7 +34,7 @@ public class NewItemScrutinizer extends ItemEditScrutinizer {
|
|||||||
|
|
||||||
if (update.getDescriptions().isEmpty()) {
|
if (update.getDescriptions().isEmpty()) {
|
||||||
QAWarning issue = new QAWarning(
|
QAWarning issue = new QAWarning(
|
||||||
"new-item-without-descriptions",
|
noDescType,
|
||||||
null,
|
null,
|
||||||
QAWarning.Severity.WARNING,
|
QAWarning.Severity.WARNING,
|
||||||
1);
|
1);
|
||||||
@ -35,9 +42,9 @@ public class NewItemScrutinizer extends ItemEditScrutinizer {
|
|||||||
addIssue(issue);
|
addIssue(issue);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (! update.getDeletedStatements().isEmpty()) {
|
if (!update.getDeletedStatements().isEmpty()) {
|
||||||
QAWarning issue = new QAWarning(
|
QAWarning issue = new QAWarning(
|
||||||
"new-item-with-deleted-statements",
|
deletedStatementsType,
|
||||||
null,
|
null,
|
||||||
QAWarning.Severity.WARNING,
|
QAWarning.Severity.WARNING,
|
||||||
1);
|
1);
|
||||||
@ -56,7 +63,7 @@ public class NewItemScrutinizer extends ItemEditScrutinizer {
|
|||||||
}
|
}
|
||||||
if (!typeFound) {
|
if (!typeFound) {
|
||||||
QAWarning issue = new QAWarning(
|
QAWarning issue = new QAWarning(
|
||||||
"new-item-without-P31-or-P279",
|
noTypeType,
|
||||||
null,
|
null,
|
||||||
QAWarning.Severity.WARNING,
|
QAWarning.Severity.WARNING,
|
||||||
1);
|
1);
|
||||||
|
@ -7,10 +7,12 @@ import org.openrefine.wikidata.updates.ItemUpdate;
|
|||||||
|
|
||||||
public class NoEditsMadeScrutinizer extends EditScrutinizer {
|
public class NoEditsMadeScrutinizer extends EditScrutinizer {
|
||||||
|
|
||||||
|
public static final String type = "no-edit-generated";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scrutinize(List<ItemUpdate> edit) {
|
public void scrutinize(List<ItemUpdate> edit) {
|
||||||
if(edit.stream().allMatch(e -> e.isNull())) {
|
if(edit.stream().allMatch(e -> e.isNull())) {
|
||||||
info("no-edit-generated");
|
info(type);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -14,11 +14,14 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
|||||||
/**
|
/**
|
||||||
* A scrutinizer that checks the compatibility of the qualifiers
|
* A scrutinizer that checks the compatibility of the qualifiers
|
||||||
* and the property of a statement, and looks for mandatory qualifiers.
|
* and the property of a statement, and looks for mandatory qualifiers.
|
||||||
* @author antonin
|
|
||||||
*
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
*/
|
*/
|
||||||
public class QualifierCompatibilityScrutinizer extends StatementScrutinizer {
|
public class QualifierCompatibilityScrutinizer extends StatementScrutinizer {
|
||||||
|
|
||||||
|
public static final String missingMandatoryQualifiersType = "missing-mandatory-qualifiers";
|
||||||
|
public static final String disallowedQualifiersType = "disallowed-qualifiers";
|
||||||
|
|
||||||
private Map<PropertyIdValue, Set<PropertyIdValue>> _allowedQualifiers;
|
private Map<PropertyIdValue, Set<PropertyIdValue>> _allowedQualifiers;
|
||||||
private Map<PropertyIdValue, Set<PropertyIdValue>> _mandatoryQualifiers;
|
private Map<PropertyIdValue, Set<PropertyIdValue>> _mandatoryQualifiers;
|
||||||
|
|
||||||
@ -65,7 +68,7 @@ public class QualifierCompatibilityScrutinizer extends StatementScrutinizer {
|
|||||||
|
|
||||||
for (PropertyIdValue missing : missingQualifiers) {
|
for (PropertyIdValue missing : missingQualifiers) {
|
||||||
QAWarning issue = new QAWarning(
|
QAWarning issue = new QAWarning(
|
||||||
"missing-mandatory-qualifiers",
|
missingMandatoryQualifiersType,
|
||||||
statementProperty.getId()+"-"+missing.getId(),
|
statementProperty.getId()+"-"+missing.getId(),
|
||||||
QAWarning.Severity.WARNING,
|
QAWarning.Severity.WARNING,
|
||||||
1);
|
1);
|
||||||
@ -76,7 +79,7 @@ public class QualifierCompatibilityScrutinizer extends StatementScrutinizer {
|
|||||||
}
|
}
|
||||||
for (PropertyIdValue disallowed : disallowedQualifiers) {
|
for (PropertyIdValue disallowed : disallowedQualifiers) {
|
||||||
QAWarning issue = new QAWarning(
|
QAWarning issue = new QAWarning(
|
||||||
"disallowed-qualifiers",
|
disallowedQualifiersType,
|
||||||
statementProperty.getId()+"-"+disallowed.getId(),
|
statementProperty.getId()+"-"+disallowed.getId(),
|
||||||
QAWarning.Severity.WARNING,
|
QAWarning.Severity.WARNING,
|
||||||
1);
|
1);
|
||||||
|
@ -13,14 +13,14 @@ import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
|||||||
*/
|
*/
|
||||||
public class SelfReferentialScrutinizer extends SnakScrutinizer {
|
public class SelfReferentialScrutinizer extends SnakScrutinizer {
|
||||||
|
|
||||||
|
public static final String type = "self-referential-statements";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
|
public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
|
||||||
if (entityId.equals(snak.getValue())) {
|
if (entityId.equals(snak.getValue())) {
|
||||||
QAWarning issue = new QAWarning(
|
QAWarning issue = new QAWarning(
|
||||||
"self-referential-statements",
|
type, null,
|
||||||
null,
|
QAWarning.Severity.WARNING, 1);
|
||||||
QAWarning.Severity.WARNING,
|
|
||||||
1);
|
|
||||||
issue.setProperty("example_entity", entityId);
|
issue.setProperty("example_entity", entityId);
|
||||||
addIssue(issue);
|
addIssue(issue);
|
||||||
}
|
}
|
||||||
|
@ -11,10 +11,13 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
|||||||
/**
|
/**
|
||||||
* For now this scrutinizer only checks for uniqueness at
|
* For now this scrutinizer only checks for uniqueness at
|
||||||
* the item level (it ignores qualifiers and references).
|
* the item level (it ignores qualifiers and references).
|
||||||
* @author antonin
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class SingleValueScrutinizer extends ItemEditScrutinizer {
|
public class SingleValueScrutinizer extends ItemUpdateScrutinizer {
|
||||||
|
|
||||||
|
public static final String type = "single-valued-property-added-more-than-once";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scrutinize(ItemUpdate update) {
|
public void scrutinize(ItemUpdate update) {
|
||||||
@ -25,10 +28,8 @@ public class SingleValueScrutinizer extends ItemEditScrutinizer {
|
|||||||
if (seenSingleProperties.contains(pid)) {
|
if (seenSingleProperties.contains(pid)) {
|
||||||
|
|
||||||
QAWarning issue = new QAWarning(
|
QAWarning issue = new QAWarning(
|
||||||
"single-valued-property-added-more-than-once",
|
type, pid.getId(),
|
||||||
pid.getId(),
|
QAWarning.Severity.WARNING, 1);
|
||||||
QAWarning.Severity.WARNING,
|
|
||||||
1);
|
|
||||||
issue.setProperty("property_entity", pid);
|
issue.setProperty("property_entity", pid);
|
||||||
issue.setProperty("example_entity", update.getItemId());
|
issue.setProperty("example_entity", update.getItemId());
|
||||||
addIssue(issue);
|
addIssue(issue);
|
||||||
|
@ -11,7 +11,7 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
|||||||
* A scrutinizer that inspects snaks individually, no matter whether they
|
* A scrutinizer that inspects snaks individually, no matter whether they
|
||||||
* appear as main snaks, qualifiers or references.
|
* appear as main snaks, qualifiers or references.
|
||||||
*
|
*
|
||||||
* @author antonin
|
* @author Antonin Delpeuch
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public abstract class SnakScrutinizer extends StatementScrutinizer {
|
public abstract class SnakScrutinizer extends StatementScrutinizer {
|
||||||
|
@ -3,7 +3,7 @@ package org.openrefine.wikidata.qa.scrutinizers;
|
|||||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||||
|
|
||||||
public abstract class StatementGroupScrutinizer extends ItemEditScrutinizer {
|
public abstract class StatementGroupScrutinizer extends ItemUpdateScrutinizer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scrutinize(ItemUpdate update) {
|
public void scrutinize(ItemUpdate update) {
|
||||||
|
@ -4,7 +4,7 @@ import org.openrefine.wikidata.updates.ItemUpdate;
|
|||||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
|
||||||
public abstract class StatementScrutinizer extends ItemEditScrutinizer {
|
public abstract class StatementScrutinizer extends ItemUpdateScrutinizer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scrutinize(ItemUpdate update) {
|
public void scrutinize(ItemUpdate update) {
|
||||||
|
@ -11,10 +11,12 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
|||||||
*/
|
*/
|
||||||
public class UnsourcedScrutinizer extends StatementScrutinizer {
|
public class UnsourcedScrutinizer extends StatementScrutinizer {
|
||||||
|
|
||||||
|
public static final String type = "unsourced-statements";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
|
public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
|
||||||
if(statement.getReferences().isEmpty() && added) {
|
if(statement.getReferences().isEmpty() && added) {
|
||||||
warning("unsourced-statements");
|
warning(type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,21 +12,27 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Scrutinizes strings for trailing / leading whitespace, and others
|
* Scrutinizes strings for trailing / leading whitespace, and others
|
||||||
* @author antonin
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class WhitespaceScrutinizer extends ValueScrutinizer {
|
public class WhitespaceScrutinizer extends ValueScrutinizer {
|
||||||
|
|
||||||
private Map<String,Pattern> _issuesMap;
|
private Map<String,Pattern> _issuesMap;
|
||||||
|
|
||||||
|
public static final String leadingWhitespaceType = "leading-whitespace";
|
||||||
|
public static final String trailingWhitespaceType = "trailing-whitespace";
|
||||||
|
public static final String duplicateWhitespaceType = "duplicate-whitespace";
|
||||||
|
public static final String nonPrintableCharsType = "non-printable-characters";
|
||||||
|
|
||||||
public WhitespaceScrutinizer() {
|
public WhitespaceScrutinizer() {
|
||||||
_issuesMap = new HashMap<>();
|
_issuesMap = new HashMap<>();
|
||||||
_issuesMap.put("leading-whitespace", Pattern.compile("^\\s"));
|
_issuesMap.put(leadingWhitespaceType, Pattern.compile("^\\s"));
|
||||||
_issuesMap.put("trailing-whitespace", Pattern.compile("\\s$"));
|
_issuesMap.put(trailingWhitespaceType, Pattern.compile("\\s$"));
|
||||||
_issuesMap.put("duplicate-whitespace", Pattern.compile("\\s\\s"));
|
_issuesMap.put(duplicateWhitespaceType, Pattern.compile("\\s\\s"));
|
||||||
|
|
||||||
// https://stackoverflow.com/questions/14565934/regular-expression-to-remove-all-non-printable-characters
|
// https://stackoverflow.com/questions/14565934/regular-expression-to-remove-all-non-printable-characters
|
||||||
_issuesMap.put("non-printable-characters", Pattern.compile("[\\x00\\x08\\x0B\\x0C\\x0E-\\x1F]"));
|
_issuesMap.put(nonPrintableCharsType, Pattern.compile("[\\x00\\x03\\x08\\x0B\\x0C\\x0E-\\x1F]"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -0,0 +1,75 @@
|
|||||||
|
package org.openrefine.wikidata.qa;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
|
|
||||||
|
|
||||||
|
public class MockConstraintFetcher implements ConstraintFetcher {
|
||||||
|
|
||||||
|
public static PropertyIdValue pidWithInverse = Datamodel.makeWikidataPropertyIdValue("P350");
|
||||||
|
public static PropertyIdValue inversePid = Datamodel.makeWikidataPropertyIdValue("P57");
|
||||||
|
public static PropertyIdValue allowedQualifierPid = Datamodel.makeWikidataPropertyIdValue("P34");
|
||||||
|
public static PropertyIdValue mandatoryQualifierPid = Datamodel.makeWikidataPropertyIdValue("P97");
|
||||||
|
|
||||||
|
public static PropertyIdValue mainSnakPid = Datamodel.makeWikidataPropertyIdValue("P1234");
|
||||||
|
public static PropertyIdValue qualifierPid = Datamodel.makeWikidataPropertyIdValue("P987");
|
||||||
|
public static PropertyIdValue referencePid = Datamodel.makeWikidataPropertyIdValue("P384");
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getFormatRegex(PropertyIdValue pid) {
|
||||||
|
return "[1-9]\\d+";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This constraint is purposely left inconsistent (the inverse
|
||||||
|
* constraint holds only on one side).
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public PropertyIdValue getInversePid(PropertyIdValue pid) {
|
||||||
|
if (pidWithInverse.equals(pid)) {
|
||||||
|
return inversePid;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isForValuesOnly(PropertyIdValue pid) {
|
||||||
|
return mainSnakPid.equals(pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isForQualifiersOnly(PropertyIdValue pid) {
|
||||||
|
return qualifierPid.equals(pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isForReferencesOnly(PropertyIdValue pid) {
|
||||||
|
return referencePid.equals(pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid) {
|
||||||
|
return Arrays.asList(allowedQualifierPid, mandatoryQualifierPid).stream().collect(Collectors.toSet());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid) {
|
||||||
|
return Collections.singleton(mandatoryQualifierPid);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasSingleValue(PropertyIdValue pid) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasDistinctValues(PropertyIdValue pid) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,44 @@
|
|||||||
|
package org.openrefine.wikidata.qa;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.JacksonSerializationTest;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
public class QAWarningStoreTest {
|
||||||
|
|
||||||
|
public static String exampleJson = "{\"max_severity\":\"CRITICAL\",\"nb_warnings\":5,"
|
||||||
|
+"\"warnings\":[{\"type\":\"new-item-without-label\",\"bucketId\":null,"
|
||||||
|
+"\"severity\":\"CRITICAL\",\"count\":3},{\"type\":\"add-statements-with-invalid-format\","
|
||||||
|
+"\"bucketId\":\"P2427\",\"severity\":\"IMPORTANT\",\"count\":2}]}";
|
||||||
|
|
||||||
|
private QAWarningStore store;
|
||||||
|
private QAWarning otherWarning;
|
||||||
|
|
||||||
|
@BeforeMethod
|
||||||
|
public void setUp() {
|
||||||
|
store = new QAWarningStore();
|
||||||
|
store.addWarning(QAWarningTest.exampleWarning);
|
||||||
|
store.addWarning(QAWarningTest.exampleWarning);
|
||||||
|
otherWarning = new QAWarning("new-item-without-label", null, QAWarning.Severity.CRITICAL, 3);
|
||||||
|
store.addWarning(otherWarning);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSerialize() {
|
||||||
|
JacksonSerializationTest.testSerialize(store, exampleJson);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCount() {
|
||||||
|
assertEquals(5, store.getNbWarnings());
|
||||||
|
assertEquals(2, store.getWarnings().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMaxSeverity() {
|
||||||
|
assertEquals(QAWarning.Severity.CRITICAL, store.getMaxSeverity());
|
||||||
|
assertEquals(QAWarning.Severity.INFO, (new QAWarningStore()).getMaxSeverity());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,50 @@
|
|||||||
|
package org.openrefine.wikidata.qa;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.JacksonSerializationTest;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
public class QAWarningTest {
|
||||||
|
|
||||||
|
public static QAWarning exampleWarning = new QAWarning("add-statements-with-invalid-format",
|
||||||
|
"P2427",
|
||||||
|
QAWarning.Severity.IMPORTANT,
|
||||||
|
1);
|
||||||
|
public static String exampleJson =
|
||||||
|
"{\"severity\":\"IMPORTANT\","+
|
||||||
|
"\"count\":1,\"bucketId\":\"P2427\",\"type\":\"add-statements-with-invalid-format\"}";
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSerialize() {
|
||||||
|
JacksonSerializationTest.testSerialize(exampleWarning, exampleJson);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAggregate() {
|
||||||
|
QAWarning firstWarning = new QAWarning("add-statements-with-invalid-format",
|
||||||
|
"P2427",
|
||||||
|
QAWarning.Severity.INFO,
|
||||||
|
1);
|
||||||
|
firstWarning.setProperty("foo", "bar");
|
||||||
|
assertEquals(exampleWarning.getAggregationId(), firstWarning.getAggregationId());
|
||||||
|
QAWarning merged = firstWarning.aggregate(exampleWarning);
|
||||||
|
assertEquals(2, merged.getCount());
|
||||||
|
assertEquals(exampleWarning.getAggregationId(), merged.getAggregationId());
|
||||||
|
assertEquals(exampleWarning.getType(), merged.getType());
|
||||||
|
assertEquals(exampleWarning.getSeverity(), merged.getSeverity());
|
||||||
|
assertEquals("bar", merged.getProperties().get("foo"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCompare() {
|
||||||
|
QAWarning otherWarning = new QAWarning("no-reference",
|
||||||
|
"no-reference",
|
||||||
|
QAWarning.Severity.WARNING,
|
||||||
|
1);
|
||||||
|
assertEquals(1, otherWarning.compareTo(exampleWarning));
|
||||||
|
assertEquals(-1, exampleWarning.compareTo(otherWarning));
|
||||||
|
assertEquals(0, exampleWarning.compareTo(exampleWarning));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -7,7 +7,7 @@ import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
|||||||
|
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class ConstraintFetcherTests {
|
public class WikidataConstraintFetcherTests {
|
||||||
|
|
||||||
private ConstraintFetcher fetcher;
|
private ConstraintFetcher fetcher;
|
||||||
|
|
||||||
@ -21,8 +21,8 @@ public class ConstraintFetcherTests {
|
|||||||
private PropertyIdValue referenceURL;
|
private PropertyIdValue referenceURL;
|
||||||
private PropertyIdValue reasonForDeprecation;
|
private PropertyIdValue reasonForDeprecation;
|
||||||
|
|
||||||
public ConstraintFetcherTests() {
|
public WikidataConstraintFetcherTests() {
|
||||||
fetcher = new ConstraintFetcher();
|
fetcher = new WikidataConstraintFetcher();
|
||||||
headOfGovernment = Datamodel.makeWikidataPropertyIdValue("P6");
|
headOfGovernment = Datamodel.makeWikidataPropertyIdValue("P6");
|
||||||
startTime = Datamodel.makeWikidataPropertyIdValue("P580");
|
startTime = Datamodel.makeWikidataPropertyIdValue("P580");
|
||||||
endTime = Datamodel.makeWikidataPropertyIdValue("P582");
|
endTime = Datamodel.makeWikidataPropertyIdValue("P582");
|
@ -0,0 +1,29 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
|
||||||
|
public class DistinctValuesScrutinizerTest extends StatementScrutinizerTest {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new DistinctValuesScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTrigger() {
|
||||||
|
ItemIdValue idA = TestingDataGenerator.existingId;
|
||||||
|
ItemIdValue idB = TestingDataGenerator.matchedId;
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(idA)
|
||||||
|
.addStatement(TestingDataGenerator.generateStatement(idA, idB))
|
||||||
|
.build();
|
||||||
|
ItemUpdate updateB = new ItemUpdateBuilder(idB)
|
||||||
|
.addStatement(TestingDataGenerator.generateStatement(idB, idB))
|
||||||
|
.build();
|
||||||
|
scrutinize(updateA, updateB);
|
||||||
|
assertWarningsRaised(DistinctValuesScrutinizer.type);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,31 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
|
||||||
|
public class FormatScrutinizerTest extends ValueScrutinizerTest {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new FormatScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTrigger() {
|
||||||
|
scrutinize(Datamodel.makeStringValue("not a number"));
|
||||||
|
assertWarningsRaised(FormatScrutinizer.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoIssue() {
|
||||||
|
scrutinize(Datamodel.makeStringValue("1234"));
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIncompleteMatch() {
|
||||||
|
scrutinize(Datamodel.makeStringValue("42 is a number"));
|
||||||
|
assertWarningsRaised(FormatScrutinizer.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,41 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.qa.MockConstraintFetcher;
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
|
|
||||||
|
public class InverseConstaintScrutinizerTest extends StatementScrutinizerTest {
|
||||||
|
|
||||||
|
private ItemIdValue idA = TestingDataGenerator.existingId;
|
||||||
|
private ItemIdValue idB = TestingDataGenerator.newIdB;
|
||||||
|
private PropertyIdValue pidWithInverse = MockConstraintFetcher.pidWithInverse;
|
||||||
|
private PropertyIdValue inversePid = MockConstraintFetcher.inversePid;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new InverseConstraintScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTrigger() {
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(idA)
|
||||||
|
.addStatement(TestingDataGenerator.generateStatement(idA, pidWithInverse, idB))
|
||||||
|
.build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertWarningsRaised(InverseConstraintScrutinizer.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoSymmetricClosure() {
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(idA)
|
||||||
|
.addStatement(TestingDataGenerator.generateStatement(idA, inversePid, idB))
|
||||||
|
.build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,70 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
|
||||||
|
|
||||||
|
|
||||||
|
public class NewItemScrutinizerTest extends ScrutinizerTest {
|
||||||
|
|
||||||
|
private Claim claim = Datamodel.makeClaim(TestingDataGenerator.newIdA,
|
||||||
|
Datamodel.makeValueSnak(Datamodel.makeWikidataPropertyIdValue("P31"), TestingDataGenerator.existingId),
|
||||||
|
Collections.emptyList());
|
||||||
|
private Statement p31Statement = Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new NewItemScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTrigger() {
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.newIdA).build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertWarningsRaised(
|
||||||
|
NewItemScrutinizer.noDescType,
|
||||||
|
NewItemScrutinizer.noLabelType,
|
||||||
|
NewItemScrutinizer.noTypeType,
|
||||||
|
NewItemScrutinizer.newItemType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEmptyItem() {
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.existingId).build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGoodNewItem() {
|
||||||
|
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.newIdA)
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"))
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue("interesting item", "en"))
|
||||||
|
.addStatement(p31Statement)
|
||||||
|
.build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertWarningsRaised(NewItemScrutinizer.newItemType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDeletedStatements() {
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.newIdA)
|
||||||
|
.addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"))
|
||||||
|
.addDescription(Datamodel.makeMonolingualTextValue("interesting item", "en"))
|
||||||
|
.addStatement(p31Statement)
|
||||||
|
.deleteStatement(TestingDataGenerator.generateStatement(TestingDataGenerator.newIdA,
|
||||||
|
TestingDataGenerator.matchedId))
|
||||||
|
.build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertWarningsRaised(NewItemScrutinizer.newItemType, NewItemScrutinizer.deletedStatementsType);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,31 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
public class NoEditsMadeScrutinizerTest extends ScrutinizerTest {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new NoEditsMadeScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTrigger() {
|
||||||
|
scrutinize();
|
||||||
|
assertWarningsRaised(NoEditsMadeScrutinizer.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNonNull() {
|
||||||
|
scrutinize(new ItemUpdateBuilder(TestingDataGenerator.newIdA).build());
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNull() {
|
||||||
|
scrutinize(new ItemUpdateBuilder(TestingDataGenerator.existingId).build());
|
||||||
|
assertWarningsRaised(NoEditsMadeScrutinizer.type);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,59 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.qa.MockConstraintFetcher;
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
|
||||||
|
|
||||||
|
public class QualifierCompatibilityScrutinizerTest extends StatementScrutinizerTest {
|
||||||
|
private Snak disallowedQualifier = Datamodel.makeNoValueSnak(MockConstraintFetcher.qualifierPid);
|
||||||
|
private Snak mandatoryQualifier = Datamodel.makeNoValueSnak(MockConstraintFetcher.mandatoryQualifierPid);
|
||||||
|
private Snak allowedQualifier = Datamodel.makeNoValueSnak(MockConstraintFetcher.allowedQualifierPid);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new QualifierCompatibilityScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDisallowedQualifier() {
|
||||||
|
|
||||||
|
scrutinize(makeStatement(disallowedQualifier,mandatoryQualifier));
|
||||||
|
assertWarningsRaised(QualifierCompatibilityScrutinizer.disallowedQualifiersType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMissingQualifier() {
|
||||||
|
scrutinize(makeStatement());
|
||||||
|
assertWarningsRaised(QualifierCompatibilityScrutinizer.missingMandatoryQualifiersType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGoodEdit() {
|
||||||
|
scrutinize(makeStatement(allowedQualifier,mandatoryQualifier));
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Statement makeStatement(Snak... qualifiers) {
|
||||||
|
Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId,
|
||||||
|
Datamodel.makeNoValueSnak(MockConstraintFetcher.mainSnakPid), makeQualifiers(qualifiers));
|
||||||
|
return Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
|
||||||
|
}
|
||||||
|
private List<SnakGroup> makeQualifiers(Snak[] qualifiers) {
|
||||||
|
List<Snak> snaks = Arrays.asList(qualifiers);
|
||||||
|
return snaks.stream()
|
||||||
|
.map((Snak q) -> Datamodel.makeSnakGroup(Collections.<Snak>singletonList(q)))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,55 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.qa.MockConstraintFetcher;
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
|
||||||
|
|
||||||
|
public class RestrictedPositionScrutinizerTest extends SnakScrutinizerTest {
|
||||||
|
|
||||||
|
private ItemIdValue qid = TestingDataGenerator.existingId;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new RestrictedPositionScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTriggerMainSnak() {
|
||||||
|
scrutinize(TestingDataGenerator.generateStatement(qid, MockConstraintFetcher.qualifierPid, qid));
|
||||||
|
assertWarningsRaised("property-restricted-to-qualifier-found-in-mainsnak");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoProblem() {
|
||||||
|
scrutinize(TestingDataGenerator.generateStatement(qid, MockConstraintFetcher.mainSnakPid, qid));
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNotRestricted() {
|
||||||
|
scrutinize(TestingDataGenerator.generateStatement(qid, Datamodel.makeWikidataPropertyIdValue("P3748"), qid));
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTriggerReference() {
|
||||||
|
Snak snak = Datamodel.makeValueSnak(MockConstraintFetcher.mainSnakPid, qid);
|
||||||
|
List<SnakGroup> snakGroups = Collections.singletonList(Datamodel.makeSnakGroup(Collections.singletonList(snak)));
|
||||||
|
Statement statement = Datamodel.makeStatement(
|
||||||
|
TestingDataGenerator.generateStatement(qid, MockConstraintFetcher.mainSnakPid, qid).getClaim(),
|
||||||
|
Collections.singletonList(Datamodel.makeReference(snakGroups)),
|
||||||
|
StatementRank.NORMAL, "");
|
||||||
|
scrutinize(statement);
|
||||||
|
assertWarningsRaised("property-restricted-to-mainsnak-found-in-reference");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,52 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
||||||
|
import org.openrefine.wikidata.qa.MockConstraintFetcher;
|
||||||
|
import org.openrefine.wikidata.qa.QAWarning;
|
||||||
|
import org.openrefine.wikidata.qa.QAWarningStore;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
|
||||||
|
public abstract class ScrutinizerTest {
|
||||||
|
public abstract EditScrutinizer getScrutinizer();
|
||||||
|
|
||||||
|
private EditScrutinizer scrutinizer;
|
||||||
|
private QAWarningStore store;
|
||||||
|
private ConstraintFetcher fetcher;
|
||||||
|
|
||||||
|
@BeforeMethod
|
||||||
|
public void setUp() {
|
||||||
|
store = new QAWarningStore();
|
||||||
|
fetcher = new MockConstraintFetcher();
|
||||||
|
scrutinizer = getScrutinizer();
|
||||||
|
scrutinizer.setStore(store);
|
||||||
|
scrutinizer.setFetcher(fetcher);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void scrutinize(ItemUpdate... updates) {
|
||||||
|
scrutinizer.scrutinize(Arrays.asList(updates));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void assertWarningsRaised(String... types) {
|
||||||
|
assertEquals(Arrays.asList(types).stream().collect(Collectors.toSet()), getWarningTypes());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void assertWarningRaised(QAWarning warning) {
|
||||||
|
assertTrue(store.getWarnings().contains(warning));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void assertNoWarningRaised() {
|
||||||
|
assertWarningsRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<String> getWarningTypes() {
|
||||||
|
return store.getWarnings().stream().map(w -> w.getType()).collect(Collectors.toSet());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,27 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
|
||||||
|
public class SelfReferentialScrutinizerTest extends StatementScrutinizerTest {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new SelfReferentialScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTrigger() {
|
||||||
|
ItemIdValue id = TestingDataGenerator.matchedId;
|
||||||
|
scrutinize(TestingDataGenerator.generateStatement(id, id));
|
||||||
|
assertWarningsRaised(SelfReferentialScrutinizer.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoProblem() {
|
||||||
|
ItemIdValue id = TestingDataGenerator.matchedId;
|
||||||
|
scrutinize(TestingDataGenerator.generateStatement(id, TestingDataGenerator.existingId));
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,41 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
|
||||||
|
public class SingleValueScrutinizerTest extends ScrutinizerTest {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new SingleValueScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTrigger() {
|
||||||
|
ItemIdValue idA = TestingDataGenerator.existingId;
|
||||||
|
ItemIdValue idB = TestingDataGenerator.matchedId;
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder(idA)
|
||||||
|
.addStatement(TestingDataGenerator.generateStatement(idA, idB))
|
||||||
|
.addStatement(TestingDataGenerator.generateStatement(idA, idB))
|
||||||
|
.build();
|
||||||
|
scrutinize(update);
|
||||||
|
assertWarningsRaised(SingleValueScrutinizer.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoIssue() {
|
||||||
|
ItemIdValue idA = TestingDataGenerator.existingId;
|
||||||
|
ItemIdValue idB = TestingDataGenerator.matchedId;
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(idA)
|
||||||
|
.addStatement(TestingDataGenerator.generateStatement(idA, idB))
|
||||||
|
.build();
|
||||||
|
ItemUpdate updateB = new ItemUpdateBuilder(idB)
|
||||||
|
.addStatement(TestingDataGenerator.generateStatement(idB, idB))
|
||||||
|
.build();
|
||||||
|
scrutinize(updateA, updateB);
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,43 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
|
||||||
|
|
||||||
|
public abstract class SnakScrutinizerTest extends StatementScrutinizerTest {
|
||||||
|
|
||||||
|
public static Snak defaultMainSnak = Datamodel.makeNoValueSnak(Datamodel.makeWikidataPropertyIdValue("P3928"));
|
||||||
|
|
||||||
|
public void scrutinize(Snak snak) {
|
||||||
|
Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, snak,
|
||||||
|
Collections.emptyList());
|
||||||
|
Statement statement = Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
|
||||||
|
scrutinize(statement);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void scrutinizeAsQualifier(Snak snak) {
|
||||||
|
Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, defaultMainSnak,
|
||||||
|
toSnakGroups(snak));
|
||||||
|
Statement statement = Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
|
||||||
|
scrutinize(statement);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void scrutinizeAsReference(Snak snak) {
|
||||||
|
Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, defaultMainSnak,
|
||||||
|
Collections.emptyList());
|
||||||
|
Statement statement = Datamodel.makeStatement(claim,
|
||||||
|
Collections.singletonList(Datamodel.makeReference(toSnakGroups(snak))), StatementRank.NORMAL, "");
|
||||||
|
scrutinize(statement);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<SnakGroup> toSnakGroups(Snak snak) {
|
||||||
|
return Collections.singletonList(Datamodel.makeSnakGroup(Collections.singletonList(snak)));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,16 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
|
||||||
|
public abstract class StatementScrutinizerTest extends ScrutinizerTest {
|
||||||
|
|
||||||
|
public void scrutinize(Statement statement) {
|
||||||
|
ItemUpdate update = new ItemUpdateBuilder((ItemIdValue)statement.getClaim().getSubject())
|
||||||
|
.addStatement(statement).build();
|
||||||
|
scrutinize(update);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,20 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
public class UnsourcedScrutinizerTest extends StatementScrutinizerTest {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new UnsourcedScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTrigger() {
|
||||||
|
scrutinize(TestingDataGenerator.generateStatement(TestingDataGenerator.existingId,
|
||||||
|
TestingDataGenerator.matchedId));
|
||||||
|
assertWarningsRaised(UnsourcedScrutinizer.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,21 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||||
|
|
||||||
|
public abstract class ValueScrutinizerTest extends SnakScrutinizerTest {
|
||||||
|
|
||||||
|
public static final PropertyIdValue defaultPid = Datamodel.makeWikidataPropertyIdValue("P328");
|
||||||
|
|
||||||
|
public void scrutinize(Value value) {
|
||||||
|
scrutinize(Datamodel.makeValueSnak(defaultPid, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void scrutinizeLabel(MonolingualTextValue text) {
|
||||||
|
scrutinize(new ItemUpdateBuilder(TestingDataGenerator.existingId).addLabel(text).build());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,57 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
|
||||||
|
public class WhitespaceScrutinizerTest extends ValueScrutinizerTest {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EditScrutinizer getScrutinizer() {
|
||||||
|
return new WhitespaceScrutinizer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLeadingWhitespace() {
|
||||||
|
scrutinize(Datamodel.makeStringValue(" a"));
|
||||||
|
assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTrailingWhitespace() {
|
||||||
|
scrutinize(Datamodel.makeStringValue("a\t"));
|
||||||
|
assertWarningsRaised(WhitespaceScrutinizer.trailingWhitespaceType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDuplicateWhitespace() {
|
||||||
|
scrutinize(Datamodel.makeStringValue("a\t b"));
|
||||||
|
assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNonPrintableChars() {
|
||||||
|
scrutinize(Datamodel.makeStringValue("c\u0003"));
|
||||||
|
assertWarningsRaised(WhitespaceScrutinizer.nonPrintableCharsType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoIssue() {
|
||||||
|
scrutinize(Datamodel.makeStringValue("a b"));
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleIssues() {
|
||||||
|
scrutinize(Datamodel.makeStringValue(" a\t b "));
|
||||||
|
assertWarningsRaised(
|
||||||
|
WhitespaceScrutinizer.duplicateWhitespaceType,
|
||||||
|
WhitespaceScrutinizer.leadingWhitespaceType,
|
||||||
|
WhitespaceScrutinizer.trailingWhitespaceType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMonolingualTextValue() {
|
||||||
|
scrutinizeLabel(Datamodel.makeMonolingualTextValue(" a", "fr"));
|
||||||
|
assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType);
|
||||||
|
}
|
||||||
|
}
|
@ -68,10 +68,19 @@ public class TestingDataGenerator {
|
|||||||
return new WbMonolingualExpr(new WbLanguageConstant(langCode, langLabel), new WbStringConstant(text));
|
return new WbMonolingualExpr(new WbLanguageConstant(langCode, langLabel), new WbStringConstant(text));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Statement generateStatement(ItemIdValue from, ItemIdValue to) {
|
public static Statement generateStatement(ItemIdValue from, PropertyIdValue pid, ItemIdValue to) {
|
||||||
Claim claim = Datamodel.makeClaim(from, Datamodel.makeValueSnak(pid, to), Collections.emptyList());
|
Claim claim = Datamodel.makeClaim(from, Datamodel.makeValueSnak(pid, to), Collections.emptyList());
|
||||||
return Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
|
return Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Statement generateStatement(ItemIdValue from, ItemIdValue to) {
|
||||||
|
return generateStatement(from, pid, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ItemIdValue newIdA = makeNewItemIdValue(1234L, "new item A");
|
||||||
|
public static ItemIdValue newIdB = makeNewItemIdValue(4567L, "new item B");
|
||||||
|
public static ItemIdValue matchedId = makeMatchedItemIdValue("Q89","eist");
|
||||||
|
public static ItemIdValue existingId = Datamodel.makeWikidataItemIdValue("Q43");
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -12,7 +12,6 @@ import org.testng.annotations.Test;
|
|||||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
import org.wikidata.wdtk.datamodel.interfaces.Claim;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
|
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Reference;
|
import org.wikidata.wdtk.datamodel.interfaces.Reference;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
@ -23,28 +22,23 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
|
|||||||
|
|
||||||
public class PointerExtractorTest {
|
public class PointerExtractorTest {
|
||||||
|
|
||||||
private ItemIdValue existingId = Datamodel.makeWikidataItemIdValue("Q43");
|
|
||||||
private ItemIdValue matchedId = TestingDataGenerator.makeMatchedItemIdValue("Q89","eist");
|
|
||||||
private ItemIdValue newIdA = TestingDataGenerator.makeNewItemIdValue(1234L, "new item A");
|
|
||||||
private ItemIdValue newIdB = TestingDataGenerator.makeNewItemIdValue(4567L, "new item B");
|
|
||||||
|
|
||||||
private PropertyIdValue pid = Datamodel.makeWikidataPropertyIdValue("P89");
|
private PropertyIdValue pid = Datamodel.makeWikidataPropertyIdValue("P89");
|
||||||
private Snak snakWithNew = Datamodel.makeValueSnak(pid, newIdA);
|
private Snak snakWithNew = Datamodel.makeValueSnak(pid, TestingDataGenerator.newIdA);
|
||||||
private Snak snakWithoutNew = Datamodel.makeValueSnak(pid, matchedId);
|
private Snak snakWithoutNew = Datamodel.makeValueSnak(pid, TestingDataGenerator.matchedId);
|
||||||
private SnakGroup snakGroupWithNew = Datamodel.makeSnakGroup(Collections.singletonList(snakWithNew));
|
private SnakGroup snakGroupWithNew = Datamodel.makeSnakGroup(Collections.singletonList(snakWithNew));
|
||||||
private SnakGroup snakGroupWithoutNew = Datamodel.makeSnakGroup(Collections.singletonList(snakWithoutNew));
|
private SnakGroup snakGroupWithoutNew = Datamodel.makeSnakGroup(Collections.singletonList(snakWithoutNew));
|
||||||
private Claim claimWithNew = Datamodel.makeClaim(existingId, snakWithNew, Collections.emptyList());
|
private Claim claimWithNew = Datamodel.makeClaim(TestingDataGenerator.existingId, snakWithNew, Collections.emptyList());
|
||||||
private Claim claimNewSubject = Datamodel.makeClaim(newIdB, snakWithoutNew, Collections.emptyList());
|
private Claim claimNewSubject = Datamodel.makeClaim(TestingDataGenerator.newIdB, snakWithoutNew, Collections.emptyList());
|
||||||
private Claim claimNewQualifier = Datamodel.makeClaim(matchedId, snakWithoutNew,
|
private Claim claimNewQualifier = Datamodel.makeClaim(TestingDataGenerator.matchedId, snakWithoutNew,
|
||||||
Collections.singletonList(snakGroupWithNew));
|
Collections.singletonList(snakGroupWithNew));
|
||||||
|
|
||||||
private static PointerExtractor e = new PointerExtractor();
|
private static PointerExtractor e = new PointerExtractor();
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExtractEntityId() {
|
public void testExtractEntityId() {
|
||||||
assertEquals(Collections.singleton(newIdA), e.extractPointers(newIdA));
|
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(TestingDataGenerator.newIdA));
|
||||||
assertEmpty(e.extractPointers(existingId));
|
assertEmpty(e.extractPointers(TestingDataGenerator.existingId));
|
||||||
assertEmpty(e.extractPointers(matchedId));
|
assertEmpty(e.extractPointers(TestingDataGenerator.matchedId));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -62,26 +56,26 @@ public class PointerExtractorTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testSnak() {
|
public void testSnak() {
|
||||||
assertEmpty(e.extractPointers(snakWithoutNew));
|
assertEmpty(e.extractPointers(snakWithoutNew));
|
||||||
assertEquals(Collections.singleton(newIdA), e.extractPointers(snakWithNew));
|
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(snakWithNew));
|
||||||
assertEmpty(e.extractPointers(Datamodel.makeNoValueSnak(pid)));
|
assertEmpty(e.extractPointers(Datamodel.makeNoValueSnak(pid)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSnakGroup() {
|
public void testSnakGroup() {
|
||||||
assertEmpty(e.extractPointers(snakGroupWithoutNew));
|
assertEmpty(e.extractPointers(snakGroupWithoutNew));
|
||||||
assertEquals(Collections.singleton(newIdA), e.extractPointers(snakGroupWithNew));
|
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(snakGroupWithNew));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testStatement() {
|
public void testStatement() {
|
||||||
assertEmpty(e.extractPointers(Datamodel.makeStatement(claimNewSubject,
|
assertEmpty(e.extractPointers(Datamodel.makeStatement(claimNewSubject,
|
||||||
Collections.emptyList(), StatementRank.NORMAL, "")));
|
Collections.emptyList(), StatementRank.NORMAL, "")));
|
||||||
assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimWithNew,
|
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(Datamodel.makeStatement(claimWithNew,
|
||||||
Collections.emptyList(), StatementRank.NORMAL, "")));
|
Collections.emptyList(), StatementRank.NORMAL, "")));
|
||||||
assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimNewQualifier,
|
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(Datamodel.makeStatement(claimNewQualifier,
|
||||||
Collections.emptyList(), StatementRank.NORMAL, "")));
|
Collections.emptyList(), StatementRank.NORMAL, "")));
|
||||||
Reference reference = Datamodel.makeReference(Collections.singletonList(snakGroupWithNew));
|
Reference reference = Datamodel.makeReference(Collections.singletonList(snakGroupWithNew));
|
||||||
assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimNewSubject,
|
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(Datamodel.makeStatement(claimNewSubject,
|
||||||
Collections.singletonList(reference), StatementRank.NORMAL, "")));
|
Collections.singletonList(reference), StatementRank.NORMAL, "")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user