Test the scrutinizers

This commit is contained in:
Antonin Delpeuch 2018-03-02 14:41:41 +00:00
parent fd7462f749
commit 973a28cc90
41 changed files with 1146 additions and 284 deletions

View File

@ -1,60 +1,17 @@
package org.openrefine.wikidata.qa;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.openrefine.wikidata.utils.EntityCache;
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
/**
* This class provides an abstraction over the way constraint
* definitions are stored in Wikidata.
* An object that fetches constraints about properties.
*
* @author antonin
* @author Antonin Delpeuch
*
*/
public class ConstraintFetcher {
public static String WIKIDATA_CONSTRAINT_PID = "P2302";
public static String FORMAT_CONSTRAINT_QID = "Q21502404";
public static String FORMAT_REGEX_PID = "P1793";
public static String INVERSE_CONSTRAINT_QID = "Q21510855";
public static String INVERSE_PROPERTY_PID = "P2306";
public static String USED_ONLY_AS_VALUES_CONSTRAINT_QID = "Q21528958";
public static String USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID = "Q21510863";
public static String USED_ONLY_AS_REFERENCE_CONSTRAINT_QID = "Q21528959";
public static String ALLOWED_QUALIFIERS_CONSTRAINT_QID = "Q21510851";
public static String ALLOWED_QUALIFIERS_CONSTRAINT_PID = "P2306";
public static String MANDATORY_QUALIFIERS_CONSTRAINT_QID = "Q21510856";
public static String MANDATORY_QUALIFIERS_CONSTRAINT_PID = "P2306";
public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404";
public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410";
// The following constraints still need to be implemented:
public static String TYPE_CONSTRAINT_QID = "Q21503250";
public interface ConstraintFetcher {
/**
* Retrieves the regular expression for formatting a property, or null if
@ -62,152 +19,48 @@ public class ConstraintFetcher {
* @param pid
* @return the expression of a regular expression which should be compatible with java.util.regex
*/
public String getFormatRegex(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, FORMAT_CONSTRAINT_QID);
if (specs != null) {
List<Value> regexes = findValues(specs, FORMAT_REGEX_PID);
if (! regexes.isEmpty()) {
return ((StringValue)regexes.get(0)).getString();
}
}
return null;
}
String getFormatRegex(PropertyIdValue pid);
/**
* Retrieves the property that is the inverse of a given property
* @param pid: the property to retrieve the inverse for
* @return the pid of the inverse property
*/
public PropertyIdValue getInversePid(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, INVERSE_CONSTRAINT_QID);
if(specs != null) {
List<Value> inverses = findValues(specs, INVERSE_PROPERTY_PID);
if (! inverses.isEmpty()) {
return (PropertyIdValue)inverses.get(0);
}
}
return null;
}
PropertyIdValue getInversePid(PropertyIdValue pid);
/**
* Is this property for values only?
*/
public boolean isForValuesOnly(PropertyIdValue pid) {
return getSingleConstraint(pid, USED_ONLY_AS_VALUES_CONSTRAINT_QID) != null;
}
boolean isForValuesOnly(PropertyIdValue pid);
/**
* Is this property for qualifiers only?
*/
public boolean isForQualifiersOnly(PropertyIdValue pid) {
return getSingleConstraint(pid, USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID) != null;
}
boolean isForQualifiersOnly(PropertyIdValue pid);
/**
* Is this property for references only?
*/
public boolean isForReferencesOnly(PropertyIdValue pid) {
return getSingleConstraint(pid, USED_ONLY_AS_REFERENCE_CONSTRAINT_QID) != null;
}
boolean isForReferencesOnly(PropertyIdValue pid);
/**
* Get the list of allowed qualifiers (as property ids) for this property (null if any)
*/
public Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, ALLOWED_QUALIFIERS_CONSTRAINT_QID);
if (specs != null) {
List<Value> properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID);
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
}
return null;
}
Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid);
/**
* Get the list of mandatory qualifiers (as property ids) for this property (null if any)
*/
public Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, MANDATORY_QUALIFIERS_CONSTRAINT_QID);
if (specs != null) {
List<Value> properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID);
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
}
return null;
}
Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid);
/**
* Is this property expected to have at most one value per item?
*/
public boolean hasSingleValue(PropertyIdValue pid) {
return getSingleConstraint(pid, SINGLE_VALUE_CONSTRAINT_QID) != null;
}
boolean hasSingleValue(PropertyIdValue pid);
/**
* Is this property expected to have distinct values?
*/
public boolean hasDistinctValues(PropertyIdValue pid) {
return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null;
}
boolean hasDistinctValues(PropertyIdValue pid);
/**
* Returns a single constraint for a particular type and a property, or null
* if there is no such constraint
* @param pid: the property to retrieve the constraints for
* @param qid: the type of the constraints
* @return the list of qualifiers for the constraint, or null if it does not exist
*/
protected List<SnakGroup> getSingleConstraint(PropertyIdValue pid, String qid) {
Statement statement = getConstraintsByType(pid, qid).findFirst().orElse(null);
if (statement != null) {
return statement.getClaim().getQualifiers();
}
return null;
}
/**
* Gets the list of constraints of a particular type for a property
* @param pid: the property to retrieve the constraints for
* @param qid: the type of the constraints
* @return the stream of matching constraint statements
*/
protected Stream<Statement> getConstraintsByType(PropertyIdValue pid, String qid) {
Stream<Statement> allConstraints = getConstraintStatements(pid)
.stream()
.filter(s -> ((EntityIdValue) s.getValue()).getId().equals(qid));
return allConstraints;
}
/**
* Gets all the constraint statements for a given property
* @param pid : the id of the property to retrieve the constraints for
* @return the list of constraint statements
*/
protected List<Statement> getConstraintStatements(PropertyIdValue pid) {
PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid);
StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID);
if (group != null) {
return group.getStatements();
} else {
return new ArrayList<Statement>();
}
}
/**
* Returns the values of a given property in qualifiers
* @param groups: the qualifiers
* @param pid: the property to filter on
* @return
*/
protected List<Value> findValues(List<SnakGroup> groups, String pid) {
List<Value> results = new ArrayList<>();
for(SnakGroup group : groups) {
if (group.getProperty().getId().equals(pid)) {
for (Snak snak : group.getSnaks())
results.add(snak.getValue());
}
}
return results;
}
}

View File

@ -7,7 +7,7 @@ import java.util.stream.Collectors;
import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.FormatScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.NoEditsMadeScrutinizer;
@ -32,14 +32,16 @@ import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
public class EditInspector {
private Map<String, EditScrutinizer> scrutinizers;
private QAWarningStore warningStore;
private ConstraintFetcher fetcher;
public EditInspector(QAWarningStore warningStore) {
this.scrutinizers = new HashMap<>();
this.fetcher = new WikidataConstraintFetcher();
this.warningStore = warningStore;
// Register all known scrutinizers here
register(new NewItemScrutinizer());
register(new FormatConstraintScrutinizer());
register(new FormatScrutinizer());
register(new InverseConstraintScrutinizer());
register(new SelfReferentialScrutinizer());
register(new UnsourcedScrutinizer());
@ -59,6 +61,7 @@ public class EditInspector {
String key = scrutinizer.getClass().getName();
scrutinizers.put(key, scrutinizer);
scrutinizer.setStore(warningStore);
scrutinizer.setFetcher(fetcher);
}
@ -69,19 +72,15 @@ public class EditInspector {
public void inspect(List<ItemUpdate> editBatch) {
// First, schedule them with some scheduler,
// so that all newly created entities appear in the batch
UpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
try {
editBatch = scheduler.schedule(editBatch);
Map<EntityIdValue, ItemUpdate> updates = ItemUpdate.groupBySubject(editBatch);
List<ItemUpdate> mergedUpdates = updates.values().stream().collect(Collectors.toList());
for(EditScrutinizer scrutinizer : scrutinizers.values()) {
scrutinizer.scrutinize(mergedUpdates);
}
} catch(ImpossibleSchedulingException e) {
warningStore.addWarning(new QAWarning(
"scheduling-failed", null, QAWarning.Severity.CRITICAL, 1));
WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler();
editBatch = scheduler.schedule(editBatch);
Map<EntityIdValue, ItemUpdate> updates = ItemUpdate.groupBySubject(editBatch);
List<ItemUpdate> mergedUpdates = updates.values().stream().collect(Collectors.toList());
for(EditScrutinizer scrutinizer : scrutinizers.values()) {
scrutinizer.scrutinize(mergedUpdates);
}
if (warningStore.getNbWarnings() == 0) {
warningStore.addWarning(new QAWarning(
"no-issue-detected", null, QAWarning.Severity.INFO, 0));

View File

@ -1,20 +1,25 @@
package org.openrefine.wikidata.qa;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import org.jsoup.helper.Validate;
import org.openrefine.wikidata.utils.JacksonJsonizable;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* A class to represent a QA warning emited by the Wikidata schema
* A class to represent a QA warning emitted by the Wikidata schema
* This could probably be reused at a broader scale, for instance for
* Data Package validation.
*
* @author antonin
* @author Antonin Delpeuch
*
*/
public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning> {
@ -27,42 +32,30 @@ public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning
}
/// The type of QA warning emitted
private String type;
private final String type;
// The key for aggregation of other QA warnings together - this specializes the id
private String bucketId;
private final String bucketId;
// The severity of the issue
private Severity severity;
private final Severity severity;
// The number of times this issue was found
private int count;
private final int count;
// Other details about the warning, that can be displayed to the user
private Map<String,Object> properties;
private final Map<String,Object> properties;
public QAWarning(String type, String bucketId, Severity severity, int count) {
Validate.notNull(type);
this.type = type;
this.bucketId = bucketId;
Validate.notNull(severity);
this.severity = severity;
this.count = count;
this.properties = new HashMap<String,Object>();
}
@JsonCreator
public QAWarning(
@JsonProperty("type") String type,
@JsonProperty("bucket_id") String bucketId,
@JsonProperty("severity") Severity severity,
@JsonProperty("count") int count,
@JsonProperty("properties") Map<String,Object> properties) {
this.type = type;
this.bucketId = bucketId;
this.severity = severity;
this.count = count;
this.properties = properties;
this.properties = new HashMap<>();
}
/**
* Returns the full key for aggregation of QA warnings
* @return
* @return the full key for aggregation of QA warnings
*/
@JsonIgnore
public String getAggregationId() {
if (this.bucketId != null) {
return this.type + "_" + this.bucketId;
@ -75,12 +68,22 @@ public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning
* Aggregates another QA warning of the same aggregation id.
* @param other
*/
public void aggregate(QAWarning other) {
assert other.getAggregationId() == getAggregationId();
this.count += other.getCount();
if(this.severity.compareTo(other.getSeverity()) < 0) {
this.severity = other.getSeverity();
public QAWarning aggregate(QAWarning other) {
assert other.getAggregationId().equals(getAggregationId());
int newCount = count+other.getCount();
Severity newSeverity = severity;
if (other.getSeverity().compareTo(severity) > 0) {
newSeverity = other.getSeverity();
}
QAWarning merged = new QAWarning(getType(), getBucketId(), newSeverity,
newCount);
for(Entry<String,Object> entry : properties.entrySet()) {
merged.setProperty(entry.getKey(),entry.getValue());
}
for(Entry<String,Object> entry : other.getProperties().entrySet()) {
merged.setProperty(entry.getKey(),entry.getValue());
}
return merged;
}
/**
@ -114,6 +117,7 @@ public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning
}
@JsonProperty("properties")
@JsonInclude(JsonInclude.Include.NON_EMPTY)
public Map<String,Object> getProperties() {
return properties;
}
@ -125,4 +129,17 @@ public class QAWarning extends JacksonJsonizable implements Comparable<QAWarning
public int compareTo(QAWarning other) {
return - severity.compareTo(other.getSeverity());
}
@Override
public boolean equals(Object other) {
if (other == null || !QAWarning.class.isInstance(other)) {
return false;
}
QAWarning otherWarning = (QAWarning)other;
return type.equals(otherWarning.getType()) &&
bucketId.equals(otherWarning.getBucketId()) &&
severity.equals(otherWarning.getSeverity()) &&
count == otherWarning.getCount() &&
properties.equals(otherWarning.getProperties());
}
}

View File

@ -6,16 +6,21 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* A store for QA warnings which aggregates them by type.
* @author antonin
*
* @author Antonin Delpeuch
*/
public class QAWarningStore {
@JsonIgnore
private Map<String, QAWarning> map;
@JsonIgnore
private QAWarning.Severity maxSeverity;
@JsonIgnore
private int totalWarnings;
public QAWarningStore() {
@ -36,7 +41,7 @@ public class QAWarningStore {
totalWarnings += warning.getCount();
if (map.containsKey(aggregationKey)) {
QAWarning existing = map.get(aggregationKey);
existing.aggregate(warning);
map.put(aggregationKey, existing.aggregate(warning));
} else {
map.put(aggregationKey, warning);
}

View File

@ -0,0 +1,190 @@
package org.openrefine.wikidata.qa;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.openrefine.wikidata.utils.EntityCache;
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
/**
* This class provides an abstraction over the way constraint
* definitions are stored in Wikidata.
*
* @author antonin
*
*/
public class WikidataConstraintFetcher implements ConstraintFetcher {
public static String WIKIDATA_CONSTRAINT_PID = "P2302";
public static String FORMAT_CONSTRAINT_QID = "Q21502404";
public static String FORMAT_REGEX_PID = "P1793";
public static String INVERSE_CONSTRAINT_QID = "Q21510855";
public static String INVERSE_PROPERTY_PID = "P2306";
public static String USED_ONLY_AS_VALUES_CONSTRAINT_QID = "Q21528958";
public static String USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID = "Q21510863";
public static String USED_ONLY_AS_REFERENCE_CONSTRAINT_QID = "Q21528959";
public static String ALLOWED_QUALIFIERS_CONSTRAINT_QID = "Q21510851";
public static String ALLOWED_QUALIFIERS_CONSTRAINT_PID = "P2306";
public static String MANDATORY_QUALIFIERS_CONSTRAINT_QID = "Q21510856";
public static String MANDATORY_QUALIFIERS_CONSTRAINT_PID = "P2306";
public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404";
public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410";
// The following constraints still need to be implemented:
public static String TYPE_CONSTRAINT_QID = "Q21503250";
@Override
public String getFormatRegex(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, FORMAT_CONSTRAINT_QID);
if (specs != null) {
List<Value> regexes = findValues(specs, FORMAT_REGEX_PID);
if (! regexes.isEmpty()) {
return ((StringValue)regexes.get(0)).getString();
}
}
return null;
}
@Override
public PropertyIdValue getInversePid(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, INVERSE_CONSTRAINT_QID);
if(specs != null) {
List<Value> inverses = findValues(specs, INVERSE_PROPERTY_PID);
if (! inverses.isEmpty()) {
return (PropertyIdValue)inverses.get(0);
}
}
return null;
}
@Override
public boolean isForValuesOnly(PropertyIdValue pid) {
return getSingleConstraint(pid, USED_ONLY_AS_VALUES_CONSTRAINT_QID) != null;
}
@Override
public boolean isForQualifiersOnly(PropertyIdValue pid) {
return getSingleConstraint(pid, USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID) != null;
}
@Override
public boolean isForReferencesOnly(PropertyIdValue pid) {
return getSingleConstraint(pid, USED_ONLY_AS_REFERENCE_CONSTRAINT_QID) != null;
}
@Override
public Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, ALLOWED_QUALIFIERS_CONSTRAINT_QID);
if (specs != null) {
List<Value> properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID);
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
}
return null;
}
@Override
public Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, MANDATORY_QUALIFIERS_CONSTRAINT_QID);
if (specs != null) {
List<Value> properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID);
return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet());
}
return null;
}
@Override
public boolean hasSingleValue(PropertyIdValue pid) {
return getSingleConstraint(pid, SINGLE_VALUE_CONSTRAINT_QID) != null;
}
@Override
public boolean hasDistinctValues(PropertyIdValue pid) {
return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null;
}
/**
* Returns a single constraint for a particular type and a property, or null
* if there is no such constraint
* @param pid: the property to retrieve the constraints for
* @param qid: the type of the constraints
* @return the list of qualifiers for the constraint, or null if it does not exist
*/
protected List<SnakGroup> getSingleConstraint(PropertyIdValue pid, String qid) {
Statement statement = getConstraintsByType(pid, qid).findFirst().orElse(null);
if (statement != null) {
return statement.getClaim().getQualifiers();
}
return null;
}
/**
* Gets the list of constraints of a particular type for a property
* @param pid: the property to retrieve the constraints for
* @param qid: the type of the constraints
* @return the stream of matching constraint statements
*/
protected Stream<Statement> getConstraintsByType(PropertyIdValue pid, String qid) {
Stream<Statement> allConstraints = getConstraintStatements(pid)
.stream()
.filter(s -> ((EntityIdValue) s.getValue()).getId().equals(qid));
return allConstraints;
}
/**
* Gets all the constraint statements for a given property
* @param pid : the id of the property to retrieve the constraints for
* @return the list of constraint statements
*/
protected List<Statement> getConstraintStatements(PropertyIdValue pid) {
PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid);
StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID);
if (group != null) {
return group.getStatements();
} else {
return new ArrayList<Statement>();
}
}
/**
* Returns the values of a given property in qualifiers
* @param groups: the qualifiers
* @param pid: the property to filter on
* @return
*/
protected List<Value> findValues(List<SnakGroup> groups, String pid) {
List<Value> results = new ArrayList<>();
for(SnakGroup group : groups) {
if (group.getProperty().getId().equals(pid)) {
for (Snak snak : group.getSnaks())
results.add(snak.getValue());
}
}
return results;
}
}

View File

@ -15,11 +15,13 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
* A scrutinizer that checks for properties using the same value
* on different items.
*
* @author antonin
* @author Antonin Delpeuch
*
*/
public class DistinctValuesScrutinizer extends StatementScrutinizer {
public final static String type = "identical-values-for-distinct-valued-property";
private Map<PropertyIdValue, Map<Value, EntityIdValue>> _seenValues;
public DistinctValuesScrutinizer() {
@ -39,7 +41,7 @@ public class DistinctValuesScrutinizer extends StatementScrutinizer {
if (seen.containsKey(mainSnakValue)) {
EntityIdValue otherId = seen.get(mainSnakValue);
QAWarning issue = new QAWarning(
"identical-values-for-distinct-valued-property",
type,
pid.getId(),
QAWarning.Severity.IMPORTANT,
1);

View File

@ -2,6 +2,7 @@ package org.openrefine.wikidata.qa.scrutinizers;
import java.util.List;
import org.openrefine.wikidata.qa.WikidataConstraintFetcher;
import org.openrefine.wikidata.qa.ConstraintFetcher;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.qa.QAWarning.Severity;
@ -9,9 +10,9 @@ import org.openrefine.wikidata.qa.QAWarningStore;
import org.openrefine.wikidata.updates.ItemUpdate;
/**
* Interface for any class that
* @author antonin
* Inspects an edit batch and emits warnings.
*
* @author Antonin Delpeuch
*/
public abstract class EditScrutinizer {
@ -19,13 +20,18 @@ public abstract class EditScrutinizer {
protected ConstraintFetcher _fetcher;
public EditScrutinizer() {
_fetcher = new ConstraintFetcher();
_fetcher = null;
_store = null;
}
public void setStore(QAWarningStore store) {
_store = store;
}
public void setFetcher(ConstraintFetcher fetcher) {
_fetcher = fetcher;
}
/**
* Reads the candidate edits and emits warnings in the store
* @param edit: the list of ItemUpdates to scrutinize

View File

@ -15,14 +15,16 @@ import org.wikidata.wdtk.datamodel.interfaces.StringValue;
* A scrutinizer that detects incorrect formats in text values
* (mostly identifiers).
*
* @author antonin
* @author Antonin Delpeuch
*
*/
public class FormatConstraintScrutinizer extends SnakScrutinizer {
public class FormatScrutinizer extends SnakScrutinizer {
public static final String type = "add-statements-with-invalid-format";
private Map<PropertyIdValue, Pattern> _patterns;
public FormatConstraintScrutinizer() {
public FormatScrutinizer() {
_patterns = new HashMap<>();
}
@ -59,7 +61,7 @@ public class FormatConstraintScrutinizer extends SnakScrutinizer {
if (!pattern.matcher(value).matches()) {
if (added) {
QAWarning issue = new QAWarning(
"add-statements-with-invalid-format",
type,
pid.getId(),
QAWarning.Severity.IMPORTANT,
1);

View File

@ -17,11 +17,13 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
* A scrutinizer that checks for missing inverse statements in
* edit batches.
*
* @author antonin
* @author Antonin Delpeuch
*
*/
public class InverseConstraintScrutinizer extends StatementScrutinizer {
public static final String type = "missing-inverse-statements";
private Map<PropertyIdValue, PropertyIdValue> _inverse;
private Map<PropertyIdValue, Map<EntityIdValue, Set<EntityIdValue> >> _statements;
@ -83,7 +85,7 @@ public class InverseConstraintScrutinizer extends StatementScrutinizer {
PropertyIdValue missingProperty = propertyPair.getValue();
Set<EntityIdValue> reciprocalLinks = _statements.get(missingProperty).get(idValue);
if (reciprocalLinks == null || !reciprocalLinks.contains(itemLinks.getKey())) {
QAWarning issue = new QAWarning("missing-inverse-statements",
QAWarning issue = new QAWarning(type,
ourProperty.getId(),
QAWarning.Severity.IMPORTANT,
1);

View File

@ -4,7 +4,7 @@ import java.util.List;
import org.openrefine.wikidata.updates.ItemUpdate;
public abstract class ItemEditScrutinizer extends EditScrutinizer {
public abstract class ItemUpdateScrutinizer extends EditScrutinizer {
@Override
public void scrutinize(List<ItemUpdate> edit) {

View File

@ -5,19 +5,26 @@ import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
/**
* A scrutinizer that inspects new items
* @author antonin
* A scrutinizer that inspects new items.
*
* @author Antonin Delpeuch
*/
public class NewItemScrutinizer extends ItemEditScrutinizer {
public class NewItemScrutinizer extends ItemUpdateScrutinizer {
public static final String noLabelType = "new-item-without-labels-or-aliases";
public static final String noDescType = "new-item-without-descriptions";
public static final String deletedStatementsType = "new-item-with-deleted-statements";
public static final String noTypeType = "new-item-without-P31-or-P279";
public static final String newItemType = "new-item-created";
@Override
public void scrutinize(ItemUpdate update) {
if (update.isNew()) {
info("new-item-created");
info(newItemType);
if (update.getLabels().isEmpty() && update.getAliases().isEmpty()) {
QAWarning issue = new QAWarning(
"new-item-without-labels-or-aliases",
noLabelType,
null,
QAWarning.Severity.CRITICAL,
1);
@ -27,7 +34,7 @@ public class NewItemScrutinizer extends ItemEditScrutinizer {
if (update.getDescriptions().isEmpty()) {
QAWarning issue = new QAWarning(
"new-item-without-descriptions",
noDescType,
null,
QAWarning.Severity.WARNING,
1);
@ -35,9 +42,9 @@ public class NewItemScrutinizer extends ItemEditScrutinizer {
addIssue(issue);
}
if (! update.getDeletedStatements().isEmpty()) {
if (!update.getDeletedStatements().isEmpty()) {
QAWarning issue = new QAWarning(
"new-item-with-deleted-statements",
deletedStatementsType,
null,
QAWarning.Severity.WARNING,
1);
@ -56,7 +63,7 @@ public class NewItemScrutinizer extends ItemEditScrutinizer {
}
if (!typeFound) {
QAWarning issue = new QAWarning(
"new-item-without-P31-or-P279",
noTypeType,
null,
QAWarning.Severity.WARNING,
1);

View File

@ -7,10 +7,12 @@ import org.openrefine.wikidata.updates.ItemUpdate;
public class NoEditsMadeScrutinizer extends EditScrutinizer {
public static final String type = "no-edit-generated";
@Override
public void scrutinize(List<ItemUpdate> edit) {
if(edit.stream().allMatch(e -> e.isNull())) {
info("no-edit-generated");
info(type);
}
}

View File

@ -14,11 +14,14 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
/**
* A scrutinizer that checks the compatibility of the qualifiers
* and the property of a statement, and looks for mandatory qualifiers.
* @author antonin
*
* @author Antonin Delpeuch
*/
public class QualifierCompatibilityScrutinizer extends StatementScrutinizer {
public static final String missingMandatoryQualifiersType = "missing-mandatory-qualifiers";
public static final String disallowedQualifiersType = "disallowed-qualifiers";
private Map<PropertyIdValue, Set<PropertyIdValue>> _allowedQualifiers;
private Map<PropertyIdValue, Set<PropertyIdValue>> _mandatoryQualifiers;
@ -65,7 +68,7 @@ public class QualifierCompatibilityScrutinizer extends StatementScrutinizer {
for (PropertyIdValue missing : missingQualifiers) {
QAWarning issue = new QAWarning(
"missing-mandatory-qualifiers",
missingMandatoryQualifiersType,
statementProperty.getId()+"-"+missing.getId(),
QAWarning.Severity.WARNING,
1);
@ -76,7 +79,7 @@ public class QualifierCompatibilityScrutinizer extends StatementScrutinizer {
}
for (PropertyIdValue disallowed : disallowedQualifiers) {
QAWarning issue = new QAWarning(
"disallowed-qualifiers",
disallowedQualifiersType,
statementProperty.getId()+"-"+disallowed.getId(),
QAWarning.Severity.WARNING,
1);

View File

@ -13,14 +13,14 @@ import org.wikidata.wdtk.datamodel.interfaces.Snak;
*/
public class SelfReferentialScrutinizer extends SnakScrutinizer {
public static final String type = "self-referential-statements";
@Override
public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
if (entityId.equals(snak.getValue())) {
QAWarning issue = new QAWarning(
"self-referential-statements",
null,
QAWarning.Severity.WARNING,
1);
type, null,
QAWarning.Severity.WARNING, 1);
issue.setProperty("example_entity", entityId);
addIssue(issue);
}

View File

@ -11,10 +11,13 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
/**
* For now this scrutinizer only checks for uniqueness at
* the item level (it ignores qualifiers and references).
* @author antonin
*
* @author Antonin Delpeuch
*
*/
public class SingleValueScrutinizer extends ItemEditScrutinizer {
public class SingleValueScrutinizer extends ItemUpdateScrutinizer {
public static final String type = "single-valued-property-added-more-than-once";
@Override
public void scrutinize(ItemUpdate update) {
@ -25,10 +28,8 @@ public class SingleValueScrutinizer extends ItemEditScrutinizer {
if (seenSingleProperties.contains(pid)) {
QAWarning issue = new QAWarning(
"single-valued-property-added-more-than-once",
pid.getId(),
QAWarning.Severity.WARNING,
1);
type, pid.getId(),
QAWarning.Severity.WARNING, 1);
issue.setProperty("property_entity", pid);
issue.setProperty("example_entity", update.getItemId());
addIssue(issue);

View File

@ -11,7 +11,7 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
* A scrutinizer that inspects snaks individually, no matter whether they
* appear as main snaks, qualifiers or references.
*
* @author antonin
* @author Antonin Delpeuch
*
*/
public abstract class SnakScrutinizer extends StatementScrutinizer {

View File

@ -3,7 +3,7 @@ package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
public abstract class StatementGroupScrutinizer extends ItemEditScrutinizer {
public abstract class StatementGroupScrutinizer extends ItemUpdateScrutinizer {
@Override
public void scrutinize(ItemUpdate update) {

View File

@ -4,7 +4,7 @@ import org.openrefine.wikidata.updates.ItemUpdate;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
public abstract class StatementScrutinizer extends ItemEditScrutinizer {
public abstract class StatementScrutinizer extends ItemUpdateScrutinizer {
@Override
public void scrutinize(ItemUpdate update) {

View File

@ -11,10 +11,12 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement;
*/
public class UnsourcedScrutinizer extends StatementScrutinizer {
public static final String type = "unsourced-statements";
@Override
public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) {
if(statement.getReferences().isEmpty() && added) {
warning("unsourced-statements");
warning(type);
}
}

View File

@ -12,21 +12,27 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
/**
* Scrutinizes strings for trailing / leading whitespace, and others
* @author antonin
*
* @author Antonin Delpeuch
*
*/
public class WhitespaceScrutinizer extends ValueScrutinizer {
private Map<String,Pattern> _issuesMap;
public static final String leadingWhitespaceType = "leading-whitespace";
public static final String trailingWhitespaceType = "trailing-whitespace";
public static final String duplicateWhitespaceType = "duplicate-whitespace";
public static final String nonPrintableCharsType = "non-printable-characters";
public WhitespaceScrutinizer() {
_issuesMap = new HashMap<>();
_issuesMap.put("leading-whitespace", Pattern.compile("^\\s"));
_issuesMap.put("trailing-whitespace", Pattern.compile("\\s$"));
_issuesMap.put("duplicate-whitespace", Pattern.compile("\\s\\s"));
_issuesMap.put(leadingWhitespaceType, Pattern.compile("^\\s"));
_issuesMap.put(trailingWhitespaceType, Pattern.compile("\\s$"));
_issuesMap.put(duplicateWhitespaceType, Pattern.compile("\\s\\s"));
// https://stackoverflow.com/questions/14565934/regular-expression-to-remove-all-non-printable-characters
_issuesMap.put("non-printable-characters", Pattern.compile("[\\x00\\x08\\x0B\\x0C\\x0E-\\x1F]"));
_issuesMap.put(nonPrintableCharsType, Pattern.compile("[\\x00\\x03\\x08\\x0B\\x0C\\x0E-\\x1F]"));
}
@Override

View File

@ -0,0 +1,75 @@
package org.openrefine.wikidata.qa;
import java.util.Arrays;
import java.util.Collections;
import java.util.Set;
import java.util.stream.Collectors;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
public class MockConstraintFetcher implements ConstraintFetcher {
public static PropertyIdValue pidWithInverse = Datamodel.makeWikidataPropertyIdValue("P350");
public static PropertyIdValue inversePid = Datamodel.makeWikidataPropertyIdValue("P57");
public static PropertyIdValue allowedQualifierPid = Datamodel.makeWikidataPropertyIdValue("P34");
public static PropertyIdValue mandatoryQualifierPid = Datamodel.makeWikidataPropertyIdValue("P97");
public static PropertyIdValue mainSnakPid = Datamodel.makeWikidataPropertyIdValue("P1234");
public static PropertyIdValue qualifierPid = Datamodel.makeWikidataPropertyIdValue("P987");
public static PropertyIdValue referencePid = Datamodel.makeWikidataPropertyIdValue("P384");
@Override
public String getFormatRegex(PropertyIdValue pid) {
return "[1-9]\\d+";
}
/**
* This constraint is purposely left inconsistent (the inverse
* constraint holds only on one side).
*/
@Override
public PropertyIdValue getInversePid(PropertyIdValue pid) {
if (pidWithInverse.equals(pid)) {
return inversePid;
}
return null;
}
@Override
public boolean isForValuesOnly(PropertyIdValue pid) {
return mainSnakPid.equals(pid);
}
@Override
public boolean isForQualifiersOnly(PropertyIdValue pid) {
return qualifierPid.equals(pid);
}
@Override
public boolean isForReferencesOnly(PropertyIdValue pid) {
return referencePid.equals(pid);
}
@Override
public Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid) {
return Arrays.asList(allowedQualifierPid, mandatoryQualifierPid).stream().collect(Collectors.toSet());
}
@Override
public Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid) {
return Collections.singleton(mandatoryQualifierPid);
}
@Override
public boolean hasSingleValue(PropertyIdValue pid) {
return true;
}
@Override
public boolean hasDistinctValues(PropertyIdValue pid) {
return true;
}
}

View File

@ -0,0 +1,44 @@
package org.openrefine.wikidata.qa;
import static org.junit.Assert.assertEquals;
import org.openrefine.wikidata.testing.JacksonSerializationTest;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
public class QAWarningStoreTest {
public static String exampleJson = "{\"max_severity\":\"CRITICAL\",\"nb_warnings\":5,"
+"\"warnings\":[{\"type\":\"new-item-without-label\",\"bucketId\":null,"
+"\"severity\":\"CRITICAL\",\"count\":3},{\"type\":\"add-statements-with-invalid-format\","
+"\"bucketId\":\"P2427\",\"severity\":\"IMPORTANT\",\"count\":2}]}";
private QAWarningStore store;
private QAWarning otherWarning;
@BeforeMethod
public void setUp() {
store = new QAWarningStore();
store.addWarning(QAWarningTest.exampleWarning);
store.addWarning(QAWarningTest.exampleWarning);
otherWarning = new QAWarning("new-item-without-label", null, QAWarning.Severity.CRITICAL, 3);
store.addWarning(otherWarning);
}
@Test
public void testSerialize() {
JacksonSerializationTest.testSerialize(store, exampleJson);
}
@Test
public void testCount() {
assertEquals(5, store.getNbWarnings());
assertEquals(2, store.getWarnings().size());
}
@Test
public void testMaxSeverity() {
assertEquals(QAWarning.Severity.CRITICAL, store.getMaxSeverity());
assertEquals(QAWarning.Severity.INFO, (new QAWarningStore()).getMaxSeverity());
}
}

View File

@ -0,0 +1,50 @@
package org.openrefine.wikidata.qa;
import static org.junit.Assert.assertEquals;
import org.openrefine.wikidata.testing.JacksonSerializationTest;
import org.testng.annotations.Test;
public class QAWarningTest {
public static QAWarning exampleWarning = new QAWarning("add-statements-with-invalid-format",
"P2427",
QAWarning.Severity.IMPORTANT,
1);
public static String exampleJson =
"{\"severity\":\"IMPORTANT\","+
"\"count\":1,\"bucketId\":\"P2427\",\"type\":\"add-statements-with-invalid-format\"}";
@Test
public void testSerialize() {
JacksonSerializationTest.testSerialize(exampleWarning, exampleJson);
}
@Test
public void testAggregate() {
QAWarning firstWarning = new QAWarning("add-statements-with-invalid-format",
"P2427",
QAWarning.Severity.INFO,
1);
firstWarning.setProperty("foo", "bar");
assertEquals(exampleWarning.getAggregationId(), firstWarning.getAggregationId());
QAWarning merged = firstWarning.aggregate(exampleWarning);
assertEquals(2, merged.getCount());
assertEquals(exampleWarning.getAggregationId(), merged.getAggregationId());
assertEquals(exampleWarning.getType(), merged.getType());
assertEquals(exampleWarning.getSeverity(), merged.getSeverity());
assertEquals("bar", merged.getProperties().get("foo"));
}
@Test
public void testCompare() {
QAWarning otherWarning = new QAWarning("no-reference",
"no-reference",
QAWarning.Severity.WARNING,
1);
assertEquals(1, otherWarning.compareTo(exampleWarning));
assertEquals(-1, exampleWarning.compareTo(otherWarning));
assertEquals(0, exampleWarning.compareTo(exampleWarning));
}
}

View File

@ -7,7 +7,7 @@ import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import java.util.regex.Pattern;
public class ConstraintFetcherTests {
public class WikidataConstraintFetcherTests {
private ConstraintFetcher fetcher;
@ -21,8 +21,8 @@ public class ConstraintFetcherTests {
private PropertyIdValue referenceURL;
private PropertyIdValue reasonForDeprecation;
public ConstraintFetcherTests() {
fetcher = new ConstraintFetcher();
public WikidataConstraintFetcherTests() {
fetcher = new WikidataConstraintFetcher();
headOfGovernment = Datamodel.makeWikidataPropertyIdValue("P6");
startTime = Datamodel.makeWikidataPropertyIdValue("P580");
endTime = Datamodel.makeWikidataPropertyIdValue("P582");

View File

@ -0,0 +1,29 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
public class DistinctValuesScrutinizerTest extends StatementScrutinizerTest {
@Override
public EditScrutinizer getScrutinizer() {
return new DistinctValuesScrutinizer();
}
@Test
public void testTrigger() {
ItemIdValue idA = TestingDataGenerator.existingId;
ItemIdValue idB = TestingDataGenerator.matchedId;
ItemUpdate updateA = new ItemUpdateBuilder(idA)
.addStatement(TestingDataGenerator.generateStatement(idA, idB))
.build();
ItemUpdate updateB = new ItemUpdateBuilder(idB)
.addStatement(TestingDataGenerator.generateStatement(idB, idB))
.build();
scrutinize(updateA, updateB);
assertWarningsRaised(DistinctValuesScrutinizer.type);
}
}

View File

@ -0,0 +1,31 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
public class FormatScrutinizerTest extends ValueScrutinizerTest {
@Override
public EditScrutinizer getScrutinizer() {
return new FormatScrutinizer();
}
@Test
public void testTrigger() {
scrutinize(Datamodel.makeStringValue("not a number"));
assertWarningsRaised(FormatScrutinizer.type);
}
@Test
public void testNoIssue() {
scrutinize(Datamodel.makeStringValue("1234"));
assertNoWarningRaised();
}
@Test
public void testIncompleteMatch() {
scrutinize(Datamodel.makeStringValue("42 is a number"));
assertWarningsRaised(FormatScrutinizer.type);
}
}

View File

@ -0,0 +1,41 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.qa.MockConstraintFetcher;
import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
public class InverseConstaintScrutinizerTest extends StatementScrutinizerTest {
private ItemIdValue idA = TestingDataGenerator.existingId;
private ItemIdValue idB = TestingDataGenerator.newIdB;
private PropertyIdValue pidWithInverse = MockConstraintFetcher.pidWithInverse;
private PropertyIdValue inversePid = MockConstraintFetcher.inversePid;
@Override
public EditScrutinizer getScrutinizer() {
return new InverseConstraintScrutinizer();
}
@Test
public void testTrigger() {
ItemUpdate update = new ItemUpdateBuilder(idA)
.addStatement(TestingDataGenerator.generateStatement(idA, pidWithInverse, idB))
.build();
scrutinize(update);
assertWarningsRaised(InverseConstraintScrutinizer.type);
}
@Test
public void testNoSymmetricClosure() {
ItemUpdate update = new ItemUpdateBuilder(idA)
.addStatement(TestingDataGenerator.generateStatement(idA, inversePid, idB))
.build();
scrutinize(update);
assertNoWarningRaised();
}
}

View File

@ -0,0 +1,70 @@
package org.openrefine.wikidata.qa.scrutinizers;
import java.util.Collections;
import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.Claim;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
public class NewItemScrutinizerTest extends ScrutinizerTest {
private Claim claim = Datamodel.makeClaim(TestingDataGenerator.newIdA,
Datamodel.makeValueSnak(Datamodel.makeWikidataPropertyIdValue("P31"), TestingDataGenerator.existingId),
Collections.emptyList());
private Statement p31Statement = Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
@Override
public EditScrutinizer getScrutinizer() {
return new NewItemScrutinizer();
}
@Test
public void testTrigger() {
ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.newIdA).build();
scrutinize(update);
assertWarningsRaised(
NewItemScrutinizer.noDescType,
NewItemScrutinizer.noLabelType,
NewItemScrutinizer.noTypeType,
NewItemScrutinizer.newItemType);
}
@Test
public void testEmptyItem() {
ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.existingId).build();
scrutinize(update);
assertNoWarningRaised();
}
@Test
public void testGoodNewItem() {
ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.newIdA)
.addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"))
.addDescription(Datamodel.makeMonolingualTextValue("interesting item", "en"))
.addStatement(p31Statement)
.build();
scrutinize(update);
assertWarningsRaised(NewItemScrutinizer.newItemType);
}
@Test
public void testDeletedStatements() {
ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.newIdA)
.addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr"))
.addDescription(Datamodel.makeMonolingualTextValue("interesting item", "en"))
.addStatement(p31Statement)
.deleteStatement(TestingDataGenerator.generateStatement(TestingDataGenerator.newIdA,
TestingDataGenerator.matchedId))
.build();
scrutinize(update);
assertWarningsRaised(NewItemScrutinizer.newItemType, NewItemScrutinizer.deletedStatementsType);
}
}

View File

@ -0,0 +1,31 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test;
public class NoEditsMadeScrutinizerTest extends ScrutinizerTest {
@Override
public EditScrutinizer getScrutinizer() {
return new NoEditsMadeScrutinizer();
}
@Test
public void testTrigger() {
scrutinize();
assertWarningsRaised(NoEditsMadeScrutinizer.type);
}
@Test
public void testNonNull() {
scrutinize(new ItemUpdateBuilder(TestingDataGenerator.newIdA).build());
assertNoWarningRaised();
}
@Test
public void testNull() {
scrutinize(new ItemUpdateBuilder(TestingDataGenerator.existingId).build());
assertWarningsRaised(NoEditsMadeScrutinizer.type);
}
}

View File

@ -0,0 +1,59 @@
package org.openrefine.wikidata.qa.scrutinizers;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.openrefine.wikidata.qa.MockConstraintFetcher;
import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.Claim;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
public class QualifierCompatibilityScrutinizerTest extends StatementScrutinizerTest {
private Snak disallowedQualifier = Datamodel.makeNoValueSnak(MockConstraintFetcher.qualifierPid);
private Snak mandatoryQualifier = Datamodel.makeNoValueSnak(MockConstraintFetcher.mandatoryQualifierPid);
private Snak allowedQualifier = Datamodel.makeNoValueSnak(MockConstraintFetcher.allowedQualifierPid);
@Override
public EditScrutinizer getScrutinizer() {
return new QualifierCompatibilityScrutinizer();
}
@Test
public void testDisallowedQualifier() {
scrutinize(makeStatement(disallowedQualifier,mandatoryQualifier));
assertWarningsRaised(QualifierCompatibilityScrutinizer.disallowedQualifiersType);
}
@Test
public void testMissingQualifier() {
scrutinize(makeStatement());
assertWarningsRaised(QualifierCompatibilityScrutinizer.missingMandatoryQualifiersType);
}
@Test
public void testGoodEdit() {
scrutinize(makeStatement(allowedQualifier,mandatoryQualifier));
assertNoWarningRaised();
}
private Statement makeStatement(Snak... qualifiers) {
Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId,
Datamodel.makeNoValueSnak(MockConstraintFetcher.mainSnakPid), makeQualifiers(qualifiers));
return Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
}
private List<SnakGroup> makeQualifiers(Snak[] qualifiers) {
List<Snak> snaks = Arrays.asList(qualifiers);
return snaks.stream()
.map((Snak q) -> Datamodel.makeSnakGroup(Collections.<Snak>singletonList(q)))
.collect(Collectors.toList());
}
}

View File

@ -0,0 +1,55 @@
package org.openrefine.wikidata.qa.scrutinizers;
import java.util.Collections;
import java.util.List;
import org.openrefine.wikidata.qa.MockConstraintFetcher;
import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
public class RestrictedPositionScrutinizerTest extends SnakScrutinizerTest {
private ItemIdValue qid = TestingDataGenerator.existingId;
@Override
public EditScrutinizer getScrutinizer() {
return new RestrictedPositionScrutinizer();
}
@Test
public void testTriggerMainSnak() {
scrutinize(TestingDataGenerator.generateStatement(qid, MockConstraintFetcher.qualifierPid, qid));
assertWarningsRaised("property-restricted-to-qualifier-found-in-mainsnak");
}
@Test
public void testNoProblem() {
scrutinize(TestingDataGenerator.generateStatement(qid, MockConstraintFetcher.mainSnakPid, qid));
assertNoWarningRaised();
}
@Test
public void testNotRestricted() {
scrutinize(TestingDataGenerator.generateStatement(qid, Datamodel.makeWikidataPropertyIdValue("P3748"), qid));
assertNoWarningRaised();
}
@Test
public void testTriggerReference() {
Snak snak = Datamodel.makeValueSnak(MockConstraintFetcher.mainSnakPid, qid);
List<SnakGroup> snakGroups = Collections.singletonList(Datamodel.makeSnakGroup(Collections.singletonList(snak)));
Statement statement = Datamodel.makeStatement(
TestingDataGenerator.generateStatement(qid, MockConstraintFetcher.mainSnakPid, qid).getClaim(),
Collections.singletonList(Datamodel.makeReference(snakGroups)),
StatementRank.NORMAL, "");
scrutinize(statement);
assertWarningsRaised("property-restricted-to-mainsnak-found-in-reference");
}
}

View File

@ -0,0 +1,52 @@
package org.openrefine.wikidata.qa.scrutinizers;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.Arrays;
import java.util.Set;
import java.util.stream.Collectors;
import org.openrefine.wikidata.qa.ConstraintFetcher;
import org.openrefine.wikidata.qa.MockConstraintFetcher;
import org.openrefine.wikidata.qa.QAWarning;
import org.openrefine.wikidata.qa.QAWarningStore;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.testng.annotations.BeforeMethod;
public abstract class ScrutinizerTest {
public abstract EditScrutinizer getScrutinizer();
private EditScrutinizer scrutinizer;
private QAWarningStore store;
private ConstraintFetcher fetcher;
@BeforeMethod
public void setUp() {
store = new QAWarningStore();
fetcher = new MockConstraintFetcher();
scrutinizer = getScrutinizer();
scrutinizer.setStore(store);
scrutinizer.setFetcher(fetcher);
}
public void scrutinize(ItemUpdate... updates) {
scrutinizer.scrutinize(Arrays.asList(updates));
}
public void assertWarningsRaised(String... types) {
assertEquals(Arrays.asList(types).stream().collect(Collectors.toSet()), getWarningTypes());
}
public void assertWarningRaised(QAWarning warning) {
assertTrue(store.getWarnings().contains(warning));
}
public void assertNoWarningRaised() {
assertWarningsRaised();
}
public Set<String> getWarningTypes() {
return store.getWarnings().stream().map(w -> w.getType()).collect(Collectors.toSet());
}
}

View File

@ -0,0 +1,27 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
public class SelfReferentialScrutinizerTest extends StatementScrutinizerTest {
@Override
public EditScrutinizer getScrutinizer() {
return new SelfReferentialScrutinizer();
}
@Test
public void testTrigger() {
ItemIdValue id = TestingDataGenerator.matchedId;
scrutinize(TestingDataGenerator.generateStatement(id, id));
assertWarningsRaised(SelfReferentialScrutinizer.type);
}
@Test
public void testNoProblem() {
ItemIdValue id = TestingDataGenerator.matchedId;
scrutinize(TestingDataGenerator.generateStatement(id, TestingDataGenerator.existingId));
assertNoWarningRaised();
}
}

View File

@ -0,0 +1,41 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
public class SingleValueScrutinizerTest extends ScrutinizerTest {
@Override
public EditScrutinizer getScrutinizer() {
return new SingleValueScrutinizer();
}
@Test
public void testTrigger() {
ItemIdValue idA = TestingDataGenerator.existingId;
ItemIdValue idB = TestingDataGenerator.matchedId;
ItemUpdate update = new ItemUpdateBuilder(idA)
.addStatement(TestingDataGenerator.generateStatement(idA, idB))
.addStatement(TestingDataGenerator.generateStatement(idA, idB))
.build();
scrutinize(update);
assertWarningsRaised(SingleValueScrutinizer.type);
}
@Test
public void testNoIssue() {
ItemIdValue idA = TestingDataGenerator.existingId;
ItemIdValue idB = TestingDataGenerator.matchedId;
ItemUpdate updateA = new ItemUpdateBuilder(idA)
.addStatement(TestingDataGenerator.generateStatement(idA, idB))
.build();
ItemUpdate updateB = new ItemUpdateBuilder(idB)
.addStatement(TestingDataGenerator.generateStatement(idB, idB))
.build();
scrutinize(updateA, updateB);
assertNoWarningRaised();
}
}

View File

@ -0,0 +1,43 @@
package org.openrefine.wikidata.qa.scrutinizers;
import java.util.Collections;
import java.util.List;
import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.Claim;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StatementRank;
public abstract class SnakScrutinizerTest extends StatementScrutinizerTest {
public static Snak defaultMainSnak = Datamodel.makeNoValueSnak(Datamodel.makeWikidataPropertyIdValue("P3928"));
public void scrutinize(Snak snak) {
Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, snak,
Collections.emptyList());
Statement statement = Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
scrutinize(statement);
}
public void scrutinizeAsQualifier(Snak snak) {
Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, defaultMainSnak,
toSnakGroups(snak));
Statement statement = Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
scrutinize(statement);
}
public void scrutinizeAsReference(Snak snak) {
Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, defaultMainSnak,
Collections.emptyList());
Statement statement = Datamodel.makeStatement(claim,
Collections.singletonList(Datamodel.makeReference(toSnakGroups(snak))), StatementRank.NORMAL, "");
scrutinize(statement);
}
private List<SnakGroup> toSnakGroups(Snak snak) {
return Collections.singletonList(Datamodel.makeSnakGroup(Collections.singletonList(snak)));
}
}

View File

@ -0,0 +1,16 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
public abstract class StatementScrutinizerTest extends ScrutinizerTest {
public void scrutinize(Statement statement) {
ItemUpdate update = new ItemUpdateBuilder((ItemIdValue)statement.getClaim().getSubject())
.addStatement(statement).build();
scrutinize(update);
}
}

View File

@ -0,0 +1,20 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.testng.annotations.Test;
public class UnsourcedScrutinizerTest extends StatementScrutinizerTest {
@Override
public EditScrutinizer getScrutinizer() {
return new UnsourcedScrutinizer();
}
@Test
public void testTrigger() {
scrutinize(TestingDataGenerator.generateStatement(TestingDataGenerator.existingId,
TestingDataGenerator.matchedId));
assertWarningsRaised(UnsourcedScrutinizer.type);
}
}

View File

@ -0,0 +1,21 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
public abstract class ValueScrutinizerTest extends SnakScrutinizerTest {
public static final PropertyIdValue defaultPid = Datamodel.makeWikidataPropertyIdValue("P328");
public void scrutinize(Value value) {
scrutinize(Datamodel.makeValueSnak(defaultPid, value));
}
public void scrutinizeLabel(MonolingualTextValue text) {
scrutinize(new ItemUpdateBuilder(TestingDataGenerator.existingId).addLabel(text).build());
}
}

View File

@ -0,0 +1,57 @@
package org.openrefine.wikidata.qa.scrutinizers;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
public class WhitespaceScrutinizerTest extends ValueScrutinizerTest {
@Override
public EditScrutinizer getScrutinizer() {
return new WhitespaceScrutinizer();
}
@Test
public void testLeadingWhitespace() {
scrutinize(Datamodel.makeStringValue(" a"));
assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType);
}
@Test
public void testTrailingWhitespace() {
scrutinize(Datamodel.makeStringValue("a\t"));
assertWarningsRaised(WhitespaceScrutinizer.trailingWhitespaceType);
}
@Test
public void testDuplicateWhitespace() {
scrutinize(Datamodel.makeStringValue("a\t b"));
assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType);
}
@Test
public void testNonPrintableChars() {
scrutinize(Datamodel.makeStringValue("c\u0003"));
assertWarningsRaised(WhitespaceScrutinizer.nonPrintableCharsType);
}
@Test
public void testNoIssue() {
scrutinize(Datamodel.makeStringValue("a b"));
assertNoWarningRaised();
}
@Test
public void testMultipleIssues() {
scrutinize(Datamodel.makeStringValue(" a\t b "));
assertWarningsRaised(
WhitespaceScrutinizer.duplicateWhitespaceType,
WhitespaceScrutinizer.leadingWhitespaceType,
WhitespaceScrutinizer.trailingWhitespaceType);
}
@Test
public void testMonolingualTextValue() {
scrutinizeLabel(Datamodel.makeMonolingualTextValue(" a", "fr"));
assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType);
}
}

View File

@ -68,10 +68,19 @@ public class TestingDataGenerator {
return new WbMonolingualExpr(new WbLanguageConstant(langCode, langLabel), new WbStringConstant(text));
}
public static Statement generateStatement(ItemIdValue from, ItemIdValue to) {
public static Statement generateStatement(ItemIdValue from, PropertyIdValue pid, ItemIdValue to) {
Claim claim = Datamodel.makeClaim(from, Datamodel.makeValueSnak(pid, to), Collections.emptyList());
return Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, "");
}
public static Statement generateStatement(ItemIdValue from, ItemIdValue to) {
return generateStatement(from, pid, to);
}
public static ItemIdValue newIdA = makeNewItemIdValue(1234L, "new item A");
public static ItemIdValue newIdB = makeNewItemIdValue(4567L, "new item B");
public static ItemIdValue matchedId = makeMatchedItemIdValue("Q89","eist");
public static ItemIdValue existingId = Datamodel.makeWikidataItemIdValue("Q43");
}

View File

@ -12,7 +12,6 @@ import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.Claim;
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Reference;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
@ -23,28 +22,23 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
public class PointerExtractorTest {
private ItemIdValue existingId = Datamodel.makeWikidataItemIdValue("Q43");
private ItemIdValue matchedId = TestingDataGenerator.makeMatchedItemIdValue("Q89","eist");
private ItemIdValue newIdA = TestingDataGenerator.makeNewItemIdValue(1234L, "new item A");
private ItemIdValue newIdB = TestingDataGenerator.makeNewItemIdValue(4567L, "new item B");
private PropertyIdValue pid = Datamodel.makeWikidataPropertyIdValue("P89");
private Snak snakWithNew = Datamodel.makeValueSnak(pid, newIdA);
private Snak snakWithoutNew = Datamodel.makeValueSnak(pid, matchedId);
private Snak snakWithNew = Datamodel.makeValueSnak(pid, TestingDataGenerator.newIdA);
private Snak snakWithoutNew = Datamodel.makeValueSnak(pid, TestingDataGenerator.matchedId);
private SnakGroup snakGroupWithNew = Datamodel.makeSnakGroup(Collections.singletonList(snakWithNew));
private SnakGroup snakGroupWithoutNew = Datamodel.makeSnakGroup(Collections.singletonList(snakWithoutNew));
private Claim claimWithNew = Datamodel.makeClaim(existingId, snakWithNew, Collections.emptyList());
private Claim claimNewSubject = Datamodel.makeClaim(newIdB, snakWithoutNew, Collections.emptyList());
private Claim claimNewQualifier = Datamodel.makeClaim(matchedId, snakWithoutNew,
private Claim claimWithNew = Datamodel.makeClaim(TestingDataGenerator.existingId, snakWithNew, Collections.emptyList());
private Claim claimNewSubject = Datamodel.makeClaim(TestingDataGenerator.newIdB, snakWithoutNew, Collections.emptyList());
private Claim claimNewQualifier = Datamodel.makeClaim(TestingDataGenerator.matchedId, snakWithoutNew,
Collections.singletonList(snakGroupWithNew));
private static PointerExtractor e = new PointerExtractor();
@Test
public void testExtractEntityId() {
assertEquals(Collections.singleton(newIdA), e.extractPointers(newIdA));
assertEmpty(e.extractPointers(existingId));
assertEmpty(e.extractPointers(matchedId));
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(TestingDataGenerator.newIdA));
assertEmpty(e.extractPointers(TestingDataGenerator.existingId));
assertEmpty(e.extractPointers(TestingDataGenerator.matchedId));
}
@Test
@ -62,26 +56,26 @@ public class PointerExtractorTest {
@Test
public void testSnak() {
assertEmpty(e.extractPointers(snakWithoutNew));
assertEquals(Collections.singleton(newIdA), e.extractPointers(snakWithNew));
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(snakWithNew));
assertEmpty(e.extractPointers(Datamodel.makeNoValueSnak(pid)));
}
@Test
public void testSnakGroup() {
assertEmpty(e.extractPointers(snakGroupWithoutNew));
assertEquals(Collections.singleton(newIdA), e.extractPointers(snakGroupWithNew));
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(snakGroupWithNew));
}
@Test
public void testStatement() {
assertEmpty(e.extractPointers(Datamodel.makeStatement(claimNewSubject,
Collections.emptyList(), StatementRank.NORMAL, "")));
assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimWithNew,
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(Datamodel.makeStatement(claimWithNew,
Collections.emptyList(), StatementRank.NORMAL, "")));
assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimNewQualifier,
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(Datamodel.makeStatement(claimNewQualifier,
Collections.emptyList(), StatementRank.NORMAL, "")));
Reference reference = Datamodel.makeReference(Collections.singletonList(snakGroupWithNew));
assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimNewSubject,
assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(Datamodel.makeStatement(claimNewSubject,
Collections.singletonList(reference), StatementRank.NORMAL, "")));
}