diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java index 44ea32ac0..2efecd88c 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/ConstraintFetcher.java @@ -1,213 +1,66 @@ + package org.openrefine.wikidata.qa; -import java.util.ArrayList; -import java.util.List; import java.util.Set; -import java.util.function.Predicate; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.openrefine.wikidata.utils.EntityCache; -import org.wikidata.wdtk.datamodel.interfaces.EntityDocument; -import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; -import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; -import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; -import org.wikidata.wdtk.datamodel.interfaces.Snak; -import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; -import org.wikidata.wdtk.datamodel.interfaces.Statement; -import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; -import org.wikidata.wdtk.datamodel.interfaces.StringValue; -import org.wikidata.wdtk.datamodel.interfaces.Value; /** - * This class provides an abstraction over the way constraint - * definitions are stored in Wikidata. + * An object that fetches constraints about properties. * - * @author antonin + * @author Antonin Delpeuch * */ -public class ConstraintFetcher { - public static String WIKIDATA_CONSTRAINT_PID = "P2302"; - - public static String FORMAT_CONSTRAINT_QID = "Q21502404"; - public static String FORMAT_REGEX_PID = "P1793"; - - public static String INVERSE_CONSTRAINT_QID = "Q21510855"; - public static String INVERSE_PROPERTY_PID = "P2306"; - - public static String USED_ONLY_AS_VALUES_CONSTRAINT_QID = "Q21528958"; - - public static String USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID = "Q21510863"; - - public static String USED_ONLY_AS_REFERENCE_CONSTRAINT_QID = "Q21528959"; - - public static String ALLOWED_QUALIFIERS_CONSTRAINT_QID = "Q21510851"; - public static String ALLOWED_QUALIFIERS_CONSTRAINT_PID = "P2306"; - - public static String MANDATORY_QUALIFIERS_CONSTRAINT_QID = "Q21510856"; - public static String MANDATORY_QUALIFIERS_CONSTRAINT_PID = "P2306"; - - public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404"; - public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410"; - - // The following constraints still need to be implemented: - - public static String TYPE_CONSTRAINT_QID = "Q21503250"; - - +public interface ConstraintFetcher { + /** * Retrieves the regular expression for formatting a property, or null if * there is no such constraint * @param pid * @return the expression of a regular expression which should be compatible with java.util.regex */ - public String getFormatRegex(PropertyIdValue pid) { - List specs = getSingleConstraint(pid, FORMAT_CONSTRAINT_QID); - if (specs != null) { - List regexes = findValues(specs, FORMAT_REGEX_PID); - if (! regexes.isEmpty()) { - return ((StringValue)regexes.get(0)).getString(); - } - } - return null; - } - + String getFormatRegex(PropertyIdValue pid); + /** * Retrieves the property that is the inverse of a given property * @param pid: the property to retrieve the inverse for * @return the pid of the inverse property */ - public PropertyIdValue getInversePid(PropertyIdValue pid) { - List specs = getSingleConstraint(pid, INVERSE_CONSTRAINT_QID); - - if(specs != null) { - List inverses = findValues(specs, INVERSE_PROPERTY_PID); - if (! inverses.isEmpty()) { - return (PropertyIdValue)inverses.get(0); - } - } - return null; - } - + PropertyIdValue getInversePid(PropertyIdValue pid); + /** * Is this property for values only? */ - public boolean isForValuesOnly(PropertyIdValue pid) { - return getSingleConstraint(pid, USED_ONLY_AS_VALUES_CONSTRAINT_QID) != null; - } - + boolean isForValuesOnly(PropertyIdValue pid); + /** * Is this property for qualifiers only? */ - public boolean isForQualifiersOnly(PropertyIdValue pid) { - return getSingleConstraint(pid, USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID) != null; - } - + boolean isForQualifiersOnly(PropertyIdValue pid); + /** * Is this property for references only? */ - public boolean isForReferencesOnly(PropertyIdValue pid) { - return getSingleConstraint(pid, USED_ONLY_AS_REFERENCE_CONSTRAINT_QID) != null; - } - + boolean isForReferencesOnly(PropertyIdValue pid); + /** * Get the list of allowed qualifiers (as property ids) for this property (null if any) */ - public Set allowedQualifiers(PropertyIdValue pid) { - List specs = getSingleConstraint(pid, ALLOWED_QUALIFIERS_CONSTRAINT_QID); - - if (specs != null) { - List properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID); - return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet()); - } - return null; - } - + Set allowedQualifiers(PropertyIdValue pid); + /** * Get the list of mandatory qualifiers (as property ids) for this property (null if any) */ - public Set mandatoryQualifiers(PropertyIdValue pid) { - List specs = getSingleConstraint(pid, MANDATORY_QUALIFIERS_CONSTRAINT_QID); - - if (specs != null) { - List properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID); - return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet()); - } - return null; - } - + Set mandatoryQualifiers(PropertyIdValue pid); + /** * Is this property expected to have at most one value per item? */ - public boolean hasSingleValue(PropertyIdValue pid) { - return getSingleConstraint(pid, SINGLE_VALUE_CONSTRAINT_QID) != null; - } - + boolean hasSingleValue(PropertyIdValue pid); + /** * Is this property expected to have distinct values? */ - public boolean hasDistinctValues(PropertyIdValue pid) { - return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null; - } - - /** - * Returns a single constraint for a particular type and a property, or null - * if there is no such constraint - * @param pid: the property to retrieve the constraints for - * @param qid: the type of the constraints - * @return the list of qualifiers for the constraint, or null if it does not exist - */ - protected List getSingleConstraint(PropertyIdValue pid, String qid) { - Statement statement = getConstraintsByType(pid, qid).findFirst().orElse(null); - if (statement != null) { - return statement.getClaim().getQualifiers(); - } - return null; - } - - /** - * Gets the list of constraints of a particular type for a property - * @param pid: the property to retrieve the constraints for - * @param qid: the type of the constraints - * @return the stream of matching constraint statements - */ - protected Stream getConstraintsByType(PropertyIdValue pid, String qid) { - Stream allConstraints = getConstraintStatements(pid) - .stream() - .filter(s -> ((EntityIdValue) s.getValue()).getId().equals(qid)); - return allConstraints; - } - - /** - * Gets all the constraint statements for a given property - * @param pid : the id of the property to retrieve the constraints for - * @return the list of constraint statements - */ - protected List getConstraintStatements(PropertyIdValue pid) { - PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid); - StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID); - if (group != null) { - return group.getStatements(); - } else { - return new ArrayList(); - } - } - - /** - * Returns the values of a given property in qualifiers - * @param groups: the qualifiers - * @param pid: the property to filter on - * @return - */ - protected List findValues(List groups, String pid) { - List results = new ArrayList<>(); - for(SnakGroup group : groups) { - if (group.getProperty().getId().equals(pid)) { - for (Snak snak : group.getSnaks()) - results.add(snak.getValue()); - } - } - return results; - } + boolean hasDistinctValues(PropertyIdValue pid); + } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java index 530b1da4a..b90bb3101 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/EditInspector.java @@ -7,7 +7,7 @@ import java.util.stream.Collectors; import org.openrefine.wikidata.qa.scrutinizers.DistinctValuesScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer; -import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer; +import org.openrefine.wikidata.qa.scrutinizers.FormatScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer; import org.openrefine.wikidata.qa.scrutinizers.NoEditsMadeScrutinizer; @@ -32,14 +32,16 @@ import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; public class EditInspector { private Map scrutinizers; private QAWarningStore warningStore; + private ConstraintFetcher fetcher; public EditInspector(QAWarningStore warningStore) { this.scrutinizers = new HashMap<>(); + this.fetcher = new WikidataConstraintFetcher(); this.warningStore = warningStore; // Register all known scrutinizers here register(new NewItemScrutinizer()); - register(new FormatConstraintScrutinizer()); + register(new FormatScrutinizer()); register(new InverseConstraintScrutinizer()); register(new SelfReferentialScrutinizer()); register(new UnsourcedScrutinizer()); @@ -59,28 +61,25 @@ public class EditInspector { String key = scrutinizer.getClass().getName(); scrutinizers.put(key, scrutinizer); scrutinizer.setStore(warningStore); + scrutinizer.setFetcher(fetcher); } /** * Inspect a batch of edits with the registered scrutinizers - * @param editBatch + * @param editBatch */ public void inspect(List editBatch) { // First, schedule them with some scheduler, // so that all newly created entities appear in the batch - UpdateScheduler scheduler = new WikibaseAPIUpdateScheduler(); - try { - editBatch = scheduler.schedule(editBatch); - Map updates = ItemUpdate.groupBySubject(editBatch); - List mergedUpdates = updates.values().stream().collect(Collectors.toList()); - for(EditScrutinizer scrutinizer : scrutinizers.values()) { - scrutinizer.scrutinize(mergedUpdates); - } - } catch(ImpossibleSchedulingException e) { - warningStore.addWarning(new QAWarning( - "scheduling-failed", null, QAWarning.Severity.CRITICAL, 1)); + WikibaseAPIUpdateScheduler scheduler = new WikibaseAPIUpdateScheduler(); + editBatch = scheduler.schedule(editBatch); + Map updates = ItemUpdate.groupBySubject(editBatch); + List mergedUpdates = updates.values().stream().collect(Collectors.toList()); + for(EditScrutinizer scrutinizer : scrutinizers.values()) { + scrutinizer.scrutinize(mergedUpdates); } + if (warningStore.getNbWarnings() == 0) { warningStore.addWarning(new QAWarning( diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarning.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarning.java index aec8aca5a..1115fddb0 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarning.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/QAWarning.java @@ -1,20 +1,25 @@ package org.openrefine.wikidata.qa; +import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.Map.Entry; import java.util.Properties; +import org.jsoup.helper.Validate; import org.openrefine.wikidata.utils.JacksonJsonizable; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; /** - * A class to represent a QA warning emited by the Wikidata schema + * A class to represent a QA warning emitted by the Wikidata schema * This could probably be reused at a broader scale, for instance for * Data Package validation. * - * @author antonin + * @author Antonin Delpeuch * */ public class QAWarning extends JacksonJsonizable implements Comparable { @@ -27,42 +32,30 @@ public class QAWarning extends JacksonJsonizable implements Comparable properties; + private final Map properties; public QAWarning(String type, String bucketId, Severity severity, int count) { + Validate.notNull(type); this.type = type; this.bucketId = bucketId; + Validate.notNull(severity); this.severity = severity; this.count = count; - this.properties = new HashMap(); - } - - @JsonCreator - public QAWarning( - @JsonProperty("type") String type, - @JsonProperty("bucket_id") String bucketId, - @JsonProperty("severity") Severity severity, - @JsonProperty("count") int count, - @JsonProperty("properties") Map properties) { - this.type = type; - this.bucketId = bucketId; - this.severity = severity; - this.count = count; - this.properties = properties; + this.properties = new HashMap<>(); } /** - * Returns the full key for aggregation of QA warnings - * @return + * @return the full key for aggregation of QA warnings */ + @JsonIgnore public String getAggregationId() { if (this.bucketId != null) { return this.type + "_" + this.bucketId; @@ -75,12 +68,22 @@ public class QAWarning extends JacksonJsonizable implements Comparable 0) { + newSeverity = other.getSeverity(); } + QAWarning merged = new QAWarning(getType(), getBucketId(), newSeverity, + newCount); + for(Entry entry : properties.entrySet()) { + merged.setProperty(entry.getKey(),entry.getValue()); + } + for(Entry entry : other.getProperties().entrySet()) { + merged.setProperty(entry.getKey(),entry.getValue()); + } + return merged; } /** @@ -114,6 +117,7 @@ public class QAWarning extends JacksonJsonizable implements Comparable getProperties() { return properties; } @@ -125,4 +129,17 @@ public class QAWarning extends JacksonJsonizable implements Comparable map; + @JsonIgnore private QAWarning.Severity maxSeverity; + @JsonIgnore private int totalWarnings; public QAWarningStore() { @@ -36,7 +41,7 @@ public class QAWarningStore { totalWarnings += warning.getCount(); if (map.containsKey(aggregationKey)) { QAWarning existing = map.get(aggregationKey); - existing.aggregate(warning); + map.put(aggregationKey, existing.aggregate(warning)); } else { map.put(aggregationKey, warning); } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java new file mode 100644 index 000000000..9323de866 --- /dev/null +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/WikidataConstraintFetcher.java @@ -0,0 +1,190 @@ +package org.openrefine.wikidata.qa; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.openrefine.wikidata.utils.EntityCache; +import org.wikidata.wdtk.datamodel.interfaces.EntityDocument; +import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; +import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument; +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; +import org.wikidata.wdtk.datamodel.interfaces.Snak; +import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; +import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; +import org.wikidata.wdtk.datamodel.interfaces.StringValue; +import org.wikidata.wdtk.datamodel.interfaces.Value; + +/** + * This class provides an abstraction over the way constraint + * definitions are stored in Wikidata. + * + * @author antonin + * + */ +public class WikidataConstraintFetcher implements ConstraintFetcher { + public static String WIKIDATA_CONSTRAINT_PID = "P2302"; + + public static String FORMAT_CONSTRAINT_QID = "Q21502404"; + public static String FORMAT_REGEX_PID = "P1793"; + + public static String INVERSE_CONSTRAINT_QID = "Q21510855"; + public static String INVERSE_PROPERTY_PID = "P2306"; + + public static String USED_ONLY_AS_VALUES_CONSTRAINT_QID = "Q21528958"; + + public static String USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID = "Q21510863"; + + public static String USED_ONLY_AS_REFERENCE_CONSTRAINT_QID = "Q21528959"; + + public static String ALLOWED_QUALIFIERS_CONSTRAINT_QID = "Q21510851"; + public static String ALLOWED_QUALIFIERS_CONSTRAINT_PID = "P2306"; + + public static String MANDATORY_QUALIFIERS_CONSTRAINT_QID = "Q21510856"; + public static String MANDATORY_QUALIFIERS_CONSTRAINT_PID = "P2306"; + + public static String SINGLE_VALUE_CONSTRAINT_QID = "Q19474404"; + public static String DISTINCT_VALUES_CONSTRAINT_QID = "Q21502410"; + + // The following constraints still need to be implemented: + + public static String TYPE_CONSTRAINT_QID = "Q21503250"; + + + @Override + public String getFormatRegex(PropertyIdValue pid) { + List specs = getSingleConstraint(pid, FORMAT_CONSTRAINT_QID); + if (specs != null) { + List regexes = findValues(specs, FORMAT_REGEX_PID); + if (! regexes.isEmpty()) { + return ((StringValue)regexes.get(0)).getString(); + } + } + return null; + } + + @Override + public PropertyIdValue getInversePid(PropertyIdValue pid) { + List specs = getSingleConstraint(pid, INVERSE_CONSTRAINT_QID); + + if(specs != null) { + List inverses = findValues(specs, INVERSE_PROPERTY_PID); + if (! inverses.isEmpty()) { + return (PropertyIdValue)inverses.get(0); + } + } + return null; + } + + @Override + public boolean isForValuesOnly(PropertyIdValue pid) { + return getSingleConstraint(pid, USED_ONLY_AS_VALUES_CONSTRAINT_QID) != null; + } + + @Override + public boolean isForQualifiersOnly(PropertyIdValue pid) { + return getSingleConstraint(pid, USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID) != null; + } + + @Override + public boolean isForReferencesOnly(PropertyIdValue pid) { + return getSingleConstraint(pid, USED_ONLY_AS_REFERENCE_CONSTRAINT_QID) != null; + } + + @Override + public Set allowedQualifiers(PropertyIdValue pid) { + List specs = getSingleConstraint(pid, ALLOWED_QUALIFIERS_CONSTRAINT_QID); + + if (specs != null) { + List properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID); + return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet()); + } + return null; + } + + @Override + public Set mandatoryQualifiers(PropertyIdValue pid) { + List specs = getSingleConstraint(pid, MANDATORY_QUALIFIERS_CONSTRAINT_QID); + + if (specs != null) { + List properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID); + return properties.stream().map(e -> (PropertyIdValue) e).collect(Collectors.toSet()); + } + return null; + } + + @Override + public boolean hasSingleValue(PropertyIdValue pid) { + return getSingleConstraint(pid, SINGLE_VALUE_CONSTRAINT_QID) != null; + } + + @Override + public boolean hasDistinctValues(PropertyIdValue pid) { + return getSingleConstraint(pid, DISTINCT_VALUES_CONSTRAINT_QID) != null; + } + + /** + * Returns a single constraint for a particular type and a property, or null + * if there is no such constraint + * @param pid: the property to retrieve the constraints for + * @param qid: the type of the constraints + * @return the list of qualifiers for the constraint, or null if it does not exist + */ + protected List getSingleConstraint(PropertyIdValue pid, String qid) { + Statement statement = getConstraintsByType(pid, qid).findFirst().orElse(null); + if (statement != null) { + return statement.getClaim().getQualifiers(); + } + return null; + } + + /** + * Gets the list of constraints of a particular type for a property + * @param pid: the property to retrieve the constraints for + * @param qid: the type of the constraints + * @return the stream of matching constraint statements + */ + protected Stream getConstraintsByType(PropertyIdValue pid, String qid) { + Stream allConstraints = getConstraintStatements(pid) + .stream() + .filter(s -> ((EntityIdValue) s.getValue()).getId().equals(qid)); + return allConstraints; + } + + /** + * Gets all the constraint statements for a given property + * @param pid : the id of the property to retrieve the constraints for + * @return the list of constraint statements + */ + protected List getConstraintStatements(PropertyIdValue pid) { + PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid); + StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID); + if (group != null) { + return group.getStatements(); + } else { + return new ArrayList(); + } + } + + /** + * Returns the values of a given property in qualifiers + * @param groups: the qualifiers + * @param pid: the property to filter on + * @return + */ + protected List findValues(List groups, String pid) { + List results = new ArrayList<>(); + for(SnakGroup group : groups) { + if (group.getProperty().getId().equals(pid)) { + for (Snak snak : group.getSnaks()) + results.add(snak.getValue()); + } + } + return results; + } +} diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java index 336ccacb7..e3520e092 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizer.java @@ -15,11 +15,13 @@ import org.wikidata.wdtk.datamodel.interfaces.Value; * A scrutinizer that checks for properties using the same value * on different items. * - * @author antonin + * @author Antonin Delpeuch * */ public class DistinctValuesScrutinizer extends StatementScrutinizer { + public final static String type = "identical-values-for-distinct-valued-property"; + private Map> _seenValues; public DistinctValuesScrutinizer() { @@ -39,7 +41,7 @@ public class DistinctValuesScrutinizer extends StatementScrutinizer { if (seen.containsKey(mainSnakValue)) { EntityIdValue otherId = seen.get(mainSnakValue); QAWarning issue = new QAWarning( - "identical-values-for-distinct-valued-property", + type, pid.getId(), QAWarning.Severity.IMPORTANT, 1); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/EditScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/EditScrutinizer.java index f3335f04f..d0ee56524 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/EditScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/EditScrutinizer.java @@ -2,6 +2,7 @@ package org.openrefine.wikidata.qa.scrutinizers; import java.util.List; +import org.openrefine.wikidata.qa.WikidataConstraintFetcher; import org.openrefine.wikidata.qa.ConstraintFetcher; import org.openrefine.wikidata.qa.QAWarning; import org.openrefine.wikidata.qa.QAWarning.Severity; @@ -9,9 +10,9 @@ import org.openrefine.wikidata.qa.QAWarningStore; import org.openrefine.wikidata.updates.ItemUpdate; /** - * Interface for any class that - * @author antonin - * + * Inspects an edit batch and emits warnings. + * + * @author Antonin Delpeuch */ public abstract class EditScrutinizer { @@ -19,13 +20,18 @@ public abstract class EditScrutinizer { protected ConstraintFetcher _fetcher; public EditScrutinizer() { - _fetcher = new ConstraintFetcher(); + _fetcher = null; + _store = null; } public void setStore(QAWarningStore store) { _store = store; } + public void setFetcher(ConstraintFetcher fetcher) { + _fetcher = fetcher; + } + /** * Reads the candidate edits and emits warnings in the store * @param edit: the list of ItemUpdates to scrutinize diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatConstraintScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizer.java similarity index 91% rename from extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatConstraintScrutinizer.java rename to extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizer.java index 376fb716b..826baaa4b 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatConstraintScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizer.java @@ -15,14 +15,16 @@ import org.wikidata.wdtk.datamodel.interfaces.StringValue; * A scrutinizer that detects incorrect formats in text values * (mostly identifiers). * - * @author antonin + * @author Antonin Delpeuch * */ -public class FormatConstraintScrutinizer extends SnakScrutinizer { +public class FormatScrutinizer extends SnakScrutinizer { + + public static final String type = "add-statements-with-invalid-format"; private Map _patterns; - public FormatConstraintScrutinizer() { + public FormatScrutinizer() { _patterns = new HashMap<>(); } @@ -59,7 +61,7 @@ public class FormatConstraintScrutinizer extends SnakScrutinizer { if (!pattern.matcher(value).matches()) { if (added) { QAWarning issue = new QAWarning( - "add-statements-with-invalid-format", + type, pid.getId(), QAWarning.Severity.IMPORTANT, 1); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstraintScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstraintScrutinizer.java index 08a71d53f..61f2d8eb6 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstraintScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstraintScrutinizer.java @@ -17,10 +17,12 @@ import org.wikidata.wdtk.datamodel.interfaces.Value; * A scrutinizer that checks for missing inverse statements in * edit batches. * - * @author antonin + * @author Antonin Delpeuch * */ public class InverseConstraintScrutinizer extends StatementScrutinizer { + + public static final String type = "missing-inverse-statements"; private Map _inverse; private Map >> _statements; @@ -83,7 +85,7 @@ public class InverseConstraintScrutinizer extends StatementScrutinizer { PropertyIdValue missingProperty = propertyPair.getValue(); Set reciprocalLinks = _statements.get(missingProperty).get(idValue); if (reciprocalLinks == null || !reciprocalLinks.contains(itemLinks.getKey())) { - QAWarning issue = new QAWarning("missing-inverse-statements", + QAWarning issue = new QAWarning(type, ourProperty.getId(), QAWarning.Severity.IMPORTANT, 1); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/ItemEditScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/ItemUpdateScrutinizer.java similarity index 91% rename from extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/ItemEditScrutinizer.java rename to extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/ItemUpdateScrutinizer.java index 8053ab6bc..9c645a50c 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/ItemEditScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/ItemUpdateScrutinizer.java @@ -4,7 +4,7 @@ import java.util.List; import org.openrefine.wikidata.updates.ItemUpdate; -public abstract class ItemEditScrutinizer extends EditScrutinizer { +public abstract class ItemUpdateScrutinizer extends EditScrutinizer { @Override public void scrutinize(List edit) { diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizer.java index ee49fa74e..e3eda44af 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizer.java @@ -5,19 +5,26 @@ import org.openrefine.wikidata.updates.ItemUpdate; import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; /** - * A scrutinizer that inspects new items - * @author antonin + * A scrutinizer that inspects new items. + * + * @author Antonin Delpeuch */ -public class NewItemScrutinizer extends ItemEditScrutinizer { +public class NewItemScrutinizer extends ItemUpdateScrutinizer { + + public static final String noLabelType = "new-item-without-labels-or-aliases"; + public static final String noDescType = "new-item-without-descriptions"; + public static final String deletedStatementsType = "new-item-with-deleted-statements"; + public static final String noTypeType = "new-item-without-P31-or-P279"; + public static final String newItemType = "new-item-created"; @Override public void scrutinize(ItemUpdate update) { if (update.isNew()) { - info("new-item-created"); + info(newItemType); if (update.getLabels().isEmpty() && update.getAliases().isEmpty()) { QAWarning issue = new QAWarning( - "new-item-without-labels-or-aliases", + noLabelType, null, QAWarning.Severity.CRITICAL, 1); @@ -27,7 +34,7 @@ public class NewItemScrutinizer extends ItemEditScrutinizer { if (update.getDescriptions().isEmpty()) { QAWarning issue = new QAWarning( - "new-item-without-descriptions", + noDescType, null, QAWarning.Severity.WARNING, 1); @@ -35,9 +42,9 @@ public class NewItemScrutinizer extends ItemEditScrutinizer { addIssue(issue); } - if (! update.getDeletedStatements().isEmpty()) { + if (!update.getDeletedStatements().isEmpty()) { QAWarning issue = new QAWarning( - "new-item-with-deleted-statements", + deletedStatementsType, null, QAWarning.Severity.WARNING, 1); @@ -56,7 +63,7 @@ public class NewItemScrutinizer extends ItemEditScrutinizer { } if (!typeFound) { QAWarning issue = new QAWarning( - "new-item-without-P31-or-P279", + noTypeType, null, QAWarning.Severity.WARNING, 1); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NoEditsMadeScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NoEditsMadeScrutinizer.java index ad801c208..84bd2a96b 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NoEditsMadeScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/NoEditsMadeScrutinizer.java @@ -6,11 +6,13 @@ import org.openrefine.wikidata.updates.ItemUpdate; public class NoEditsMadeScrutinizer extends EditScrutinizer { + + public static final String type = "no-edit-generated"; @Override public void scrutinize(List edit) { if(edit.stream().allMatch(e -> e.isNull())) { - info("no-edit-generated"); + info(type); } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/QualifierCompatibilityScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/QualifierCompatibilityScrutinizer.java index ccce60733..e2f75074b 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/QualifierCompatibilityScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/QualifierCompatibilityScrutinizer.java @@ -14,11 +14,14 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement; /** * A scrutinizer that checks the compatibility of the qualifiers * and the property of a statement, and looks for mandatory qualifiers. - * @author antonin - * + * + * @author Antonin Delpeuch */ public class QualifierCompatibilityScrutinizer extends StatementScrutinizer { + public static final String missingMandatoryQualifiersType = "missing-mandatory-qualifiers"; + public static final String disallowedQualifiersType = "disallowed-qualifiers"; + private Map> _allowedQualifiers; private Map> _mandatoryQualifiers; @@ -65,7 +68,7 @@ public class QualifierCompatibilityScrutinizer extends StatementScrutinizer { for (PropertyIdValue missing : missingQualifiers) { QAWarning issue = new QAWarning( - "missing-mandatory-qualifiers", + missingMandatoryQualifiersType, statementProperty.getId()+"-"+missing.getId(), QAWarning.Severity.WARNING, 1); @@ -76,7 +79,7 @@ public class QualifierCompatibilityScrutinizer extends StatementScrutinizer { } for (PropertyIdValue disallowed : disallowedQualifiers) { QAWarning issue = new QAWarning( - "disallowed-qualifiers", + disallowedQualifiersType, statementProperty.getId()+"-"+disallowed.getId(), QAWarning.Severity.WARNING, 1); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizer.java index 3053c1bd7..108780634 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizer.java @@ -12,15 +12,15 @@ import org.wikidata.wdtk.datamodel.interfaces.Snak; * */ public class SelfReferentialScrutinizer extends SnakScrutinizer { + + public static final String type = "self-referential-statements"; @Override public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) { if (entityId.equals(snak.getValue())) { QAWarning issue = new QAWarning( - "self-referential-statements", - null, - QAWarning.Severity.WARNING, - 1); + type, null, + QAWarning.Severity.WARNING, 1); issue.setProperty("example_entity", entityId); addIssue(issue); } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SingleValueScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SingleValueScrutinizer.java index e09fa5c6e..c7cf399b2 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SingleValueScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SingleValueScrutinizer.java @@ -11,10 +11,13 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement; /** * For now this scrutinizer only checks for uniqueness at * the item level (it ignores qualifiers and references). - * @author antonin + * + * @author Antonin Delpeuch * */ -public class SingleValueScrutinizer extends ItemEditScrutinizer { +public class SingleValueScrutinizer extends ItemUpdateScrutinizer { + + public static final String type = "single-valued-property-added-more-than-once"; @Override public void scrutinize(ItemUpdate update) { @@ -25,10 +28,8 @@ public class SingleValueScrutinizer extends ItemEditScrutinizer { if (seenSingleProperties.contains(pid)) { QAWarning issue = new QAWarning( - "single-valued-property-added-more-than-once", - pid.getId(), - QAWarning.Severity.WARNING, - 1); + type, pid.getId(), + QAWarning.Severity.WARNING, 1); issue.setProperty("property_entity", pid); issue.setProperty("example_entity", update.getItemId()); addIssue(issue); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizer.java index 7b56f0f4a..18d0c290c 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizer.java @@ -11,7 +11,7 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement; * A scrutinizer that inspects snaks individually, no matter whether they * appear as main snaks, qualifiers or references. * - * @author antonin + * @author Antonin Delpeuch * */ public abstract class SnakScrutinizer extends StatementScrutinizer { diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/StatementGroupScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/StatementGroupScrutinizer.java index 88df82086..0e553041c 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/StatementGroupScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/StatementGroupScrutinizer.java @@ -3,7 +3,7 @@ package org.openrefine.wikidata.qa.scrutinizers; import org.openrefine.wikidata.updates.ItemUpdate; import org.wikidata.wdtk.datamodel.interfaces.StatementGroup; -public abstract class StatementGroupScrutinizer extends ItemEditScrutinizer { +public abstract class StatementGroupScrutinizer extends ItemUpdateScrutinizer { @Override public void scrutinize(ItemUpdate update) { diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/StatementScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/StatementScrutinizer.java index 071bd36d8..00e489d51 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/StatementScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/StatementScrutinizer.java @@ -4,7 +4,7 @@ import org.openrefine.wikidata.updates.ItemUpdate; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.Statement; -public abstract class StatementScrutinizer extends ItemEditScrutinizer { +public abstract class StatementScrutinizer extends ItemUpdateScrutinizer { @Override public void scrutinize(ItemUpdate update) { diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/UnsourcedScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/UnsourcedScrutinizer.java index 6aa0e60f1..56e86514f 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/UnsourcedScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/UnsourcedScrutinizer.java @@ -10,11 +10,13 @@ import org.wikidata.wdtk.datamodel.interfaces.Statement; * */ public class UnsourcedScrutinizer extends StatementScrutinizer { + + public static final String type = "unsourced-statements"; @Override public void scrutinize(Statement statement, EntityIdValue entityId, boolean added) { if(statement.getReferences().isEmpty() && added) { - warning("unsourced-statements"); + warning(type); } } diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java index 50145f3cc..1d8a1c484 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizer.java @@ -12,21 +12,27 @@ import org.wikidata.wdtk.datamodel.interfaces.Value; /** * Scrutinizes strings for trailing / leading whitespace, and others - * @author antonin + * + * @author Antonin Delpeuch * */ public class WhitespaceScrutinizer extends ValueScrutinizer { private Map _issuesMap; + public static final String leadingWhitespaceType = "leading-whitespace"; + public static final String trailingWhitespaceType = "trailing-whitespace"; + public static final String duplicateWhitespaceType = "duplicate-whitespace"; + public static final String nonPrintableCharsType = "non-printable-characters"; + public WhitespaceScrutinizer() { _issuesMap = new HashMap<>(); - _issuesMap.put("leading-whitespace", Pattern.compile("^\\s")); - _issuesMap.put("trailing-whitespace", Pattern.compile("\\s$")); - _issuesMap.put("duplicate-whitespace", Pattern.compile("\\s\\s")); + _issuesMap.put(leadingWhitespaceType, Pattern.compile("^\\s")); + _issuesMap.put(trailingWhitespaceType, Pattern.compile("\\s$")); + _issuesMap.put(duplicateWhitespaceType, Pattern.compile("\\s\\s")); // https://stackoverflow.com/questions/14565934/regular-expression-to-remove-all-non-printable-characters - _issuesMap.put("non-printable-characters", Pattern.compile("[\\x00\\x08\\x0B\\x0C\\x0E-\\x1F]")); + _issuesMap.put(nonPrintableCharsType, Pattern.compile("[\\x00\\x03\\x08\\x0B\\x0C\\x0E-\\x1F]")); } @Override diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java new file mode 100644 index 000000000..ac1b42e21 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/MockConstraintFetcher.java @@ -0,0 +1,75 @@ +package org.openrefine.wikidata.qa; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Set; +import java.util.stream.Collectors; + +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; + + +public class MockConstraintFetcher implements ConstraintFetcher { + + public static PropertyIdValue pidWithInverse = Datamodel.makeWikidataPropertyIdValue("P350"); + public static PropertyIdValue inversePid = Datamodel.makeWikidataPropertyIdValue("P57"); + public static PropertyIdValue allowedQualifierPid = Datamodel.makeWikidataPropertyIdValue("P34"); + public static PropertyIdValue mandatoryQualifierPid = Datamodel.makeWikidataPropertyIdValue("P97"); + + public static PropertyIdValue mainSnakPid = Datamodel.makeWikidataPropertyIdValue("P1234"); + public static PropertyIdValue qualifierPid = Datamodel.makeWikidataPropertyIdValue("P987"); + public static PropertyIdValue referencePid = Datamodel.makeWikidataPropertyIdValue("P384"); + + @Override + public String getFormatRegex(PropertyIdValue pid) { + return "[1-9]\\d+"; + } + + /** + * This constraint is purposely left inconsistent (the inverse + * constraint holds only on one side). + */ + @Override + public PropertyIdValue getInversePid(PropertyIdValue pid) { + if (pidWithInverse.equals(pid)) { + return inversePid; + } + return null; + } + + @Override + public boolean isForValuesOnly(PropertyIdValue pid) { + return mainSnakPid.equals(pid); + } + + @Override + public boolean isForQualifiersOnly(PropertyIdValue pid) { + return qualifierPid.equals(pid); + } + + @Override + public boolean isForReferencesOnly(PropertyIdValue pid) { + return referencePid.equals(pid); + } + + @Override + public Set allowedQualifiers(PropertyIdValue pid) { + return Arrays.asList(allowedQualifierPid, mandatoryQualifierPid).stream().collect(Collectors.toSet()); + } + + @Override + public Set mandatoryQualifiers(PropertyIdValue pid) { + return Collections.singleton(mandatoryQualifierPid); + } + + @Override + public boolean hasSingleValue(PropertyIdValue pid) { + return true; + } + + @Override + public boolean hasDistinctValues(PropertyIdValue pid) { + return true; + } + +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/QAWarningStoreTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/QAWarningStoreTest.java new file mode 100644 index 000000000..2f6432347 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/QAWarningStoreTest.java @@ -0,0 +1,44 @@ +package org.openrefine.wikidata.qa; + +import static org.junit.Assert.assertEquals; + +import org.openrefine.wikidata.testing.JacksonSerializationTest; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class QAWarningStoreTest { + + public static String exampleJson = "{\"max_severity\":\"CRITICAL\",\"nb_warnings\":5," + +"\"warnings\":[{\"type\":\"new-item-without-label\",\"bucketId\":null," + +"\"severity\":\"CRITICAL\",\"count\":3},{\"type\":\"add-statements-with-invalid-format\"," + +"\"bucketId\":\"P2427\",\"severity\":\"IMPORTANT\",\"count\":2}]}"; + + private QAWarningStore store; + private QAWarning otherWarning; + + @BeforeMethod + public void setUp() { + store = new QAWarningStore(); + store.addWarning(QAWarningTest.exampleWarning); + store.addWarning(QAWarningTest.exampleWarning); + otherWarning = new QAWarning("new-item-without-label", null, QAWarning.Severity.CRITICAL, 3); + store.addWarning(otherWarning); + } + + @Test + public void testSerialize() { + JacksonSerializationTest.testSerialize(store, exampleJson); + } + + @Test + public void testCount() { + assertEquals(5, store.getNbWarnings()); + assertEquals(2, store.getWarnings().size()); + } + + @Test + public void testMaxSeverity() { + assertEquals(QAWarning.Severity.CRITICAL, store.getMaxSeverity()); + assertEquals(QAWarning.Severity.INFO, (new QAWarningStore()).getMaxSeverity()); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/QAWarningTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/QAWarningTest.java new file mode 100644 index 000000000..de2ed33d2 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/QAWarningTest.java @@ -0,0 +1,50 @@ +package org.openrefine.wikidata.qa; + +import static org.junit.Assert.assertEquals; + +import org.openrefine.wikidata.testing.JacksonSerializationTest; +import org.testng.annotations.Test; + +public class QAWarningTest { + + public static QAWarning exampleWarning = new QAWarning("add-statements-with-invalid-format", + "P2427", + QAWarning.Severity.IMPORTANT, + 1); + public static String exampleJson = + "{\"severity\":\"IMPORTANT\","+ + "\"count\":1,\"bucketId\":\"P2427\",\"type\":\"add-statements-with-invalid-format\"}"; + + @Test + public void testSerialize() { + JacksonSerializationTest.testSerialize(exampleWarning, exampleJson); + } + + @Test + public void testAggregate() { + QAWarning firstWarning = new QAWarning("add-statements-with-invalid-format", + "P2427", + QAWarning.Severity.INFO, + 1); + firstWarning.setProperty("foo", "bar"); + assertEquals(exampleWarning.getAggregationId(), firstWarning.getAggregationId()); + QAWarning merged = firstWarning.aggregate(exampleWarning); + assertEquals(2, merged.getCount()); + assertEquals(exampleWarning.getAggregationId(), merged.getAggregationId()); + assertEquals(exampleWarning.getType(), merged.getType()); + assertEquals(exampleWarning.getSeverity(), merged.getSeverity()); + assertEquals("bar", merged.getProperties().get("foo")); + } + + @Test + public void testCompare() { + QAWarning otherWarning = new QAWarning("no-reference", + "no-reference", + QAWarning.Severity.WARNING, + 1); + assertEquals(1, otherWarning.compareTo(exampleWarning)); + assertEquals(-1, exampleWarning.compareTo(otherWarning)); + assertEquals(0, exampleWarning.compareTo(exampleWarning)); + } + +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/ConstraintFetcherTests.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/WikidataConstraintFetcherTests.java similarity index 96% rename from extensions/wikidata/tests/src/org/openrefine/wikidata/qa/ConstraintFetcherTests.java rename to extensions/wikidata/tests/src/org/openrefine/wikidata/qa/WikidataConstraintFetcherTests.java index 7df9ebbb4..9ed8bbd7a 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/ConstraintFetcherTests.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/WikidataConstraintFetcherTests.java @@ -7,7 +7,7 @@ import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; import java.util.regex.Pattern; -public class ConstraintFetcherTests { +public class WikidataConstraintFetcherTests { private ConstraintFetcher fetcher; @@ -21,8 +21,8 @@ public class ConstraintFetcherTests { private PropertyIdValue referenceURL; private PropertyIdValue reasonForDeprecation; - public ConstraintFetcherTests() { - fetcher = new ConstraintFetcher(); + public WikidataConstraintFetcherTests() { + fetcher = new WikidataConstraintFetcher(); headOfGovernment = Datamodel.makeWikidataPropertyIdValue("P6"); startTime = Datamodel.makeWikidataPropertyIdValue("P580"); endTime = Datamodel.makeWikidataPropertyIdValue("P582"); diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizerTest.java new file mode 100644 index 000000000..6de687652 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/DistinctValuesScrutinizerTest.java @@ -0,0 +1,29 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.testing.TestingDataGenerator; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; + +public class DistinctValuesScrutinizerTest extends StatementScrutinizerTest { + + @Override + public EditScrutinizer getScrutinizer() { + return new DistinctValuesScrutinizer(); + } + + @Test + public void testTrigger() { + ItemIdValue idA = TestingDataGenerator.existingId; + ItemIdValue idB = TestingDataGenerator.matchedId; + ItemUpdate updateA = new ItemUpdateBuilder(idA) + .addStatement(TestingDataGenerator.generateStatement(idA, idB)) + .build(); + ItemUpdate updateB = new ItemUpdateBuilder(idB) + .addStatement(TestingDataGenerator.generateStatement(idB, idB)) + .build(); + scrutinize(updateA, updateB); + assertWarningsRaised(DistinctValuesScrutinizer.type); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizerTest.java new file mode 100644 index 000000000..953d13cdd --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizerTest.java @@ -0,0 +1,31 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; + +public class FormatScrutinizerTest extends ValueScrutinizerTest { + + @Override + public EditScrutinizer getScrutinizer() { + return new FormatScrutinizer(); + } + + @Test + public void testTrigger() { + scrutinize(Datamodel.makeStringValue("not a number")); + assertWarningsRaised(FormatScrutinizer.type); + } + + @Test + public void testNoIssue() { + scrutinize(Datamodel.makeStringValue("1234")); + assertNoWarningRaised(); + } + + @Test + public void testIncompleteMatch() { + scrutinize(Datamodel.makeStringValue("42 is a number")); + assertWarningsRaised(FormatScrutinizer.type); + } + +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstaintScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstaintScrutinizerTest.java new file mode 100644 index 000000000..bf26fbc25 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/InverseConstaintScrutinizerTest.java @@ -0,0 +1,41 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.qa.MockConstraintFetcher; +import org.openrefine.wikidata.testing.TestingDataGenerator; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; + +public class InverseConstaintScrutinizerTest extends StatementScrutinizerTest { + + private ItemIdValue idA = TestingDataGenerator.existingId; + private ItemIdValue idB = TestingDataGenerator.newIdB; + private PropertyIdValue pidWithInverse = MockConstraintFetcher.pidWithInverse; + private PropertyIdValue inversePid = MockConstraintFetcher.inversePid; + + @Override + public EditScrutinizer getScrutinizer() { + return new InverseConstraintScrutinizer(); + } + + @Test + public void testTrigger() { + ItemUpdate update = new ItemUpdateBuilder(idA) + .addStatement(TestingDataGenerator.generateStatement(idA, pidWithInverse, idB)) + .build(); + scrutinize(update); + assertWarningsRaised(InverseConstraintScrutinizer.type); + } + + @Test + public void testNoSymmetricClosure() { + ItemUpdate update = new ItemUpdateBuilder(idA) + .addStatement(TestingDataGenerator.generateStatement(idA, inversePid, idB)) + .build(); + scrutinize(update); + assertNoWarningRaised(); + } + +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizerTest.java new file mode 100644 index 000000000..a9a0cafd8 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/NewItemScrutinizerTest.java @@ -0,0 +1,70 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import java.util.Collections; + +import org.openrefine.wikidata.testing.TestingDataGenerator; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.Claim; +import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.StatementRank; + + +public class NewItemScrutinizerTest extends ScrutinizerTest { + + private Claim claim = Datamodel.makeClaim(TestingDataGenerator.newIdA, + Datamodel.makeValueSnak(Datamodel.makeWikidataPropertyIdValue("P31"), TestingDataGenerator.existingId), + Collections.emptyList()); + private Statement p31Statement = Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, ""); + + @Override + public EditScrutinizer getScrutinizer() { + return new NewItemScrutinizer(); + } + + @Test + public void testTrigger() { + ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.newIdA).build(); + scrutinize(update); + assertWarningsRaised( + NewItemScrutinizer.noDescType, + NewItemScrutinizer.noLabelType, + NewItemScrutinizer.noTypeType, + NewItemScrutinizer.newItemType); + } + + @Test + public void testEmptyItem() { + ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.existingId).build(); + scrutinize(update); + assertNoWarningRaised(); + } + + @Test + public void testGoodNewItem() { + + ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.newIdA) + .addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr")) + .addDescription(Datamodel.makeMonolingualTextValue("interesting item", "en")) + .addStatement(p31Statement) + .build(); + scrutinize(update); + assertWarningsRaised(NewItemScrutinizer.newItemType); + } + + @Test + public void testDeletedStatements() { + ItemUpdate update = new ItemUpdateBuilder(TestingDataGenerator.newIdA) + .addLabel(Datamodel.makeMonolingualTextValue("bonjour", "fr")) + .addDescription(Datamodel.makeMonolingualTextValue("interesting item", "en")) + .addStatement(p31Statement) + .deleteStatement(TestingDataGenerator.generateStatement(TestingDataGenerator.newIdA, + TestingDataGenerator.matchedId)) + .build(); + scrutinize(update); + assertWarningsRaised(NewItemScrutinizer.newItemType, NewItemScrutinizer.deletedStatementsType); + } + +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/NoEditsMadeScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/NoEditsMadeScrutinizerTest.java new file mode 100644 index 000000000..b2276e07a --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/NoEditsMadeScrutinizerTest.java @@ -0,0 +1,31 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.testing.TestingDataGenerator; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; +import org.testng.annotations.Test; + +public class NoEditsMadeScrutinizerTest extends ScrutinizerTest { + + @Override + public EditScrutinizer getScrutinizer() { + return new NoEditsMadeScrutinizer(); + } + + @Test + public void testTrigger() { + scrutinize(); + assertWarningsRaised(NoEditsMadeScrutinizer.type); + } + + @Test + public void testNonNull() { + scrutinize(new ItemUpdateBuilder(TestingDataGenerator.newIdA).build()); + assertNoWarningRaised(); + } + + @Test + public void testNull() { + scrutinize(new ItemUpdateBuilder(TestingDataGenerator.existingId).build()); + assertWarningsRaised(NoEditsMadeScrutinizer.type); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/QualifierCompatibilityScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/QualifierCompatibilityScrutinizerTest.java new file mode 100644 index 000000000..8c2fa7a09 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/QualifierCompatibilityScrutinizerTest.java @@ -0,0 +1,59 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import org.openrefine.wikidata.qa.MockConstraintFetcher; +import org.openrefine.wikidata.testing.TestingDataGenerator; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.Claim; +import org.wikidata.wdtk.datamodel.interfaces.Snak; +import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; +import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.StatementRank; + +public class QualifierCompatibilityScrutinizerTest extends StatementScrutinizerTest { + private Snak disallowedQualifier = Datamodel.makeNoValueSnak(MockConstraintFetcher.qualifierPid); + private Snak mandatoryQualifier = Datamodel.makeNoValueSnak(MockConstraintFetcher.mandatoryQualifierPid); + private Snak allowedQualifier = Datamodel.makeNoValueSnak(MockConstraintFetcher.allowedQualifierPid); + + @Override + public EditScrutinizer getScrutinizer() { + return new QualifierCompatibilityScrutinizer(); + } + + @Test + public void testDisallowedQualifier() { + + scrutinize(makeStatement(disallowedQualifier,mandatoryQualifier)); + assertWarningsRaised(QualifierCompatibilityScrutinizer.disallowedQualifiersType); + } + + @Test + public void testMissingQualifier() { + scrutinize(makeStatement()); + assertWarningsRaised(QualifierCompatibilityScrutinizer.missingMandatoryQualifiersType); + } + + @Test + public void testGoodEdit() { + scrutinize(makeStatement(allowedQualifier,mandatoryQualifier)); + assertNoWarningRaised(); + } + + private Statement makeStatement(Snak... qualifiers) { + Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, + Datamodel.makeNoValueSnak(MockConstraintFetcher.mainSnakPid), makeQualifiers(qualifiers)); + return Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, ""); + } + private List makeQualifiers(Snak[] qualifiers) { + List snaks = Arrays.asList(qualifiers); + return snaks.stream() + .map((Snak q) -> Datamodel.makeSnakGroup(Collections.singletonList(q))) + .collect(Collectors.toList()); + } + +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/RestrictedPositionScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/RestrictedPositionScrutinizerTest.java new file mode 100644 index 000000000..d660835c4 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/RestrictedPositionScrutinizerTest.java @@ -0,0 +1,55 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import java.util.Collections; +import java.util.List; + +import org.openrefine.wikidata.qa.MockConstraintFetcher; +import org.openrefine.wikidata.testing.TestingDataGenerator; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; +import org.wikidata.wdtk.datamodel.interfaces.Snak; +import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; +import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.StatementRank; + +public class RestrictedPositionScrutinizerTest extends SnakScrutinizerTest { + + private ItemIdValue qid = TestingDataGenerator.existingId; + + @Override + public EditScrutinizer getScrutinizer() { + return new RestrictedPositionScrutinizer(); + } + + @Test + public void testTriggerMainSnak() { + scrutinize(TestingDataGenerator.generateStatement(qid, MockConstraintFetcher.qualifierPid, qid)); + assertWarningsRaised("property-restricted-to-qualifier-found-in-mainsnak"); + } + + @Test + public void testNoProblem() { + scrutinize(TestingDataGenerator.generateStatement(qid, MockConstraintFetcher.mainSnakPid, qid)); + assertNoWarningRaised(); + } + + @Test + public void testNotRestricted() { + scrutinize(TestingDataGenerator.generateStatement(qid, Datamodel.makeWikidataPropertyIdValue("P3748"), qid)); + assertNoWarningRaised(); + } + + @Test + public void testTriggerReference() { + Snak snak = Datamodel.makeValueSnak(MockConstraintFetcher.mainSnakPid, qid); + List snakGroups = Collections.singletonList(Datamodel.makeSnakGroup(Collections.singletonList(snak))); + Statement statement = Datamodel.makeStatement( + TestingDataGenerator.generateStatement(qid, MockConstraintFetcher.mainSnakPid, qid).getClaim(), + Collections.singletonList(Datamodel.makeReference(snakGroups)), + StatementRank.NORMAL, ""); + scrutinize(statement); + assertWarningsRaised("property-restricted-to-mainsnak-found-in-reference"); + } + +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ScrutinizerTest.java new file mode 100644 index 000000000..fe0eba1e6 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ScrutinizerTest.java @@ -0,0 +1,52 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.Arrays; +import java.util.Set; +import java.util.stream.Collectors; + +import org.openrefine.wikidata.qa.ConstraintFetcher; +import org.openrefine.wikidata.qa.MockConstraintFetcher; +import org.openrefine.wikidata.qa.QAWarning; +import org.openrefine.wikidata.qa.QAWarningStore; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.testng.annotations.BeforeMethod; + +public abstract class ScrutinizerTest { + public abstract EditScrutinizer getScrutinizer(); + + private EditScrutinizer scrutinizer; + private QAWarningStore store; + private ConstraintFetcher fetcher; + + @BeforeMethod + public void setUp() { + store = new QAWarningStore(); + fetcher = new MockConstraintFetcher(); + scrutinizer = getScrutinizer(); + scrutinizer.setStore(store); + scrutinizer.setFetcher(fetcher); + } + + public void scrutinize(ItemUpdate... updates) { + scrutinizer.scrutinize(Arrays.asList(updates)); + } + + public void assertWarningsRaised(String... types) { + assertEquals(Arrays.asList(types).stream().collect(Collectors.toSet()), getWarningTypes()); + } + + public void assertWarningRaised(QAWarning warning) { + assertTrue(store.getWarnings().contains(warning)); + } + + public void assertNoWarningRaised() { + assertWarningsRaised(); + } + + public Set getWarningTypes() { + return store.getWarnings().stream().map(w -> w.getType()).collect(Collectors.toSet()); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizerTest.java new file mode 100644 index 000000000..a8cc2b812 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/SelfReferentialScrutinizerTest.java @@ -0,0 +1,27 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.testing.TestingDataGenerator; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; + +public class SelfReferentialScrutinizerTest extends StatementScrutinizerTest { + + @Override + public EditScrutinizer getScrutinizer() { + return new SelfReferentialScrutinizer(); + } + + @Test + public void testTrigger() { + ItemIdValue id = TestingDataGenerator.matchedId; + scrutinize(TestingDataGenerator.generateStatement(id, id)); + assertWarningsRaised(SelfReferentialScrutinizer.type); + } + + @Test + public void testNoProblem() { + ItemIdValue id = TestingDataGenerator.matchedId; + scrutinize(TestingDataGenerator.generateStatement(id, TestingDataGenerator.existingId)); + assertNoWarningRaised(); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/SingleValueScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/SingleValueScrutinizerTest.java new file mode 100644 index 000000000..9d724a079 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/SingleValueScrutinizerTest.java @@ -0,0 +1,41 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.testing.TestingDataGenerator; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; + +public class SingleValueScrutinizerTest extends ScrutinizerTest { + + @Override + public EditScrutinizer getScrutinizer() { + return new SingleValueScrutinizer(); + } + + @Test + public void testTrigger() { + ItemIdValue idA = TestingDataGenerator.existingId; + ItemIdValue idB = TestingDataGenerator.matchedId; + ItemUpdate update = new ItemUpdateBuilder(idA) + .addStatement(TestingDataGenerator.generateStatement(idA, idB)) + .addStatement(TestingDataGenerator.generateStatement(idA, idB)) + .build(); + scrutinize(update); + assertWarningsRaised(SingleValueScrutinizer.type); + } + + @Test + public void testNoIssue() { + ItemIdValue idA = TestingDataGenerator.existingId; + ItemIdValue idB = TestingDataGenerator.matchedId; + ItemUpdate updateA = new ItemUpdateBuilder(idA) + .addStatement(TestingDataGenerator.generateStatement(idA, idB)) + .build(); + ItemUpdate updateB = new ItemUpdateBuilder(idB) + .addStatement(TestingDataGenerator.generateStatement(idB, idB)) + .build(); + scrutinize(updateA, updateB); + assertNoWarningRaised(); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizerTest.java new file mode 100644 index 000000000..7b4e5c6ab --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/SnakScrutinizerTest.java @@ -0,0 +1,43 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import java.util.Collections; +import java.util.List; + +import org.openrefine.wikidata.testing.TestingDataGenerator; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.Claim; +import org.wikidata.wdtk.datamodel.interfaces.Snak; +import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; +import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.StatementRank; + +public abstract class SnakScrutinizerTest extends StatementScrutinizerTest { + + public static Snak defaultMainSnak = Datamodel.makeNoValueSnak(Datamodel.makeWikidataPropertyIdValue("P3928")); + + public void scrutinize(Snak snak) { + Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, snak, + Collections.emptyList()); + Statement statement = Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, ""); + scrutinize(statement); + } + + public void scrutinizeAsQualifier(Snak snak) { + Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, defaultMainSnak, + toSnakGroups(snak)); + Statement statement = Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, ""); + scrutinize(statement); + } + + public void scrutinizeAsReference(Snak snak) { + Claim claim = Datamodel.makeClaim(TestingDataGenerator.existingId, defaultMainSnak, + Collections.emptyList()); + Statement statement = Datamodel.makeStatement(claim, + Collections.singletonList(Datamodel.makeReference(toSnakGroups(snak))), StatementRank.NORMAL, ""); + scrutinize(statement); + } + + private List toSnakGroups(Snak snak) { + return Collections.singletonList(Datamodel.makeSnakGroup(Collections.singletonList(snak))); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/StatementScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/StatementScrutinizerTest.java new file mode 100644 index 000000000..de9f81bb4 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/StatementScrutinizerTest.java @@ -0,0 +1,16 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.updates.ItemUpdate; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; +import org.wikidata.wdtk.datamodel.interfaces.Statement; + +public abstract class StatementScrutinizerTest extends ScrutinizerTest { + + public void scrutinize(Statement statement) { + ItemUpdate update = new ItemUpdateBuilder((ItemIdValue)statement.getClaim().getSubject()) + .addStatement(statement).build(); + scrutinize(update); + } + +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/UnsourcedScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/UnsourcedScrutinizerTest.java new file mode 100644 index 000000000..9cfd0abcc --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/UnsourcedScrutinizerTest.java @@ -0,0 +1,20 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.testing.TestingDataGenerator; +import org.testng.annotations.Test; + +public class UnsourcedScrutinizerTest extends StatementScrutinizerTest { + + @Override + public EditScrutinizer getScrutinizer() { + return new UnsourcedScrutinizer(); + } + + @Test + public void testTrigger() { + scrutinize(TestingDataGenerator.generateStatement(TestingDataGenerator.existingId, + TestingDataGenerator.matchedId)); + assertWarningsRaised(UnsourcedScrutinizer.type); + } + +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizerTest.java new file mode 100644 index 000000000..3c96a924c --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/ValueScrutinizerTest.java @@ -0,0 +1,21 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.openrefine.wikidata.testing.TestingDataGenerator; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue; +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; +import org.wikidata.wdtk.datamodel.interfaces.Value; + +public abstract class ValueScrutinizerTest extends SnakScrutinizerTest { + + public static final PropertyIdValue defaultPid = Datamodel.makeWikidataPropertyIdValue("P328"); + + public void scrutinize(Value value) { + scrutinize(Datamodel.makeValueSnak(defaultPid, value)); + } + + public void scrutinizeLabel(MonolingualTextValue text) { + scrutinize(new ItemUpdateBuilder(TestingDataGenerator.existingId).addLabel(text).build()); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizerTest.java new file mode 100644 index 000000000..1e5ddd455 --- /dev/null +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/WhitespaceScrutinizerTest.java @@ -0,0 +1,57 @@ +package org.openrefine.wikidata.qa.scrutinizers; + +import org.testng.annotations.Test; +import org.wikidata.wdtk.datamodel.helpers.Datamodel; + +public class WhitespaceScrutinizerTest extends ValueScrutinizerTest { + + @Override + public EditScrutinizer getScrutinizer() { + return new WhitespaceScrutinizer(); + } + + @Test + public void testLeadingWhitespace() { + scrutinize(Datamodel.makeStringValue(" a")); + assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType); + } + + @Test + public void testTrailingWhitespace() { + scrutinize(Datamodel.makeStringValue("a\t")); + assertWarningsRaised(WhitespaceScrutinizer.trailingWhitespaceType); + } + + @Test + public void testDuplicateWhitespace() { + scrutinize(Datamodel.makeStringValue("a\t b")); + assertWarningsRaised(WhitespaceScrutinizer.duplicateWhitespaceType); + } + + @Test + public void testNonPrintableChars() { + scrutinize(Datamodel.makeStringValue("c\u0003")); + assertWarningsRaised(WhitespaceScrutinizer.nonPrintableCharsType); + } + + @Test + public void testNoIssue() { + scrutinize(Datamodel.makeStringValue("a b")); + assertNoWarningRaised(); + } + + @Test + public void testMultipleIssues() { + scrutinize(Datamodel.makeStringValue(" a\t b ")); + assertWarningsRaised( + WhitespaceScrutinizer.duplicateWhitespaceType, + WhitespaceScrutinizer.leadingWhitespaceType, + WhitespaceScrutinizer.trailingWhitespaceType); + } + + @Test + public void testMonolingualTextValue() { + scrutinizeLabel(Datamodel.makeMonolingualTextValue(" a", "fr")); + assertWarningsRaised(WhitespaceScrutinizer.leadingWhitespaceType); + } +} diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/testing/TestingDataGenerator.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/testing/TestingDataGenerator.java index 0d8626890..193e5df95 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/testing/TestingDataGenerator.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/testing/TestingDataGenerator.java @@ -68,10 +68,19 @@ public class TestingDataGenerator { return new WbMonolingualExpr(new WbLanguageConstant(langCode, langLabel), new WbStringConstant(text)); } - public static Statement generateStatement(ItemIdValue from, ItemIdValue to) { + public static Statement generateStatement(ItemIdValue from, PropertyIdValue pid, ItemIdValue to) { Claim claim = Datamodel.makeClaim(from, Datamodel.makeValueSnak(pid, to), Collections.emptyList()); return Datamodel.makeStatement(claim, Collections.emptyList(), StatementRank.NORMAL, ""); } + public static Statement generateStatement(ItemIdValue from, ItemIdValue to) { + return generateStatement(from, pid, to); + } + + public static ItemIdValue newIdA = makeNewItemIdValue(1234L, "new item A"); + public static ItemIdValue newIdB = makeNewItemIdValue(4567L, "new item B"); + public static ItemIdValue matchedId = makeMatchedItemIdValue("Q89","eist"); + public static ItemIdValue existingId = Datamodel.makeWikidataItemIdValue("Q43"); + } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/updates/scheduler/PointerExtractorTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/updates/scheduler/PointerExtractorTest.java index 441c70d7c..61b4b1f49 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/updates/scheduler/PointerExtractorTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/updates/scheduler/PointerExtractorTest.java @@ -12,7 +12,6 @@ import org.testng.annotations.Test; import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.interfaces.Claim; import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue; -import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; import org.wikidata.wdtk.datamodel.interfaces.Reference; import org.wikidata.wdtk.datamodel.interfaces.Snak; @@ -23,28 +22,23 @@ import org.wikidata.wdtk.datamodel.interfaces.Value; public class PointerExtractorTest { - private ItemIdValue existingId = Datamodel.makeWikidataItemIdValue("Q43"); - private ItemIdValue matchedId = TestingDataGenerator.makeMatchedItemIdValue("Q89","eist"); - private ItemIdValue newIdA = TestingDataGenerator.makeNewItemIdValue(1234L, "new item A"); - private ItemIdValue newIdB = TestingDataGenerator.makeNewItemIdValue(4567L, "new item B"); - private PropertyIdValue pid = Datamodel.makeWikidataPropertyIdValue("P89"); - private Snak snakWithNew = Datamodel.makeValueSnak(pid, newIdA); - private Snak snakWithoutNew = Datamodel.makeValueSnak(pid, matchedId); + private Snak snakWithNew = Datamodel.makeValueSnak(pid, TestingDataGenerator.newIdA); + private Snak snakWithoutNew = Datamodel.makeValueSnak(pid, TestingDataGenerator.matchedId); private SnakGroup snakGroupWithNew = Datamodel.makeSnakGroup(Collections.singletonList(snakWithNew)); private SnakGroup snakGroupWithoutNew = Datamodel.makeSnakGroup(Collections.singletonList(snakWithoutNew)); - private Claim claimWithNew = Datamodel.makeClaim(existingId, snakWithNew, Collections.emptyList()); - private Claim claimNewSubject = Datamodel.makeClaim(newIdB, snakWithoutNew, Collections.emptyList()); - private Claim claimNewQualifier = Datamodel.makeClaim(matchedId, snakWithoutNew, + private Claim claimWithNew = Datamodel.makeClaim(TestingDataGenerator.existingId, snakWithNew, Collections.emptyList()); + private Claim claimNewSubject = Datamodel.makeClaim(TestingDataGenerator.newIdB, snakWithoutNew, Collections.emptyList()); + private Claim claimNewQualifier = Datamodel.makeClaim(TestingDataGenerator.matchedId, snakWithoutNew, Collections.singletonList(snakGroupWithNew)); private static PointerExtractor e = new PointerExtractor(); @Test public void testExtractEntityId() { - assertEquals(Collections.singleton(newIdA), e.extractPointers(newIdA)); - assertEmpty(e.extractPointers(existingId)); - assertEmpty(e.extractPointers(matchedId)); + assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(TestingDataGenerator.newIdA)); + assertEmpty(e.extractPointers(TestingDataGenerator.existingId)); + assertEmpty(e.extractPointers(TestingDataGenerator.matchedId)); } @Test @@ -62,26 +56,26 @@ public class PointerExtractorTest { @Test public void testSnak() { assertEmpty(e.extractPointers(snakWithoutNew)); - assertEquals(Collections.singleton(newIdA), e.extractPointers(snakWithNew)); + assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(snakWithNew)); assertEmpty(e.extractPointers(Datamodel.makeNoValueSnak(pid))); } @Test public void testSnakGroup() { assertEmpty(e.extractPointers(snakGroupWithoutNew)); - assertEquals(Collections.singleton(newIdA), e.extractPointers(snakGroupWithNew)); + assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(snakGroupWithNew)); } @Test public void testStatement() { assertEmpty(e.extractPointers(Datamodel.makeStatement(claimNewSubject, Collections.emptyList(), StatementRank.NORMAL, ""))); - assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimWithNew, + assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(Datamodel.makeStatement(claimWithNew, Collections.emptyList(), StatementRank.NORMAL, ""))); - assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimNewQualifier, + assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(Datamodel.makeStatement(claimNewQualifier, Collections.emptyList(), StatementRank.NORMAL, ""))); Reference reference = Datamodel.makeReference(Collections.singletonList(snakGroupWithNew)); - assertEquals(Collections.singleton(newIdA), e.extractPointers(Datamodel.makeStatement(claimNewSubject, + assertEquals(Collections.singleton(TestingDataGenerator.newIdA), e.extractPointers(Datamodel.makeStatement(claimNewSubject, Collections.singletonList(reference), StatementRank.NORMAL, ""))); }