Implemented Format Scrutinizer tests using Mockito (#2849)
* Implemented Format Scrutinizer tests using Mockito Updated implementation of the scrutinzer & tests * Testcases updated in FormatScrutinizerTest
This commit is contained in:
parent
9dfb9114c4
commit
cd0ed11dad
@ -27,10 +27,16 @@ import org.openrefine.wikidata.qa.QAWarning;
|
|||||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -43,9 +49,24 @@ import java.util.regex.Pattern;
|
|||||||
public class FormatScrutinizer extends SnakScrutinizer {
|
public class FormatScrutinizer extends SnakScrutinizer {
|
||||||
|
|
||||||
public static final String type = "add-statements-with-invalid-format";
|
public static final String type = "add-statements-with-invalid-format";
|
||||||
|
public static String FORMAT_CONSTRAINT_QID = "Q21502404";
|
||||||
|
public static String FORMAT_REGEX_PID = "P1793";
|
||||||
|
|
||||||
private Map<PropertyIdValue, Pattern> _patterns;
|
private Map<PropertyIdValue, Set<Pattern>> _patterns;
|
||||||
|
|
||||||
|
class FormatConstraint {
|
||||||
|
String regularExpressionFormat = null;
|
||||||
|
|
||||||
|
FormatConstraint(Statement statement) {
|
||||||
|
List<SnakGroup> constraint = statement.getClaim().getQualifiers();
|
||||||
|
if (constraint != null) {
|
||||||
|
List<Value> regexes = _fetcher.findValues(constraint, FORMAT_REGEX_PID);
|
||||||
|
if (!regexes.isEmpty()) {
|
||||||
|
regularExpressionFormat = ((StringValue) regexes.get(0)).getString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
public FormatScrutinizer() {
|
public FormatScrutinizer() {
|
||||||
_patterns = new HashMap<>();
|
_patterns = new HashMap<>();
|
||||||
}
|
}
|
||||||
@ -58,17 +79,23 @@ public class FormatScrutinizer extends SnakScrutinizer {
|
|||||||
* the id of the property to fetch the constraints for
|
* the id of the property to fetch the constraints for
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
protected Pattern getPattern(PropertyIdValue pid) {
|
protected Set<Pattern> getPattern(PropertyIdValue pid) {
|
||||||
if (_patterns.containsKey(pid)) {
|
if (_patterns.containsKey(pid)) {
|
||||||
return _patterns.get(pid);
|
return _patterns.get(pid);
|
||||||
} else {
|
} else {
|
||||||
String regex = _fetcher.getFormatRegex(pid);
|
List<Statement> statementList = _fetcher.getConstraintsByType(pid, FORMAT_CONSTRAINT_QID);
|
||||||
Pattern pattern = null;
|
Set<Pattern> patterns = new HashSet<>();
|
||||||
if (regex != null) {
|
for (Statement statement: statementList) {
|
||||||
pattern = Pattern.compile(regex);
|
FormatConstraint constraint = new FormatConstraint(statement);
|
||||||
|
String regex = constraint.regularExpressionFormat;
|
||||||
|
Pattern pattern = null;
|
||||||
|
if (regex != null) {
|
||||||
|
pattern = Pattern.compile(regex);
|
||||||
|
}
|
||||||
|
patterns.add(pattern);
|
||||||
}
|
}
|
||||||
_patterns.put(pid, pattern);
|
_patterns.put(pid, patterns);
|
||||||
return pattern;
|
return patterns;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -77,20 +104,19 @@ public class FormatScrutinizer extends SnakScrutinizer {
|
|||||||
if (snak.getValue() instanceof StringValue) {
|
if (snak.getValue() instanceof StringValue) {
|
||||||
String value = ((StringValue) snak.getValue()).getString();
|
String value = ((StringValue) snak.getValue()).getString();
|
||||||
PropertyIdValue pid = snak.getPropertyId();
|
PropertyIdValue pid = snak.getPropertyId();
|
||||||
Pattern pattern = getPattern(pid);
|
Set<Pattern> patterns = getPattern(pid);
|
||||||
if (pattern == null) {
|
for (Pattern pattern : patterns) {
|
||||||
return;
|
if (!pattern.matcher(value).matches()) {
|
||||||
}
|
if (added) {
|
||||||
if (!pattern.matcher(value).matches()) {
|
QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
|
||||||
if (added) {
|
issue.setProperty("property_entity", pid);
|
||||||
QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
|
issue.setProperty("regex", pattern.toString());
|
||||||
issue.setProperty("property_entity", pid);
|
issue.setProperty("example_value", value);
|
||||||
issue.setProperty("regex", pattern.toString());
|
issue.setProperty("example_item_entity", entityId);
|
||||||
issue.setProperty("example_value", value);
|
addIssue(issue);
|
||||||
issue.setProperty("example_item_entity", entityId);
|
} else {
|
||||||
addIssue(issue);
|
info("remove-statements-with-invalid-format");
|
||||||
} else {
|
}
|
||||||
info("remove-statements-with-invalid-format");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,31 +23,105 @@
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
package org.openrefine.wikidata.qa.scrutinizers;
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
||||||
|
import org.openrefine.wikidata.testing.TestingData;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||||
|
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.implementation.StatementImpl;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ValueSnak;
|
||||||
|
|
||||||
public class FormatScrutinizerTest extends ValueScrutinizerTest {
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
public class FormatScrutinizerTest extends ScrutinizerTest {
|
||||||
|
|
||||||
|
public static PropertyIdValue propertyIdValue = Datamodel.makeWikidataPropertyIdValue("P18");
|
||||||
|
public static Value completeMatchValue = Datamodel.makeStringValue("image.png");
|
||||||
|
public static Value noMatchValue = Datamodel.makeStringValue("image");
|
||||||
|
public static Value incompleteMatchValue = Datamodel.makeStringValue(".jpg");
|
||||||
|
public static String regularExpression = "(?i).+\\.(jpg|jpeg|jpe|png|svg|tif|tiff|gif|xcf|pdf|djvu|webp)";
|
||||||
|
|
||||||
|
public static ItemIdValue entityIdValue = Datamodel.makeWikidataItemIdValue("Q21502404");
|
||||||
|
public static PropertyIdValue regularExpressionParameter = Datamodel.makeWikidataPropertyIdValue("P1793");
|
||||||
|
public static Value regularExpressionFormat = Datamodel.makeStringValue(regularExpression);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public EditScrutinizer getScrutinizer() {
|
public EditScrutinizer getScrutinizer() {
|
||||||
return new FormatScrutinizer();
|
return new FormatScrutinizer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTrigger() {
|
public void testTrigger() {
|
||||||
scrutinize(Datamodel.makeStringValue("not a number"));
|
ItemIdValue idA = TestingData.existingId;
|
||||||
|
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, noMatchValue);
|
||||||
|
Statement statement = new StatementImpl("P18", value, idA);
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||||
|
|
||||||
|
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
|
||||||
|
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
||||||
|
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
||||||
|
List<SnakGroup> snakGroupList = Collections.singletonList(qualifierSnakGroup);
|
||||||
|
List<Statement> statementList = constraintParameterStatementList(entityIdValue, snakGroupList);
|
||||||
|
|
||||||
|
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||||
|
when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList);
|
||||||
|
when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat));
|
||||||
|
setFetcher(fetcher);
|
||||||
|
scrutinize(updateA);
|
||||||
assertWarningsRaised(FormatScrutinizer.type);
|
assertWarningsRaised(FormatScrutinizer.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNoIssue() {
|
public void testNoIssue() {
|
||||||
scrutinize(Datamodel.makeStringValue("1234"));
|
ItemIdValue idA = TestingData.existingId;
|
||||||
|
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, completeMatchValue);
|
||||||
|
Statement statement = new StatementImpl("P18", value, idA);
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||||
|
|
||||||
|
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
|
||||||
|
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
||||||
|
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
||||||
|
List<SnakGroup> snakGroupList = Collections.singletonList(qualifierSnakGroup);
|
||||||
|
List<Statement> statementList = constraintParameterStatementList(entityIdValue, snakGroupList);
|
||||||
|
|
||||||
|
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||||
|
when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList);
|
||||||
|
when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat));
|
||||||
|
setFetcher(fetcher);
|
||||||
|
scrutinize(updateA);
|
||||||
assertNoWarningRaised();
|
assertNoWarningRaised();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIncompleteMatch() {
|
public void testIncompleteMatch() {
|
||||||
scrutinize(Datamodel.makeStringValue("42 is a number"));
|
ItemIdValue idA = TestingData.existingId;
|
||||||
|
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, incompleteMatchValue);
|
||||||
|
Statement statement = new StatementImpl("P18", value, idA);
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||||
|
|
||||||
|
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
|
||||||
|
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
||||||
|
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
||||||
|
List<SnakGroup> snakGroupList = Collections.singletonList(qualifierSnakGroup);
|
||||||
|
List<Statement> statementList = constraintParameterStatementList(entityIdValue, snakGroupList);
|
||||||
|
|
||||||
|
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||||
|
when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList);
|
||||||
|
when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat));
|
||||||
|
setFetcher(fetcher);
|
||||||
|
scrutinize(updateA);
|
||||||
assertWarningsRaised(FormatScrutinizer.type);
|
assertWarningsRaised(FormatScrutinizer.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user