Implemented Format Scrutinizer tests using Mockito (#2849)
* Implemented Format Scrutinizer tests using Mockito Updated implementation of the scrutinzer & tests * Testcases updated in FormatScrutinizerTest
This commit is contained in:
parent
9dfb9114c4
commit
cd0ed11dad
@ -27,10 +27,16 @@ import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
@ -43,9 +49,24 @@ import java.util.regex.Pattern;
|
||||
public class FormatScrutinizer extends SnakScrutinizer {
|
||||
|
||||
public static final String type = "add-statements-with-invalid-format";
|
||||
public static String FORMAT_CONSTRAINT_QID = "Q21502404";
|
||||
public static String FORMAT_REGEX_PID = "P1793";
|
||||
|
||||
private Map<PropertyIdValue, Pattern> _patterns;
|
||||
private Map<PropertyIdValue, Set<Pattern>> _patterns;
|
||||
|
||||
class FormatConstraint {
|
||||
String regularExpressionFormat = null;
|
||||
|
||||
FormatConstraint(Statement statement) {
|
||||
List<SnakGroup> constraint = statement.getClaim().getQualifiers();
|
||||
if (constraint != null) {
|
||||
List<Value> regexes = _fetcher.findValues(constraint, FORMAT_REGEX_PID);
|
||||
if (!regexes.isEmpty()) {
|
||||
regularExpressionFormat = ((StringValue) regexes.get(0)).getString();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
public FormatScrutinizer() {
|
||||
_patterns = new HashMap<>();
|
||||
}
|
||||
@ -58,17 +79,23 @@ public class FormatScrutinizer extends SnakScrutinizer {
|
||||
* the id of the property to fetch the constraints for
|
||||
* @return
|
||||
*/
|
||||
protected Pattern getPattern(PropertyIdValue pid) {
|
||||
protected Set<Pattern> getPattern(PropertyIdValue pid) {
|
||||
if (_patterns.containsKey(pid)) {
|
||||
return _patterns.get(pid);
|
||||
} else {
|
||||
String regex = _fetcher.getFormatRegex(pid);
|
||||
Pattern pattern = null;
|
||||
if (regex != null) {
|
||||
pattern = Pattern.compile(regex);
|
||||
List<Statement> statementList = _fetcher.getConstraintsByType(pid, FORMAT_CONSTRAINT_QID);
|
||||
Set<Pattern> patterns = new HashSet<>();
|
||||
for (Statement statement: statementList) {
|
||||
FormatConstraint constraint = new FormatConstraint(statement);
|
||||
String regex = constraint.regularExpressionFormat;
|
||||
Pattern pattern = null;
|
||||
if (regex != null) {
|
||||
pattern = Pattern.compile(regex);
|
||||
}
|
||||
patterns.add(pattern);
|
||||
}
|
||||
_patterns.put(pid, pattern);
|
||||
return pattern;
|
||||
_patterns.put(pid, patterns);
|
||||
return patterns;
|
||||
}
|
||||
}
|
||||
|
||||
@ -77,20 +104,19 @@ public class FormatScrutinizer extends SnakScrutinizer {
|
||||
if (snak.getValue() instanceof StringValue) {
|
||||
String value = ((StringValue) snak.getValue()).getString();
|
||||
PropertyIdValue pid = snak.getPropertyId();
|
||||
Pattern pattern = getPattern(pid);
|
||||
if (pattern == null) {
|
||||
return;
|
||||
}
|
||||
if (!pattern.matcher(value).matches()) {
|
||||
if (added) {
|
||||
QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
|
||||
issue.setProperty("property_entity", pid);
|
||||
issue.setProperty("regex", pattern.toString());
|
||||
issue.setProperty("example_value", value);
|
||||
issue.setProperty("example_item_entity", entityId);
|
||||
addIssue(issue);
|
||||
} else {
|
||||
info("remove-statements-with-invalid-format");
|
||||
Set<Pattern> patterns = getPattern(pid);
|
||||
for (Pattern pattern : patterns) {
|
||||
if (!pattern.matcher(value).matches()) {
|
||||
if (added) {
|
||||
QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
|
||||
issue.setProperty("property_entity", pid);
|
||||
issue.setProperty("regex", pattern.toString());
|
||||
issue.setProperty("example_value", value);
|
||||
issue.setProperty("example_item_entity", entityId);
|
||||
addIssue(issue);
|
||||
} else {
|
||||
info("remove-statements-with-invalid-format");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -23,31 +23,105 @@
|
||||
******************************************************************************/
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
||||
import org.openrefine.wikidata.testing.TestingData;
|
||||
import org.openrefine.wikidata.updates.ItemUpdate;
|
||||
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.implementation.StatementImpl;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ValueSnak;
|
||||
|
||||
public class FormatScrutinizerTest extends ValueScrutinizerTest {
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
public class FormatScrutinizerTest extends ScrutinizerTest {
|
||||
|
||||
public static PropertyIdValue propertyIdValue = Datamodel.makeWikidataPropertyIdValue("P18");
|
||||
public static Value completeMatchValue = Datamodel.makeStringValue("image.png");
|
||||
public static Value noMatchValue = Datamodel.makeStringValue("image");
|
||||
public static Value incompleteMatchValue = Datamodel.makeStringValue(".jpg");
|
||||
public static String regularExpression = "(?i).+\\.(jpg|jpeg|jpe|png|svg|tif|tiff|gif|xcf|pdf|djvu|webp)";
|
||||
|
||||
public static ItemIdValue entityIdValue = Datamodel.makeWikidataItemIdValue("Q21502404");
|
||||
public static PropertyIdValue regularExpressionParameter = Datamodel.makeWikidataPropertyIdValue("P1793");
|
||||
public static Value regularExpressionFormat = Datamodel.makeStringValue(regularExpression);
|
||||
|
||||
@Override
|
||||
public EditScrutinizer getScrutinizer() {
|
||||
return new FormatScrutinizer();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTrigger() {
|
||||
scrutinize(Datamodel.makeStringValue("not a number"));
|
||||
ItemIdValue idA = TestingData.existingId;
|
||||
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, noMatchValue);
|
||||
Statement statement = new StatementImpl("P18", value, idA);
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||
|
||||
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
|
||||
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
||||
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
||||
List<SnakGroup> snakGroupList = Collections.singletonList(qualifierSnakGroup);
|
||||
List<Statement> statementList = constraintParameterStatementList(entityIdValue, snakGroupList);
|
||||
|
||||
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||
when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList);
|
||||
when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat));
|
||||
setFetcher(fetcher);
|
||||
scrutinize(updateA);
|
||||
assertWarningsRaised(FormatScrutinizer.type);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoIssue() {
|
||||
scrutinize(Datamodel.makeStringValue("1234"));
|
||||
ItemIdValue idA = TestingData.existingId;
|
||||
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, completeMatchValue);
|
||||
Statement statement = new StatementImpl("P18", value, idA);
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||
|
||||
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
|
||||
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
||||
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
||||
List<SnakGroup> snakGroupList = Collections.singletonList(qualifierSnakGroup);
|
||||
List<Statement> statementList = constraintParameterStatementList(entityIdValue, snakGroupList);
|
||||
|
||||
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||
when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList);
|
||||
when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat));
|
||||
setFetcher(fetcher);
|
||||
scrutinize(updateA);
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncompleteMatch() {
|
||||
scrutinize(Datamodel.makeStringValue("42 is a number"));
|
||||
ItemIdValue idA = TestingData.existingId;
|
||||
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, incompleteMatchValue);
|
||||
Statement statement = new StatementImpl("P18", value, idA);
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||
|
||||
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
|
||||
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
||||
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
||||
List<SnakGroup> snakGroupList = Collections.singletonList(qualifierSnakGroup);
|
||||
List<Statement> statementList = constraintParameterStatementList(entityIdValue, snakGroupList);
|
||||
|
||||
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||
when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList);
|
||||
when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat));
|
||||
setFetcher(fetcher);
|
||||
scrutinize(updateA);
|
||||
assertWarningsRaised(FormatScrutinizer.type);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user