Implemented Format Scrutinizer tests using Mockito (#2849)

* Implemented Format Scrutinizer tests using Mockito

Updated implementation of the scrutinzer & tests

* Testcases updated in FormatScrutinizerTest
This commit is contained in:
Ekta Mishra 2020-07-02 19:58:56 +05:30 committed by GitHub
parent 9dfb9114c4
commit cd0ed11dad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 126 additions and 26 deletions

View File

@ -27,10 +27,16 @@ import org.openrefine.wikidata.qa.QAWarning;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
/**
@ -43,9 +49,24 @@ import java.util.regex.Pattern;
public class FormatScrutinizer extends SnakScrutinizer {
public static final String type = "add-statements-with-invalid-format";
public static String FORMAT_CONSTRAINT_QID = "Q21502404";
public static String FORMAT_REGEX_PID = "P1793";
private Map<PropertyIdValue, Pattern> _patterns;
private Map<PropertyIdValue, Set<Pattern>> _patterns;
class FormatConstraint {
String regularExpressionFormat = null;
FormatConstraint(Statement statement) {
List<SnakGroup> constraint = statement.getClaim().getQualifiers();
if (constraint != null) {
List<Value> regexes = _fetcher.findValues(constraint, FORMAT_REGEX_PID);
if (!regexes.isEmpty()) {
regularExpressionFormat = ((StringValue) regexes.get(0)).getString();
}
}
}
}
public FormatScrutinizer() {
_patterns = new HashMap<>();
}
@ -58,17 +79,23 @@ public class FormatScrutinizer extends SnakScrutinizer {
* the id of the property to fetch the constraints for
* @return
*/
protected Pattern getPattern(PropertyIdValue pid) {
protected Set<Pattern> getPattern(PropertyIdValue pid) {
if (_patterns.containsKey(pid)) {
return _patterns.get(pid);
} else {
String regex = _fetcher.getFormatRegex(pid);
Pattern pattern = null;
if (regex != null) {
pattern = Pattern.compile(regex);
List<Statement> statementList = _fetcher.getConstraintsByType(pid, FORMAT_CONSTRAINT_QID);
Set<Pattern> patterns = new HashSet<>();
for (Statement statement: statementList) {
FormatConstraint constraint = new FormatConstraint(statement);
String regex = constraint.regularExpressionFormat;
Pattern pattern = null;
if (regex != null) {
pattern = Pattern.compile(regex);
}
patterns.add(pattern);
}
_patterns.put(pid, pattern);
return pattern;
_patterns.put(pid, patterns);
return patterns;
}
}
@ -77,20 +104,19 @@ public class FormatScrutinizer extends SnakScrutinizer {
if (snak.getValue() instanceof StringValue) {
String value = ((StringValue) snak.getValue()).getString();
PropertyIdValue pid = snak.getPropertyId();
Pattern pattern = getPattern(pid);
if (pattern == null) {
return;
}
if (!pattern.matcher(value).matches()) {
if (added) {
QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
issue.setProperty("property_entity", pid);
issue.setProperty("regex", pattern.toString());
issue.setProperty("example_value", value);
issue.setProperty("example_item_entity", entityId);
addIssue(issue);
} else {
info("remove-statements-with-invalid-format");
Set<Pattern> patterns = getPattern(pid);
for (Pattern pattern : patterns) {
if (!pattern.matcher(value).matches()) {
if (added) {
QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
issue.setProperty("property_entity", pid);
issue.setProperty("regex", pattern.toString());
issue.setProperty("example_value", value);
issue.setProperty("example_item_entity", entityId);
addIssue(issue);
} else {
info("remove-statements-with-invalid-format");
}
}
}
}

View File

@ -23,31 +23,105 @@
******************************************************************************/
package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.qa.ConstraintFetcher;
import org.openrefine.wikidata.testing.TestingData;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.implementation.StatementImpl;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.Value;
import org.wikidata.wdtk.datamodel.interfaces.ValueSnak;
public class FormatScrutinizerTest extends ValueScrutinizerTest {
import java.util.Collections;
import java.util.List;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class FormatScrutinizerTest extends ScrutinizerTest {
public static PropertyIdValue propertyIdValue = Datamodel.makeWikidataPropertyIdValue("P18");
public static Value completeMatchValue = Datamodel.makeStringValue("image.png");
public static Value noMatchValue = Datamodel.makeStringValue("image");
public static Value incompleteMatchValue = Datamodel.makeStringValue(".jpg");
public static String regularExpression = "(?i).+\\.(jpg|jpeg|jpe|png|svg|tif|tiff|gif|xcf|pdf|djvu|webp)";
public static ItemIdValue entityIdValue = Datamodel.makeWikidataItemIdValue("Q21502404");
public static PropertyIdValue regularExpressionParameter = Datamodel.makeWikidataPropertyIdValue("P1793");
public static Value regularExpressionFormat = Datamodel.makeStringValue(regularExpression);
@Override
public EditScrutinizer getScrutinizer() {
return new FormatScrutinizer();
}
@Test
public void testTrigger() {
scrutinize(Datamodel.makeStringValue("not a number"));
ItemIdValue idA = TestingData.existingId;
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, noMatchValue);
Statement statement = new StatementImpl("P18", value, idA);
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
List<SnakGroup> snakGroupList = Collections.singletonList(qualifierSnakGroup);
List<Statement> statementList = constraintParameterStatementList(entityIdValue, snakGroupList);
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList);
when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat));
setFetcher(fetcher);
scrutinize(updateA);
assertWarningsRaised(FormatScrutinizer.type);
}
@Test
public void testNoIssue() {
scrutinize(Datamodel.makeStringValue("1234"));
ItemIdValue idA = TestingData.existingId;
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, completeMatchValue);
Statement statement = new StatementImpl("P18", value, idA);
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
List<SnakGroup> snakGroupList = Collections.singletonList(qualifierSnakGroup);
List<Statement> statementList = constraintParameterStatementList(entityIdValue, snakGroupList);
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList);
when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat));
setFetcher(fetcher);
scrutinize(updateA);
assertNoWarningRaised();
}
@Test
public void testIncompleteMatch() {
scrutinize(Datamodel.makeStringValue("42 is a number"));
ItemIdValue idA = TestingData.existingId;
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, incompleteMatchValue);
Statement statement = new StatementImpl("P18", value, idA);
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
List<SnakGroup> snakGroupList = Collections.singletonList(qualifierSnakGroup);
List<Statement> statementList = constraintParameterStatementList(entityIdValue, snakGroupList);
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList);
when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat));
setFetcher(fetcher);
scrutinize(updateA);
assertWarningsRaised(FormatScrutinizer.type);
}