Implemented Format Scrutinizer tests using Mockito (#2849)

* Implemented Format Scrutinizer tests using Mockito

Updated implementation of the scrutinzer & tests

* Testcases updated in FormatScrutinizerTest
This commit is contained in:
Ekta Mishra 2020-07-02 19:58:56 +05:30 committed by GitHub
parent 9dfb9114c4
commit cd0ed11dad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 126 additions and 26 deletions

View File

@ -27,10 +27,16 @@ import org.openrefine.wikidata.qa.QAWarning;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak; import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.StringValue; import org.wikidata.wdtk.datamodel.interfaces.StringValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /**
@ -43,9 +49,24 @@ import java.util.regex.Pattern;
public class FormatScrutinizer extends SnakScrutinizer { public class FormatScrutinizer extends SnakScrutinizer {
public static final String type = "add-statements-with-invalid-format"; public static final String type = "add-statements-with-invalid-format";
public static String FORMAT_CONSTRAINT_QID = "Q21502404";
public static String FORMAT_REGEX_PID = "P1793";
private Map<PropertyIdValue, Pattern> _patterns; private Map<PropertyIdValue, Set<Pattern>> _patterns;
class FormatConstraint {
String regularExpressionFormat = null;
FormatConstraint(Statement statement) {
List<SnakGroup> constraint = statement.getClaim().getQualifiers();
if (constraint != null) {
List<Value> regexes = _fetcher.findValues(constraint, FORMAT_REGEX_PID);
if (!regexes.isEmpty()) {
regularExpressionFormat = ((StringValue) regexes.get(0)).getString();
}
}
}
}
public FormatScrutinizer() { public FormatScrutinizer() {
_patterns = new HashMap<>(); _patterns = new HashMap<>();
} }
@ -58,17 +79,23 @@ public class FormatScrutinizer extends SnakScrutinizer {
* the id of the property to fetch the constraints for * the id of the property to fetch the constraints for
* @return * @return
*/ */
protected Pattern getPattern(PropertyIdValue pid) { protected Set<Pattern> getPattern(PropertyIdValue pid) {
if (_patterns.containsKey(pid)) { if (_patterns.containsKey(pid)) {
return _patterns.get(pid); return _patterns.get(pid);
} else { } else {
String regex = _fetcher.getFormatRegex(pid); List<Statement> statementList = _fetcher.getConstraintsByType(pid, FORMAT_CONSTRAINT_QID);
Pattern pattern = null; Set<Pattern> patterns = new HashSet<>();
if (regex != null) { for (Statement statement: statementList) {
pattern = Pattern.compile(regex); FormatConstraint constraint = new FormatConstraint(statement);
String regex = constraint.regularExpressionFormat;
Pattern pattern = null;
if (regex != null) {
pattern = Pattern.compile(regex);
}
patterns.add(pattern);
} }
_patterns.put(pid, pattern); _patterns.put(pid, patterns);
return pattern; return patterns;
} }
} }
@ -77,20 +104,19 @@ public class FormatScrutinizer extends SnakScrutinizer {
if (snak.getValue() instanceof StringValue) { if (snak.getValue() instanceof StringValue) {
String value = ((StringValue) snak.getValue()).getString(); String value = ((StringValue) snak.getValue()).getString();
PropertyIdValue pid = snak.getPropertyId(); PropertyIdValue pid = snak.getPropertyId();
Pattern pattern = getPattern(pid); Set<Pattern> patterns = getPattern(pid);
if (pattern == null) { for (Pattern pattern : patterns) {
return; if (!pattern.matcher(value).matches()) {
} if (added) {
if (!pattern.matcher(value).matches()) { QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.IMPORTANT, 1);
if (added) { issue.setProperty("property_entity", pid);
QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.IMPORTANT, 1); issue.setProperty("regex", pattern.toString());
issue.setProperty("property_entity", pid); issue.setProperty("example_value", value);
issue.setProperty("regex", pattern.toString()); issue.setProperty("example_item_entity", entityId);
issue.setProperty("example_value", value); addIssue(issue);
issue.setProperty("example_item_entity", entityId); } else {
addIssue(issue); info("remove-statements-with-invalid-format");
} else { }
info("remove-statements-with-invalid-format");
} }
} }
} }

View File

@ -23,31 +23,105 @@
******************************************************************************/ ******************************************************************************/
package org.openrefine.wikidata.qa.scrutinizers; package org.openrefine.wikidata.qa.scrutinizers;
import org.openrefine.wikidata.qa.ConstraintFetcher;
import org.openrefine.wikidata.testing.TestingData;
import org.openrefine.wikidata.updates.ItemUpdate;
import org.openrefine.wikidata.updates.ItemUpdateBuilder;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.implementation.StatementImpl;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.Value;
import org.wikidata.wdtk.datamodel.interfaces.ValueSnak;
public class FormatScrutinizerTest extends ValueScrutinizerTest { import java.util.Collections;
import java.util.List;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class FormatScrutinizerTest extends ScrutinizerTest {
public static PropertyIdValue propertyIdValue = Datamodel.makeWikidataPropertyIdValue("P18");
public static Value completeMatchValue = Datamodel.makeStringValue("image.png");
public static Value noMatchValue = Datamodel.makeStringValue("image");
public static Value incompleteMatchValue = Datamodel.makeStringValue(".jpg");
public static String regularExpression = "(?i).+\\.(jpg|jpeg|jpe|png|svg|tif|tiff|gif|xcf|pdf|djvu|webp)";
public static ItemIdValue entityIdValue = Datamodel.makeWikidataItemIdValue("Q21502404");
public static PropertyIdValue regularExpressionParameter = Datamodel.makeWikidataPropertyIdValue("P1793");
public static Value regularExpressionFormat = Datamodel.makeStringValue(regularExpression);
@Override @Override
public EditScrutinizer getScrutinizer() { public EditScrutinizer getScrutinizer() {
return new FormatScrutinizer(); return new FormatScrutinizer();
} }
@Test @Test
public void testTrigger() { public void testTrigger() {
scrutinize(Datamodel.makeStringValue("not a number")); ItemIdValue idA = TestingData.existingId;
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, noMatchValue);
Statement statement = new StatementImpl("P18", value, idA);
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
List<SnakGroup> snakGroupList = Collections.singletonList(qualifierSnakGroup);
List<Statement> statementList = constraintParameterStatementList(entityIdValue, snakGroupList);
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList);
when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat));
setFetcher(fetcher);
scrutinize(updateA);
assertWarningsRaised(FormatScrutinizer.type); assertWarningsRaised(FormatScrutinizer.type);
} }
@Test @Test
public void testNoIssue() { public void testNoIssue() {
scrutinize(Datamodel.makeStringValue("1234")); ItemIdValue idA = TestingData.existingId;
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, completeMatchValue);
Statement statement = new StatementImpl("P18", value, idA);
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
List<SnakGroup> snakGroupList = Collections.singletonList(qualifierSnakGroup);
List<Statement> statementList = constraintParameterStatementList(entityIdValue, snakGroupList);
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList);
when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat));
setFetcher(fetcher);
scrutinize(updateA);
assertNoWarningRaised(); assertNoWarningRaised();
} }
@Test @Test
public void testIncompleteMatch() { public void testIncompleteMatch() {
scrutinize(Datamodel.makeStringValue("42 is a number")); ItemIdValue idA = TestingData.existingId;
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, incompleteMatchValue);
Statement statement = new StatementImpl("P18", value, idA);
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
List<SnakGroup> snakGroupList = Collections.singletonList(qualifierSnakGroup);
List<Statement> statementList = constraintParameterStatementList(entityIdValue, snakGroupList);
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList);
when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat));
setFetcher(fetcher);
scrutinize(updateA);
assertWarningsRaised(FormatScrutinizer.type); assertWarningsRaised(FormatScrutinizer.type);
} }