From cd0ed11dadc6a639ae591aa3faac90f45b0968ac Mon Sep 17 00:00:00 2001 From: Ekta Mishra Date: Thu, 2 Jul 2020 19:58:56 +0530 Subject: [PATCH] Implemented Format Scrutinizer tests using Mockito (#2849) * Implemented Format Scrutinizer tests using Mockito Updated implementation of the scrutinzer & tests * Testcases updated in FormatScrutinizerTest --- .../qa/scrutinizers/FormatScrutinizer.java | 70 +++++++++++----- .../scrutinizers/FormatScrutinizerTest.java | 82 ++++++++++++++++++- 2 files changed, 126 insertions(+), 26 deletions(-) diff --git a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizer.java b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizer.java index bf06ead3b..687ac44f7 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizer.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizer.java @@ -27,10 +27,16 @@ import org.openrefine.wikidata.qa.QAWarning; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; import org.wikidata.wdtk.datamodel.interfaces.Snak; +import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; +import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.StringValue; +import org.wikidata.wdtk.datamodel.interfaces.Value; import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Set; import java.util.regex.Pattern; /** @@ -43,9 +49,24 @@ import java.util.regex.Pattern; public class FormatScrutinizer extends SnakScrutinizer { public static final String type = "add-statements-with-invalid-format"; + public static String FORMAT_CONSTRAINT_QID = "Q21502404"; + public static String FORMAT_REGEX_PID = "P1793"; - private Map _patterns; + private Map> _patterns; + class FormatConstraint { + String regularExpressionFormat = null; + + FormatConstraint(Statement statement) { + List constraint = statement.getClaim().getQualifiers(); + if (constraint != null) { + List regexes = _fetcher.findValues(constraint, FORMAT_REGEX_PID); + if (!regexes.isEmpty()) { + regularExpressionFormat = ((StringValue) regexes.get(0)).getString(); + } + } + } + } public FormatScrutinizer() { _patterns = new HashMap<>(); } @@ -58,17 +79,23 @@ public class FormatScrutinizer extends SnakScrutinizer { * the id of the property to fetch the constraints for * @return */ - protected Pattern getPattern(PropertyIdValue pid) { + protected Set getPattern(PropertyIdValue pid) { if (_patterns.containsKey(pid)) { return _patterns.get(pid); } else { - String regex = _fetcher.getFormatRegex(pid); - Pattern pattern = null; - if (regex != null) { - pattern = Pattern.compile(regex); + List statementList = _fetcher.getConstraintsByType(pid, FORMAT_CONSTRAINT_QID); + Set patterns = new HashSet<>(); + for (Statement statement: statementList) { + FormatConstraint constraint = new FormatConstraint(statement); + String regex = constraint.regularExpressionFormat; + Pattern pattern = null; + if (regex != null) { + pattern = Pattern.compile(regex); + } + patterns.add(pattern); } - _patterns.put(pid, pattern); - return pattern; + _patterns.put(pid, patterns); + return patterns; } } @@ -77,20 +104,19 @@ public class FormatScrutinizer extends SnakScrutinizer { if (snak.getValue() instanceof StringValue) { String value = ((StringValue) snak.getValue()).getString(); PropertyIdValue pid = snak.getPropertyId(); - Pattern pattern = getPattern(pid); - if (pattern == null) { - return; - } - if (!pattern.matcher(value).matches()) { - if (added) { - QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.IMPORTANT, 1); - issue.setProperty("property_entity", pid); - issue.setProperty("regex", pattern.toString()); - issue.setProperty("example_value", value); - issue.setProperty("example_item_entity", entityId); - addIssue(issue); - } else { - info("remove-statements-with-invalid-format"); + Set patterns = getPattern(pid); + for (Pattern pattern : patterns) { + if (!pattern.matcher(value).matches()) { + if (added) { + QAWarning issue = new QAWarning(type, pid.getId(), QAWarning.Severity.IMPORTANT, 1); + issue.setProperty("property_entity", pid); + issue.setProperty("regex", pattern.toString()); + issue.setProperty("example_value", value); + issue.setProperty("example_item_entity", entityId); + addIssue(issue); + } else { + info("remove-statements-with-invalid-format"); + } } } } diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizerTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizerTest.java index 9aca1f400..33b8bc681 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizerTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/qa/scrutinizers/FormatScrutinizerTest.java @@ -23,31 +23,105 @@ ******************************************************************************/ package org.openrefine.wikidata.qa.scrutinizers; +import org.openrefine.wikidata.qa.ConstraintFetcher; +import org.openrefine.wikidata.testing.TestingData; +import org.openrefine.wikidata.updates.ItemUpdate; +import org.openrefine.wikidata.updates.ItemUpdateBuilder; import org.testng.annotations.Test; import org.wikidata.wdtk.datamodel.helpers.Datamodel; +import org.wikidata.wdtk.datamodel.implementation.StatementImpl; +import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; +import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue; +import org.wikidata.wdtk.datamodel.interfaces.Snak; +import org.wikidata.wdtk.datamodel.interfaces.SnakGroup; +import org.wikidata.wdtk.datamodel.interfaces.Statement; +import org.wikidata.wdtk.datamodel.interfaces.Value; +import org.wikidata.wdtk.datamodel.interfaces.ValueSnak; -public class FormatScrutinizerTest extends ValueScrutinizerTest { +import java.util.Collections; +import java.util.List; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class FormatScrutinizerTest extends ScrutinizerTest { + + public static PropertyIdValue propertyIdValue = Datamodel.makeWikidataPropertyIdValue("P18"); + public static Value completeMatchValue = Datamodel.makeStringValue("image.png"); + public static Value noMatchValue = Datamodel.makeStringValue("image"); + public static Value incompleteMatchValue = Datamodel.makeStringValue(".jpg"); + public static String regularExpression = "(?i).+\\.(jpg|jpeg|jpe|png|svg|tif|tiff|gif|xcf|pdf|djvu|webp)"; + + public static ItemIdValue entityIdValue = Datamodel.makeWikidataItemIdValue("Q21502404"); + public static PropertyIdValue regularExpressionParameter = Datamodel.makeWikidataPropertyIdValue("P1793"); + public static Value regularExpressionFormat = Datamodel.makeStringValue(regularExpression); @Override public EditScrutinizer getScrutinizer() { return new FormatScrutinizer(); } + @Test public void testTrigger() { - scrutinize(Datamodel.makeStringValue("not a number")); + ItemIdValue idA = TestingData.existingId; + ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, noMatchValue); + Statement statement = new StatementImpl("P18", value, idA); + ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build(); + + Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat); + List qualifierSnakList = Collections.singletonList(qualifierSnak); + SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList); + List snakGroupList = Collections.singletonList(qualifierSnakGroup); + List statementList = constraintParameterStatementList(entityIdValue, snakGroupList); + + ConstraintFetcher fetcher = mock(ConstraintFetcher.class); + when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList); + when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat)); + setFetcher(fetcher); + scrutinize(updateA); assertWarningsRaised(FormatScrutinizer.type); } @Test public void testNoIssue() { - scrutinize(Datamodel.makeStringValue("1234")); + ItemIdValue idA = TestingData.existingId; + ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, completeMatchValue); + Statement statement = new StatementImpl("P18", value, idA); + ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build(); + + Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat); + List qualifierSnakList = Collections.singletonList(qualifierSnak); + SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList); + List snakGroupList = Collections.singletonList(qualifierSnakGroup); + List statementList = constraintParameterStatementList(entityIdValue, snakGroupList); + + ConstraintFetcher fetcher = mock(ConstraintFetcher.class); + when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList); + when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat)); + setFetcher(fetcher); + scrutinize(updateA); assertNoWarningRaised(); } @Test public void testIncompleteMatch() { - scrutinize(Datamodel.makeStringValue("42 is a number")); + ItemIdValue idA = TestingData.existingId; + ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, incompleteMatchValue); + Statement statement = new StatementImpl("P18", value, idA); + ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build(); + + Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat); + List qualifierSnakList = Collections.singletonList(qualifierSnak); + SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList); + List snakGroupList = Collections.singletonList(qualifierSnakGroup); + List statementList = constraintParameterStatementList(entityIdValue, snakGroupList); + + ConstraintFetcher fetcher = mock(ConstraintFetcher.class); + when(fetcher.getConstraintsByType(propertyIdValue, "Q21502404")).thenReturn(statementList); + when(fetcher.findValues(snakGroupList, "P1793")).thenReturn(Collections.singletonList(regularExpressionFormat)); + setFetcher(fetcher); + scrutinize(updateA); assertWarningsRaised(FormatScrutinizer.type); }