Ignore invalid regexes from Wikibase format constraints. (#3721)
* Ignore invalid regexes from Wikibase format constraints. Closes #3274. * Add logging
This commit is contained in:
parent
4c930fed9a
commit
343c8afbea
@ -24,6 +24,8 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import org.openrefine.wikidata.qa.QAWarning;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
@ -38,6 +40,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
/**
|
||||
* A scrutinizer that detects incorrect formats in text values (mostly
|
||||
@ -48,6 +51,8 @@ import java.util.regex.Pattern;
|
||||
*/
|
||||
public class FormatScrutinizer extends SnakScrutinizer {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(FormatScrutinizer.class);
|
||||
|
||||
public static final String type = "add-statements-with-invalid-format";
|
||||
public String formatConstraintQid;
|
||||
public String formatRegexPid;
|
||||
@ -97,9 +102,14 @@ public class FormatScrutinizer extends SnakScrutinizer {
|
||||
String regex = constraint.regularExpressionFormat;
|
||||
Pattern pattern = null;
|
||||
if (regex != null) {
|
||||
try {
|
||||
pattern = Pattern.compile(regex);
|
||||
}
|
||||
patterns.add(pattern);
|
||||
} catch(PatternSyntaxException e) {
|
||||
logger.info(String.format("Ignoring invalid format constraint for property %s. Regex %s is invalid: %s",
|
||||
pid.getId(), regex, e.getMessage()));
|
||||
}
|
||||
}
|
||||
}
|
||||
_patterns.put(pid, patterns);
|
||||
return patterns;
|
||||
|
@ -54,6 +54,7 @@ public class FormatScrutinizerTest extends ScrutinizerTest {
|
||||
public static Value noMatchValue = Datamodel.makeStringValue("image");
|
||||
public static Value incompleteMatchValue = Datamodel.makeStringValue(".jpg");
|
||||
public static String regularExpression = "(?i).+\\.(jpg|jpeg|jpe|png|svg|tif|tiff|gif|xcf|pdf|djvu|webp)";
|
||||
public static String invalidRegularExpression = "(?[A-Za-z]+)";
|
||||
|
||||
public static ItemIdValue entityIdValue = Datamodel.makeWikidataItemIdValue(FORMAT_CONSTRAINT_QID);
|
||||
public static PropertyIdValue regularExpressionParameter = Datamodel.makeWikidataPropertyIdValue(FORMAT_REGEX_PID);
|
||||
@ -72,11 +73,7 @@ public class FormatScrutinizerTest extends ScrutinizerTest {
|
||||
Statement statement = new StatementImpl("P18", value, idA);
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||
|
||||
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
|
||||
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
||||
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
||||
List<SnakGroup> constraintQualifiers = Collections.singletonList(qualifierSnakGroup);
|
||||
List<Statement> constraintDefinitions = constraintParameterStatementList(entityIdValue, constraintQualifiers);
|
||||
List<Statement> constraintDefinitions = generateFormatConstraint(regularExpression);
|
||||
|
||||
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||
when(fetcher.getConstraintsByType(propertyIdValue, FORMAT_CONSTRAINT_QID)).thenReturn(constraintDefinitions);
|
||||
@ -92,11 +89,7 @@ public class FormatScrutinizerTest extends ScrutinizerTest {
|
||||
Statement statement = new StatementImpl("P18", value, idA);
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||
|
||||
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
|
||||
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
||||
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
||||
List<SnakGroup> constraintQualifiers = Collections.singletonList(qualifierSnakGroup);
|
||||
List<Statement> constraintDefinitions = constraintParameterStatementList(entityIdValue, constraintQualifiers);
|
||||
List<Statement> constraintDefinitions = generateFormatConstraint(regularExpression);
|
||||
|
||||
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||
when(fetcher.getConstraintsByType(propertyIdValue, FORMAT_CONSTRAINT_QID)).thenReturn(constraintDefinitions);
|
||||
@ -112,11 +105,7 @@ public class FormatScrutinizerTest extends ScrutinizerTest {
|
||||
Statement statement = new StatementImpl("P18", value, idA);
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||
|
||||
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
|
||||
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
||||
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
||||
List<SnakGroup> constraintQualifiers = Collections.singletonList(qualifierSnakGroup);
|
||||
List<Statement> constraintDefinitions = constraintParameterStatementList(entityIdValue, constraintQualifiers);
|
||||
List<Statement> constraintDefinitions = generateFormatConstraint(regularExpression);
|
||||
|
||||
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||
when(fetcher.getConstraintsByType(propertyIdValue, FORMAT_CONSTRAINT_QID)).thenReturn(constraintDefinitions);
|
||||
@ -125,4 +114,28 @@ public class FormatScrutinizerTest extends ScrutinizerTest {
|
||||
assertWarningsRaised(FormatScrutinizer.type);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInvalidRegex() {
|
||||
ItemIdValue idA = TestingData.existingId;
|
||||
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, incompleteMatchValue);
|
||||
Statement statement = new StatementImpl("P18", value, idA);
|
||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||
|
||||
List<Statement> constraintDefinitions = generateFormatConstraint(invalidRegularExpression);
|
||||
|
||||
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||
when(fetcher.getConstraintsByType(propertyIdValue, FORMAT_CONSTRAINT_QID)).thenReturn(constraintDefinitions);
|
||||
setFetcher(fetcher);
|
||||
scrutinize(updateA);
|
||||
assertNoWarningRaised();
|
||||
}
|
||||
|
||||
protected List<Statement> generateFormatConstraint(String regex) {
|
||||
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, Datamodel.makeStringValue(regex));
|
||||
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
||||
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
||||
List<SnakGroup> constraintQualifiers = Collections.singletonList(qualifierSnakGroup);
|
||||
return constraintParameterStatementList(entityIdValue, constraintQualifiers);
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user