Ignore invalid regexes from Wikibase format constraints. (#3721)
* Ignore invalid regexes from Wikibase format constraints. Closes #3274. * Add logging
This commit is contained in:
parent
4c930fed9a
commit
343c8afbea
@ -24,6 +24,8 @@
|
|||||||
package org.openrefine.wikidata.qa.scrutinizers;
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
import org.openrefine.wikidata.qa.QAWarning;
|
import org.openrefine.wikidata.qa.QAWarning;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
|
||||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
@ -38,6 +40,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.regex.PatternSyntaxException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A scrutinizer that detects incorrect formats in text values (mostly
|
* A scrutinizer that detects incorrect formats in text values (mostly
|
||||||
@ -48,6 +51,8 @@ import java.util.regex.Pattern;
|
|||||||
*/
|
*/
|
||||||
public class FormatScrutinizer extends SnakScrutinizer {
|
public class FormatScrutinizer extends SnakScrutinizer {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(FormatScrutinizer.class);
|
||||||
|
|
||||||
public static final String type = "add-statements-with-invalid-format";
|
public static final String type = "add-statements-with-invalid-format";
|
||||||
public String formatConstraintQid;
|
public String formatConstraintQid;
|
||||||
public String formatRegexPid;
|
public String formatRegexPid;
|
||||||
@ -97,9 +102,14 @@ public class FormatScrutinizer extends SnakScrutinizer {
|
|||||||
String regex = constraint.regularExpressionFormat;
|
String regex = constraint.regularExpressionFormat;
|
||||||
Pattern pattern = null;
|
Pattern pattern = null;
|
||||||
if (regex != null) {
|
if (regex != null) {
|
||||||
pattern = Pattern.compile(regex);
|
try {
|
||||||
|
pattern = Pattern.compile(regex);
|
||||||
|
patterns.add(pattern);
|
||||||
|
} catch(PatternSyntaxException e) {
|
||||||
|
logger.info(String.format("Ignoring invalid format constraint for property %s. Regex %s is invalid: %s",
|
||||||
|
pid.getId(), regex, e.getMessage()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
patterns.add(pattern);
|
|
||||||
}
|
}
|
||||||
_patterns.put(pid, patterns);
|
_patterns.put(pid, patterns);
|
||||||
return patterns;
|
return patterns;
|
||||||
|
@ -54,6 +54,7 @@ public class FormatScrutinizerTest extends ScrutinizerTest {
|
|||||||
public static Value noMatchValue = Datamodel.makeStringValue("image");
|
public static Value noMatchValue = Datamodel.makeStringValue("image");
|
||||||
public static Value incompleteMatchValue = Datamodel.makeStringValue(".jpg");
|
public static Value incompleteMatchValue = Datamodel.makeStringValue(".jpg");
|
||||||
public static String regularExpression = "(?i).+\\.(jpg|jpeg|jpe|png|svg|tif|tiff|gif|xcf|pdf|djvu|webp)";
|
public static String regularExpression = "(?i).+\\.(jpg|jpeg|jpe|png|svg|tif|tiff|gif|xcf|pdf|djvu|webp)";
|
||||||
|
public static String invalidRegularExpression = "(?[A-Za-z]+)";
|
||||||
|
|
||||||
public static ItemIdValue entityIdValue = Datamodel.makeWikidataItemIdValue(FORMAT_CONSTRAINT_QID);
|
public static ItemIdValue entityIdValue = Datamodel.makeWikidataItemIdValue(FORMAT_CONSTRAINT_QID);
|
||||||
public static PropertyIdValue regularExpressionParameter = Datamodel.makeWikidataPropertyIdValue(FORMAT_REGEX_PID);
|
public static PropertyIdValue regularExpressionParameter = Datamodel.makeWikidataPropertyIdValue(FORMAT_REGEX_PID);
|
||||||
@ -72,11 +73,7 @@ public class FormatScrutinizerTest extends ScrutinizerTest {
|
|||||||
Statement statement = new StatementImpl("P18", value, idA);
|
Statement statement = new StatementImpl("P18", value, idA);
|
||||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||||
|
|
||||||
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
|
List<Statement> constraintDefinitions = generateFormatConstraint(regularExpression);
|
||||||
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
|
||||||
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
|
||||||
List<SnakGroup> constraintQualifiers = Collections.singletonList(qualifierSnakGroup);
|
|
||||||
List<Statement> constraintDefinitions = constraintParameterStatementList(entityIdValue, constraintQualifiers);
|
|
||||||
|
|
||||||
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||||
when(fetcher.getConstraintsByType(propertyIdValue, FORMAT_CONSTRAINT_QID)).thenReturn(constraintDefinitions);
|
when(fetcher.getConstraintsByType(propertyIdValue, FORMAT_CONSTRAINT_QID)).thenReturn(constraintDefinitions);
|
||||||
@ -92,11 +89,7 @@ public class FormatScrutinizerTest extends ScrutinizerTest {
|
|||||||
Statement statement = new StatementImpl("P18", value, idA);
|
Statement statement = new StatementImpl("P18", value, idA);
|
||||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||||
|
|
||||||
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
|
List<Statement> constraintDefinitions = generateFormatConstraint(regularExpression);
|
||||||
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
|
||||||
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
|
||||||
List<SnakGroup> constraintQualifiers = Collections.singletonList(qualifierSnakGroup);
|
|
||||||
List<Statement> constraintDefinitions = constraintParameterStatementList(entityIdValue, constraintQualifiers);
|
|
||||||
|
|
||||||
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||||
when(fetcher.getConstraintsByType(propertyIdValue, FORMAT_CONSTRAINT_QID)).thenReturn(constraintDefinitions);
|
when(fetcher.getConstraintsByType(propertyIdValue, FORMAT_CONSTRAINT_QID)).thenReturn(constraintDefinitions);
|
||||||
@ -112,11 +105,7 @@ public class FormatScrutinizerTest extends ScrutinizerTest {
|
|||||||
Statement statement = new StatementImpl("P18", value, idA);
|
Statement statement = new StatementImpl("P18", value, idA);
|
||||||
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||||
|
|
||||||
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, regularExpressionFormat);
|
List<Statement> constraintDefinitions = generateFormatConstraint(regularExpression);
|
||||||
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
|
||||||
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
|
||||||
List<SnakGroup> constraintQualifiers = Collections.singletonList(qualifierSnakGroup);
|
|
||||||
List<Statement> constraintDefinitions = constraintParameterStatementList(entityIdValue, constraintQualifiers);
|
|
||||||
|
|
||||||
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||||
when(fetcher.getConstraintsByType(propertyIdValue, FORMAT_CONSTRAINT_QID)).thenReturn(constraintDefinitions);
|
when(fetcher.getConstraintsByType(propertyIdValue, FORMAT_CONSTRAINT_QID)).thenReturn(constraintDefinitions);
|
||||||
@ -124,5 +113,29 @@ public class FormatScrutinizerTest extends ScrutinizerTest {
|
|||||||
scrutinize(updateA);
|
scrutinize(updateA);
|
||||||
assertWarningsRaised(FormatScrutinizer.type);
|
assertWarningsRaised(FormatScrutinizer.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInvalidRegex() {
|
||||||
|
ItemIdValue idA = TestingData.existingId;
|
||||||
|
ValueSnak value = Datamodel.makeValueSnak(propertyIdValue, incompleteMatchValue);
|
||||||
|
Statement statement = new StatementImpl("P18", value, idA);
|
||||||
|
ItemUpdate updateA = new ItemUpdateBuilder(idA).addStatement(statement).build();
|
||||||
|
|
||||||
|
List<Statement> constraintDefinitions = generateFormatConstraint(invalidRegularExpression);
|
||||||
|
|
||||||
|
ConstraintFetcher fetcher = mock(ConstraintFetcher.class);
|
||||||
|
when(fetcher.getConstraintsByType(propertyIdValue, FORMAT_CONSTRAINT_QID)).thenReturn(constraintDefinitions);
|
||||||
|
setFetcher(fetcher);
|
||||||
|
scrutinize(updateA);
|
||||||
|
assertNoWarningRaised();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected List<Statement> generateFormatConstraint(String regex) {
|
||||||
|
Snak qualifierSnak = Datamodel.makeValueSnak(regularExpressionParameter, Datamodel.makeStringValue(regex));
|
||||||
|
List<Snak> qualifierSnakList = Collections.singletonList(qualifierSnak);
|
||||||
|
SnakGroup qualifierSnakGroup = Datamodel.makeSnakGroup(qualifierSnakList);
|
||||||
|
List<SnakGroup> constraintQualifiers = Collections.singletonList(qualifierSnakGroup);
|
||||||
|
return constraintParameterStatementList(entityIdValue, constraintQualifiers);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user