Add format scrutinizer
This commit is contained in:
parent
b5f2085038
commit
0d5875b35b
@ -24,7 +24,7 @@
|
|||||||
},
|
},
|
||||||
"new-item-without-descriptions": {
|
"new-item-without-descriptions": {
|
||||||
"title": "Some new items will be created without any description.",
|
"title": "Some new items will be created without any description.",
|
||||||
"body": "Adding descriptions will make it easier to disambiguate them from namesakes."
|
"body": "Adding descriptions will make it easier to disambiguate the items from namesakes."
|
||||||
},
|
},
|
||||||
"new-item-with-deleted-statements": {
|
"new-item-with-deleted-statements": {
|
||||||
"title": "You are trying to delete statements on new items.",
|
"title": "You are trying to delete statements on new items.",
|
||||||
@ -37,6 +37,14 @@
|
|||||||
"statement-without-reference": {
|
"statement-without-reference": {
|
||||||
"title": "Some statements are not referenced.",
|
"title": "Some statements are not referenced.",
|
||||||
"body": "Please provide references for the statements that you add."
|
"body": "Please provide references for the statements that you add."
|
||||||
|
},
|
||||||
|
"add-statements-with-invalid-format": {
|
||||||
|
"title": "Invalid format for some text statements.",
|
||||||
|
"body": "Please consult the documentation of the properties to find out the correct format for their values."
|
||||||
|
},
|
||||||
|
"remove-statements-with-invalid-format": {
|
||||||
|
"title": "Statements with invalid format will be removed.",
|
||||||
|
"body": "If these statements currently exist on Wikidata, this will solve constraint violations."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,8 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
||||||
|
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
|
||||||
|
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
|
||||||
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
|
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
|
||||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||||
|
|
||||||
@ -23,6 +25,7 @@ public class EditInspector {
|
|||||||
|
|
||||||
// Register all known scrutinizers here
|
// Register all known scrutinizers here
|
||||||
register(new NewItemScrutinizer());
|
register(new NewItemScrutinizer());
|
||||||
|
register(new FormatConstraintScrutinizer());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -0,0 +1,60 @@
|
|||||||
|
package org.openrefine.wikidata.qa.scrutinizers;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||||
|
|
||||||
|
|
||||||
|
public class FormatConstraintScrutinizer extends SnakScrutinizer {
|
||||||
|
|
||||||
|
private Map<String, Pattern> _patterns;
|
||||||
|
private ConstraintFetcher _fetcher;
|
||||||
|
|
||||||
|
public FormatConstraintScrutinizer() {
|
||||||
|
_patterns = new HashMap<>();
|
||||||
|
_fetcher = new ConstraintFetcher();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads the regex for a property and compiles it to a pattern
|
||||||
|
* (this is cached upstream, plus we are doing it only once per
|
||||||
|
* property and batch).
|
||||||
|
* @param pid the id of the property to fetch the constraints for
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected Pattern getPattern(String pid) {
|
||||||
|
if(_patterns.containsKey(pid)) {
|
||||||
|
return _patterns.get(pid);
|
||||||
|
} else {
|
||||||
|
String regex = _fetcher.getFormatRegex(pid);
|
||||||
|
Pattern pattern = null;
|
||||||
|
if (regex != null) {
|
||||||
|
pattern = Pattern.compile(regex);
|
||||||
|
}
|
||||||
|
_patterns.put(pid, pattern);
|
||||||
|
return pattern;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void scrutinize(Snak snak, boolean added) {
|
||||||
|
if(StringValue.class.isInstance(snak.getValue())) {
|
||||||
|
String value = ((StringValue) snak.getValue()).getString();
|
||||||
|
String pid = snak.getPropertyId().getId();
|
||||||
|
Pattern pattern = getPattern(pid);
|
||||||
|
if (!pattern.matcher(value).matches()) {
|
||||||
|
if (added) {
|
||||||
|
important("add-statements-with-invalid-format");
|
||||||
|
} else {
|
||||||
|
info("remove-statements-with-invalid-format");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user