Add format scrutinizer
This commit is contained in:
parent
b5f2085038
commit
0d5875b35b
@ -24,7 +24,7 @@
|
||||
},
|
||||
"new-item-without-descriptions": {
|
||||
"title": "Some new items will be created without any description.",
|
||||
"body": "Adding descriptions will make it easier to disambiguate them from namesakes."
|
||||
"body": "Adding descriptions will make it easier to disambiguate the items from namesakes."
|
||||
},
|
||||
"new-item-with-deleted-statements": {
|
||||
"title": "You are trying to delete statements on new items.",
|
||||
@ -37,6 +37,14 @@
|
||||
"statement-without-reference": {
|
||||
"title": "Some statements are not referenced.",
|
||||
"body": "Please provide references for the statements that you add."
|
||||
},
|
||||
"add-statements-with-invalid-format": {
|
||||
"title": "Invalid format for some text statements.",
|
||||
"body": "Please consult the documentation of the properties to find out the correct format for their values."
|
||||
},
|
||||
"remove-statements-with-invalid-format": {
|
||||
"title": "Statements with invalid format will be removed.",
|
||||
"body": "If these statements currently exist on Wikidata, this will solve constraint violations."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,8 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
|
||||
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
|
||||
import org.openrefine.wikidata.schema.ItemUpdate;
|
||||
|
||||
@ -23,6 +25,7 @@ public class EditInspector {
|
||||
|
||||
// Register all known scrutinizers here
|
||||
register(new NewItemScrutinizer());
|
||||
register(new FormatConstraintScrutinizer());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -0,0 +1,60 @@
|
||||
package org.openrefine.wikidata.qa.scrutinizers;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.openrefine.wikidata.qa.ConstraintFetcher;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||
|
||||
|
||||
public class FormatConstraintScrutinizer extends SnakScrutinizer {
|
||||
|
||||
private Map<String, Pattern> _patterns;
|
||||
private ConstraintFetcher _fetcher;
|
||||
|
||||
public FormatConstraintScrutinizer() {
|
||||
_patterns = new HashMap<>();
|
||||
_fetcher = new ConstraintFetcher();
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the regex for a property and compiles it to a pattern
|
||||
* (this is cached upstream, plus we are doing it only once per
|
||||
* property and batch).
|
||||
* @param pid the id of the property to fetch the constraints for
|
||||
* @return
|
||||
*/
|
||||
protected Pattern getPattern(String pid) {
|
||||
if(_patterns.containsKey(pid)) {
|
||||
return _patterns.get(pid);
|
||||
} else {
|
||||
String regex = _fetcher.getFormatRegex(pid);
|
||||
Pattern pattern = null;
|
||||
if (regex != null) {
|
||||
pattern = Pattern.compile(regex);
|
||||
}
|
||||
_patterns.put(pid, pattern);
|
||||
return pattern;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void scrutinize(Snak snak, boolean added) {
|
||||
if(StringValue.class.isInstance(snak.getValue())) {
|
||||
String value = ((StringValue) snak.getValue()).getString();
|
||||
String pid = snak.getPropertyId().getId();
|
||||
Pattern pattern = getPattern(pid);
|
||||
if (!pattern.matcher(value).matches()) {
|
||||
if (added) {
|
||||
important("add-statements-with-invalid-format");
|
||||
} else {
|
||||
info("remove-statements-with-invalid-format");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user