Add format scrutinizer

This commit is contained in:
Antonin Delpeuch 2018-01-09 10:21:50 +00:00
parent b5f2085038
commit 0d5875b35b
3 changed files with 72 additions and 1 deletions

View File

@ -24,7 +24,7 @@
},
"new-item-without-descriptions": {
"title": "Some new items will be created without any description.",
"body": "Adding descriptions will make it easier to disambiguate them from namesakes."
"body": "Adding descriptions will make it easier to disambiguate the items from namesakes."
},
"new-item-with-deleted-statements": {
"title": "You are trying to delete statements on new items.",
@ -37,6 +37,14 @@
"statement-without-reference": {
"title": "Some statements are not referenced.",
"body": "Please provide references for the statements that you add."
},
"add-statements-with-invalid-format": {
"title": "Invalid format for some text statements.",
"body": "Please consult the documentation of the properties to find out the correct format for their values."
},
"remove-statements-with-invalid-format": {
"title": "Statements with invalid format will be removed.",
"body": "If these statements currently exist on Wikidata, this will solve constraint violations."
}
}
}

View File

@ -5,6 +5,8 @@ import java.util.List;
import java.util.Map;
import org.openrefine.wikidata.qa.scrutinizers.EditScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.FormatConstraintScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.InverseConstraintScrutinizer;
import org.openrefine.wikidata.qa.scrutinizers.NewItemScrutinizer;
import org.openrefine.wikidata.schema.ItemUpdate;
@ -23,6 +25,7 @@ public class EditInspector {
// Register all known scrutinizers here
register(new NewItemScrutinizer());
register(new FormatConstraintScrutinizer());
}
/**

View File

@ -0,0 +1,60 @@
package org.openrefine.wikidata.qa.scrutinizers;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import org.openrefine.wikidata.qa.ConstraintFetcher;
import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
public class FormatConstraintScrutinizer extends SnakScrutinizer {
private Map<String, Pattern> _patterns;
private ConstraintFetcher _fetcher;
public FormatConstraintScrutinizer() {
_patterns = new HashMap<>();
_fetcher = new ConstraintFetcher();
}
/**
* Loads the regex for a property and compiles it to a pattern
* (this is cached upstream, plus we are doing it only once per
* property and batch).
* @param pid the id of the property to fetch the constraints for
* @return
*/
protected Pattern getPattern(String pid) {
if(_patterns.containsKey(pid)) {
return _patterns.get(pid);
} else {
String regex = _fetcher.getFormatRegex(pid);
Pattern pattern = null;
if (regex != null) {
pattern = Pattern.compile(regex);
}
_patterns.put(pid, pattern);
return pattern;
}
}
@Override
public void scrutinize(Snak snak, boolean added) {
if(StringValue.class.isInstance(snak.getValue())) {
String value = ((StringValue) snak.getValue()).getString();
String pid = snak.getPropertyId().getId();
Pattern pattern = getPattern(pid);
if (!pattern.matcher(value).matches()) {
if (added) {
important("add-statements-with-invalid-format");
} else {
info("remove-statements-with-invalid-format");
}
}
}
}
}