Move pids from Strings to PropertyIdValues, cleaner

This commit is contained in:
Antonin Delpeuch 2018-01-10 09:55:51 +00:00
parent fea53aa900
commit 0a0aacb0cd
5 changed files with 34 additions and 30 deletions

View File

@ -61,7 +61,7 @@ public class ConstraintFetcher {
* @param pid * @param pid
* @return the expression of a regular expression which should be compatible with java.util.regex * @return the expression of a regular expression which should be compatible with java.util.regex
*/ */
public String getFormatRegex(String pid) { public String getFormatRegex(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, FORMAT_CONSTRAINT_QID); List<SnakGroup> specs = getSingleConstraint(pid, FORMAT_CONSTRAINT_QID);
if (specs != null) { if (specs != null) {
List<Value> regexes = findValues(specs, FORMAT_REGEX_PID); List<Value> regexes = findValues(specs, FORMAT_REGEX_PID);
@ -77,13 +77,13 @@ public class ConstraintFetcher {
* @param pid: the property to retrieve the inverse for * @param pid: the property to retrieve the inverse for
* @return the pid of the inverse property * @return the pid of the inverse property
*/ */
public String getInversePid(String pid) { public PropertyIdValue getInversePid(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid, INVERSE_CONSTRAINT_QID); List<SnakGroup> specs = getSingleConstraint(pid, INVERSE_CONSTRAINT_QID);
if(specs != null) { if(specs != null) {
List<Value> inverses = findValues(specs, INVERSE_PROPERTY_PID); List<Value> inverses = findValues(specs, INVERSE_PROPERTY_PID);
if (! inverses.isEmpty()) { if (! inverses.isEmpty()) {
return ((EntityIdValue)inverses.get(0)).getId(); return (PropertyIdValue)inverses.get(0);
} }
} }
return null; return null;
@ -92,21 +92,21 @@ public class ConstraintFetcher {
/** /**
* Is this property for values only? * Is this property for values only?
*/ */
public boolean isForValuesOnly(String pid) { public boolean isForValuesOnly(PropertyIdValue pid) {
return getSingleConstraint(pid, USED_ONLY_AS_VALUES_CONSTRAINT_QID) != null; return getSingleConstraint(pid, USED_ONLY_AS_VALUES_CONSTRAINT_QID) != null;
} }
/** /**
* Is this property for qualifiers only? * Is this property for qualifiers only?
*/ */
public boolean isForQualifiersOnly(String pid) { public boolean isForQualifiersOnly(PropertyIdValue pid) {
return getSingleConstraint(pid, USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID) != null; return getSingleConstraint(pid, USED_ONLY_AS_QUALIFIER_CONSTRAINT_QID) != null;
} }
/** /**
* Is this property for references only? * Is this property for references only?
*/ */
public boolean isForReferencesOnly(String pid) { public boolean isForReferencesOnly(PropertyIdValue pid) {
return getSingleConstraint(pid, USED_ONLY_AS_REFERENCE_CONSTRAINT_QID) != null; return getSingleConstraint(pid, USED_ONLY_AS_REFERENCE_CONSTRAINT_QID) != null;
} }
@ -114,7 +114,7 @@ public class ConstraintFetcher {
* Get the list of allowed qualifiers (as property ids) for this property (null if any) * Get the list of allowed qualifiers (as property ids) for this property (null if any)
*/ */
public Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid) { public Set<PropertyIdValue> allowedQualifiers(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid.getId(), ALLOWED_QUALIFIERS_CONSTRAINT_QID); List<SnakGroup> specs = getSingleConstraint(pid, ALLOWED_QUALIFIERS_CONSTRAINT_QID);
if (specs != null) { if (specs != null) {
List<Value> properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID); List<Value> properties = findValues(specs, ALLOWED_QUALIFIERS_CONSTRAINT_PID);
@ -127,7 +127,7 @@ public class ConstraintFetcher {
* Get the list of mandatory qualifiers (as property ids) for this property (null if any) * Get the list of mandatory qualifiers (as property ids) for this property (null if any)
*/ */
public Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid) { public Set<PropertyIdValue> mandatoryQualifiers(PropertyIdValue pid) {
List<SnakGroup> specs = getSingleConstraint(pid.getId(), MANDATORY_QUALIFIERS_CONSTRAINT_QID); List<SnakGroup> specs = getSingleConstraint(pid, MANDATORY_QUALIFIERS_CONSTRAINT_QID);
if (specs != null) { if (specs != null) {
List<Value> properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID); List<Value> properties = findValues(specs, MANDATORY_QUALIFIERS_CONSTRAINT_PID);
@ -143,7 +143,7 @@ public class ConstraintFetcher {
* @param qid: the type of the constraints * @param qid: the type of the constraints
* @return the list of qualifiers for the constraint, or null if it does not exist * @return the list of qualifiers for the constraint, or null if it does not exist
*/ */
protected List<SnakGroup> getSingleConstraint(String pid, String qid) { protected List<SnakGroup> getSingleConstraint(PropertyIdValue pid, String qid) {
Statement statement = getConstraintsByType(pid, qid).findFirst().orElse(null); Statement statement = getConstraintsByType(pid, qid).findFirst().orElse(null);
if (statement != null) { if (statement != null) {
return statement.getClaim().getQualifiers(); return statement.getClaim().getQualifiers();
@ -157,7 +157,7 @@ public class ConstraintFetcher {
* @param qid: the type of the constraints * @param qid: the type of the constraints
* @return the stream of matching constraint statements * @return the stream of matching constraint statements
*/ */
protected Stream<Statement> getConstraintsByType(String pid, String qid) { protected Stream<Statement> getConstraintsByType(PropertyIdValue pid, String qid) {
Stream<Statement> allConstraints = getConstraintStatements(pid) Stream<Statement> allConstraints = getConstraintStatements(pid)
.stream() .stream()
.filter(s -> ((EntityIdValue) s.getValue()).getId().equals(qid)); .filter(s -> ((EntityIdValue) s.getValue()).getId().equals(qid));
@ -169,7 +169,7 @@ public class ConstraintFetcher {
* @param pid : the id of the property to retrieve the constraints for * @param pid : the id of the property to retrieve the constraints for
* @return the list of constraint statements * @return the list of constraint statements
*/ */
protected List<Statement> getConstraintStatements(String pid) { protected List<Statement> getConstraintStatements(PropertyIdValue pid) {
PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid); PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid);
StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID); StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID);
return group.getStatements(); return group.getStatements();

View File

@ -6,6 +6,7 @@ import java.util.regex.Pattern;
import org.openrefine.wikidata.qa.ConstraintFetcher; import org.openrefine.wikidata.qa.ConstraintFetcher;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Snak; import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.StringValue; import org.wikidata.wdtk.datamodel.interfaces.StringValue;
@ -18,7 +19,7 @@ import org.wikidata.wdtk.datamodel.interfaces.StringValue;
*/ */
public class FormatConstraintScrutinizer extends SnakScrutinizer { public class FormatConstraintScrutinizer extends SnakScrutinizer {
private Map<String, Pattern> _patterns; private Map<PropertyIdValue, Pattern> _patterns;
private ConstraintFetcher _fetcher; private ConstraintFetcher _fetcher;
public FormatConstraintScrutinizer() { public FormatConstraintScrutinizer() {
@ -33,7 +34,7 @@ public class FormatConstraintScrutinizer extends SnakScrutinizer {
* @param pid the id of the property to fetch the constraints for * @param pid the id of the property to fetch the constraints for
* @return * @return
*/ */
protected Pattern getPattern(String pid) { protected Pattern getPattern(PropertyIdValue pid) {
if(_patterns.containsKey(pid)) { if(_patterns.containsKey(pid)) {
return _patterns.get(pid); return _patterns.get(pid);
} else { } else {
@ -51,7 +52,7 @@ public class FormatConstraintScrutinizer extends SnakScrutinizer {
public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) { public void scrutinize(Snak snak, EntityIdValue entityId, boolean added) {
if(StringValue.class.isInstance(snak.getValue())) { if(StringValue.class.isInstance(snak.getValue())) {
String value = ((StringValue) snak.getValue()).getString(); String value = ((StringValue) snak.getValue()).getString();
String pid = snak.getPropertyId().getId(); PropertyIdValue pid = snak.getPropertyId();
Pattern pattern = getPattern(pid); Pattern pattern = getPattern(pid);
if (!pattern.matcher(value).matches()) { if (!pattern.matcher(value).matches()) {
if (added) { if (added) {

View File

@ -9,6 +9,7 @@ import java.util.Set;
import org.openrefine.wikidata.qa.ConstraintFetcher; import org.openrefine.wikidata.qa.ConstraintFetcher;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue; import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.Statement;
import org.wikidata.wdtk.datamodel.interfaces.Value; import org.wikidata.wdtk.datamodel.interfaces.Value;
@ -22,8 +23,8 @@ import org.wikidata.wdtk.datamodel.interfaces.Value;
public class InverseConstraintScrutinizer extends StatementScrutinizer { public class InverseConstraintScrutinizer extends StatementScrutinizer {
private ConstraintFetcher _fetcher; private ConstraintFetcher _fetcher;
private Map<String, String> _inverse; private Map<PropertyIdValue, PropertyIdValue> _inverse;
private Map<String, Map<EntityIdValue, Set<EntityIdValue> >> _statements; private Map<PropertyIdValue, Map<EntityIdValue, Set<EntityIdValue> >> _statements;
public InverseConstraintScrutinizer() { public InverseConstraintScrutinizer() {
_fetcher = new ConstraintFetcher(); _fetcher = new ConstraintFetcher();
@ -31,11 +32,11 @@ public class InverseConstraintScrutinizer extends StatementScrutinizer {
_statements = new HashMap<>(); _statements = new HashMap<>();
} }
protected String getInverseConstraint(String pid) { protected PropertyIdValue getInverseConstraint(PropertyIdValue pid) {
if (_inverse.containsKey(pid)) { if (_inverse.containsKey(pid)) {
return _inverse.get(pid); return _inverse.get(pid);
} else { } else {
String inversePid = _fetcher.getInversePid(pid); PropertyIdValue inversePid = _fetcher.getInversePid(pid);
_inverse.put(pid, inversePid); _inverse.put(pid, inversePid);
_statements.put(pid, new HashMap<EntityIdValue,Set<EntityIdValue>>()); _statements.put(pid, new HashMap<EntityIdValue,Set<EntityIdValue>>());
@ -57,8 +58,8 @@ public class InverseConstraintScrutinizer extends StatementScrutinizer {
Value mainSnakValue = statement.getClaim().getMainSnak().getValue(); Value mainSnakValue = statement.getClaim().getMainSnak().getValue();
if (ItemIdValue.class.isInstance(mainSnakValue)) { if (ItemIdValue.class.isInstance(mainSnakValue)) {
String pid = statement.getClaim().getMainSnak().getPropertyId().getId(); PropertyIdValue pid = statement.getClaim().getMainSnak().getPropertyId();
String inversePid = getInverseConstraint(pid); PropertyIdValue inversePid = getInverseConstraint(pid);
if (inversePid != null) { if (inversePid != null) {
EntityIdValue targetEntityId = (EntityIdValue) mainSnakValue; EntityIdValue targetEntityId = (EntityIdValue) mainSnakValue;
Set<EntityIdValue> currentValues = _statements.get(pid).get(entityId); Set<EntityIdValue> currentValues = _statements.get(pid).get(entityId);
@ -74,7 +75,7 @@ public class InverseConstraintScrutinizer extends StatementScrutinizer {
@Override @Override
public void batchIsFinished() { public void batchIsFinished() {
// For each pair of inverse properties (in each direction) // For each pair of inverse properties (in each direction)
for(Entry<String,String> propertyPair : _inverse.entrySet()) { for(Entry<PropertyIdValue,PropertyIdValue> propertyPair : _inverse.entrySet()) {
// Get the statements made for the first // Get the statements made for the first
for(Entry<EntityIdValue, Set<EntityIdValue>> itemLinks : _statements.get(propertyPair.getKey()).entrySet()) { for(Entry<EntityIdValue, Set<EntityIdValue>> itemLinks : _statements.get(propertyPair.getKey()).entrySet()) {
// For each outgoing link // For each outgoing link

View File

@ -8,6 +8,7 @@ import java.util.Set;
import org.openrefine.wikidata.qa.ConstraintFetcher; import org.openrefine.wikidata.qa.ConstraintFetcher;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue; import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;
import org.wikidata.wdtk.datamodel.interfaces.Reference; import org.wikidata.wdtk.datamodel.interfaces.Reference;
import org.wikidata.wdtk.datamodel.interfaces.Snak; import org.wikidata.wdtk.datamodel.interfaces.Snak;
import org.wikidata.wdtk.datamodel.interfaces.Statement; import org.wikidata.wdtk.datamodel.interfaces.Statement;
@ -21,8 +22,8 @@ public class RestrictedPositionScrutinizer extends StatementScrutinizer {
REFERENCE REFERENCE
} }
private Map<String, SnakPosition> _restrictedPids; private Map<PropertyIdValue, SnakPosition> _restrictedPids;
private Set<String> _unrestrictedPids; private Set<PropertyIdValue> _unrestrictedPids;
private ConstraintFetcher _fetcher; private ConstraintFetcher _fetcher;
public RestrictedPositionScrutinizer() { public RestrictedPositionScrutinizer() {
@ -31,7 +32,7 @@ public class RestrictedPositionScrutinizer extends StatementScrutinizer {
_fetcher = new ConstraintFetcher(); _fetcher = new ConstraintFetcher();
} }
SnakPosition positionRestriction(String pid) { SnakPosition positionRestriction(PropertyIdValue pid) {
if(_unrestrictedPids.contains(pid)) { if(_unrestrictedPids.contains(pid)) {
return null; return null;
} }
@ -79,7 +80,7 @@ public class RestrictedPositionScrutinizer extends StatementScrutinizer {
} }
public void scrutinize(Snak snak, EntityIdValue entityId, SnakPosition position, boolean added) { public void scrutinize(Snak snak, EntityIdValue entityId, SnakPosition position, boolean added) {
SnakPosition restriction = positionRestriction(snak.getPropertyId().getId()); SnakPosition restriction = positionRestriction(snak.getPropertyId());
if (restriction != null && position != restriction) { if (restriction != null && position != restriction) {
String positionStr = position.toString().toLowerCase(); String positionStr = position.toString().toLowerCase();
String restrictionStr = restriction.toString().toLowerCase(); String restrictionStr = restriction.toString().toLowerCase();

View File

@ -4,6 +4,7 @@ import java.util.concurrent.TimeUnit;
import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument; import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.wikibaseapi.ApiConnection; import org.wikidata.wdtk.wikibaseapi.ApiConnection;
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher; import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
@ -14,7 +15,7 @@ import com.google.common.cache.LoadingCache;
public class EntityCache { public class EntityCache {
private static EntityCache _entityCache = new EntityCache(); private static EntityCache _entityCache = new EntityCache();
private LoadingCache<String, EntityDocument> _cache; private LoadingCache<EntityIdValue, EntityDocument> _cache;
private WikibaseDataFetcher _fetcher; private WikibaseDataFetcher _fetcher;
@ -26,15 +27,15 @@ public class EntityCache {
.maximumSize(4096) .maximumSize(4096)
.expireAfterWrite(1, TimeUnit.HOURS) .expireAfterWrite(1, TimeUnit.HOURS)
.build( .build(
new CacheLoader<String, EntityDocument>() { new CacheLoader<EntityIdValue, EntityDocument>() {
public EntityDocument load(String entityId) throws Exception { public EntityDocument load(EntityIdValue entityId) throws Exception {
EntityDocument doc = _fetcher.getEntityDocument(entityId); EntityDocument doc = _fetcher.getEntityDocument(entityId.getId());
return doc; return doc;
} }
}); });
} }
public static EntityDocument getEntityDocument(String qid) { public static EntityDocument getEntityDocument(EntityIdValue qid) {
return _entityCache._cache.apply(qid); return _entityCache._cache.apply(qid);
} }
} }