Add cached constraint fetcher and tests
This commit is contained in:
parent
508e9d22ad
commit
d347e5091f
@ -0,0 +1,131 @@
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.openrefine.wikidata.utils.EntityCache;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.PropertyDocument;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Snak;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.SnakGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Statement;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StatementGroup;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
/**
|
||||
* This class provides an abstraction over the way constraint
|
||||
* definitions are stored in Wikidata.
|
||||
*
|
||||
* @author antonin
|
||||
*
|
||||
*/
|
||||
public class ConstraintFetcher {
|
||||
public static String WIKIDATA_CONSTRAINT_PID = "P2302";
|
||||
|
||||
public static String FORMAT_CONSTRAINT_QID = "Q21502404";
|
||||
public static String FORMAT_REGEX_PID = "P1793";
|
||||
|
||||
public static String INVERSE_CONSTRAINT_QID = "Q21510855";
|
||||
public static String INVERSE_PROPERTY_PID = "P2306";
|
||||
|
||||
public static String SINGLE_VALUE_CONSRAINT_QID = "Q19474404";
|
||||
public static String DISTINCT_VALUES_CONSRAINT_QID = "Q21502410";
|
||||
public static String TYPE_CONSTRAINT_QID = "Q21503250";
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the regular expression for formatting a property, or null if
|
||||
* there is no such constraint
|
||||
* @param pid
|
||||
* @return the expression of a regular expression which should be compatible with java.util.regex
|
||||
*/
|
||||
public String getFormatRegex(String pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, FORMAT_CONSTRAINT_QID);
|
||||
if (specs != null) {
|
||||
List<Value> regexes = findValues(specs, FORMAT_REGEX_PID);
|
||||
if (! regexes.isEmpty()) {
|
||||
return ((StringValue)regexes.get(0)).getString();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the property that is the inverse of a given property
|
||||
* @param pid: the property to retrieve the inverse for
|
||||
* @return the pid of the inverse property
|
||||
*/
|
||||
public String getInversePid(String pid) {
|
||||
List<SnakGroup> specs = getSingleConstraint(pid, INVERSE_CONSTRAINT_QID);
|
||||
|
||||
if(specs != null) {
|
||||
List<Value> inverses = findValues(specs, INVERSE_PROPERTY_PID);
|
||||
if (! inverses.isEmpty()) {
|
||||
return ((EntityIdValue)inverses.get(0)).getId();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a single constraint for a particular type and a property, or null
|
||||
* if there is no such constraint
|
||||
* @param pid: the property to retrieve the constraints for
|
||||
* @param qid: the type of the constraints
|
||||
* @return the list of qualifiers for the constraint, or null if it does not exist
|
||||
*/
|
||||
protected List<SnakGroup> getSingleConstraint(String pid, String qid) {
|
||||
Statement statement = getConstraintsByType(pid, qid).findFirst().orElse(null);
|
||||
if (statement != null) {
|
||||
return statement.getClaim().getQualifiers();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the list of constraints of a particular type for a property
|
||||
* @param pid: the property to retrieve the constraints for
|
||||
* @param qid: the type of the constraints
|
||||
* @return the stream of matching constraint statements
|
||||
*/
|
||||
protected Stream<Statement> getConstraintsByType(String pid, String qid) {
|
||||
Stream<Statement> allConstraints = getConstraintStatements(pid)
|
||||
.stream()
|
||||
.filter(s -> ((EntityIdValue) s.getValue()).getId().equals(qid));
|
||||
return allConstraints;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets all the constraint statements for a given property
|
||||
* @param pid : the id of the property to retrieve the constraints for
|
||||
* @return the list of constraint statements
|
||||
*/
|
||||
protected List<Statement> getConstraintStatements(String pid) {
|
||||
PropertyDocument doc = (PropertyDocument) EntityCache.getEntityDocument(pid);
|
||||
StatementGroup group = doc.findStatementGroup(WIKIDATA_CONSTRAINT_PID);
|
||||
return group.getStatements();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the values of a given property in qualifiers
|
||||
* @param groups: the qualifiers
|
||||
* @param pid: the property to filter on
|
||||
* @return
|
||||
*/
|
||||
protected List<Value> findValues(List<SnakGroup> groups, String pid) {
|
||||
List<Value> results = new ArrayList<>();
|
||||
for(SnakGroup group : groups) {
|
||||
if (group.getProperty().getId().equals(pid)) {
|
||||
for (Snak snak : group.getSnaks())
|
||||
results.add(snak.getValue());
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
package org.openrefine.wikidata.utils;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityDocument;
|
||||
import org.wikidata.wdtk.wikibaseapi.ApiConnection;
|
||||
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;
|
||||
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.cache.CacheLoader;
|
||||
import com.google.common.cache.LoadingCache;
|
||||
|
||||
public class EntityCache {
|
||||
private static EntityCache _entityCache = new EntityCache();
|
||||
|
||||
private LoadingCache<String, EntityDocument> _cache;
|
||||
private WikibaseDataFetcher _fetcher;
|
||||
|
||||
|
||||
private EntityCache() {
|
||||
ApiConnection connection = ApiConnection.getWikidataApiConnection();
|
||||
_fetcher = new WikibaseDataFetcher(connection, Datamodel.SITE_WIKIDATA);
|
||||
|
||||
_cache = CacheBuilder.newBuilder()
|
||||
.maximumSize(4096)
|
||||
.expireAfterWrite(1, TimeUnit.HOURS)
|
||||
.build(
|
||||
new CacheLoader<String, EntityDocument>() {
|
||||
public EntityDocument load(String entityId) throws Exception {
|
||||
EntityDocument doc = _fetcher.getEntityDocument(entityId);
|
||||
return doc;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static EntityDocument getEntityDocument(String qid) {
|
||||
return _entityCache._cache.apply(qid);
|
||||
}
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
package org.openrefine.wikidata.qa;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class ConstraintFetcherTests {
|
||||
|
||||
private ConstraintFetcher fetcher;
|
||||
|
||||
public ConstraintFetcherTests() {
|
||||
fetcher = new ConstraintFetcher();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetFormatConstraint() {
|
||||
String regex = fetcher.getFormatRegex("P2427");
|
||||
Pattern pattern = Pattern.compile(regex);
|
||||
|
||||
Assert.assertTrue(pattern.matcher("grid.470811.b").matches());
|
||||
Assert.assertFalse(pattern.matcher("501100006367").matches());
|
||||
|
||||
Assert.assertNull(fetcher.getFormatRegex("P31"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetInverseConstraint() {
|
||||
Assert.assertEquals(fetcher.getInversePid("P361"), "P527");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user