Refactor and test QS value printing
This commit is contained in:
parent
1837926cb1
commit
b48c78a617
@ -0,0 +1,129 @@
|
|||||||
|
package org.openrefine.wikidata.exporters;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prints a Wikibase value as a string as required by QuickStatements.
|
||||||
|
* Format documentation:
|
||||||
|
* https://www.wikidata.org/wiki/Help:QuickStatements
|
||||||
|
*
|
||||||
|
* @author Antonin Delpeuch
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class QSValuePrinter implements ValueVisitor<String> {
|
||||||
|
|
||||||
|
private final ReconEntityIdValue lastCreatedEntityIdValue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
*
|
||||||
|
* Creates a printer for a context where no entity was previously
|
||||||
|
* created with the "CREATE" command. Any new entity id will not
|
||||||
|
* be printed.
|
||||||
|
*/
|
||||||
|
public QSValuePrinter() {
|
||||||
|
lastCreatedEntityIdValue = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a printer for a context where an entity was previously
|
||||||
|
* created with the "CREATE" command. If this id is encountered,
|
||||||
|
* it will be printed as "LAST".
|
||||||
|
*
|
||||||
|
* @param lastCreatedEntityIdValue
|
||||||
|
* the virtual id of the last created entity
|
||||||
|
*/
|
||||||
|
public QSValuePrinter(ReconEntityIdValue lastCreatedEntityIdValue) {
|
||||||
|
this.lastCreatedEntityIdValue = lastCreatedEntityIdValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String visit(DatatypeIdValue value) {
|
||||||
|
// unsupported according to
|
||||||
|
// https://tools.wmflabs.org/wikidata-todo/quick_statements.php?
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String visit(EntityIdValue value) {
|
||||||
|
if (lastCreatedEntityIdValue != null && lastCreatedEntityIdValue.equals(value)) {
|
||||||
|
return "LAST";
|
||||||
|
} else if (ReconEntityIdValue.class.isInstance(value)) {
|
||||||
|
// oops, we are trying to print another newly created entity (not the last one)
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return value.getId();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String visit(GlobeCoordinatesValue value) {
|
||||||
|
return String.format(
|
||||||
|
Locale.US,
|
||||||
|
"@%f/%f",
|
||||||
|
value.getLatitude(),
|
||||||
|
value.getLongitude());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String visit(MonolingualTextValue value) {
|
||||||
|
return String.format(
|
||||||
|
"%s:\"%s\"",
|
||||||
|
value.getLanguageCode(),
|
||||||
|
value.getText());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String visit(QuantityValue value) {
|
||||||
|
String unitPrefix = "http://www.wikidata.org/entity/Q";
|
||||||
|
String unitIri = value.getUnit();
|
||||||
|
String unitRepresentation = "", boundsRepresentation = "";
|
||||||
|
if (!unitIri.isEmpty()) {
|
||||||
|
if (!unitIri.startsWith(unitPrefix))
|
||||||
|
return null; // QuickStatements only accepts Qids as units
|
||||||
|
unitRepresentation = "U"+unitIri.substring(unitPrefix.length());
|
||||||
|
}
|
||||||
|
if (value.getLowerBound() != null) {
|
||||||
|
// bounds are always null at the same time so we know they are both not null
|
||||||
|
BigDecimal lowerBound = value.getLowerBound();
|
||||||
|
BigDecimal upperBound = value.getUpperBound();
|
||||||
|
boundsRepresentation = String.format(Locale.US, "[%s,%s]",
|
||||||
|
lowerBound.toString(), upperBound.toString());
|
||||||
|
}
|
||||||
|
return String.format(
|
||||||
|
Locale.US,
|
||||||
|
"%s%s%s",
|
||||||
|
value.getNumericValue().toString(),
|
||||||
|
boundsRepresentation,
|
||||||
|
unitRepresentation);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String visit(StringValue value) {
|
||||||
|
return "\"" + value.getString() + "\"";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String visit(TimeValue value) {
|
||||||
|
return String.format(
|
||||||
|
"+%04d-%02d-%02dT%02d:%02d:%02dZ/%d",
|
||||||
|
value.getYear(),
|
||||||
|
value.getMonth(),
|
||||||
|
value.getDay(),
|
||||||
|
value.getHour(),
|
||||||
|
value.getMinute(),
|
||||||
|
value.getSecond(),
|
||||||
|
value.getPrecision());
|
||||||
|
}
|
||||||
|
}
|
@ -2,6 +2,7 @@ package org.openrefine.wikidata.exporters;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Writer;
|
import java.io.Writer;
|
||||||
|
import java.math.BigDecimal;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
@ -54,10 +55,15 @@ public class QuickStatementsExporter implements WriterExporter {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Exports a project and a schema to a QuickStatements file
|
* Exports a project and a schema to a QuickStatements file
|
||||||
* @param project: the project to translate
|
*
|
||||||
* @param engine: the engine used for evaluation of the edits
|
* @param project
|
||||||
* @param schema: the WikibaseSchema used for translation of tabular data to edits
|
* the project to translate
|
||||||
* @param writer: the writer to which the QS should be written
|
* @param engine
|
||||||
|
* the engine used for evaluation of the edits
|
||||||
|
* @param schema
|
||||||
|
* the WikibaseSchema used for translation of tabular data to edits
|
||||||
|
* @param writer
|
||||||
|
* the writer to which the QS should be written
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public void translateSchema(Project project, Engine engine, WikibaseSchema schema, Writer writer) throws IOException {
|
public void translateSchema(Project project, Engine engine, WikibaseSchema schema, Writer writer) throws IOException {
|
||||||
@ -106,7 +112,7 @@ public class QuickStatementsExporter implements WriterExporter {
|
|||||||
Claim claim = statement.getClaim();
|
Claim claim = statement.getClaim();
|
||||||
|
|
||||||
Value val = claim.getValue();
|
Value val = claim.getValue();
|
||||||
ValueVisitor<String> vv = new ValuePrinter();
|
ValueVisitor<String> vv = new QSValuePrinter();
|
||||||
String targetValue = val.accept(vv);
|
String targetValue = val.accept(vv);
|
||||||
if (targetValue != null) {
|
if (targetValue != null) {
|
||||||
if (! add) {
|
if (! add) {
|
||||||
@ -138,80 +144,12 @@ public class QuickStatementsExporter implements WriterExporter {
|
|||||||
pid = pid.replace('P', 'S');
|
pid = pid.replace('P', 'S');
|
||||||
}
|
}
|
||||||
Value val = s.getValue();
|
Value val = s.getValue();
|
||||||
ValueVisitor<String> vv = new ValuePrinter();
|
ValueVisitor<String> vv = new QSValuePrinter();
|
||||||
String valStr = val.accept(vv);
|
String valStr = val.accept(vv);
|
||||||
if(valStr != null) {
|
if(valStr != null) {
|
||||||
writer.write("\t" + pid + "\t" + valStr);
|
writer.write("\t" + pid + "\t" + valStr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class ValuePrinter implements ValueVisitor<String> {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String visit(DatatypeIdValue value) {
|
|
||||||
// unsupported according to
|
|
||||||
// https://tools.wmflabs.org/wikidata-todo/quick_statements.php?
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String visit(EntityIdValue value) {
|
|
||||||
if (value.equals(ItemIdValue.NULL)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return value.getId();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String visit(GlobeCoordinatesValue value) {
|
|
||||||
return String.format(
|
|
||||||
Locale.US,
|
|
||||||
"@%f/%f",
|
|
||||||
value.getLatitude(),
|
|
||||||
value.getLongitude());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String visit(MonolingualTextValue value) {
|
|
||||||
return String.format(
|
|
||||||
"%s:\"%s\"",
|
|
||||||
value.getLanguageCode(),
|
|
||||||
value.getText());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String visit(QuantityValue value) {
|
|
||||||
String unitPrefix = "http://www.wikidata.org/entity/Q";
|
|
||||||
String unit = value.getUnit();
|
|
||||||
if (!unit.startsWith(unitPrefix))
|
|
||||||
return null; // QuickStatements only accepts Qids as units
|
|
||||||
// TODO test this for values without bounds
|
|
||||||
String unitID = "U"+unit.substring(unitPrefix.length());
|
|
||||||
return String.format(
|
|
||||||
Locale.US,
|
|
||||||
"[%f,%f]%s",
|
|
||||||
value.getLowerBound(),
|
|
||||||
value.getUpperBound(),
|
|
||||||
unitID);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String visit(StringValue value) {
|
|
||||||
return "\"" + value.getString() + "\"";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String visit(TimeValue value) {
|
|
||||||
return String.format(
|
|
||||||
"+%04d-%02d-%02dT%02d:%02d:%02dZ/%d",
|
|
||||||
value.getYear(),
|
|
||||||
value.getMonth(),
|
|
||||||
value.getDay(),
|
|
||||||
value.getHour(),
|
|
||||||
value.getMinute(),
|
|
||||||
value.getSecond(),
|
|
||||||
value.getPrecision());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@ import com.google.refine.model.Recon;
|
|||||||
*
|
*
|
||||||
* Storing the types also lets us perform some constraint checks
|
* Storing the types also lets us perform some constraint checks
|
||||||
* without re-fetching the types of many items.
|
* without re-fetching the types of many items.
|
||||||
|
*
|
||||||
* @author antonin
|
* @author antonin
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@ -116,7 +117,7 @@ public abstract class ReconEntityIdValue implements PrefetchedEntityIdValue {
|
|||||||
// This ensures compliance with OR's notion of new items
|
// This ensures compliance with OR's notion of new items
|
||||||
// (it is possible that two cells are reconciled to the same
|
// (it is possible that two cells are reconciled to the same
|
||||||
// new item, in which case they share the same internal recon id).
|
// new item, in which case they share the same internal recon id).
|
||||||
return getRecon().id == reconOther.getRecon().id;
|
return getRecon().judgmentHistoryEntry == reconOther.getRecon().judgmentHistoryEntry;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,120 @@
|
|||||||
|
package org.openrefine.wikidata.exporters;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertNull;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||||
|
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||||
|
|
||||||
|
import com.google.refine.model.Recon;
|
||||||
|
import com.google.refine.model.ReconCandidate;
|
||||||
|
|
||||||
|
public class QSValuePrinterTest {
|
||||||
|
|
||||||
|
private QSValuePrinter printer;
|
||||||
|
|
||||||
|
public QSValuePrinterTest() {
|
||||||
|
printer = new QSValuePrinter();
|
||||||
|
}
|
||||||
|
|
||||||
|
void assertPrints(String expectedFormat, Value datavalue) {
|
||||||
|
assertEquals(expectedFormat, datavalue.accept(printer));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Entity id values
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void printItemId() {
|
||||||
|
assertPrints("Q42", Datamodel.makeWikidataItemIdValue("Q42"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void printPropertyId() {
|
||||||
|
assertPrints("P42", Datamodel.makeWikidataPropertyIdValue("P42"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void printNewItemId() {
|
||||||
|
ReconEntityIdValue id = TestingDataGenerator.makeNewItemIdValue(12345L, "my new item");
|
||||||
|
assertNull(id.accept(printer));
|
||||||
|
// because no entity was previously created
|
||||||
|
|
||||||
|
QSValuePrinter printerAfterCreate = new QSValuePrinter(id);
|
||||||
|
ReconEntityIdValue equalId = TestingDataGenerator.makeNewItemIdValue(12345L, "my other new item");
|
||||||
|
assertEquals("LAST", printerAfterCreate.visit(equalId));
|
||||||
|
|
||||||
|
ReconEntityIdValue differentId = TestingDataGenerator.makeNewItemIdValue(34567L, "my new item");
|
||||||
|
assertNull(printerAfterCreate.visit(differentId));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Globe coordinates
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void printGlobeCoordinate() {
|
||||||
|
// I don't see how to avoid the trailing zeros - in any case it's not a big deal because
|
||||||
|
// the precision is governed by a different parameter that QuickStatements does not support.
|
||||||
|
assertPrints("@43.261930/10.927080", Datamodel.makeGlobeCoordinatesValue(43.26193, 10.92708,
|
||||||
|
GlobeCoordinatesValue.PREC_DEGREE, GlobeCoordinatesValue.GLOBE_EARTH));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Monolingual text values
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void printMonolingualTextValue() {
|
||||||
|
assertPrints("pl:\"Krzyżacy\"", Datamodel.makeMonolingualTextValue("Krzyżacy", "pl"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quantity values
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void printSimpleQuantityValue() {
|
||||||
|
assertPrints("10.00", Datamodel.makeQuantityValue(new BigDecimal("10.00"),
|
||||||
|
null, null, "1"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void printQuantityValueWithUnit() {
|
||||||
|
assertPrints("10.00U11573", Datamodel.makeQuantityValue(new BigDecimal("10.00"),
|
||||||
|
null, null, "http://www.wikidata.org/entity/Q11573"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void printQuantityValueWithBounds() {
|
||||||
|
assertPrints("10.00[9.0,11.05]", Datamodel.makeQuantityValue(new BigDecimal("10.00"),
|
||||||
|
new BigDecimal("9.0"), new BigDecimal("11.05"), "1"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void printFullQuantity() {
|
||||||
|
assertPrints("10.00[9.0,11.05]U11573", Datamodel.makeQuantityValue(new BigDecimal("10.00"),
|
||||||
|
new BigDecimal("9.0"), new BigDecimal("11.05"), "http://www.wikidata.org/entity/Q11573"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// String values
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void printString() {
|
||||||
|
assertPrints("\"hello\"", Datamodel.makeStringValue("hello"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Time values
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void printYear() {
|
||||||
|
assertPrints("+1586-00-00T00:00:00Z/9", Datamodel.makeTimeValue(1586L, (byte)0, (byte)0, (byte)0,
|
||||||
|
(byte)0, (byte)0, (byte)9, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void printDay() {
|
||||||
|
assertPrints("+1586-03-09T00:00:00Z/11", Datamodel.makeTimeValue(1586L, (byte)3, (byte)9, (byte)0,
|
||||||
|
(byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,40 @@
|
|||||||
|
package org.openrefine.wikidata.testing;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||||
|
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
|
||||||
|
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||||
|
|
||||||
|
import com.google.refine.model.Cell;
|
||||||
|
import com.google.refine.model.Recon;
|
||||||
|
import com.google.refine.model.ReconCandidate;
|
||||||
|
|
||||||
|
public class TestingDataGenerator {
|
||||||
|
|
||||||
|
public static Recon makeNewItemRecon(long judgementId) {
|
||||||
|
Recon recon = Recon.makeWikidataRecon(judgementId);
|
||||||
|
recon.judgment = Recon.Judgment.New;
|
||||||
|
return recon;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Recon makeMatchedRecon(String qid, String name) {
|
||||||
|
Recon recon = Recon.makeWikidataRecon(123456L);
|
||||||
|
recon.match = new ReconCandidate(qid, name, new String[0], 100.0);
|
||||||
|
recon.candidates = Collections.singletonList(recon.match);
|
||||||
|
recon.judgment = Recon.Judgment.Matched;
|
||||||
|
return recon;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Cell makeNewItemCell(long judgementId, String name) {
|
||||||
|
return new Cell(name, makeNewItemRecon(judgementId));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Cell makeMatchedCell(String qid, String name) {
|
||||||
|
return new Cell(name, makeMatchedRecon(qid, name));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ReconEntityIdValue makeNewItemIdValue(long judgementId, String name) {
|
||||||
|
return new ReconItemIdValue(makeNewItemRecon(judgementId), name);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user