Refactor and test QS value printing
This commit is contained in:
parent
1837926cb1
commit
b48c78a617
@ -0,0 +1,129 @@
|
||||
package org.openrefine.wikidata.exporters;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
|
||||
|
||||
/**
|
||||
* Prints a Wikibase value as a string as required by QuickStatements.
|
||||
* Format documentation:
|
||||
* https://www.wikidata.org/wiki/Help:QuickStatements
|
||||
*
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
public class QSValuePrinter implements ValueVisitor<String> {
|
||||
|
||||
private final ReconEntityIdValue lastCreatedEntityIdValue;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* Creates a printer for a context where no entity was previously
|
||||
* created with the "CREATE" command. Any new entity id will not
|
||||
* be printed.
|
||||
*/
|
||||
public QSValuePrinter() {
|
||||
lastCreatedEntityIdValue = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a printer for a context where an entity was previously
|
||||
* created with the "CREATE" command. If this id is encountered,
|
||||
* it will be printed as "LAST".
|
||||
*
|
||||
* @param lastCreatedEntityIdValue
|
||||
* the virtual id of the last created entity
|
||||
*/
|
||||
public QSValuePrinter(ReconEntityIdValue lastCreatedEntityIdValue) {
|
||||
this.lastCreatedEntityIdValue = lastCreatedEntityIdValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(DatatypeIdValue value) {
|
||||
// unsupported according to
|
||||
// https://tools.wmflabs.org/wikidata-todo/quick_statements.php?
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(EntityIdValue value) {
|
||||
if (lastCreatedEntityIdValue != null && lastCreatedEntityIdValue.equals(value)) {
|
||||
return "LAST";
|
||||
} else if (ReconEntityIdValue.class.isInstance(value)) {
|
||||
// oops, we are trying to print another newly created entity (not the last one)
|
||||
return null;
|
||||
}
|
||||
return value.getId();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(GlobeCoordinatesValue value) {
|
||||
return String.format(
|
||||
Locale.US,
|
||||
"@%f/%f",
|
||||
value.getLatitude(),
|
||||
value.getLongitude());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(MonolingualTextValue value) {
|
||||
return String.format(
|
||||
"%s:\"%s\"",
|
||||
value.getLanguageCode(),
|
||||
value.getText());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(QuantityValue value) {
|
||||
String unitPrefix = "http://www.wikidata.org/entity/Q";
|
||||
String unitIri = value.getUnit();
|
||||
String unitRepresentation = "", boundsRepresentation = "";
|
||||
if (!unitIri.isEmpty()) {
|
||||
if (!unitIri.startsWith(unitPrefix))
|
||||
return null; // QuickStatements only accepts Qids as units
|
||||
unitRepresentation = "U"+unitIri.substring(unitPrefix.length());
|
||||
}
|
||||
if (value.getLowerBound() != null) {
|
||||
// bounds are always null at the same time so we know they are both not null
|
||||
BigDecimal lowerBound = value.getLowerBound();
|
||||
BigDecimal upperBound = value.getUpperBound();
|
||||
boundsRepresentation = String.format(Locale.US, "[%s,%s]",
|
||||
lowerBound.toString(), upperBound.toString());
|
||||
}
|
||||
return String.format(
|
||||
Locale.US,
|
||||
"%s%s%s",
|
||||
value.getNumericValue().toString(),
|
||||
boundsRepresentation,
|
||||
unitRepresentation);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(StringValue value) {
|
||||
return "\"" + value.getString() + "\"";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(TimeValue value) {
|
||||
return String.format(
|
||||
"+%04d-%02d-%02dT%02d:%02d:%02dZ/%d",
|
||||
value.getYear(),
|
||||
value.getMonth(),
|
||||
value.getDay(),
|
||||
value.getHour(),
|
||||
value.getMinute(),
|
||||
value.getSecond(),
|
||||
value.getPrecision());
|
||||
}
|
||||
}
|
@ -2,6 +2,7 @@ package org.openrefine.wikidata.exporters;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
import java.math.BigDecimal;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Properties;
|
||||
@ -54,10 +55,15 @@ public class QuickStatementsExporter implements WriterExporter {
|
||||
|
||||
/**
|
||||
* Exports a project and a schema to a QuickStatements file
|
||||
* @param project: the project to translate
|
||||
* @param engine: the engine used for evaluation of the edits
|
||||
* @param schema: the WikibaseSchema used for translation of tabular data to edits
|
||||
* @param writer: the writer to which the QS should be written
|
||||
*
|
||||
* @param project
|
||||
* the project to translate
|
||||
* @param engine
|
||||
* the engine used for evaluation of the edits
|
||||
* @param schema
|
||||
* the WikibaseSchema used for translation of tabular data to edits
|
||||
* @param writer
|
||||
* the writer to which the QS should be written
|
||||
* @throws IOException
|
||||
*/
|
||||
public void translateSchema(Project project, Engine engine, WikibaseSchema schema, Writer writer) throws IOException {
|
||||
@ -106,7 +112,7 @@ public class QuickStatementsExporter implements WriterExporter {
|
||||
Claim claim = statement.getClaim();
|
||||
|
||||
Value val = claim.getValue();
|
||||
ValueVisitor<String> vv = new ValuePrinter();
|
||||
ValueVisitor<String> vv = new QSValuePrinter();
|
||||
String targetValue = val.accept(vv);
|
||||
if (targetValue != null) {
|
||||
if (! add) {
|
||||
@ -138,80 +144,12 @@ public class QuickStatementsExporter implements WriterExporter {
|
||||
pid = pid.replace('P', 'S');
|
||||
}
|
||||
Value val = s.getValue();
|
||||
ValueVisitor<String> vv = new ValuePrinter();
|
||||
ValueVisitor<String> vv = new QSValuePrinter();
|
||||
String valStr = val.accept(vv);
|
||||
if(valStr != null) {
|
||||
writer.write("\t" + pid + "\t" + valStr);
|
||||
}
|
||||
}
|
||||
|
||||
class ValuePrinter implements ValueVisitor<String> {
|
||||
|
||||
@Override
|
||||
public String visit(DatatypeIdValue value) {
|
||||
// unsupported according to
|
||||
// https://tools.wmflabs.org/wikidata-todo/quick_statements.php?
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(EntityIdValue value) {
|
||||
if (value.equals(ItemIdValue.NULL)) {
|
||||
return null;
|
||||
}
|
||||
return value.getId();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(GlobeCoordinatesValue value) {
|
||||
return String.format(
|
||||
Locale.US,
|
||||
"@%f/%f",
|
||||
value.getLatitude(),
|
||||
value.getLongitude());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(MonolingualTextValue value) {
|
||||
return String.format(
|
||||
"%s:\"%s\"",
|
||||
value.getLanguageCode(),
|
||||
value.getText());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(QuantityValue value) {
|
||||
String unitPrefix = "http://www.wikidata.org/entity/Q";
|
||||
String unit = value.getUnit();
|
||||
if (!unit.startsWith(unitPrefix))
|
||||
return null; // QuickStatements only accepts Qids as units
|
||||
// TODO test this for values without bounds
|
||||
String unitID = "U"+unit.substring(unitPrefix.length());
|
||||
return String.format(
|
||||
Locale.US,
|
||||
"[%f,%f]%s",
|
||||
value.getLowerBound(),
|
||||
value.getUpperBound(),
|
||||
unitID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(StringValue value) {
|
||||
return "\"" + value.getString() + "\"";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(TimeValue value) {
|
||||
return String.format(
|
||||
"+%04d-%02d-%02dT%02d:%02d:%02dZ/%d",
|
||||
value.getYear(),
|
||||
value.getMonth(),
|
||||
value.getDay(),
|
||||
value.getHour(),
|
||||
value.getMinute(),
|
||||
value.getSecond(),
|
||||
value.getPrecision());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ import com.google.refine.model.Recon;
|
||||
*
|
||||
* Storing the types also lets us perform some constraint checks
|
||||
* without re-fetching the types of many items.
|
||||
*
|
||||
* @author antonin
|
||||
*
|
||||
*/
|
||||
@ -116,7 +117,7 @@ public abstract class ReconEntityIdValue implements PrefetchedEntityIdValue {
|
||||
// This ensures compliance with OR's notion of new items
|
||||
// (it is possible that two cells are reconciled to the same
|
||||
// new item, in which case they share the same internal recon id).
|
||||
return getRecon().id == reconOther.getRecon().id;
|
||||
return getRecon().judgmentHistoryEntry == reconOther.getRecon().judgmentHistoryEntry;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,120 @@
|
||||
package org.openrefine.wikidata.exporters;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
||||
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||
import org.openrefine.wikidata.testing.TestingDataGenerator;
|
||||
import org.testng.annotations.Test;
|
||||
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.Value;
|
||||
|
||||
import com.google.refine.model.Recon;
|
||||
import com.google.refine.model.ReconCandidate;
|
||||
|
||||
public class QSValuePrinterTest {
|
||||
|
||||
private QSValuePrinter printer;
|
||||
|
||||
public QSValuePrinterTest() {
|
||||
printer = new QSValuePrinter();
|
||||
}
|
||||
|
||||
void assertPrints(String expectedFormat, Value datavalue) {
|
||||
assertEquals(expectedFormat, datavalue.accept(printer));
|
||||
}
|
||||
|
||||
// Entity id values
|
||||
|
||||
@Test
|
||||
public void printItemId() {
|
||||
assertPrints("Q42", Datamodel.makeWikidataItemIdValue("Q42"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void printPropertyId() {
|
||||
assertPrints("P42", Datamodel.makeWikidataPropertyIdValue("P42"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void printNewItemId() {
|
||||
ReconEntityIdValue id = TestingDataGenerator.makeNewItemIdValue(12345L, "my new item");
|
||||
assertNull(id.accept(printer));
|
||||
// because no entity was previously created
|
||||
|
||||
QSValuePrinter printerAfterCreate = new QSValuePrinter(id);
|
||||
ReconEntityIdValue equalId = TestingDataGenerator.makeNewItemIdValue(12345L, "my other new item");
|
||||
assertEquals("LAST", printerAfterCreate.visit(equalId));
|
||||
|
||||
ReconEntityIdValue differentId = TestingDataGenerator.makeNewItemIdValue(34567L, "my new item");
|
||||
assertNull(printerAfterCreate.visit(differentId));
|
||||
}
|
||||
|
||||
// Globe coordinates
|
||||
|
||||
@Test
|
||||
public void printGlobeCoordinate() {
|
||||
// I don't see how to avoid the trailing zeros - in any case it's not a big deal because
|
||||
// the precision is governed by a different parameter that QuickStatements does not support.
|
||||
assertPrints("@43.261930/10.927080", Datamodel.makeGlobeCoordinatesValue(43.26193, 10.92708,
|
||||
GlobeCoordinatesValue.PREC_DEGREE, GlobeCoordinatesValue.GLOBE_EARTH));
|
||||
}
|
||||
|
||||
// Monolingual text values
|
||||
|
||||
@Test
|
||||
public void printMonolingualTextValue() {
|
||||
assertPrints("pl:\"Krzyżacy\"", Datamodel.makeMonolingualTextValue("Krzyżacy", "pl"));
|
||||
}
|
||||
|
||||
// Quantity values
|
||||
|
||||
@Test
|
||||
public void printSimpleQuantityValue() {
|
||||
assertPrints("10.00", Datamodel.makeQuantityValue(new BigDecimal("10.00"),
|
||||
null, null, "1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void printQuantityValueWithUnit() {
|
||||
assertPrints("10.00U11573", Datamodel.makeQuantityValue(new BigDecimal("10.00"),
|
||||
null, null, "http://www.wikidata.org/entity/Q11573"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void printQuantityValueWithBounds() {
|
||||
assertPrints("10.00[9.0,11.05]", Datamodel.makeQuantityValue(new BigDecimal("10.00"),
|
||||
new BigDecimal("9.0"), new BigDecimal("11.05"), "1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void printFullQuantity() {
|
||||
assertPrints("10.00[9.0,11.05]U11573", Datamodel.makeQuantityValue(new BigDecimal("10.00"),
|
||||
new BigDecimal("9.0"), new BigDecimal("11.05"), "http://www.wikidata.org/entity/Q11573"));
|
||||
}
|
||||
|
||||
// String values
|
||||
|
||||
@Test
|
||||
public void printString() {
|
||||
assertPrints("\"hello\"", Datamodel.makeStringValue("hello"));
|
||||
}
|
||||
|
||||
// Time values
|
||||
|
||||
@Test
|
||||
public void printYear() {
|
||||
assertPrints("+1586-00-00T00:00:00Z/9", Datamodel.makeTimeValue(1586L, (byte)0, (byte)0, (byte)0,
|
||||
(byte)0, (byte)0, (byte)9, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void printDay() {
|
||||
assertPrints("+1586-03-09T00:00:00Z/11", Datamodel.makeTimeValue(1586L, (byte)3, (byte)9, (byte)0,
|
||||
(byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO));
|
||||
}
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
package org.openrefine.wikidata.testing;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
|
||||
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
|
||||
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
|
||||
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Recon;
|
||||
import com.google.refine.model.ReconCandidate;
|
||||
|
||||
public class TestingDataGenerator {
|
||||
|
||||
public static Recon makeNewItemRecon(long judgementId) {
|
||||
Recon recon = Recon.makeWikidataRecon(judgementId);
|
||||
recon.judgment = Recon.Judgment.New;
|
||||
return recon;
|
||||
}
|
||||
|
||||
public static Recon makeMatchedRecon(String qid, String name) {
|
||||
Recon recon = Recon.makeWikidataRecon(123456L);
|
||||
recon.match = new ReconCandidate(qid, name, new String[0], 100.0);
|
||||
recon.candidates = Collections.singletonList(recon.match);
|
||||
recon.judgment = Recon.Judgment.Matched;
|
||||
return recon;
|
||||
}
|
||||
|
||||
public static Cell makeNewItemCell(long judgementId, String name) {
|
||||
return new Cell(name, makeNewItemRecon(judgementId));
|
||||
}
|
||||
|
||||
public static Cell makeMatchedCell(String qid, String name) {
|
||||
return new Cell(name, makeMatchedRecon(qid, name));
|
||||
}
|
||||
|
||||
public static ReconEntityIdValue makeNewItemIdValue(long judgementId, String name) {
|
||||
return new ReconItemIdValue(makeNewItemRecon(judgementId), name);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user