Refactor and test QS value printing

This commit is contained in:
Antonin Delpeuch 2018-02-26 16:36:46 +00:00
parent 1837926cb1
commit b48c78a617
5 changed files with 303 additions and 75 deletions

View File

@ -0,0 +1,129 @@
package org.openrefine.wikidata.exporters;
import java.math.BigDecimal;
import java.util.Locale;
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.DatatypeIdValue;
import org.wikidata.wdtk.datamodel.interfaces.EntityIdValue;
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.MonolingualTextValue;
import org.wikidata.wdtk.datamodel.interfaces.QuantityValue;
import org.wikidata.wdtk.datamodel.interfaces.StringValue;
import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
import org.wikidata.wdtk.datamodel.interfaces.ValueVisitor;
/**
* Prints a Wikibase value as a string as required by QuickStatements.
* Format documentation:
* https://www.wikidata.org/wiki/Help:QuickStatements
*
* @author Antonin Delpeuch
*
*/
public class QSValuePrinter implements ValueVisitor<String> {
private final ReconEntityIdValue lastCreatedEntityIdValue;
/**
* Constructor.
*
* Creates a printer for a context where no entity was previously
* created with the "CREATE" command. Any new entity id will not
* be printed.
*/
public QSValuePrinter() {
lastCreatedEntityIdValue = null;
}
/**
* Creates a printer for a context where an entity was previously
* created with the "CREATE" command. If this id is encountered,
* it will be printed as "LAST".
*
* @param lastCreatedEntityIdValue
* the virtual id of the last created entity
*/
public QSValuePrinter(ReconEntityIdValue lastCreatedEntityIdValue) {
this.lastCreatedEntityIdValue = lastCreatedEntityIdValue;
}
@Override
public String visit(DatatypeIdValue value) {
// unsupported according to
// https://tools.wmflabs.org/wikidata-todo/quick_statements.php?
return null;
}
@Override
public String visit(EntityIdValue value) {
if (lastCreatedEntityIdValue != null && lastCreatedEntityIdValue.equals(value)) {
return "LAST";
} else if (ReconEntityIdValue.class.isInstance(value)) {
// oops, we are trying to print another newly created entity (not the last one)
return null;
}
return value.getId();
}
@Override
public String visit(GlobeCoordinatesValue value) {
return String.format(
Locale.US,
"@%f/%f",
value.getLatitude(),
value.getLongitude());
}
@Override
public String visit(MonolingualTextValue value) {
return String.format(
"%s:\"%s\"",
value.getLanguageCode(),
value.getText());
}
@Override
public String visit(QuantityValue value) {
String unitPrefix = "http://www.wikidata.org/entity/Q";
String unitIri = value.getUnit();
String unitRepresentation = "", boundsRepresentation = "";
if (!unitIri.isEmpty()) {
if (!unitIri.startsWith(unitPrefix))
return null; // QuickStatements only accepts Qids as units
unitRepresentation = "U"+unitIri.substring(unitPrefix.length());
}
if (value.getLowerBound() != null) {
// bounds are always null at the same time so we know they are both not null
BigDecimal lowerBound = value.getLowerBound();
BigDecimal upperBound = value.getUpperBound();
boundsRepresentation = String.format(Locale.US, "[%s,%s]",
lowerBound.toString(), upperBound.toString());
}
return String.format(
Locale.US,
"%s%s%s",
value.getNumericValue().toString(),
boundsRepresentation,
unitRepresentation);
}
@Override
public String visit(StringValue value) {
return "\"" + value.getString() + "\"";
}
@Override
public String visit(TimeValue value) {
return String.format(
"+%04d-%02d-%02dT%02d:%02d:%02dZ/%d",
value.getYear(),
value.getMonth(),
value.getDay(),
value.getHour(),
value.getMinute(),
value.getSecond(),
value.getPrecision());
}
}

View File

@ -2,6 +2,7 @@ package org.openrefine.wikidata.exporters;
import java.io.IOException; import java.io.IOException;
import java.io.Writer; import java.io.Writer;
import java.math.BigDecimal;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Properties; import java.util.Properties;
@ -54,10 +55,15 @@ public class QuickStatementsExporter implements WriterExporter {
/** /**
* Exports a project and a schema to a QuickStatements file * Exports a project and a schema to a QuickStatements file
* @param project: the project to translate *
* @param engine: the engine used for evaluation of the edits * @param project
* @param schema: the WikibaseSchema used for translation of tabular data to edits * the project to translate
* @param writer: the writer to which the QS should be written * @param engine
* the engine used for evaluation of the edits
* @param schema
* the WikibaseSchema used for translation of tabular data to edits
* @param writer
* the writer to which the QS should be written
* @throws IOException * @throws IOException
*/ */
public void translateSchema(Project project, Engine engine, WikibaseSchema schema, Writer writer) throws IOException { public void translateSchema(Project project, Engine engine, WikibaseSchema schema, Writer writer) throws IOException {
@ -106,7 +112,7 @@ public class QuickStatementsExporter implements WriterExporter {
Claim claim = statement.getClaim(); Claim claim = statement.getClaim();
Value val = claim.getValue(); Value val = claim.getValue();
ValueVisitor<String> vv = new ValuePrinter(); ValueVisitor<String> vv = new QSValuePrinter();
String targetValue = val.accept(vv); String targetValue = val.accept(vv);
if (targetValue != null) { if (targetValue != null) {
if (! add) { if (! add) {
@ -138,80 +144,12 @@ public class QuickStatementsExporter implements WriterExporter {
pid = pid.replace('P', 'S'); pid = pid.replace('P', 'S');
} }
Value val = s.getValue(); Value val = s.getValue();
ValueVisitor<String> vv = new ValuePrinter(); ValueVisitor<String> vv = new QSValuePrinter();
String valStr = val.accept(vv); String valStr = val.accept(vv);
if(valStr != null) { if(valStr != null) {
writer.write("\t" + pid + "\t" + valStr); writer.write("\t" + pid + "\t" + valStr);
} }
} }
class ValuePrinter implements ValueVisitor<String> {
@Override
public String visit(DatatypeIdValue value) {
// unsupported according to
// https://tools.wmflabs.org/wikidata-todo/quick_statements.php?
return null;
}
@Override
public String visit(EntityIdValue value) {
if (value.equals(ItemIdValue.NULL)) {
return null;
}
return value.getId();
}
@Override
public String visit(GlobeCoordinatesValue value) {
return String.format(
Locale.US,
"@%f/%f",
value.getLatitude(),
value.getLongitude());
}
@Override
public String visit(MonolingualTextValue value) {
return String.format(
"%s:\"%s\"",
value.getLanguageCode(),
value.getText());
}
@Override
public String visit(QuantityValue value) {
String unitPrefix = "http://www.wikidata.org/entity/Q";
String unit = value.getUnit();
if (!unit.startsWith(unitPrefix))
return null; // QuickStatements only accepts Qids as units
// TODO test this for values without bounds
String unitID = "U"+unit.substring(unitPrefix.length());
return String.format(
Locale.US,
"[%f,%f]%s",
value.getLowerBound(),
value.getUpperBound(),
unitID);
}
@Override
public String visit(StringValue value) {
return "\"" + value.getString() + "\"";
}
@Override
public String visit(TimeValue value) {
return String.format(
"+%04d-%02d-%02dT%02d:%02d:%02dZ/%d",
value.getYear(),
value.getMonth(),
value.getDay(),
value.getHour(),
value.getMinute(),
value.getSecond(),
value.getPrecision());
}
}
} }

View File

@ -22,6 +22,7 @@ import com.google.refine.model.Recon;
* *
* Storing the types also lets us perform some constraint checks * Storing the types also lets us perform some constraint checks
* without re-fetching the types of many items. * without re-fetching the types of many items.
*
* @author antonin * @author antonin
* *
*/ */
@ -116,7 +117,7 @@ public abstract class ReconEntityIdValue implements PrefetchedEntityIdValue {
// This ensures compliance with OR's notion of new items // This ensures compliance with OR's notion of new items
// (it is possible that two cells are reconciled to the same // (it is possible that two cells are reconciled to the same
// new item, in which case they share the same internal recon id). // new item, in which case they share the same internal recon id).
return getRecon().id == reconOther.getRecon().id; return getRecon().judgmentHistoryEntry == reconOther.getRecon().judgmentHistoryEntry;
} }
} }

View File

@ -0,0 +1,120 @@
package org.openrefine.wikidata.exporters;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import java.math.BigDecimal;
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
import org.openrefine.wikidata.testing.TestingDataGenerator;
import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
import org.wikidata.wdtk.datamodel.interfaces.TimeValue;
import org.wikidata.wdtk.datamodel.interfaces.Value;
import com.google.refine.model.Recon;
import com.google.refine.model.ReconCandidate;
public class QSValuePrinterTest {
private QSValuePrinter printer;
public QSValuePrinterTest() {
printer = new QSValuePrinter();
}
void assertPrints(String expectedFormat, Value datavalue) {
assertEquals(expectedFormat, datavalue.accept(printer));
}
// Entity id values
@Test
public void printItemId() {
assertPrints("Q42", Datamodel.makeWikidataItemIdValue("Q42"));
}
@Test
public void printPropertyId() {
assertPrints("P42", Datamodel.makeWikidataPropertyIdValue("P42"));
}
@Test
public void printNewItemId() {
ReconEntityIdValue id = TestingDataGenerator.makeNewItemIdValue(12345L, "my new item");
assertNull(id.accept(printer));
// because no entity was previously created
QSValuePrinter printerAfterCreate = new QSValuePrinter(id);
ReconEntityIdValue equalId = TestingDataGenerator.makeNewItemIdValue(12345L, "my other new item");
assertEquals("LAST", printerAfterCreate.visit(equalId));
ReconEntityIdValue differentId = TestingDataGenerator.makeNewItemIdValue(34567L, "my new item");
assertNull(printerAfterCreate.visit(differentId));
}
// Globe coordinates
@Test
public void printGlobeCoordinate() {
// I don't see how to avoid the trailing zeros - in any case it's not a big deal because
// the precision is governed by a different parameter that QuickStatements does not support.
assertPrints("@43.261930/10.927080", Datamodel.makeGlobeCoordinatesValue(43.26193, 10.92708,
GlobeCoordinatesValue.PREC_DEGREE, GlobeCoordinatesValue.GLOBE_EARTH));
}
// Monolingual text values
@Test
public void printMonolingualTextValue() {
assertPrints("pl:\"Krzyżacy\"", Datamodel.makeMonolingualTextValue("Krzyżacy", "pl"));
}
// Quantity values
@Test
public void printSimpleQuantityValue() {
assertPrints("10.00", Datamodel.makeQuantityValue(new BigDecimal("10.00"),
null, null, "1"));
}
@Test
public void printQuantityValueWithUnit() {
assertPrints("10.00U11573", Datamodel.makeQuantityValue(new BigDecimal("10.00"),
null, null, "http://www.wikidata.org/entity/Q11573"));
}
@Test
public void printQuantityValueWithBounds() {
assertPrints("10.00[9.0,11.05]", Datamodel.makeQuantityValue(new BigDecimal("10.00"),
new BigDecimal("9.0"), new BigDecimal("11.05"), "1"));
}
@Test
public void printFullQuantity() {
assertPrints("10.00[9.0,11.05]U11573", Datamodel.makeQuantityValue(new BigDecimal("10.00"),
new BigDecimal("9.0"), new BigDecimal("11.05"), "http://www.wikidata.org/entity/Q11573"));
}
// String values
@Test
public void printString() {
assertPrints("\"hello\"", Datamodel.makeStringValue("hello"));
}
// Time values
@Test
public void printYear() {
assertPrints("+1586-00-00T00:00:00Z/9", Datamodel.makeTimeValue(1586L, (byte)0, (byte)0, (byte)0,
(byte)0, (byte)0, (byte)9, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO));
}
@Test
public void printDay() {
assertPrints("+1586-03-09T00:00:00Z/11", Datamodel.makeTimeValue(1586L, (byte)3, (byte)9, (byte)0,
(byte)0, (byte)0, (byte)11, 0, 0, 0, TimeValue.CM_GREGORIAN_PRO));
}
}

View File

@ -0,0 +1,40 @@
package org.openrefine.wikidata.testing;
import java.util.Collections;
import org.openrefine.wikidata.schema.entityvalues.ReconEntityIdValue;
import org.openrefine.wikidata.schema.entityvalues.ReconItemIdValue;
import org.wikidata.wdtk.datamodel.interfaces.ItemIdValue;
import com.google.refine.model.Cell;
import com.google.refine.model.Recon;
import com.google.refine.model.ReconCandidate;
public class TestingDataGenerator {
public static Recon makeNewItemRecon(long judgementId) {
Recon recon = Recon.makeWikidataRecon(judgementId);
recon.judgment = Recon.Judgment.New;
return recon;
}
public static Recon makeMatchedRecon(String qid, String name) {
Recon recon = Recon.makeWikidataRecon(123456L);
recon.match = new ReconCandidate(qid, name, new String[0], 100.0);
recon.candidates = Collections.singletonList(recon.match);
recon.judgment = Recon.Judgment.Matched;
return recon;
}
public static Cell makeNewItemCell(long judgementId, String name) {
return new Cell(name, makeNewItemRecon(judgementId));
}
public static Cell makeMatchedCell(String qid, String name) {
return new Cell(name, makeMatchedRecon(qid, name));
}
public static ReconEntityIdValue makeNewItemIdValue(long judgementId, String name) {
return new ReconItemIdValue(makeNewItemRecon(judgementId), name);
}
}