Make schema evaluation null-proof; avoid adding null values in data extension. (#3723)

Null values in non-null cells should normally not happen, but sometimes they do and
we shouldn't fail miserably in those cases.

Closes #2880.

Co-authored-by: Tom Morris <tfmorris@gmail.com>
This commit is contained in:
Antonin Delpeuch 2021-05-15 08:51:26 +02:00 committed by GitHub
parent 787c272fe0
commit e664712a70
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 61 additions and 4 deletions

View File

@ -53,6 +53,9 @@ public class WbDateVariable extends WbVariableExpr<TimeValue> {
@Override
public TimeValue fromCell(Cell cell, ExpressionContext ctxt)
throws SkipSchemaExpressionException {
if (cell == null || cell.value == null) {
throw new SkipSchemaExpressionException();
}
try {
// parsed dates are accepted by converting them to strings
return WbDateConstant.parse(cell.value.toString());

View File

@ -46,6 +46,9 @@ public class WbLocationVariable extends WbVariableExpr<GlobeCoordinatesValue> {
@Override
public GlobeCoordinatesValue fromCell(Cell cell, ExpressionContext ctxt)
throws SkipSchemaExpressionException {
if (cell == null || cell.value == null) {
throw new SkipSchemaExpressionException();
}
String expr = cell.value.toString();
try {
return WbLocationConstant.parse(expr);

View File

@ -56,7 +56,7 @@ public class WbStringVariable extends WbVariableExpr<StringValue> {
@Override
public StringValue fromCell(Cell cell, ExpressionContext ctxt)
throws SkipSchemaExpressionException {
if (!cell.value.toString().isEmpty()) {
if (cell != null && !cell.value.toString().isEmpty()) {
String stringValue = cell.value.toString();
if (cell.value instanceof Double && ((Double)cell.value) % 1 == 0) {
stringValue = Long.toString(((Double)cell.value).longValue());

View File

@ -62,6 +62,16 @@ public class WbDateVariableTest extends WbVariableTest<TimeValue> {
isSkipped("invalid format");
}
@Test
public void testNullStringValue() {
isSkipped((String) null);
}
@Test
public void testNullCell() {
isSkipped((Cell) null);
}
@Test
public void testNumber() {
// numbers are evaluated as years

View File

@ -83,6 +83,16 @@ public class WbItemVariableTest extends WbVariableTest<ItemIdValue> {
isSkipped("some value");
}
@Test
public void testNullCell() {
isSkipped((Cell) null);
}
@Test
public void testNullStringValue() {
isSkipped((String) null);
}
@Test
public void testSerialize() {
JacksonSerializationTest.canonicalSerialization(WbExpression.class, variable,

View File

@ -26,6 +26,8 @@ package org.openrefine.wikidata.schema;
import org.openrefine.wikidata.testing.JacksonSerializationTest;
import org.testng.annotations.Test;
import com.google.refine.model.Cell;
public class WbLanguageVariableTest extends WbVariableTest<String> {
@Override
@ -47,6 +49,11 @@ public class WbLanguageVariableTest extends WbVariableTest<String> {
isSkipped("");
}
@Test
public void testNullCell() {
isSkipped((Cell) null);
}
@Test
public void testSerialize() {
JacksonSerializationTest.canonicalSerialization(WbExpression.class, variable,

View File

@ -28,6 +28,8 @@ import org.testng.annotations.Test;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue;
import com.google.refine.model.Cell;
public class WbLocationVariableTest extends WbVariableTest<GlobeCoordinatesValue> {
@Override
@ -63,6 +65,15 @@ public class WbLocationVariableTest extends WbVariableTest<GlobeCoordinatesValue
isSkipped("");
}
@Test
public void testNullStringValue() {
isSkipped((String) null);
}
public void testNullCell() {
isSkipped((Cell) null);
}
@Test
public void testSerialize() {
JacksonSerializationTest.canonicalSerialization(WbExpression.class, variable,

View File

@ -41,6 +41,19 @@ public class WbStringVariableTest extends WbVariableTest<StringValue> {
isSkipped("");
}
/**
* This should not normally happen: cell values should
* never be null (only whole cells can be null).
* But better safe than sorry!
*/
public void testNullStringValue() {
isSkipped((String) null);
}
public void testNullCell() {
isSkipped((Cell) null);
}
@Test
public void testSimpleString() {
evaluatesTo(Datamodel.makeStringValue("apfelstrudel"), "apfelstrudel");

View File

@ -270,7 +270,7 @@ public class DataExtensionChange implements Change {
}
cell = new Cell(rc.name, recon);
} else {
cell = new Cell((Serializable) value, null);
cell = value == null ? null : new Cell((Serializable) value, null);
}
row.setCell(_firstNewCellIndex + c, cell);

View File

@ -249,7 +249,7 @@ public class ExtendDataOperationTests extends RefineTest {
"{"
+ "\"rows\": {"
+ " \"Q794\": {\"P297\": [{\"str\": \"IR\"}]},"
+ " \"Q863\": {\"P297\": [{\"str\": \"TJ\"}]},"
+ " \"Q863\": {\"P297\": []},"
+ " \"Q30\": {\"P297\": [{\"str\": \"US\"}]},"
+ " \"Q17\": {\"P297\": [{\"str\": \"JP\"}]}"
+ "},"
@ -270,7 +270,7 @@ public class ExtendDataOperationTests extends RefineTest {
// Inspect rows
Assert.assertTrue("IR".equals(project.rows.get(0).getCellValue(1)), "Bad country code for Iran.");
Assert.assertTrue("JP".equals(project.rows.get(1).getCellValue(1)), "Bad country code for Japan.");
Assert.assertTrue("TJ".equals(project.rows.get(2).getCellValue(1)), "Bad country code for Tajikistan.");
Assert.assertNull(project.rows.get(2).getCell(1), "Expected a null country code.");
Assert.assertTrue("US".equals(project.rows.get(3).getCellValue(1)), "Bad country code for United States.");
// Make sure we did not create any recon stats for that column (no reconciled value)