From e664712a701d61e1ec110ffb20285081426eb296 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Sat, 15 May 2021 08:51:26 +0200 Subject: [PATCH] Make schema evaluation null-proof; avoid adding null values in data extension. (#3723) Null values in non-null cells should normally not happen, but sometimes they do and we shouldn't fail miserably in those cases. Closes #2880. Co-authored-by: Tom Morris --- .../openrefine/wikidata/schema/WbDateVariable.java | 3 +++ .../wikidata/schema/WbLocationVariable.java | 3 +++ .../wikidata/schema/WbStringVariable.java | 2 +- .../wikidata/schema/WbDateVariableTest.java | 10 ++++++++++ .../wikidata/schema/WbItemVariableTest.java | 10 ++++++++++ .../wikidata/schema/WbLanguageVariableTest.java | 7 +++++++ .../wikidata/schema/WbLocationVariableTest.java | 11 +++++++++++ .../wikidata/schema/WbStringVariableTest.java | 13 +++++++++++++ .../refine/model/changes/DataExtensionChange.java | 2 +- .../operations/recon/ExtendDataOperationTests.java | 4 ++-- 10 files changed, 61 insertions(+), 4 deletions(-) diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateVariable.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateVariable.java index 8de86d7ad..438a3bd8a 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateVariable.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbDateVariable.java @@ -53,6 +53,9 @@ public class WbDateVariable extends WbVariableExpr { @Override public TimeValue fromCell(Cell cell, ExpressionContext ctxt) throws SkipSchemaExpressionException { + if (cell == null || cell.value == null) { + throw new SkipSchemaExpressionException(); + } try { // parsed dates are accepted by converting them to strings return WbDateConstant.parse(cell.value.toString()); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLocationVariable.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLocationVariable.java index 3765bf499..25b9b2db1 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLocationVariable.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbLocationVariable.java @@ -46,6 +46,9 @@ public class WbLocationVariable extends WbVariableExpr { @Override public GlobeCoordinatesValue fromCell(Cell cell, ExpressionContext ctxt) throws SkipSchemaExpressionException { + if (cell == null || cell.value == null) { + throw new SkipSchemaExpressionException(); + } String expr = cell.value.toString(); try { return WbLocationConstant.parse(expr); diff --git a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java index d1019d868..6ecbfa9c7 100644 --- a/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java +++ b/extensions/wikidata/src/org/openrefine/wikidata/schema/WbStringVariable.java @@ -56,7 +56,7 @@ public class WbStringVariable extends WbVariableExpr { @Override public StringValue fromCell(Cell cell, ExpressionContext ctxt) throws SkipSchemaExpressionException { - if (!cell.value.toString().isEmpty()) { + if (cell != null && !cell.value.toString().isEmpty()) { String stringValue = cell.value.toString(); if (cell.value instanceof Double && ((Double)cell.value) % 1 == 0) { stringValue = Long.toString(((Double)cell.value).longValue()); diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbDateVariableTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbDateVariableTest.java index 3cfa83c27..08e7b5a20 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbDateVariableTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbDateVariableTest.java @@ -61,6 +61,16 @@ public class WbDateVariableTest extends WbVariableTest { isSkipped(" 2018-XX"); isSkipped("invalid format"); } + + @Test + public void testNullStringValue() { + isSkipped((String) null); + } + + @Test + public void testNullCell() { + isSkipped((Cell) null); + } @Test public void testNumber() { diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbItemVariableTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbItemVariableTest.java index 8533ea0c9..bf9363aa8 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbItemVariableTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbItemVariableTest.java @@ -82,6 +82,16 @@ public class WbItemVariableTest extends WbVariableTest { public void testUnreconciledCell() { isSkipped("some value"); } + + @Test + public void testNullCell() { + isSkipped((Cell) null); + } + + @Test + public void testNullStringValue() { + isSkipped((String) null); + } @Test public void testSerialize() { diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbLanguageVariableTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbLanguageVariableTest.java index f6d48da2f..e1187ee32 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbLanguageVariableTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbLanguageVariableTest.java @@ -26,6 +26,8 @@ package org.openrefine.wikidata.schema; import org.openrefine.wikidata.testing.JacksonSerializationTest; import org.testng.annotations.Test; +import com.google.refine.model.Cell; + public class WbLanguageVariableTest extends WbVariableTest { @Override @@ -46,6 +48,11 @@ public class WbLanguageVariableTest extends WbVariableTest { isSkipped((String) null); isSkipped(""); } + + @Test + public void testNullCell() { + isSkipped((Cell) null); + } @Test public void testSerialize() { diff --git a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbLocationVariableTest.java b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbLocationVariableTest.java index fe142f3b6..5c7407a0b 100644 --- a/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbLocationVariableTest.java +++ b/extensions/wikidata/tests/src/org/openrefine/wikidata/schema/WbLocationVariableTest.java @@ -28,6 +28,8 @@ import org.testng.annotations.Test; import org.wikidata.wdtk.datamodel.helpers.Datamodel; import org.wikidata.wdtk.datamodel.interfaces.GlobeCoordinatesValue; +import com.google.refine.model.Cell; + public class WbLocationVariableTest extends WbVariableTest { @Override @@ -62,6 +64,15 @@ public class WbLocationVariableTest extends WbVariableTest { public void testEmpty() { isSkipped(""); } + + /** + * This should not normally happen: cell values should + * never be null (only whole cells can be null). + * But better safe than sorry! + */ + public void testNullStringValue() { + isSkipped((String) null); + } + + public void testNullCell() { + isSkipped((Cell) null); + } @Test public void testSimpleString() { diff --git a/main/src/com/google/refine/model/changes/DataExtensionChange.java b/main/src/com/google/refine/model/changes/DataExtensionChange.java index 0da9c0f6e..a3ee4c61d 100644 --- a/main/src/com/google/refine/model/changes/DataExtensionChange.java +++ b/main/src/com/google/refine/model/changes/DataExtensionChange.java @@ -270,7 +270,7 @@ public class DataExtensionChange implements Change { } cell = new Cell(rc.name, recon); } else { - cell = new Cell((Serializable) value, null); + cell = value == null ? null : new Cell((Serializable) value, null); } row.setCell(_firstNewCellIndex + c, cell); diff --git a/main/tests/server/src/com/google/refine/operations/recon/ExtendDataOperationTests.java b/main/tests/server/src/com/google/refine/operations/recon/ExtendDataOperationTests.java index 123d9311b..c1cc3dc9c 100644 --- a/main/tests/server/src/com/google/refine/operations/recon/ExtendDataOperationTests.java +++ b/main/tests/server/src/com/google/refine/operations/recon/ExtendDataOperationTests.java @@ -249,7 +249,7 @@ public class ExtendDataOperationTests extends RefineTest { "{" + "\"rows\": {" + " \"Q794\": {\"P297\": [{\"str\": \"IR\"}]}," - + " \"Q863\": {\"P297\": [{\"str\": \"TJ\"}]}," + + " \"Q863\": {\"P297\": []}," + " \"Q30\": {\"P297\": [{\"str\": \"US\"}]}," + " \"Q17\": {\"P297\": [{\"str\": \"JP\"}]}" + "}," @@ -270,7 +270,7 @@ public class ExtendDataOperationTests extends RefineTest { // Inspect rows Assert.assertTrue("IR".equals(project.rows.get(0).getCellValue(1)), "Bad country code for Iran."); Assert.assertTrue("JP".equals(project.rows.get(1).getCellValue(1)), "Bad country code for Japan."); - Assert.assertTrue("TJ".equals(project.rows.get(2).getCellValue(1)), "Bad country code for Tajikistan."); + Assert.assertNull(project.rows.get(2).getCell(1), "Expected a null country code."); Assert.assertTrue("US".equals(project.rows.get(3).getCellValue(1)), "Bad country code for United States."); // Make sure we did not create any recon stats for that column (no reconciled value)