Ensure null values are not cached in URL fetching operation. Closes #1219.

This commit is contained in:
Antonin Delpeuch 2017-08-01 13:05:29 +01:00
parent 8b2d21d7aa
commit 66eac0fae9
2 changed files with 51 additions and 4 deletions

View File

@ -202,7 +202,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
.expireAfterWrite(10, TimeUnit.MINUTES) .expireAfterWrite(10, TimeUnit.MINUTES)
.build( .build(
new CacheLoader<String, Serializable>() { new CacheLoader<String, Serializable>() {
public Serializable load(String urlString) { public Serializable load(String urlString) throws Exception {
Serializable result = fetch(urlString); Serializable result = fetch(urlString);
try { try {
// Always sleep for the delay, no matter how long the // Always sleep for the delay, no matter how long the
@ -214,8 +214,13 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
Thread.sleep(_delay); Thread.sleep(_delay);
} }
} catch (InterruptedException e) { } catch (InterruptedException e) {
return null; result = null;
} }
if (result == null) {
// the load method should not return any null value
throw new Exception("null result returned by fetch");
}
return result; return result;
} }
}); });
@ -304,7 +309,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
Serializable cachedFetch(String urlString) { Serializable cachedFetch(String urlString) {
try { try {
return _urlCache.get(urlString); return _urlCache.get(urlString);
} catch(ExecutionException e) { } catch(Exception e) {
return null; return null;
} }
} }

View File

@ -54,6 +54,7 @@ import com.google.refine.browsing.Engine;
import com.google.refine.browsing.RowVisitor; import com.google.refine.browsing.RowVisitor;
import com.google.refine.grel.Function; import com.google.refine.grel.Function;
import com.google.refine.io.FileProjectManager; import com.google.refine.io.FileProjectManager;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.model.Cell; import com.google.refine.model.Cell;
import com.google.refine.model.Column; import com.google.refine.model.Column;
import com.google.refine.model.ModelException; import com.google.refine.model.ModelException;
@ -161,4 +162,45 @@ public class UrlFetchingTests extends RefineTest {
} }
} }
/**
* Fetch invalid URLs
* https://github.com/OpenRefine/OpenRefine/issues/1219
*/
@Test
public void testInvalidUrl() throws Exception {
Row row0 = new Row(2);
row0.setCell(0, new Cell("auinrestrsc", null)); // malformed -> null
project.rows.add(row0);
Row row1 = new Row(2);
row1.setCell(0, new Cell("https://www.random.org/integers/?num=1&min=1&max=100&col=1&base=10&format=plain", null)); // fine
project.rows.add(row1);
Row row2 = new Row(2);
row2.setCell(0, new Cell("http://anursiebcuiesldcresturce.detur/anusclbc", null)); // well-formed but invalid
project.rows.add(row2);
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
"fruits",
"value",
OnError.StoreError,
"junk",
1,
50,
true);
ProcessManager pm = project.getProcessManager();
Process process = op.createProcess(project, options);
process.startPerforming(pm);
Assert.assertTrue(process.isRunning());
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
int newCol = project.columnModel.getColumnByName("junk").getCellIndex();
// Inspect rows
Assert.assertEquals(project.rows.get(0).getCellValue(newCol), null);
Assert.assertTrue(project.rows.get(1).getCellValue(newCol) != null);
Assert.assertTrue(ExpressionUtils.isError(project.rows.get(2).getCellValue(newCol)));
}
} }