diff --git a/main/src/com/google/refine/model/recon/StandardReconConfig.java b/main/src/com/google/refine/model/recon/StandardReconConfig.java index e4d888532..afaa7a455 100644 --- a/main/src/com/google/refine/model/recon/StandardReconConfig.java +++ b/main/src/com/google/refine/model/recon/StandardReconConfig.java @@ -501,14 +501,8 @@ public class StandardReconConfig extends ReconConfig { logger.error("Failed to batch recon with load:\n" + queriesString, e); } - // TODO: This code prevents the retry mechanism in ReconOperation from working while (recons.size() < jobs.size()) { - Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace); - recon.service = service; - recon.identifierSpace = identifierSpace; - recon.schemaSpace = schemaSpace; - - recons.add(recon); + recons.add(null); } return recons; diff --git a/main/src/com/google/refine/operations/recon/ReconOperation.java b/main/src/com/google/refine/operations/recon/ReconOperation.java index 7fece544d..8f61e35d5 100644 --- a/main/src/com/google/refine/operations/recon/ReconOperation.java +++ b/main/src/com/google/refine/operations/recon/ReconOperation.java @@ -286,29 +286,21 @@ public class ReconOperation extends EngineDependentOperation { JobGroup group = jobToGroup.get(job); List entries = group.entries; - /* - * TODO: Not sure what this retry is meant to handle, but it's currently - * non-functional due the code at the end of StandardReconConfig#batchRecon() - * which tops up any missing entries. - */ - if (recon == null) { - group.trials++; - if (group.trials < 3) { - logger.warn("Re-trying job including cell containing: " + entries.get(0).cell.value); - continue; // try again next time - } - String msg = "Failed after 3 trials for job including cell containing: " + entries.get(0).cell.value; - logger.warn(msg); - recon = _reconConfig.createNewRecon(_historyEntryID); - } - jobToGroup.remove(job); jobs.remove(j); done++; - recon.judgmentBatchSize = entries.size(); + if (recon != null) { + recon.judgmentBatchSize = entries.size(); + } for (ReconEntry entry : entries) { + if (recon == null) { + // TODO add EvalError instead? That is not so convenient + // for users because they would lose the cell contents. + // Better leave the cell unreconciled so they can be reconciled again later. + continue; + } Cell oldCell = entry.cell; Cell newCell = new Cell(oldCell.value, recon); diff --git a/main/src/com/google/refine/util/HttpClient.java b/main/src/com/google/refine/util/HttpClient.java index a2aac410a..b141e0237 100644 --- a/main/src/com/google/refine/util/HttpClient.java +++ b/main/src/com/google/refine/util/HttpClient.java @@ -37,11 +37,15 @@ import org.apache.hc.core5.http.io.entity.EntityUtils; import org.apache.hc.core5.http.message.BasicNameValuePair; import org.apache.hc.core5.http.protocol.HttpContext; import org.apache.hc.core5.util.TimeValue; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.refine.RefineServlet; public class HttpClient { + final static Logger logger = LoggerFactory.getLogger("http-client"); + final private RequestConfig defaultRequestConfig; private HttpClientBuilder httpClientBuilder; private CloseableHttpClient httpClient; @@ -206,8 +210,10 @@ public class HttpClient { if (interval.compareTo(defaultInterval) == 0) { interval = TimeValue.of(((Double) (Math.pow(2, execCount - 1) * defaultInterval.getDuration())).longValue(), defaultInterval.getTimeUnit() ); + logger.warn("Retrying HTTP request after "+interval.toString()); return interval; } + logger.warn("Retrying HTTP request after "+interval.toString()); return interval; } diff --git a/main/tests/server/src/com/google/refine/RefineTest.java b/main/tests/server/src/com/google/refine/RefineTest.java index 99c1c900a..8c74026b1 100644 --- a/main/tests/server/src/com/google/refine/RefineTest.java +++ b/main/tests/server/src/com/google/refine/RefineTest.java @@ -72,6 +72,8 @@ import com.google.refine.model.Column; import com.google.refine.model.ModelException; import com.google.refine.model.Project; import com.google.refine.model.Row; +import com.google.refine.process.Process; +import com.google.refine.process.ProcessManager; import com.google.refine.util.TestUtils; import edu.mit.simile.butterfly.ButterflyModule; @@ -375,4 +377,19 @@ public class RefineTest extends PowerMockTestCase { when(coreModule.getName()).thenReturn("core"); return coreModule; } + + protected void runAndWait(ProcessManager processManager, Process process, int timeout) { + process.startPerforming(processManager); + Assert.assertTrue(process.isRunning()); + int time = 0; + try { + while (process.isRunning() && time < timeout) { + Thread.sleep(200); + time += 200; + } + } catch (InterruptedException e) { + Assert.fail("Test interrupted"); + } + Assert.assertFalse(process.isRunning(),"Process failed to complete within timeout " + timeout); + } } diff --git a/main/tests/server/src/com/google/refine/model/recon/StandardReconConfigTests.java b/main/tests/server/src/com/google/refine/model/recon/StandardReconConfigTests.java index 8ab54ec19..4491e65fd 100644 --- a/main/tests/server/src/com/google/refine/model/recon/StandardReconConfigTests.java +++ b/main/tests/server/src/com/google/refine/model/recon/StandardReconConfigTests.java @@ -45,13 +45,11 @@ import org.testng.annotations.Test; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.databind.JsonMappingException; import com.fasterxml.jackson.databind.node.ArrayNode; - import com.google.refine.RefineTest; import com.google.refine.browsing.EngineConfig; import com.google.refine.model.Cell; import com.google.refine.model.Project; import com.google.refine.model.Recon; -import com.google.refine.model.ReconCandidate; import com.google.refine.model.Row; import com.google.refine.model.recon.StandardReconConfig.ColumnDetail; import com.google.refine.model.recon.StandardReconConfig.ReconResult; @@ -269,13 +267,13 @@ public class StandardReconConfigTests extends RefineTest { assertNotNull(request1); - // We won't have gotten a result, but we want to make sure things didn't die + // We won't have gotten a result, but we want to make sure things didn't die. Row row = project.rows.get(0); Cell cell = row.cells.get(1); - assertNotNull(cell.recon); - assertEquals(cell.recon.service, url.toString()); - ReconCandidate candidate = cell.recon.getBestCandidate(); - assertNull(candidate); + assertNotNull(cell.value); + assertNull(cell.recon); + // the recon object is left null, so that it can be told apart from + // empty recon objects (the service legitimally did not return any candidate) } } diff --git a/main/tests/server/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperationTests.java b/main/tests/server/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperationTests.java index f01e673bb..a5e75858b 100644 --- a/main/tests/server/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperationTests.java +++ b/main/tests/server/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperationTests.java @@ -121,20 +121,9 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest { private void runAndWait(EngineDependentOperation op, int timeout) throws Exception { ProcessManager pm = project.getProcessManager(); Process process = op.createProcess(project, options); - process.startPerforming(pm); - Assert.assertTrue(process.isRunning()); - int time = 0; - try { - while (process.isRunning() && time < timeout) { - Thread.sleep(200); - time += 200; - } - } catch (InterruptedException e) { - Assert.fail("Test interrupted"); - } - Assert.assertFalse(process.isRunning(),"Process failed to complete within timeout " + timeout); + runAndWait(pm, process, timeout); } - + @Test public void serializeColumnAdditionByFetchingURLsOperation() throws Exception { TestUtils.isSerializedTo(ParsingUtilities.mapper.readValue(json, ColumnAdditionByFetchingURLsOperation.class), json); diff --git a/main/tests/server/src/com/google/refine/operations/recon/ReconOperationTests.java b/main/tests/server/src/com/google/refine/operations/recon/ReconOperationTests.java index b57ea2d10..c199afc8e 100644 --- a/main/tests/server/src/com/google/refine/operations/recon/ReconOperationTests.java +++ b/main/tests/server/src/com/google/refine/operations/recon/ReconOperationTests.java @@ -27,18 +27,29 @@ package com.google.refine.operations.recon; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import java.util.Arrays; +import java.util.List; import java.util.Properties; +import org.mockito.Mockito; +import org.mockito.stubbing.OngoingStubbing; +import org.testng.Assert; import org.testng.annotations.BeforeSuite; +import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; import com.google.refine.RefineTest; +import com.google.refine.browsing.EngineConfig; +import com.google.refine.model.Column; import com.google.refine.model.Project; +import com.google.refine.model.Recon; import com.google.refine.model.recon.ReconConfig; +import com.google.refine.model.recon.ReconJob; import com.google.refine.model.recon.StandardReconConfig; import com.google.refine.operations.OperationRegistry; -import com.google.refine.operations.recon.ReconOperation; +import com.google.refine.process.Process; import com.google.refine.util.ParsingUtilities; import com.google.refine.util.TestUtils; @@ -59,7 +70,6 @@ public class ReconOperationTests extends RefineTest { + " \"limit\":0" + "}," + "\"engineConfig\":{\"mode\":\"row-based\",\"facets\":[]}}"; - private Project project = mock(Project.class); private String processJson = "" + " {\n" + @@ -90,6 +100,7 @@ public class ReconOperationTests extends RefineTest { " \"progress\" : 0,\n" + " \"status\" : \"pending\"\n" + " }"; + @BeforeSuite public void registerOperation() { @@ -105,7 +116,37 @@ public class ReconOperationTests extends RefineTest { @Test public void serializeReconProcess() throws Exception { ReconOperation op = ParsingUtilities.mapper.readValue(json, ReconOperation.class); - com.google.refine.process.Process process = op.createProcess(project, new Properties()); + Project project = mock(Project.class); + Process process = op.createProcess(project, new Properties()); TestUtils.isSerializedTo(process, String.format(processJson, process.hashCode())); } + + @Test + public void testFailingRecon() throws Exception { + Project project = createCSVProject("my recon test project", + "column\n" + + "valueA\n" + + "valueB\n" + + "valueC"); + StandardReconConfig reconConfig = mock(StandardReconConfig.class); + List reconList = Arrays.asList((Recon)null, (Recon)null, (Recon)null); + ReconJob reconJob = mock(ReconJob.class); + when(reconConfig.batchRecon(Mockito.any(), Mockito.anyLong())).thenReturn(reconList); + when(reconConfig.getBatchSize()).thenReturn(10); + when(reconConfig.createJob(Mockito.eq(project), Mockito.anyInt(), Mockito.any(), Mockito.any(), Mockito.any())) + .thenReturn(reconJob); + + ReconOperation op = new ReconOperation(EngineConfig.reconstruct("{}"), "column", reconConfig); + + Process process = op.createProcess(project, new Properties()); + runAndWait(project.getProcessManager(), process, 1000); + + Column column = project.columnModel.columns.get(0); + Assert.assertNotNull(column.getReconStats()); + Assert.assertEquals(column.getReconStats().matchedTopics, 0); + + Assert.assertNull(project.rows.get(0).getCell(0).recon); + Assert.assertNull(project.rows.get(1).getCell(0).recon); + Assert.assertNull(project.rows.get(2).getCell(0).recon); + } }