diff --git a/main/pom.xml b/main/pom.xml
index dc4930f16..1e22434d5 100644
--- a/main/pom.xml
+++ b/main/pom.xml
@@ -290,6 +290,11 @@
clojure
1.10.1
+
+ org.apache.httpcomponents.client5
+ httpclient5
+ 5.0.2
+
org.apache.httpcomponents
httpclient
diff --git a/main/src/com/google/refine/commands/recon/GuessTypesOfColumnCommand.java b/main/src/com/google/refine/commands/recon/GuessTypesOfColumnCommand.java
index 816c56acf..8d3ce9226 100644
--- a/main/src/com/google/refine/commands/recon/GuessTypesOfColumnCommand.java
+++ b/main/src/com/google/refine/commands/recon/GuessTypesOfColumnCommand.java
@@ -48,19 +48,6 @@ import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
-import org.apache.http.Consts;
-import org.apache.http.NameValuePair;
-import org.apache.http.StatusLine;
-import org.apache.http.client.config.RequestConfig;
-import org.apache.http.client.entity.UrlEncodedFormEntity;
-import org.apache.http.client.methods.CloseableHttpResponse;
-import org.apache.http.client.methods.HttpPost;
-import org.apache.http.impl.client.CloseableHttpClient;
-import org.apache.http.impl.client.HttpClientBuilder;
-import org.apache.http.impl.client.HttpClients;
-import org.apache.http.impl.client.LaxRedirectStrategy;
-import org.apache.http.message.BasicNameValuePair;
-
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.annotation.JsonProperty;
@@ -68,7 +55,6 @@ import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
-import com.google.refine.RefineServlet;
import com.google.refine.commands.Command;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.model.Column;
@@ -76,6 +62,7 @@ import com.google.refine.model.Project;
import com.google.refine.model.ReconType;
import com.google.refine.model.Row;
import com.google.refine.model.recon.StandardReconConfig.ReconResult;
+import com.google.refine.util.HttpClient;
import com.google.refine.util.ParsingUtilities;
public class GuessTypesOfColumnCommand extends Command {
@@ -180,61 +167,38 @@ public class GuessTypesOfColumnCommand extends Command {
}
String queriesString = ParsingUtilities.defaultWriter.writeValueAsString(queryMap);
+ String responseString;
try {
- RequestConfig defaultRequestConfig = RequestConfig.custom()
- .setConnectTimeout(30 * 1000)
- .build();
+ responseString = postQueries(serviceUrl, queriesString);
+ ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(responseString);
- HttpClientBuilder httpClientBuilder = HttpClients.custom()
- .setUserAgent(RefineServlet.getUserAgent())
- .setRedirectStrategy(new LaxRedirectStrategy())
- .setDefaultRequestConfig(defaultRequestConfig);
-
- CloseableHttpClient httpClient = httpClientBuilder.build();
- HttpPost request = new HttpPost(serviceUrl);
- List body = Collections.singletonList(
- new BasicNameValuePair("queries", queriesString));
- request.setEntity(new UrlEncodedFormEntity(body, Consts.UTF_8));
-
- try (CloseableHttpResponse response = httpClient.execute(request)) {
- StatusLine statusLine = response.getStatusLine();
- if (statusLine.getStatusCode() >= 400) {
- throw new IOException("Failed - code:"
- + Integer.toString(statusLine.getStatusCode())
- + " message: " + statusLine.getReasonPhrase());
+ Iterator iterator = o.iterator();
+ while (iterator.hasNext()) {
+ JsonNode o2 = iterator.next();
+ if (!(o2.has("result") && o2.get("result") instanceof ArrayNode)) {
+ continue;
}
-
- String s = ParsingUtilities.inputStreamToString(response.getEntity().getContent());
- ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(s);
- Iterator iterator = o.iterator();
- while (iterator.hasNext()) {
- JsonNode o2 = iterator.next();
- if (!(o2.has("result") && o2.get("result") instanceof ArrayNode)) {
- continue;
- }
+ ArrayNode results = (ArrayNode) o2.get("result");
+ List reconResults = ParsingUtilities.mapper.convertValue(results, new TypeReference>() {});
+ int count = reconResults.size();
- ArrayNode results = (ArrayNode) o2.get("result");
- List reconResults = ParsingUtilities.mapper.convertValue(results, new TypeReference>() {});
- int count = reconResults.size();
+ for (int j = 0; j < count; j++) {
+ ReconResult result = reconResults.get(j);
+ double score = 1.0 / (1 + j); // score by each result's rank
- for (int j = 0; j < count; j++) {
- ReconResult result = reconResults.get(j);
- double score = 1.0 / (1 + j); // score by each result's rank
+ List types = result.types;
+ int typeCount = types.size();
- List types = result.types;
- int typeCount = types.size();
-
- for (int t = 0; t < typeCount; t++) {
- ReconType type = types.get(t);
- double score2 = score * (typeCount - t) / typeCount;
- if (map.containsKey(type.id)) {
- TypeGroup tg = map.get(type.id);
- tg.score += score2;
- tg.count++;
- } else {
- map.put(type.id, new TypeGroup(type.id, type.name, score2));
- }
+ for (int t = 0; t < typeCount; t++) {
+ ReconType type = types.get(t);
+ double score2 = score * (typeCount - t) / typeCount;
+ if (map.containsKey(type.id)) {
+ TypeGroup tg = map.get(type.id);
+ tg.score += score2;
+ tg.count++;
+ } else {
+ map.put(type.id, new TypeGroup(type.id, type.name, score2));
}
}
}
@@ -243,7 +207,7 @@ public class GuessTypesOfColumnCommand extends Command {
logger.error("Failed to guess cell types for load\n" + queriesString, e);
throw e;
}
-
+
List types = new ArrayList(map.values());
Collections.sort(types, new Comparator() {
@Override
@@ -258,7 +222,12 @@ public class GuessTypesOfColumnCommand extends Command {
return types;
}
-
+
+ private String postQueries(String serviceUrl, String queriesString) throws IOException {
+ HttpClient client = new HttpClient();
+ return client.postNameValue(serviceUrl, "queries", queriesString);
+ }
+
static protected class TypeGroup {
@JsonProperty("id")
protected String id;
diff --git a/main/src/com/google/refine/importing/ImportingUtilities.java b/main/src/com/google/refine/importing/ImportingUtilities.java
index b2eb3bc19..8cba7db6b 100644
--- a/main/src/com/google/refine/importing/ImportingUtilities.java
+++ b/main/src/com/google/refine/importing/ImportingUtilities.java
@@ -69,19 +69,13 @@ import org.apache.commons.fileupload.ProgressListener;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.apache.commons.fileupload.util.Streams;
-import org.apache.http.HttpEntity;
-import org.apache.http.auth.AuthScope;
-import org.apache.http.auth.UsernamePasswordCredentials;
-import org.apache.http.client.CredentialsProvider;
-import org.apache.http.client.methods.CloseableHttpResponse;
-import org.apache.http.client.methods.HttpGet;
-import org.apache.http.entity.ContentType;
-import org.apache.http.impl.client.BasicCredentialsProvider;
-import org.apache.http.impl.client.CloseableHttpClient;
-import org.apache.http.impl.client.HttpClientBuilder;
-import org.apache.http.impl.client.HttpClients;
-import org.apache.http.util.EntityUtils;
-import org.apache.http.StatusLine;
+import org.apache.hc.client5.http.ClientProtocolException;
+import org.apache.hc.core5.http.ClassicHttpResponse;
+import org.apache.hc.core5.http.ContentType;
+import org.apache.hc.core5.http.HttpEntity;
+import org.apache.hc.core5.http.HttpStatus;
+import org.apache.hc.core5.http.io.HttpClientResponseHandler;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -89,10 +83,10 @@ import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
-import com.google.refine.RefineServlet;
import com.google.refine.importing.ImportingManager.Format;
import com.google.refine.importing.UrlRewriter.Result;
import com.google.refine.model.Project;
+import com.google.refine.util.HttpClient;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
import java.util.stream.Collectors;
@@ -287,65 +281,56 @@ public class ImportingUtilities {
}
if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
- HttpClientBuilder clientbuilder = HttpClients.custom()
- .setUserAgent(RefineServlet.getUserAgent());
-// .setConnectionBackoffStrategy(ConnectionBackoffStrategy)
+ final URL lastUrl = url;
+ final HttpClientResponseHandler responseHandler = new HttpClientResponseHandler() {
- String userinfo = url.getUserInfo();
- // HTTPS only - no sending password in the clear over HTTP
- if ("https".equals(url.getProtocol()) && userinfo != null) {
- int s = userinfo.indexOf(':');
- if (s > 0) {
- String user = userinfo.substring(0, s);
- String pw = userinfo.substring(s + 1, userinfo.length());
- CredentialsProvider credsProvider = new BasicCredentialsProvider();
- credsProvider.setCredentials(new AuthScope(url.getHost(), 443),
- new UsernamePasswordCredentials(user, pw));
- clientbuilder = clientbuilder.setDefaultCredentialsProvider(credsProvider);
- }
- }
+ @Override
+ public String handleResponse(final ClassicHttpResponse response) throws IOException {
+ final int status = response.getCode();
+ if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) {
+ final HttpEntity entity = response.getEntity();
+ if (entity == null) {
+ throw new IOException("No content found in " + lastUrl.toExternalForm());
+ }
- CloseableHttpClient httpclient = clientbuilder.build();
- HttpGet httpGet = new HttpGet(url.toURI());
- CloseableHttpResponse response = httpclient.execute(httpGet);
+ try {
+ InputStream stream2 = entity.getContent();
- try {
- HttpEntity entity = response.getEntity();
- if (entity == null) {
- throw new Exception("No content found in " + url.toString());
- }
- StatusLine status = response.getStatusLine();
- int statusCode = response.getStatusLine().getStatusCode();
- if (statusCode >= 400) {
- String errorString = ParsingUtilities.inputStreamToString(entity.getContent());
- String message = String.format("HTTP error %d : %s | %s", statusCode,
- status.getReasonPhrase(), errorString);
- throw new Exception(message);
- }
- InputStream stream2 = entity.getContent();
+ String mimeType = null;
+ String charset = null;
+ ContentType contentType = ContentType.parse(entity.getContentType());
+ if (contentType != null) {
+ mimeType = contentType.getMimeType();
+ Charset cs = contentType.getCharset();
+ if (cs != null) {
+ charset = cs.toString();
+ }
+ }
+ JSONUtilities.safePut(fileRecord, "declaredMimeType", mimeType);
+ JSONUtilities.safePut(fileRecord, "declaredEncoding", charset);
+ if (saveStream(stream2, lastUrl, rawDataDir, progress, update,
+ fileRecord, fileRecords,
+ entity.getContentLength())) {
+ return "saved"; // signal to increment archive count
+ }
- String mimeType = null;
- String charset = null;
- ContentType contentType = ContentType.get(entity);
- if (contentType != null) {
- mimeType = contentType.getMimeType();
- Charset cs = contentType.getCharset();
- if (cs != null) {
- charset = cs.toString();
+ } catch (final IOException ex) {
+ throw new ClientProtocolException(ex);
+ }
+ return null;
+ } else {
+ // String errorBody = EntityUtils.toString(response.getEntity());
+ throw new ClientProtocolException(String.format("HTTP error %d : %s for URL %s", status,
+ response.getReasonPhrase(), lastUrl.toExternalForm()));
}
}
- JSONUtilities.safePut(fileRecord, "declaredMimeType", mimeType);
- JSONUtilities.safePut(fileRecord, "declaredEncoding", charset);
- if (saveStream(stream2, url, rawDataDir, progress, update,
- fileRecord, fileRecords,
- entity.getContentLength())) {
- archiveCount++;
- }
- downloadCount++;
- EntityUtils.consume(entity);
- } finally {
- httpGet.reset();
- }
+ };
+
+ HttpClient httpClient = new HttpClient();
+ if (httpClient.getResponse(urlString, null, responseHandler) != null) {
+ archiveCount++;
+ };
+ downloadCount++;
} else {
// Fallback handling for non HTTP connections (only FTP?)
URLConnection urlConnection = url.openConnection();
@@ -418,7 +403,7 @@ public class ImportingUtilities {
private static boolean saveStream(InputStream stream, URL url, File rawDataDir, final Progress progress,
final SavingUpdate update, ObjectNode fileRecord, ArrayNode fileRecords, long length)
- throws IOException, Exception {
+ throws IOException {
String localname = url.getPath();
if (localname.isEmpty() || localname.endsWith("/")) {
localname = localname + "temp";
@@ -436,7 +421,7 @@ public class ImportingUtilities {
long actualLength = saveStreamToFile(stream, file, update);
JSONUtilities.safePut(fileRecord, "size", actualLength);
if (actualLength == 0) {
- throw new Exception("No content found in " + url.toString());
+ throw new IOException("No content found in " + url.toString());
} else if (length >= 0) {
update.totalExpectedSize += (actualLength - length);
} else {
diff --git a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java
index 86020c541..d88a96082 100644
--- a/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java
+++ b/main/src/com/google/refine/model/recon/ReconciledDataExtensionJob.java
@@ -37,30 +37,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.model.recon;
import java.io.IOException;
-import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
-import org.apache.http.Consts;
-import org.apache.http.NameValuePair;
-import org.apache.http.StatusLine;
-import org.apache.http.client.config.RequestConfig;
-import org.apache.http.client.entity.UrlEncodedFormEntity;
-import org.apache.http.client.methods.CloseableHttpResponse;
-import org.apache.http.client.methods.HttpPost;
-import org.apache.http.impl.client.CloseableHttpClient;
-import org.apache.http.impl.client.HttpClientBuilder;
-import org.apache.http.impl.client.HttpClients;
-import org.apache.http.impl.client.LaxRedirectStrategy;
-import org.apache.http.message.BasicNameValuePair;
-
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
@@ -69,14 +54,15 @@ import com.fasterxml.jackson.annotation.JsonView;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
-import com.google.refine.RefineServlet;
import com.google.refine.expr.functions.ToDate;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.ReconType;
+import com.google.refine.util.HttpClient;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.JsonViews;
import com.google.refine.util.ParsingUtilities;
+
public class ReconciledDataExtensionJob {
@@ -170,15 +156,23 @@ public class ReconciledDataExtensionJob {
final public DataExtensionConfig extension;
final public String endpoint;
final public List columns = new ArrayList();
-
+
// not final: initialized lazily
- private static CloseableHttpClient httpClient = null;
-
+ private static HttpClient httpClient = null;
+
public ReconciledDataExtensionJob(DataExtensionConfig obj, String endpoint) {
this.extension = obj;
this.endpoint = endpoint;
}
-
+
+ /**
+ * @todo Although the HTTP code has been unified, there may still be opportunity
+ * to refactor a higher level querying library out of this which could be shared
+ * with StandardReconConfig
+ *
+ * It may also be possible to extract a library to query reconciliation services
+ * which could be used outside of OpenRefine.
+ */
public Map extend(
Set ids,
Map reconCandidateMap
@@ -187,7 +181,7 @@ public class ReconciledDataExtensionJob {
formulateQuery(ids, extension, writer);
String query = writer.toString();
- String response = performQuery(this.endpoint, query);
+ String response = postExtendQuery(this.endpoint, query);
ObjectNode o = ParsingUtilities.mapper.readValue(response, ObjectNode.class);
@@ -218,46 +212,17 @@ public class ReconciledDataExtensionJob {
return map;
}
- /**
- * @todo this should be refactored to be unified with the HTTP querying code
- * from StandardReconConfig. We should ideally extract a library to query
- * reconciliation services and expose it as such for others to reuse.
- */
-
- static protected String performQuery(String endpoint, String query) throws IOException {
- HttpPost request = new HttpPost(endpoint);
- List body = Collections.singletonList(
- new BasicNameValuePair("extend", query));
- request.setEntity(new UrlEncodedFormEntity(body, Consts.UTF_8));
-
- try (CloseableHttpResponse response = getHttpClient().execute(request)) {
- StatusLine statusLine = response.getStatusLine();
- if (statusLine.getStatusCode() >= 400) {
- throw new IOException("Data extension query failed - code: "
- + Integer.toString(statusLine.getStatusCode())
- + " message: " + statusLine.getReasonPhrase());
- } else {
- return ParsingUtilities.inputStreamToString(response.getEntity().getContent());
- }
- }
+ static protected String postExtendQuery(String endpoint, String query) throws IOException {
+ return getHttpClient().postNameValue(endpoint, "extend", query);
}
- private static CloseableHttpClient getHttpClient() {
- if (httpClient != null) {
- return httpClient;
+ private static HttpClient getHttpClient() {
+ if (httpClient == null) {
+ httpClient = new HttpClient();
}
- RequestConfig defaultRequestConfig = RequestConfig.custom()
- .setConnectTimeout(30 * 1000)
- .build();
-
- HttpClientBuilder httpClientBuilder = HttpClients.custom()
- .setUserAgent(RefineServlet.getUserAgent())
- .setRedirectStrategy(new LaxRedirectStrategy())
- .setDefaultRequestConfig(defaultRequestConfig);
- httpClient = httpClientBuilder.build();
return httpClient;
}
-
+
protected ReconciledDataExtensionJob.DataExtension collectResult(
ObjectNode record,
Map reconCandidateMap
diff --git a/main/src/com/google/refine/model/recon/StandardReconConfig.java b/main/src/com/google/refine/model/recon/StandardReconConfig.java
index c27e79915..e4d888532 100644
--- a/main/src/com/google/refine/model/recon/StandardReconConfig.java
+++ b/main/src/com/google/refine/model/recon/StandardReconConfig.java
@@ -45,18 +45,6 @@ import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
-import org.apache.http.Consts;
-import org.apache.http.NameValuePair;
-import org.apache.http.StatusLine;
-import org.apache.http.client.config.RequestConfig;
-import org.apache.http.client.entity.UrlEncodedFormEntity;
-import org.apache.http.client.methods.CloseableHttpResponse;
-import org.apache.http.client.methods.HttpPost;
-import org.apache.http.impl.client.CloseableHttpClient;
-import org.apache.http.impl.client.HttpClientBuilder;
-import org.apache.http.impl.client.HttpClients;
-import org.apache.http.impl.client.LaxRedirectStrategy;
-import org.apache.http.message.BasicNameValuePair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -69,7 +57,6 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
-import com.google.refine.RefineServlet;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
@@ -79,6 +66,7 @@ import com.google.refine.model.ReconCandidate;
import com.google.refine.model.ReconType;
import com.google.refine.model.RecordModel.RowDependency;
import com.google.refine.model.Row;
+import com.google.refine.util.HttpClient;
import com.google.refine.util.ParsingUtilities;
public class StandardReconConfig extends ReconConfig {
@@ -164,7 +152,7 @@ public class StandardReconConfig extends ReconConfig {
final private int limit;
// initialized lazily
- private CloseableHttpClient httpClient = null;
+ private HttpClient httpClient = null;
@JsonCreator
public StandardReconConfig(
@@ -434,29 +422,29 @@ public class StandardReconConfig extends ReconConfig {
try {
job.code = ParsingUtilities.defaultWriter.writeValueAsString(query);
} catch (JsonProcessingException e) {
+ // FIXME: This error will get lost
e.printStackTrace();
return null; // TODO: Throw exception instead?
}
return job;
}
- private CloseableHttpClient getHttpClient() {
- if (httpClient != null) {
- return httpClient;
+ private HttpClient getHttpClient() {
+ if (httpClient == null) {
+ httpClient = new HttpClient();
}
- RequestConfig defaultRequestConfig = RequestConfig.custom()
- .setConnectTimeout(30 * 1000)
- .setSocketTimeout(60 * 1000)
- .build();
-
- HttpClientBuilder httpClientBuilder = HttpClients.custom()
- .setUserAgent(RefineServlet.getUserAgent())
- .setRedirectStrategy(new LaxRedirectStrategy())
- .setDefaultRequestConfig(defaultRequestConfig);
- httpClient = httpClientBuilder.build();
return httpClient;
}
-
+
+ private String postQueries(String url, String queriesString) throws IOException {
+ try {
+ return getHttpClient().postNameValue(url, "queries", queriesString);
+
+ } catch (IOException e) {
+ throw new IOException("Failed to batch recon with load:\n" + queriesString, e);
+ }
+ }
+
@Override
public List batchRecon(List jobs, long historyEntryID) {
List recons = new ArrayList(jobs.size());
@@ -475,51 +463,41 @@ public class StandardReconConfig extends ReconConfig {
stringWriter.write("}");
String queriesString = stringWriter.toString();
- HttpPost request = new HttpPost(service);
- List body = Collections.singletonList(
- new BasicNameValuePair("queries", queriesString));
- request.setEntity(new UrlEncodedFormEntity(body, Consts.UTF_8));
-
- try (CloseableHttpResponse response = getHttpClient().execute(request)) {
- StatusLine statusLine = response.getStatusLine();
- if (statusLine.getStatusCode() >= 400) {
- logger.error("Failed - code: "
- + Integer.toString(statusLine.getStatusCode())
- + " message: " + statusLine.getReasonPhrase());
+ try {
+ String responseString = postQueries(service, queriesString);
+ ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(responseString);
+
+ if (o == null) { // utility method returns null instead of throwing
+ logger.error("Failed to parse string as JSON: " + responseString);
} else {
- String s = ParsingUtilities.inputStreamToString(response.getEntity().getContent());
- ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(s);
- if (o == null) { // utility method returns null instead of throwing
- logger.error("Failed to parse string as JSON: " + s);
- } else {
- for (int i = 0; i < jobs.size(); i++) {
- StandardReconJob job = (StandardReconJob) jobs.get(i);
- Recon recon = null;
+ for (int i = 0; i < jobs.size(); i++) {
+ StandardReconJob job = (StandardReconJob) jobs.get(i);
+ Recon recon = null;
- String text = job.text;
- String key = "q" + i;
- if (o.has(key) && o.get(key) instanceof ObjectNode) {
- ObjectNode o2 = (ObjectNode) o.get(key);
- if (o2.has("result") && o2.get("result") instanceof ArrayNode) {
- ArrayNode results = (ArrayNode) o2.get("result");
+ String text = job.text;
+ String key = "q" + i;
+ if (o.has(key) && o.get(key) instanceof ObjectNode) {
+ ObjectNode o2 = (ObjectNode) o.get(key);
+ if (o2.has("result") && o2.get("result") instanceof ArrayNode) {
+ ArrayNode results = (ArrayNode) o2.get("result");
- recon = createReconServiceResults(text, results, historyEntryID);
- } else {
- logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString());
- }
+ recon = createReconServiceResults(text, results, historyEntryID);
} else {
// TODO: better error reporting
- logger.warn("Service error for text: " + text + "\n Job code: " + job.code);
+ logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString());
}
-
- if (recon != null) {
- recon.service = service;
- }
- recons.add(recon);
+ } else {
+ // TODO: better error reporting
+ logger.warn("Service error for text: " + text + "\n Job code: " + job.code);
}
+
+ if (recon != null) {
+ recon.service = service;
+ }
+ recons.add(recon);
}
}
- } catch (Exception e) {
+ } catch (IOException e) {
logger.error("Failed to batch recon with load:\n" + queriesString, e);
}
@@ -535,7 +513,7 @@ public class StandardReconConfig extends ReconConfig {
return recons;
}
-
+
@Override
public Recon createNewRecon(long historyEntryID) {
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
@@ -543,7 +521,7 @@ public class StandardReconConfig extends ReconConfig {
return recon;
}
- protected Recon createReconServiceResults(String text, ArrayNode resultsList, long historyEntryID) throws IOException {
+ protected Recon createReconServiceResults(String text, ArrayNode resultsList, long historyEntryID) {
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
List results = ParsingUtilities.mapper.convertValue(resultsList, new TypeReference>() {});
diff --git a/main/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperation.java b/main/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperation.java
index a3e5bfc6a..dc95f56dd 100644
--- a/main/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperation.java
+++ b/main/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperation.java
@@ -37,27 +37,13 @@ import static com.google.common.base.Strings.isNullOrEmpty;
import java.io.IOException;
import java.io.Serializable;
-import java.net.MalformedURLException;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
-import org.apache.http.Header;
-import org.apache.http.HttpEntity;
-import org.apache.http.StatusLine;
-import org.apache.http.client.config.RequestConfig;
-import org.apache.http.client.methods.CloseableHttpResponse;
-import org.apache.http.client.methods.HttpGet;
-import org.apache.http.entity.ContentType;
-import org.apache.http.impl.client.CloseableHttpClient;
-import org.apache.http.impl.client.HttpClientBuilder;
-import org.apache.http.impl.client.HttpClients;
-import org.apache.http.message.BasicHeader;
-import org.apache.http.util.EntityUtils;
+import org.apache.hc.core5.http.Header;
+import org.apache.hc.core5.http.message.BasicHeader;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
@@ -65,7 +51,6 @@ import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
-import com.google.refine.RefineServlet;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.EngineConfig;
import com.google.refine.browsing.FilteredRows;
@@ -86,7 +71,7 @@ import com.google.refine.operations.EngineDependentOperation;
import com.google.refine.operations.OnError;
import com.google.refine.process.LongRunningProcess;
import com.google.refine.process.Process;
-import com.google.refine.util.ParsingUtilities;
+import com.google.refine.util.HttpClient;
public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation {
@@ -117,8 +102,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
final protected boolean _cacheResponses;
final protected List _httpHeadersJson;
private Header[] httpHeaders = new Header[0];
- final private RequestConfig defaultRequestConfig;
- private HttpClientBuilder httpClientBuilder;
+ private HttpClient _httpClient;
+
@JsonCreator
public ColumnAdditionByFetchingURLsOperation(
@@ -163,22 +148,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
}
}
httpHeaders = headers.toArray(httpHeaders);
+ _httpClient = new HttpClient(_delay);
- defaultRequestConfig = RequestConfig.custom()
- .setConnectTimeout(30 * 1000)
- .setConnectionRequestTimeout(30 * 1000)
- .setSocketTimeout(10 * 1000).build();
-
- // TODO: Placeholder for future Basic Auth implementation
-// CredentialsProvider credsProvider = new BasicCredentialsProvider();
-// credsProvider.setCredentials(new AuthScope(host, 443),
-// new UsernamePasswordCredentials(user, password));
-
- httpClientBuilder = HttpClients.custom()
- .setUserAgent(RefineServlet.getUserAgent())
- .setDefaultRequestConfig(defaultRequestConfig);
-// .setConnectionBackoffStrategy(ConnectionBackoffStrategy)
-// .setDefaultCredentialsProvider(credsProvider);
}
@JsonProperty("newColumnName")
@@ -281,20 +252,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
.build(
new CacheLoader() {
public Serializable load(String urlString) throws Exception {
- Serializable result = fetch(urlString);
- try {
- // Always sleep for the delay, no matter how long the
- // request took. This is more responsible than substracting
- // the time spend requesting the URL, because it naturally
- // slows us down if the server is busy and takes a long time
- // to reply.
- if (_delay > 0) {
- Thread.sleep(_delay);
- }
- } catch (InterruptedException e) {
- result = null;
- }
-
+ Serializable result = fetch(urlString, httpHeaders);
if (result == null) {
// the load method should not return any null value
throw new Exception("null result returned by fetch");
@@ -335,9 +293,9 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
Serializable response = null;
if (_urlCache != null) {
- response = cachedFetch(urlString); // TODO: Why does this need a separate method?
+ response = cachedFetch(urlString);
} else {
- response = fetch(urlString);
+ response = fetch(urlString, httpHeaders);
}
if (response != null) {
@@ -380,68 +338,19 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
}
}
- Serializable fetch(String urlString) {
- HttpGet httpGet;
-
- try {
- // Use of URL constructor below is purely to get additional error checking to mimic
- // previous behavior for the tests.
- httpGet = new HttpGet(new URL(urlString).toURI());
- } catch (IllegalArgumentException | MalformedURLException | URISyntaxException e) {
- return null;
- }
-
- try {
- httpGet.setHeaders(httpHeaders);
- httpGet.setConfig(defaultRequestConfig);
-
- CloseableHttpClient httpclient = httpClientBuilder.build();
-
- CloseableHttpResponse response = null;
+ Serializable fetch(String urlString, Header[] headers) {
+ try { //HttpClients.createDefault()) {
try {
- response = httpclient.execute(httpGet);
-
- HttpEntity entity = response.getEntity();
- if (entity == null) {
- throw new Exception("No content found in " + httpGet.getURI().toString());
- }
-
- String encoding = null;
-
- if (entity.getContentEncoding() != null) {
- encoding = entity.getContentEncoding().getValue();
- } else {
- Charset charset = ContentType.getOrDefault(entity).getCharset();
- if (charset != null) {
- encoding = charset.name();
- }
- }
-
- String result = ParsingUtilities.inputStreamToString(
- entity.getContent(), (encoding == null) || ( encoding.equalsIgnoreCase("\"UTF-8\"")) ? "UTF-8" : encoding);
-
- EntityUtils.consume(entity);
- return result;
-
+ return _httpClient.getAsString(urlString, headers);
} catch (IOException e) {
- String message;
- if (response == null) {
- message = "Unknown HTTP error " + e.getLocalizedMessage();
- } else {
- StatusLine status = response.getStatusLine();
- HttpEntity errorEntity = response.getEntity();
- String errorString = ParsingUtilities.inputStreamToString(errorEntity.getContent());
- message = String.format("HTTP error %d : %s | %s", status.getStatusCode(),
- status.getReasonPhrase(),
- errorString);
- }
- return _onError == OnError.StoreError ? new EvalError(message) : null;
+ return _onError == OnError.StoreError ? new EvalError(e) : null;
}
} catch (Exception e) {
return _onError == OnError.StoreError ? new EvalError(e.getMessage()) : null;
}
}
+
RowVisitor createRowVisitor(List cellsAtRows) {
return new RowVisitor() {
int cellIndex;
@@ -497,4 +406,5 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
}.init(cellsAtRows);
}
}
+
}
diff --git a/main/src/com/google/refine/util/HttpClient.java b/main/src/com/google/refine/util/HttpClient.java
new file mode 100644
index 000000000..d55955b48
--- /dev/null
+++ b/main/src/com/google/refine/util/HttpClient.java
@@ -0,0 +1,208 @@
+package com.google.refine.util;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hc.client5.http.ClientProtocolException;
+import org.apache.hc.client5.http.classic.methods.HttpGet;
+import org.apache.hc.client5.http.classic.methods.HttpPost;
+import org.apache.hc.client5.http.config.RequestConfig;
+import org.apache.hc.client5.http.entity.UrlEncodedFormEntity;
+import org.apache.hc.client5.http.impl.DefaultHttpRequestRetryStrategy;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
+import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
+import org.apache.hc.client5.http.impl.classic.HttpClients;
+import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager;
+import org.apache.hc.core5.http.ClassicHttpResponse;
+import org.apache.hc.core5.http.EntityDetails;
+import org.apache.hc.core5.http.Header;
+import org.apache.hc.core5.http.HttpEntity;
+import org.apache.hc.core5.http.HttpException;
+import org.apache.hc.core5.http.HttpRequest;
+import org.apache.hc.core5.http.HttpRequestInterceptor;
+import org.apache.hc.core5.http.HttpResponse;
+import org.apache.hc.core5.http.HttpStatus;
+import org.apache.hc.core5.http.NameValuePair;
+import org.apache.hc.core5.http.ParseException;
+import org.apache.hc.core5.http.io.HttpClientResponseHandler;
+import org.apache.hc.core5.http.io.SocketConfig;
+import org.apache.hc.core5.http.io.entity.EntityUtils;
+import org.apache.hc.core5.http.message.BasicNameValuePair;
+import org.apache.hc.core5.http.protocol.HttpContext;
+import org.apache.hc.core5.util.TimeValue;
+
+import com.google.refine.RefineServlet;
+
+
+public class HttpClient {
+ final private RequestConfig defaultRequestConfig;
+ private HttpClientBuilder httpClientBuilder;
+ private CloseableHttpClient httpClient;
+ private int _delay;
+
+ public HttpClient() {
+ this(0);
+ }
+
+ public HttpClient(int delay) {
+ _delay = delay;
+ // Create a connection manager with a custom socket timeout
+ PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager();
+ final SocketConfig socketConfig = SocketConfig.custom()
+ .setSoTimeout(10, TimeUnit.SECONDS)
+ .build();
+ connManager.setDefaultSocketConfig(socketConfig);
+
+ defaultRequestConfig = RequestConfig.custom()
+ .setConnectTimeout(30, TimeUnit.SECONDS)
+ .setConnectionRequestTimeout(30, TimeUnit.SECONDS) // TODO: 60 seconds in some places in old code
+ .build();
+
+ httpClientBuilder = HttpClients.custom()
+ .setUserAgent(RefineServlet.getUserAgent())
+ .setDefaultRequestConfig(defaultRequestConfig)
+ .setConnectionManager(connManager)
+ // Default Apache HC retry is 1x @1 sec (or the value in Retry-Header)
+ .setRetryStrategy(new ExponentialBackoffRetryStrategy(3, TimeValue.ofMilliseconds(_delay)))
+// .setRedirectStrategy(new LaxRedirectStrategy()) // TODO: No longer needed since default doesn't exclude POST?
+// .setConnectionBackoffStrategy(ConnectionBackoffStrategy)
+ .addRequestInterceptorFirst(new HttpRequestInterceptor() {
+
+ private long nextRequestTime = System.currentTimeMillis();
+
+ @Override
+ public void process(
+ final HttpRequest request,
+ final EntityDetails entity,
+ final HttpContext context) throws HttpException, IOException {
+
+ long delay = nextRequestTime - System.currentTimeMillis();
+ if (delay > 0) {
+ try {
+ Thread.sleep(delay);
+ } catch (InterruptedException e) {
+ }
+ }
+ nextRequestTime = System.currentTimeMillis() + _delay;
+
+ }
+ });
+
+ // TODO: Placeholder for future Basic Auth implementation
+// String userinfo = url.getUserInfo();
+// // HTTPS only - no sending password in the clear over HTTP
+// if ("https".equals(url.getProtocol()) && userinfo != null) {
+// int s = userinfo.indexOf(':');
+// if (s > 0) {
+// String user = userinfo.substring(0, s);
+// String pw = userinfo.substring(s + 1, userinfo.length());
+// CredentialsProvider credsProvider = new BasicCredentialsProvider();
+// credsProvider.setCredentials(new AuthScope(url.getHost(), 443),
+// new UsernamePasswordCredentials(user, pw.toCharArray()));
+// httpClientBuilder = httpClientBuilder.setDefaultCredentialsProvider(credsProvider);
+// }
+// }
+
+ httpClient = httpClientBuilder.build();
+ }
+
+ public String getAsString(String urlString, Header[] headers) throws IOException {
+
+ final HttpClientResponseHandler responseHandler = new HttpClientResponseHandler() {
+
+ @Override
+ public String handleResponse(final ClassicHttpResponse response) throws IOException {
+ final int status = response.getCode();
+ if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) {
+ final HttpEntity entity = response.getEntity();
+ if (entity == null) {
+ throw new IOException("No content found in " + urlString);
+ }
+ try {
+ return EntityUtils.toString(entity);
+ } catch (final ParseException ex) {
+ throw new ClientProtocolException(ex);
+ }
+ } else {
+ // String errorBody = EntityUtils.toString(response.getEntity());
+ throw new ClientProtocolException(String.format("HTTP error %d : %s for URL %s", status,
+ response.getReasonPhrase(), urlString));
+ }
+ }
+ };
+
+ return getResponse(urlString, headers, responseHandler);
+ }
+
+ public String getResponse(String urlString, Header[] headers, HttpClientResponseHandler responseHandler) throws IOException {
+ try {
+ // Use of URL constructor below is purely to get additional error checking to mimic
+ // previous behavior for the tests.
+ new URL(urlString).toURI();
+ } catch (IllegalArgumentException | MalformedURLException | URISyntaxException e) {
+ return null;
+ }
+
+ HttpGet httpGet = new HttpGet(urlString);
+
+ if (headers != null && headers.length > 0) {
+ httpGet.setHeaders(headers);
+ }
+ httpGet.setConfig(defaultRequestConfig); // FIXME: Redundant? already includes in client builder
+ return httpClient.execute(httpGet, responseHandler);
+ }
+
+ public String postNameValue(String serviceUrl, String name, String value) throws IOException {
+ HttpPost request = new HttpPost(serviceUrl);
+ List body = Collections.singletonList(
+ new BasicNameValuePair(name, value));
+ request.setEntity(new UrlEncodedFormEntity(body, StandardCharsets.UTF_8));
+
+ try (CloseableHttpResponse response = httpClient.execute(request)) {
+ String reasonPhrase = response.getReasonPhrase();
+ int statusCode = response.getCode();
+ if (statusCode >= 400) { // We should never see 3xx since they get handled automatically
+ throw new IOException(String.format("HTTP error %d : %s for URL %s", statusCode, reasonPhrase,
+ request.getRequestUri()));
+ }
+
+ return ParsingUtilities.inputStreamToString(response.getEntity().getContent());
+ }
+ }
+
+
+ /**
+ * Use binary exponential backoff strategy, instead of the default fixed
+ * retry interval, if the server doesn't provide a Retry-After time.
+ */
+ class ExponentialBackoffRetryStrategy extends DefaultHttpRequestRetryStrategy {
+
+ private final TimeValue defaultInterval;
+
+ public ExponentialBackoffRetryStrategy(final int maxRetries, final TimeValue defaultRetryInterval) {
+ super(maxRetries, defaultRetryInterval);
+ this.defaultInterval = defaultRetryInterval;
+ }
+
+ @Override
+ public TimeValue getRetryInterval(HttpResponse response, int execCount, HttpContext context) {
+ // Get the default implementation's interval
+ TimeValue interval = super.getRetryInterval(response, execCount, context);
+ // If it's the same as the default, there was no Retry-After, so use binary
+ // exponential backoff
+ if (interval.compareTo(defaultInterval) == 0) {
+ interval = TimeValue.of(((Double) (Math.pow(2, execCount) * defaultInterval.getDuration())).longValue(),
+ defaultInterval.getTimeUnit() );
+ return interval;
+ }
+ return interval;
+ }
+ }
+}
diff --git a/main/tests/server/src/com/google/refine/importing/ImportingUtilitiesTests.java b/main/tests/server/src/com/google/refine/importing/ImportingUtilitiesTests.java
index 091178dc0..52bdc5221 100644
--- a/main/tests/server/src/com/google/refine/importing/ImportingUtilitiesTests.java
+++ b/main/tests/server/src/com/google/refine/importing/ImportingUtilitiesTests.java
@@ -29,6 +29,7 @@ package com.google.refine.importing;
import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
import java.io.File;
import java.io.IOException;
@@ -98,8 +99,6 @@ public class ImportingUtilitiesTests extends ImporterTest {
public void urlImporting() throws IOException {
String RESPONSE_BODY = "{code:401,message:Unauthorised}";
- String MESSAGE = String.format("HTTP error %d : %s | %s", 401,
- "Client Error", RESPONSE_BODY);
MockWebServer server = new MockWebServer();
MockResponse mockResponse = new MockResponse();
@@ -108,6 +107,8 @@ public class ImportingUtilitiesTests extends ImporterTest {
server.start();
server.enqueue(mockResponse);
HttpUrl url = server.url("/random");
+ String MESSAGE = String.format("HTTP error %d : %s for URL %s", 401,
+ "Client Error", url);
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
StringBody stringBody = new StringBody(url.toString(), ContentType.MULTIPART_FORM_DATA);
@@ -145,9 +146,9 @@ public class ImportingUtilitiesTests extends ImporterTest {
return job.canceled;
}
});
- Assert.fail("No Exception was thrown");
+ fail("No Exception was thrown");
} catch (Exception exception) {
- Assert.assertEquals(MESSAGE, exception.getMessage());
+ assertEquals(exception.getMessage(), MESSAGE);
} finally {
server.close();
}
diff --git a/main/tests/server/src/com/google/refine/model/recon/StandardReconConfigTests.java b/main/tests/server/src/com/google/refine/model/recon/StandardReconConfigTests.java
index 16ef0efd4..d01a750c9 100644
--- a/main/tests/server/src/com/google/refine/model/recon/StandardReconConfigTests.java
+++ b/main/tests/server/src/com/google/refine/model/recon/StandardReconConfigTests.java
@@ -91,7 +91,7 @@ public class StandardReconConfigTests extends RefineTest {
return wordDistance(s1, s2);
}
- protected Recon createReconServiceResults(String text, ArrayNode resultsList, long historyEntryID) throws IOException {
+ protected Recon createReconServiceResults(String text, ArrayNode resultsList, long historyEntryID) {
return super.createReconServiceResults(text, resultsList, historyEntryID);
}
}
diff --git a/main/tests/server/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperationTests.java b/main/tests/server/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperationTests.java
index e5068af39..f01e673bb 100644
--- a/main/tests/server/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperationTests.java
+++ b/main/tests/server/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperationTests.java
@@ -33,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.operations.column;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -129,7 +132,7 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
- Assert.assertFalse(process.isRunning());
+ Assert.assertFalse(process.isRunning(),"Process failed to complete within timeout " + timeout);
}
@Test
@@ -273,4 +276,102 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
}
}
+ @Test
+ public void testRetries() throws Exception {
+ try (MockWebServer server = new MockWebServer()) {
+ server.start();
+ HttpUrl url = server.url("/retries");
+
+ for (int i = 0; i < 2; i++) {
+ Row row = new Row(2);
+ row.setCell(0, new Cell("test" + (i + 1), null));
+ project.rows.add(row);
+ }
+
+ // Queue 5 error responses with 1 sec. Retry-After interval
+ for (int i = 0; i < 5; i++) {
+ server.enqueue(new MockResponse()
+ .setHeader("Retry-After", 1)
+ .setResponseCode(429)
+ .setBody(Integer.toString(i,10)));
+ }
+
+ server.enqueue(new MockResponse().setBody("success"));
+
+ EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
+ "fruits",
+ "\"" + url + "?city=\"+value",
+ OnError.StoreError,
+ "rand",
+ 1,
+ 100,
+ false,
+ null);
+
+ // 6 requests (4 retries @1 sec) + final response
+ long start = System.currentTimeMillis();
+ runAndWait(op, 4500);
+
+ // Make sure that our Retry-After headers were obeyed (4*1 sec vs 4*100msec)
+ long elapsed = System.currentTimeMillis() - start;
+ assertTrue(elapsed > 4000, "Retry-After retries didn't take long enough - elapsed = " + elapsed );
+
+ // 1st row fails after 4 tries (3 retries), 2nd row tries twice and gets value
+ assertTrue(project.rows.get(0).getCellValue(1).toString().contains("HTTP error 429"), "missing 429 error");
+ assertEquals(project.rows.get(1).getCellValue(1).toString(), "success");
+
+ server.shutdown();
+ }
+ }
+
+ @Test
+ public void testExponentialRetries() throws Exception {
+ try (MockWebServer server = new MockWebServer()) {
+ server.start();
+ HttpUrl url = server.url("/retries");
+
+ for (int i = 0; i < 3; i++) {
+ Row row = new Row(2);
+ row.setCell(0, new Cell("test" + (i + 1), null));
+ project.rows.add(row);
+ }
+
+ // Use 503 Server Unavailable with no Retry-After header this time
+ for (int i = 0; i < 5; i++) {
+ server.enqueue(new MockResponse()
+ .setResponseCode(503)
+ .setBody(Integer.toString(i,10)));
+ }
+ server.enqueue(new MockResponse().setBody("success"));
+
+ server.enqueue(new MockResponse().setBody("not found").setResponseCode(404));
+
+ ColumnAdditionByFetchingURLsOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
+ "fruits",
+ "\"" + url + "?city=\"+value",
+ OnError.StoreError,
+ "rand",
+ 1,
+ 100,
+ false,
+ null);
+
+ // 6 requests (4 retries 200, 400, 800, 200 msec) + final response
+ long start = System.currentTimeMillis();
+ runAndWait(op, 2500);
+
+ // Make sure that our exponential back off is working
+ long elapsed = System.currentTimeMillis() - start;
+ assertTrue(elapsed > 1600, "Exponential retries didn't take enough time - elapsed = " + elapsed);
+
+ // 1st row fails after 4 tries (3 retries), 2nd row tries twice and gets value, 3rd row is hard error
+ assertTrue(project.rows.get(0).getCellValue(1).toString().contains("HTTP error 503"), "Missing 503 error");
+ assertEquals(project.rows.get(1).getCellValue(1).toString(), "success");
+ assertTrue(project.rows.get(2).getCellValue(1).toString().contains("HTTP error 404"),"Missing 404 error");
+
+ server.shutdown();
+ }
+ }
+
+
}
diff --git a/main/tests/server/src/com/google/refine/operations/recon/ExtendDataOperationTests.java b/main/tests/server/src/com/google/refine/operations/recon/ExtendDataOperationTests.java
index 5a52537ef..123d9311b 100644
--- a/main/tests/server/src/com/google/refine/operations/recon/ExtendDataOperationTests.java
+++ b/main/tests/server/src/com/google/refine/operations/recon/ExtendDataOperationTests.java
@@ -38,9 +38,7 @@ import static org.mockito.Mockito.mock;
import static org.powermock.api.mockito.PowerMockito.mockStatic;
import java.io.IOException;
-import java.io.InputStream;
import java.io.StringWriter;
-import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@@ -49,7 +47,6 @@ import java.util.Map;
import java.util.Properties;
import java.util.Set;
-import org.apache.commons.io.IOUtils;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import org.powermock.api.mockito.PowerMockito;
@@ -225,7 +222,6 @@ public class ExtendDataOperationTests extends RefineTest {
* Test to fetch simple strings
* @throws Exception
*/
-
@BeforeMethod
public void mockHttpCalls() throws Exception {
mockStatic(ReconciledDataExtensionJob.class);
@@ -236,9 +232,9 @@ public class ExtendDataOperationTests extends RefineTest {
return fakeHttpCall(invocation.getArgument(0), invocation.getArgument(1));
}
};
- PowerMockito.doAnswer(mockedResponse).when(ReconciledDataExtensionJob.class, "performQuery", anyString(), anyString());
+ PowerMockito.doAnswer(mockedResponse).when(ReconciledDataExtensionJob.class, "postExtendQuery", anyString(), anyString());
}
-
+
@AfterMethod
public void cleanupHttpMocks() {
mockedResponses.clear();