* Refactor HTTP code into a common utility class Centralizes the six (slightly) different implementations to use a common Apache HTTP Client 5 implementation which implements our strategies for retries, timeouts, error handling, etc. Apache HTTP Client 5 adds support for Retry-After headers, HTTP/2, and a bunch of other stuff under the covers. Moves request delay to a request interceptor and fixes calculation of the delay (again). Increase retries from 1x to 3x and use delay*2 as the default retry interval, if no Retry-After header. Uses an exponential backoff strategy for multiple retries. * Reuses HTTP client across requests * Use IOException instead of Exception for HTTP errors
This commit is contained in:
parent
9e94d32b49
commit
14f43dc2cc
@ -290,6 +290,11 @@
|
|||||||
<artifactId>clojure</artifactId>
|
<artifactId>clojure</artifactId>
|
||||||
<version>1.10.1</version>
|
<version>1.10.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents.client5</groupId>
|
||||||
|
<artifactId>httpclient5</artifactId>
|
||||||
|
<version>5.0.2</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
<artifactId>httpclient</artifactId>
|
<artifactId>httpclient</artifactId>
|
||||||
|
@ -48,19 +48,6 @@ import javax.servlet.ServletException;
|
|||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
import javax.servlet.http.HttpServletResponse;
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
import org.apache.http.Consts;
|
|
||||||
import org.apache.http.NameValuePair;
|
|
||||||
import org.apache.http.StatusLine;
|
|
||||||
import org.apache.http.client.config.RequestConfig;
|
|
||||||
import org.apache.http.client.entity.UrlEncodedFormEntity;
|
|
||||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
||||||
import org.apache.http.client.methods.HttpPost;
|
|
||||||
import org.apache.http.impl.client.CloseableHttpClient;
|
|
||||||
import org.apache.http.impl.client.HttpClientBuilder;
|
|
||||||
import org.apache.http.impl.client.HttpClients;
|
|
||||||
import org.apache.http.impl.client.LaxRedirectStrategy;
|
|
||||||
import org.apache.http.message.BasicNameValuePair;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
import com.fasterxml.jackson.annotation.JsonInclude.Include;
|
import com.fasterxml.jackson.annotation.JsonInclude.Include;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
@ -68,7 +55,6 @@ import com.fasterxml.jackson.core.type.TypeReference;
|
|||||||
import com.fasterxml.jackson.databind.JsonNode;
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
import com.google.refine.RefineServlet;
|
|
||||||
import com.google.refine.commands.Command;
|
import com.google.refine.commands.Command;
|
||||||
import com.google.refine.expr.ExpressionUtils;
|
import com.google.refine.expr.ExpressionUtils;
|
||||||
import com.google.refine.model.Column;
|
import com.google.refine.model.Column;
|
||||||
@ -76,6 +62,7 @@ import com.google.refine.model.Project;
|
|||||||
import com.google.refine.model.ReconType;
|
import com.google.refine.model.ReconType;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
import com.google.refine.model.recon.StandardReconConfig.ReconResult;
|
import com.google.refine.model.recon.StandardReconConfig.ReconResult;
|
||||||
|
import com.google.refine.util.HttpClient;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
public class GuessTypesOfColumnCommand extends Command {
|
public class GuessTypesOfColumnCommand extends Command {
|
||||||
@ -180,32 +167,10 @@ public class GuessTypesOfColumnCommand extends Command {
|
|||||||
}
|
}
|
||||||
|
|
||||||
String queriesString = ParsingUtilities.defaultWriter.writeValueAsString(queryMap);
|
String queriesString = ParsingUtilities.defaultWriter.writeValueAsString(queryMap);
|
||||||
|
String responseString;
|
||||||
try {
|
try {
|
||||||
RequestConfig defaultRequestConfig = RequestConfig.custom()
|
responseString = postQueries(serviceUrl, queriesString);
|
||||||
.setConnectTimeout(30 * 1000)
|
ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(responseString);
|
||||||
.build();
|
|
||||||
|
|
||||||
HttpClientBuilder httpClientBuilder = HttpClients.custom()
|
|
||||||
.setUserAgent(RefineServlet.getUserAgent())
|
|
||||||
.setRedirectStrategy(new LaxRedirectStrategy())
|
|
||||||
.setDefaultRequestConfig(defaultRequestConfig);
|
|
||||||
|
|
||||||
CloseableHttpClient httpClient = httpClientBuilder.build();
|
|
||||||
HttpPost request = new HttpPost(serviceUrl);
|
|
||||||
List<NameValuePair> body = Collections.singletonList(
|
|
||||||
new BasicNameValuePair("queries", queriesString));
|
|
||||||
request.setEntity(new UrlEncodedFormEntity(body, Consts.UTF_8));
|
|
||||||
|
|
||||||
try (CloseableHttpResponse response = httpClient.execute(request)) {
|
|
||||||
StatusLine statusLine = response.getStatusLine();
|
|
||||||
if (statusLine.getStatusCode() >= 400) {
|
|
||||||
throw new IOException("Failed - code:"
|
|
||||||
+ Integer.toString(statusLine.getStatusCode())
|
|
||||||
+ " message: " + statusLine.getReasonPhrase());
|
|
||||||
}
|
|
||||||
|
|
||||||
String s = ParsingUtilities.inputStreamToString(response.getEntity().getContent());
|
|
||||||
ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(s);
|
|
||||||
|
|
||||||
Iterator<JsonNode> iterator = o.iterator();
|
Iterator<JsonNode> iterator = o.iterator();
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
@ -238,7 +203,6 @@ public class GuessTypesOfColumnCommand extends Command {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("Failed to guess cell types for load\n" + queriesString, e);
|
logger.error("Failed to guess cell types for load\n" + queriesString, e);
|
||||||
throw e;
|
throw e;
|
||||||
@ -259,6 +223,11 @@ public class GuessTypesOfColumnCommand extends Command {
|
|||||||
return types;
|
return types;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String postQueries(String serviceUrl, String queriesString) throws IOException {
|
||||||
|
HttpClient client = new HttpClient();
|
||||||
|
return client.postNameValue(serviceUrl, "queries", queriesString);
|
||||||
|
}
|
||||||
|
|
||||||
static protected class TypeGroup {
|
static protected class TypeGroup {
|
||||||
@JsonProperty("id")
|
@JsonProperty("id")
|
||||||
protected String id;
|
protected String id;
|
||||||
|
@ -69,19 +69,13 @@ import org.apache.commons.fileupload.ProgressListener;
|
|||||||
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
|
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
|
||||||
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
||||||
import org.apache.commons.fileupload.util.Streams;
|
import org.apache.commons.fileupload.util.Streams;
|
||||||
import org.apache.http.HttpEntity;
|
import org.apache.hc.client5.http.ClientProtocolException;
|
||||||
import org.apache.http.auth.AuthScope;
|
import org.apache.hc.core5.http.ClassicHttpResponse;
|
||||||
import org.apache.http.auth.UsernamePasswordCredentials;
|
import org.apache.hc.core5.http.ContentType;
|
||||||
import org.apache.http.client.CredentialsProvider;
|
import org.apache.hc.core5.http.HttpEntity;
|
||||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
import org.apache.hc.core5.http.HttpStatus;
|
||||||
import org.apache.http.client.methods.HttpGet;
|
import org.apache.hc.core5.http.io.HttpClientResponseHandler;
|
||||||
import org.apache.http.entity.ContentType;
|
|
||||||
import org.apache.http.impl.client.BasicCredentialsProvider;
|
|
||||||
import org.apache.http.impl.client.CloseableHttpClient;
|
|
||||||
import org.apache.http.impl.client.HttpClientBuilder;
|
|
||||||
import org.apache.http.impl.client.HttpClients;
|
|
||||||
import org.apache.http.util.EntityUtils;
|
|
||||||
import org.apache.http.StatusLine;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -89,10 +83,10 @@ import com.fasterxml.jackson.databind.node.ArrayNode;
|
|||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
import com.google.refine.ProjectManager;
|
import com.google.refine.ProjectManager;
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
import com.google.refine.RefineServlet;
|
|
||||||
import com.google.refine.importing.ImportingManager.Format;
|
import com.google.refine.importing.ImportingManager.Format;
|
||||||
import com.google.refine.importing.UrlRewriter.Result;
|
import com.google.refine.importing.UrlRewriter.Result;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.util.HttpClient;
|
||||||
import com.google.refine.util.JSONUtilities;
|
import com.google.refine.util.JSONUtilities;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@ -287,46 +281,24 @@ public class ImportingUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
|
if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
|
||||||
HttpClientBuilder clientbuilder = HttpClients.custom()
|
final URL lastUrl = url;
|
||||||
.setUserAgent(RefineServlet.getUserAgent());
|
final HttpClientResponseHandler<String> responseHandler = new HttpClientResponseHandler<String>() {
|
||||||
// .setConnectionBackoffStrategy(ConnectionBackoffStrategy)
|
|
||||||
|
|
||||||
String userinfo = url.getUserInfo();
|
@Override
|
||||||
// HTTPS only - no sending password in the clear over HTTP
|
public String handleResponse(final ClassicHttpResponse response) throws IOException {
|
||||||
if ("https".equals(url.getProtocol()) && userinfo != null) {
|
final int status = response.getCode();
|
||||||
int s = userinfo.indexOf(':');
|
if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) {
|
||||||
if (s > 0) {
|
final HttpEntity entity = response.getEntity();
|
||||||
String user = userinfo.substring(0, s);
|
if (entity == null) {
|
||||||
String pw = userinfo.substring(s + 1, userinfo.length());
|
throw new IOException("No content found in " + lastUrl.toExternalForm());
|
||||||
CredentialsProvider credsProvider = new BasicCredentialsProvider();
|
|
||||||
credsProvider.setCredentials(new AuthScope(url.getHost(), 443),
|
|
||||||
new UsernamePasswordCredentials(user, pw));
|
|
||||||
clientbuilder = clientbuilder.setDefaultCredentialsProvider(credsProvider);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
CloseableHttpClient httpclient = clientbuilder.build();
|
|
||||||
HttpGet httpGet = new HttpGet(url.toURI());
|
|
||||||
CloseableHttpResponse response = httpclient.execute(httpGet);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
HttpEntity entity = response.getEntity();
|
|
||||||
if (entity == null) {
|
|
||||||
throw new Exception("No content found in " + url.toString());
|
|
||||||
}
|
|
||||||
StatusLine status = response.getStatusLine();
|
|
||||||
int statusCode = response.getStatusLine().getStatusCode();
|
|
||||||
if (statusCode >= 400) {
|
|
||||||
String errorString = ParsingUtilities.inputStreamToString(entity.getContent());
|
|
||||||
String message = String.format("HTTP error %d : %s | %s", statusCode,
|
|
||||||
status.getReasonPhrase(), errorString);
|
|
||||||
throw new Exception(message);
|
|
||||||
}
|
|
||||||
InputStream stream2 = entity.getContent();
|
InputStream stream2 = entity.getContent();
|
||||||
|
|
||||||
String mimeType = null;
|
String mimeType = null;
|
||||||
String charset = null;
|
String charset = null;
|
||||||
ContentType contentType = ContentType.get(entity);
|
ContentType contentType = ContentType.parse(entity.getContentType());
|
||||||
if (contentType != null) {
|
if (contentType != null) {
|
||||||
mimeType = contentType.getMimeType();
|
mimeType = contentType.getMimeType();
|
||||||
Charset cs = contentType.getCharset();
|
Charset cs = contentType.getCharset();
|
||||||
@ -336,16 +308,29 @@ public class ImportingUtilities {
|
|||||||
}
|
}
|
||||||
JSONUtilities.safePut(fileRecord, "declaredMimeType", mimeType);
|
JSONUtilities.safePut(fileRecord, "declaredMimeType", mimeType);
|
||||||
JSONUtilities.safePut(fileRecord, "declaredEncoding", charset);
|
JSONUtilities.safePut(fileRecord, "declaredEncoding", charset);
|
||||||
if (saveStream(stream2, url, rawDataDir, progress, update,
|
if (saveStream(stream2, lastUrl, rawDataDir, progress, update,
|
||||||
fileRecord, fileRecords,
|
fileRecord, fileRecords,
|
||||||
entity.getContentLength())) {
|
entity.getContentLength())) {
|
||||||
|
return "saved"; // signal to increment archive count
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (final IOException ex) {
|
||||||
|
throw new ClientProtocolException(ex);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
// String errorBody = EntityUtils.toString(response.getEntity());
|
||||||
|
throw new ClientProtocolException(String.format("HTTP error %d : %s for URL %s", status,
|
||||||
|
response.getReasonPhrase(), lastUrl.toExternalForm()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
HttpClient httpClient = new HttpClient();
|
||||||
|
if (httpClient.getResponse(urlString, null, responseHandler) != null) {
|
||||||
archiveCount++;
|
archiveCount++;
|
||||||
}
|
};
|
||||||
downloadCount++;
|
downloadCount++;
|
||||||
EntityUtils.consume(entity);
|
|
||||||
} finally {
|
|
||||||
httpGet.reset();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Fallback handling for non HTTP connections (only FTP?)
|
// Fallback handling for non HTTP connections (only FTP?)
|
||||||
URLConnection urlConnection = url.openConnection();
|
URLConnection urlConnection = url.openConnection();
|
||||||
@ -418,7 +403,7 @@ public class ImportingUtilities {
|
|||||||
|
|
||||||
private static boolean saveStream(InputStream stream, URL url, File rawDataDir, final Progress progress,
|
private static boolean saveStream(InputStream stream, URL url, File rawDataDir, final Progress progress,
|
||||||
final SavingUpdate update, ObjectNode fileRecord, ArrayNode fileRecords, long length)
|
final SavingUpdate update, ObjectNode fileRecord, ArrayNode fileRecords, long length)
|
||||||
throws IOException, Exception {
|
throws IOException {
|
||||||
String localname = url.getPath();
|
String localname = url.getPath();
|
||||||
if (localname.isEmpty() || localname.endsWith("/")) {
|
if (localname.isEmpty() || localname.endsWith("/")) {
|
||||||
localname = localname + "temp";
|
localname = localname + "temp";
|
||||||
@ -436,7 +421,7 @@ public class ImportingUtilities {
|
|||||||
long actualLength = saveStreamToFile(stream, file, update);
|
long actualLength = saveStreamToFile(stream, file, update);
|
||||||
JSONUtilities.safePut(fileRecord, "size", actualLength);
|
JSONUtilities.safePut(fileRecord, "size", actualLength);
|
||||||
if (actualLength == 0) {
|
if (actualLength == 0) {
|
||||||
throw new Exception("No content found in " + url.toString());
|
throw new IOException("No content found in " + url.toString());
|
||||||
} else if (length >= 0) {
|
} else if (length >= 0) {
|
||||||
update.totalExpectedSize += (actualLength - length);
|
update.totalExpectedSize += (actualLength - length);
|
||||||
} else {
|
} else {
|
||||||
|
@ -37,30 +37,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
package com.google.refine.model.recon;
|
package com.google.refine.model.recon;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.io.Writer;
|
import java.io.Writer;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.http.Consts;
|
|
||||||
import org.apache.http.NameValuePair;
|
|
||||||
import org.apache.http.StatusLine;
|
|
||||||
import org.apache.http.client.config.RequestConfig;
|
|
||||||
import org.apache.http.client.entity.UrlEncodedFormEntity;
|
|
||||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
||||||
import org.apache.http.client.methods.HttpPost;
|
|
||||||
import org.apache.http.impl.client.CloseableHttpClient;
|
|
||||||
import org.apache.http.impl.client.HttpClientBuilder;
|
|
||||||
import org.apache.http.impl.client.HttpClients;
|
|
||||||
import org.apache.http.impl.client.LaxRedirectStrategy;
|
|
||||||
import org.apache.http.message.BasicNameValuePair;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
import com.fasterxml.jackson.annotation.JsonInclude.Include;
|
import com.fasterxml.jackson.annotation.JsonInclude.Include;
|
||||||
@ -69,14 +54,15 @@ import com.fasterxml.jackson.annotation.JsonView;
|
|||||||
import com.fasterxml.jackson.core.type.TypeReference;
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
import com.google.refine.RefineServlet;
|
|
||||||
import com.google.refine.expr.functions.ToDate;
|
import com.google.refine.expr.functions.ToDate;
|
||||||
import com.google.refine.model.ReconCandidate;
|
import com.google.refine.model.ReconCandidate;
|
||||||
import com.google.refine.model.ReconType;
|
import com.google.refine.model.ReconType;
|
||||||
|
import com.google.refine.util.HttpClient;
|
||||||
import com.google.refine.util.JSONUtilities;
|
import com.google.refine.util.JSONUtilities;
|
||||||
import com.google.refine.util.JsonViews;
|
import com.google.refine.util.JsonViews;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
|
|
||||||
public class ReconciledDataExtensionJob {
|
public class ReconciledDataExtensionJob {
|
||||||
|
|
||||||
|
|
||||||
@ -172,13 +158,21 @@ public class ReconciledDataExtensionJob {
|
|||||||
final public List<ColumnInfo> columns = new ArrayList<ColumnInfo>();
|
final public List<ColumnInfo> columns = new ArrayList<ColumnInfo>();
|
||||||
|
|
||||||
// not final: initialized lazily
|
// not final: initialized lazily
|
||||||
private static CloseableHttpClient httpClient = null;
|
private static HttpClient httpClient = null;
|
||||||
|
|
||||||
public ReconciledDataExtensionJob(DataExtensionConfig obj, String endpoint) {
|
public ReconciledDataExtensionJob(DataExtensionConfig obj, String endpoint) {
|
||||||
this.extension = obj;
|
this.extension = obj;
|
||||||
this.endpoint = endpoint;
|
this.endpoint = endpoint;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @todo Although the HTTP code has been unified, there may still be opportunity
|
||||||
|
* to refactor a higher level querying library out of this which could be shared
|
||||||
|
* with StandardReconConfig
|
||||||
|
*
|
||||||
|
* It may also be possible to extract a library to query reconciliation services
|
||||||
|
* which could be used outside of OpenRefine.
|
||||||
|
*/
|
||||||
public Map<String, ReconciledDataExtensionJob.DataExtension> extend(
|
public Map<String, ReconciledDataExtensionJob.DataExtension> extend(
|
||||||
Set<String> ids,
|
Set<String> ids,
|
||||||
Map<String, ReconCandidate> reconCandidateMap
|
Map<String, ReconCandidate> reconCandidateMap
|
||||||
@ -187,7 +181,7 @@ public class ReconciledDataExtensionJob {
|
|||||||
formulateQuery(ids, extension, writer);
|
formulateQuery(ids, extension, writer);
|
||||||
|
|
||||||
String query = writer.toString();
|
String query = writer.toString();
|
||||||
String response = performQuery(this.endpoint, query);
|
String response = postExtendQuery(this.endpoint, query);
|
||||||
|
|
||||||
ObjectNode o = ParsingUtilities.mapper.readValue(response, ObjectNode.class);
|
ObjectNode o = ParsingUtilities.mapper.readValue(response, ObjectNode.class);
|
||||||
|
|
||||||
@ -218,43 +212,14 @@ public class ReconciledDataExtensionJob {
|
|||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
static protected String postExtendQuery(String endpoint, String query) throws IOException {
|
||||||
* @todo this should be refactored to be unified with the HTTP querying code
|
return getHttpClient().postNameValue(endpoint, "extend", query);
|
||||||
* from StandardReconConfig. We should ideally extract a library to query
|
|
||||||
* reconciliation services and expose it as such for others to reuse.
|
|
||||||
*/
|
|
||||||
|
|
||||||
static protected String performQuery(String endpoint, String query) throws IOException {
|
|
||||||
HttpPost request = new HttpPost(endpoint);
|
|
||||||
List<NameValuePair> body = Collections.singletonList(
|
|
||||||
new BasicNameValuePair("extend", query));
|
|
||||||
request.setEntity(new UrlEncodedFormEntity(body, Consts.UTF_8));
|
|
||||||
|
|
||||||
try (CloseableHttpResponse response = getHttpClient().execute(request)) {
|
|
||||||
StatusLine statusLine = response.getStatusLine();
|
|
||||||
if (statusLine.getStatusCode() >= 400) {
|
|
||||||
throw new IOException("Data extension query failed - code: "
|
|
||||||
+ Integer.toString(statusLine.getStatusCode())
|
|
||||||
+ " message: " + statusLine.getReasonPhrase());
|
|
||||||
} else {
|
|
||||||
return ParsingUtilities.inputStreamToString(response.getEntity().getContent());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static CloseableHttpClient getHttpClient() {
|
private static HttpClient getHttpClient() {
|
||||||
if (httpClient != null) {
|
if (httpClient == null) {
|
||||||
return httpClient;
|
httpClient = new HttpClient();
|
||||||
}
|
}
|
||||||
RequestConfig defaultRequestConfig = RequestConfig.custom()
|
|
||||||
.setConnectTimeout(30 * 1000)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
HttpClientBuilder httpClientBuilder = HttpClients.custom()
|
|
||||||
.setUserAgent(RefineServlet.getUserAgent())
|
|
||||||
.setRedirectStrategy(new LaxRedirectStrategy())
|
|
||||||
.setDefaultRequestConfig(defaultRequestConfig);
|
|
||||||
httpClient = httpClientBuilder.build();
|
|
||||||
return httpClient;
|
return httpClient;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,18 +45,6 @@ import java.util.Map;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.http.Consts;
|
|
||||||
import org.apache.http.NameValuePair;
|
|
||||||
import org.apache.http.StatusLine;
|
|
||||||
import org.apache.http.client.config.RequestConfig;
|
|
||||||
import org.apache.http.client.entity.UrlEncodedFormEntity;
|
|
||||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
||||||
import org.apache.http.client.methods.HttpPost;
|
|
||||||
import org.apache.http.impl.client.CloseableHttpClient;
|
|
||||||
import org.apache.http.impl.client.HttpClientBuilder;
|
|
||||||
import org.apache.http.impl.client.HttpClients;
|
|
||||||
import org.apache.http.impl.client.LaxRedirectStrategy;
|
|
||||||
import org.apache.http.message.BasicNameValuePair;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -69,7 +57,6 @@ import com.fasterxml.jackson.core.JsonProcessingException;
|
|||||||
import com.fasterxml.jackson.core.type.TypeReference;
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
import com.google.refine.RefineServlet;
|
|
||||||
import com.google.refine.expr.ExpressionUtils;
|
import com.google.refine.expr.ExpressionUtils;
|
||||||
import com.google.refine.model.Cell;
|
import com.google.refine.model.Cell;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
@ -79,6 +66,7 @@ import com.google.refine.model.ReconCandidate;
|
|||||||
import com.google.refine.model.ReconType;
|
import com.google.refine.model.ReconType;
|
||||||
import com.google.refine.model.RecordModel.RowDependency;
|
import com.google.refine.model.RecordModel.RowDependency;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
|
import com.google.refine.util.HttpClient;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
public class StandardReconConfig extends ReconConfig {
|
public class StandardReconConfig extends ReconConfig {
|
||||||
@ -164,7 +152,7 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
final private int limit;
|
final private int limit;
|
||||||
|
|
||||||
// initialized lazily
|
// initialized lazily
|
||||||
private CloseableHttpClient httpClient = null;
|
private HttpClient httpClient = null;
|
||||||
|
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
public StandardReconConfig(
|
public StandardReconConfig(
|
||||||
@ -434,27 +422,27 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
try {
|
try {
|
||||||
job.code = ParsingUtilities.defaultWriter.writeValueAsString(query);
|
job.code = ParsingUtilities.defaultWriter.writeValueAsString(query);
|
||||||
} catch (JsonProcessingException e) {
|
} catch (JsonProcessingException e) {
|
||||||
|
// FIXME: This error will get lost
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
return null; // TODO: Throw exception instead?
|
return null; // TODO: Throw exception instead?
|
||||||
}
|
}
|
||||||
return job;
|
return job;
|
||||||
}
|
}
|
||||||
|
|
||||||
private CloseableHttpClient getHttpClient() {
|
private HttpClient getHttpClient() {
|
||||||
if (httpClient != null) {
|
if (httpClient == null) {
|
||||||
|
httpClient = new HttpClient();
|
||||||
|
}
|
||||||
return httpClient;
|
return httpClient;
|
||||||
}
|
}
|
||||||
RequestConfig defaultRequestConfig = RequestConfig.custom()
|
|
||||||
.setConnectTimeout(30 * 1000)
|
|
||||||
.setSocketTimeout(60 * 1000)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
HttpClientBuilder httpClientBuilder = HttpClients.custom()
|
private String postQueries(String url, String queriesString) throws IOException {
|
||||||
.setUserAgent(RefineServlet.getUserAgent())
|
try {
|
||||||
.setRedirectStrategy(new LaxRedirectStrategy())
|
return getHttpClient().postNameValue(url, "queries", queriesString);
|
||||||
.setDefaultRequestConfig(defaultRequestConfig);
|
|
||||||
httpClient = httpClientBuilder.build();
|
} catch (IOException e) {
|
||||||
return httpClient;
|
throw new IOException("Failed to batch recon with load:\n" + queriesString, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -475,22 +463,12 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
stringWriter.write("}");
|
stringWriter.write("}");
|
||||||
String queriesString = stringWriter.toString();
|
String queriesString = stringWriter.toString();
|
||||||
|
|
||||||
HttpPost request = new HttpPost(service);
|
try {
|
||||||
List<NameValuePair> body = Collections.singletonList(
|
String responseString = postQueries(service, queriesString);
|
||||||
new BasicNameValuePair("queries", queriesString));
|
ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(responseString);
|
||||||
request.setEntity(new UrlEncodedFormEntity(body, Consts.UTF_8));
|
|
||||||
|
|
||||||
try (CloseableHttpResponse response = getHttpClient().execute(request)) {
|
|
||||||
StatusLine statusLine = response.getStatusLine();
|
|
||||||
if (statusLine.getStatusCode() >= 400) {
|
|
||||||
logger.error("Failed - code: "
|
|
||||||
+ Integer.toString(statusLine.getStatusCode())
|
|
||||||
+ " message: " + statusLine.getReasonPhrase());
|
|
||||||
} else {
|
|
||||||
String s = ParsingUtilities.inputStreamToString(response.getEntity().getContent());
|
|
||||||
ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(s);
|
|
||||||
if (o == null) { // utility method returns null instead of throwing
|
if (o == null) { // utility method returns null instead of throwing
|
||||||
logger.error("Failed to parse string as JSON: " + s);
|
logger.error("Failed to parse string as JSON: " + responseString);
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < jobs.size(); i++) {
|
for (int i = 0; i < jobs.size(); i++) {
|
||||||
StandardReconJob job = (StandardReconJob) jobs.get(i);
|
StandardReconJob job = (StandardReconJob) jobs.get(i);
|
||||||
@ -505,6 +483,7 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
|
|
||||||
recon = createReconServiceResults(text, results, historyEntryID);
|
recon = createReconServiceResults(text, results, historyEntryID);
|
||||||
} else {
|
} else {
|
||||||
|
// TODO: better error reporting
|
||||||
logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString());
|
logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -518,8 +497,7 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
recons.add(recon);
|
recons.add(recon);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} catch (IOException e) {
|
||||||
} catch (Exception e) {
|
|
||||||
logger.error("Failed to batch recon with load:\n" + queriesString, e);
|
logger.error("Failed to batch recon with load:\n" + queriesString, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -543,7 +521,7 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
return recon;
|
return recon;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Recon createReconServiceResults(String text, ArrayNode resultsList, long historyEntryID) throws IOException {
|
protected Recon createReconServiceResults(String text, ArrayNode resultsList, long historyEntryID) {
|
||||||
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
|
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
|
||||||
List<ReconResult> results = ParsingUtilities.mapper.convertValue(resultsList, new TypeReference<List<ReconResult>>() {});
|
List<ReconResult> results = ParsingUtilities.mapper.convertValue(resultsList, new TypeReference<List<ReconResult>>() {});
|
||||||
|
|
||||||
|
@ -37,27 +37,13 @@ import static com.google.common.base.Strings.isNullOrEmpty;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URISyntaxException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.nio.charset.Charset;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import org.apache.http.Header;
|
import org.apache.hc.core5.http.Header;
|
||||||
import org.apache.http.HttpEntity;
|
import org.apache.hc.core5.http.message.BasicHeader;
|
||||||
import org.apache.http.StatusLine;
|
|
||||||
import org.apache.http.client.config.RequestConfig;
|
|
||||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
||||||
import org.apache.http.client.methods.HttpGet;
|
|
||||||
import org.apache.http.entity.ContentType;
|
|
||||||
import org.apache.http.impl.client.CloseableHttpClient;
|
|
||||||
import org.apache.http.impl.client.HttpClientBuilder;
|
|
||||||
import org.apache.http.impl.client.HttpClients;
|
|
||||||
import org.apache.http.message.BasicHeader;
|
|
||||||
import org.apache.http.util.EntityUtils;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
@ -65,7 +51,6 @@ import com.google.common.cache.CacheBuilder;
|
|||||||
import com.google.common.cache.CacheLoader;
|
import com.google.common.cache.CacheLoader;
|
||||||
import com.google.common.cache.LoadingCache;
|
import com.google.common.cache.LoadingCache;
|
||||||
|
|
||||||
import com.google.refine.RefineServlet;
|
|
||||||
import com.google.refine.browsing.Engine;
|
import com.google.refine.browsing.Engine;
|
||||||
import com.google.refine.browsing.EngineConfig;
|
import com.google.refine.browsing.EngineConfig;
|
||||||
import com.google.refine.browsing.FilteredRows;
|
import com.google.refine.browsing.FilteredRows;
|
||||||
@ -86,7 +71,7 @@ import com.google.refine.operations.EngineDependentOperation;
|
|||||||
import com.google.refine.operations.OnError;
|
import com.google.refine.operations.OnError;
|
||||||
import com.google.refine.process.LongRunningProcess;
|
import com.google.refine.process.LongRunningProcess;
|
||||||
import com.google.refine.process.Process;
|
import com.google.refine.process.Process;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.HttpClient;
|
||||||
|
|
||||||
|
|
||||||
public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation {
|
public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation {
|
||||||
@ -117,8 +102,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
final protected boolean _cacheResponses;
|
final protected boolean _cacheResponses;
|
||||||
final protected List<HttpHeader> _httpHeadersJson;
|
final protected List<HttpHeader> _httpHeadersJson;
|
||||||
private Header[] httpHeaders = new Header[0];
|
private Header[] httpHeaders = new Header[0];
|
||||||
final private RequestConfig defaultRequestConfig;
|
private HttpClient _httpClient;
|
||||||
private HttpClientBuilder httpClientBuilder;
|
|
||||||
|
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
public ColumnAdditionByFetchingURLsOperation(
|
public ColumnAdditionByFetchingURLsOperation(
|
||||||
@ -163,22 +148,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
httpHeaders = headers.toArray(httpHeaders);
|
httpHeaders = headers.toArray(httpHeaders);
|
||||||
|
_httpClient = new HttpClient(_delay);
|
||||||
|
|
||||||
defaultRequestConfig = RequestConfig.custom()
|
|
||||||
.setConnectTimeout(30 * 1000)
|
|
||||||
.setConnectionRequestTimeout(30 * 1000)
|
|
||||||
.setSocketTimeout(10 * 1000).build();
|
|
||||||
|
|
||||||
// TODO: Placeholder for future Basic Auth implementation
|
|
||||||
// CredentialsProvider credsProvider = new BasicCredentialsProvider();
|
|
||||||
// credsProvider.setCredentials(new AuthScope(host, 443),
|
|
||||||
// new UsernamePasswordCredentials(user, password));
|
|
||||||
|
|
||||||
httpClientBuilder = HttpClients.custom()
|
|
||||||
.setUserAgent(RefineServlet.getUserAgent())
|
|
||||||
.setDefaultRequestConfig(defaultRequestConfig);
|
|
||||||
// .setConnectionBackoffStrategy(ConnectionBackoffStrategy)
|
|
||||||
// .setDefaultCredentialsProvider(credsProvider);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@JsonProperty("newColumnName")
|
@JsonProperty("newColumnName")
|
||||||
@ -281,20 +252,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
.build(
|
.build(
|
||||||
new CacheLoader<String, Serializable>() {
|
new CacheLoader<String, Serializable>() {
|
||||||
public Serializable load(String urlString) throws Exception {
|
public Serializable load(String urlString) throws Exception {
|
||||||
Serializable result = fetch(urlString);
|
Serializable result = fetch(urlString, httpHeaders);
|
||||||
try {
|
|
||||||
// Always sleep for the delay, no matter how long the
|
|
||||||
// request took. This is more responsible than substracting
|
|
||||||
// the time spend requesting the URL, because it naturally
|
|
||||||
// slows us down if the server is busy and takes a long time
|
|
||||||
// to reply.
|
|
||||||
if (_delay > 0) {
|
|
||||||
Thread.sleep(_delay);
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
result = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result == null) {
|
if (result == null) {
|
||||||
// the load method should not return any null value
|
// the load method should not return any null value
|
||||||
throw new Exception("null result returned by fetch");
|
throw new Exception("null result returned by fetch");
|
||||||
@ -335,9 +293,9 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
|
|
||||||
Serializable response = null;
|
Serializable response = null;
|
||||||
if (_urlCache != null) {
|
if (_urlCache != null) {
|
||||||
response = cachedFetch(urlString); // TODO: Why does this need a separate method?
|
response = cachedFetch(urlString);
|
||||||
} else {
|
} else {
|
||||||
response = fetch(urlString);
|
response = fetch(urlString, httpHeaders);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (response != null) {
|
if (response != null) {
|
||||||
@ -380,68 +338,19 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Serializable fetch(String urlString) {
|
Serializable fetch(String urlString, Header[] headers) {
|
||||||
HttpGet httpGet;
|
try { //HttpClients.createDefault()) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Use of URL constructor below is purely to get additional error checking to mimic
|
return _httpClient.getAsString(urlString, headers);
|
||||||
// previous behavior for the tests.
|
|
||||||
httpGet = new HttpGet(new URL(urlString).toURI());
|
|
||||||
} catch (IllegalArgumentException | MalformedURLException | URISyntaxException e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
httpGet.setHeaders(httpHeaders);
|
|
||||||
httpGet.setConfig(defaultRequestConfig);
|
|
||||||
|
|
||||||
CloseableHttpClient httpclient = httpClientBuilder.build();
|
|
||||||
|
|
||||||
CloseableHttpResponse response = null;
|
|
||||||
try {
|
|
||||||
response = httpclient.execute(httpGet);
|
|
||||||
|
|
||||||
HttpEntity entity = response.getEntity();
|
|
||||||
if (entity == null) {
|
|
||||||
throw new Exception("No content found in " + httpGet.getURI().toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
String encoding = null;
|
|
||||||
|
|
||||||
if (entity.getContentEncoding() != null) {
|
|
||||||
encoding = entity.getContentEncoding().getValue();
|
|
||||||
} else {
|
|
||||||
Charset charset = ContentType.getOrDefault(entity).getCharset();
|
|
||||||
if (charset != null) {
|
|
||||||
encoding = charset.name();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
String result = ParsingUtilities.inputStreamToString(
|
|
||||||
entity.getContent(), (encoding == null) || ( encoding.equalsIgnoreCase("\"UTF-8\"")) ? "UTF-8" : encoding);
|
|
||||||
|
|
||||||
EntityUtils.consume(entity);
|
|
||||||
return result;
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
String message;
|
return _onError == OnError.StoreError ? new EvalError(e) : null;
|
||||||
if (response == null) {
|
|
||||||
message = "Unknown HTTP error " + e.getLocalizedMessage();
|
|
||||||
} else {
|
|
||||||
StatusLine status = response.getStatusLine();
|
|
||||||
HttpEntity errorEntity = response.getEntity();
|
|
||||||
String errorString = ParsingUtilities.inputStreamToString(errorEntity.getContent());
|
|
||||||
message = String.format("HTTP error %d : %s | %s", status.getStatusCode(),
|
|
||||||
status.getReasonPhrase(),
|
|
||||||
errorString);
|
|
||||||
}
|
|
||||||
return _onError == OnError.StoreError ? new EvalError(message) : null;
|
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return _onError == OnError.StoreError ? new EvalError(e.getMessage()) : null;
|
return _onError == OnError.StoreError ? new EvalError(e.getMessage()) : null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
RowVisitor createRowVisitor(List<CellAtRow> cellsAtRows) {
|
RowVisitor createRowVisitor(List<CellAtRow> cellsAtRows) {
|
||||||
return new RowVisitor() {
|
return new RowVisitor() {
|
||||||
int cellIndex;
|
int cellIndex;
|
||||||
@ -497,4 +406,5 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
}.init(cellsAtRows);
|
}.init(cellsAtRows);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
208
main/src/com/google/refine/util/HttpClient.java
Normal file
208
main/src/com/google/refine/util/HttpClient.java
Normal file
@ -0,0 +1,208 @@
|
|||||||
|
package com.google.refine.util;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.hc.client5.http.ClientProtocolException;
|
||||||
|
import org.apache.hc.client5.http.classic.methods.HttpGet;
|
||||||
|
import org.apache.hc.client5.http.classic.methods.HttpPost;
|
||||||
|
import org.apache.hc.client5.http.config.RequestConfig;
|
||||||
|
import org.apache.hc.client5.http.entity.UrlEncodedFormEntity;
|
||||||
|
import org.apache.hc.client5.http.impl.DefaultHttpRequestRetryStrategy;
|
||||||
|
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
|
||||||
|
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
|
||||||
|
import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
|
||||||
|
import org.apache.hc.client5.http.impl.classic.HttpClients;
|
||||||
|
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager;
|
||||||
|
import org.apache.hc.core5.http.ClassicHttpResponse;
|
||||||
|
import org.apache.hc.core5.http.EntityDetails;
|
||||||
|
import org.apache.hc.core5.http.Header;
|
||||||
|
import org.apache.hc.core5.http.HttpEntity;
|
||||||
|
import org.apache.hc.core5.http.HttpException;
|
||||||
|
import org.apache.hc.core5.http.HttpRequest;
|
||||||
|
import org.apache.hc.core5.http.HttpRequestInterceptor;
|
||||||
|
import org.apache.hc.core5.http.HttpResponse;
|
||||||
|
import org.apache.hc.core5.http.HttpStatus;
|
||||||
|
import org.apache.hc.core5.http.NameValuePair;
|
||||||
|
import org.apache.hc.core5.http.ParseException;
|
||||||
|
import org.apache.hc.core5.http.io.HttpClientResponseHandler;
|
||||||
|
import org.apache.hc.core5.http.io.SocketConfig;
|
||||||
|
import org.apache.hc.core5.http.io.entity.EntityUtils;
|
||||||
|
import org.apache.hc.core5.http.message.BasicNameValuePair;
|
||||||
|
import org.apache.hc.core5.http.protocol.HttpContext;
|
||||||
|
import org.apache.hc.core5.util.TimeValue;
|
||||||
|
|
||||||
|
import com.google.refine.RefineServlet;
|
||||||
|
|
||||||
|
|
||||||
|
public class HttpClient {
|
||||||
|
final private RequestConfig defaultRequestConfig;
|
||||||
|
private HttpClientBuilder httpClientBuilder;
|
||||||
|
private CloseableHttpClient httpClient;
|
||||||
|
private int _delay;
|
||||||
|
|
||||||
|
public HttpClient() {
|
||||||
|
this(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public HttpClient(int delay) {
|
||||||
|
_delay = delay;
|
||||||
|
// Create a connection manager with a custom socket timeout
|
||||||
|
PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager();
|
||||||
|
final SocketConfig socketConfig = SocketConfig.custom()
|
||||||
|
.setSoTimeout(10, TimeUnit.SECONDS)
|
||||||
|
.build();
|
||||||
|
connManager.setDefaultSocketConfig(socketConfig);
|
||||||
|
|
||||||
|
defaultRequestConfig = RequestConfig.custom()
|
||||||
|
.setConnectTimeout(30, TimeUnit.SECONDS)
|
||||||
|
.setConnectionRequestTimeout(30, TimeUnit.SECONDS) // TODO: 60 seconds in some places in old code
|
||||||
|
.build();
|
||||||
|
|
||||||
|
httpClientBuilder = HttpClients.custom()
|
||||||
|
.setUserAgent(RefineServlet.getUserAgent())
|
||||||
|
.setDefaultRequestConfig(defaultRequestConfig)
|
||||||
|
.setConnectionManager(connManager)
|
||||||
|
// Default Apache HC retry is 1x @1 sec (or the value in Retry-Header)
|
||||||
|
.setRetryStrategy(new ExponentialBackoffRetryStrategy(3, TimeValue.ofMilliseconds(_delay)))
|
||||||
|
// .setRedirectStrategy(new LaxRedirectStrategy()) // TODO: No longer needed since default doesn't exclude POST?
|
||||||
|
// .setConnectionBackoffStrategy(ConnectionBackoffStrategy)
|
||||||
|
.addRequestInterceptorFirst(new HttpRequestInterceptor() {
|
||||||
|
|
||||||
|
private long nextRequestTime = System.currentTimeMillis();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void process(
|
||||||
|
final HttpRequest request,
|
||||||
|
final EntityDetails entity,
|
||||||
|
final HttpContext context) throws HttpException, IOException {
|
||||||
|
|
||||||
|
long delay = nextRequestTime - System.currentTimeMillis();
|
||||||
|
if (delay > 0) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(delay);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nextRequestTime = System.currentTimeMillis() + _delay;
|
||||||
|
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// TODO: Placeholder for future Basic Auth implementation
|
||||||
|
// String userinfo = url.getUserInfo();
|
||||||
|
// // HTTPS only - no sending password in the clear over HTTP
|
||||||
|
// if ("https".equals(url.getProtocol()) && userinfo != null) {
|
||||||
|
// int s = userinfo.indexOf(':');
|
||||||
|
// if (s > 0) {
|
||||||
|
// String user = userinfo.substring(0, s);
|
||||||
|
// String pw = userinfo.substring(s + 1, userinfo.length());
|
||||||
|
// CredentialsProvider credsProvider = new BasicCredentialsProvider();
|
||||||
|
// credsProvider.setCredentials(new AuthScope(url.getHost(), 443),
|
||||||
|
// new UsernamePasswordCredentials(user, pw.toCharArray()));
|
||||||
|
// httpClientBuilder = httpClientBuilder.setDefaultCredentialsProvider(credsProvider);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
httpClient = httpClientBuilder.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAsString(String urlString, Header[] headers) throws IOException {
|
||||||
|
|
||||||
|
final HttpClientResponseHandler<String> responseHandler = new HttpClientResponseHandler<String>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String handleResponse(final ClassicHttpResponse response) throws IOException {
|
||||||
|
final int status = response.getCode();
|
||||||
|
if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) {
|
||||||
|
final HttpEntity entity = response.getEntity();
|
||||||
|
if (entity == null) {
|
||||||
|
throw new IOException("No content found in " + urlString);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return EntityUtils.toString(entity);
|
||||||
|
} catch (final ParseException ex) {
|
||||||
|
throw new ClientProtocolException(ex);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// String errorBody = EntityUtils.toString(response.getEntity());
|
||||||
|
throw new ClientProtocolException(String.format("HTTP error %d : %s for URL %s", status,
|
||||||
|
response.getReasonPhrase(), urlString));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return getResponse(urlString, headers, responseHandler);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getResponse(String urlString, Header[] headers, HttpClientResponseHandler<String> responseHandler) throws IOException {
|
||||||
|
try {
|
||||||
|
// Use of URL constructor below is purely to get additional error checking to mimic
|
||||||
|
// previous behavior for the tests.
|
||||||
|
new URL(urlString).toURI();
|
||||||
|
} catch (IllegalArgumentException | MalformedURLException | URISyntaxException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
HttpGet httpGet = new HttpGet(urlString);
|
||||||
|
|
||||||
|
if (headers != null && headers.length > 0) {
|
||||||
|
httpGet.setHeaders(headers);
|
||||||
|
}
|
||||||
|
httpGet.setConfig(defaultRequestConfig); // FIXME: Redundant? already includes in client builder
|
||||||
|
return httpClient.execute(httpGet, responseHandler);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String postNameValue(String serviceUrl, String name, String value) throws IOException {
|
||||||
|
HttpPost request = new HttpPost(serviceUrl);
|
||||||
|
List<NameValuePair> body = Collections.singletonList(
|
||||||
|
new BasicNameValuePair(name, value));
|
||||||
|
request.setEntity(new UrlEncodedFormEntity(body, StandardCharsets.UTF_8));
|
||||||
|
|
||||||
|
try (CloseableHttpResponse response = httpClient.execute(request)) {
|
||||||
|
String reasonPhrase = response.getReasonPhrase();
|
||||||
|
int statusCode = response.getCode();
|
||||||
|
if (statusCode >= 400) { // We should never see 3xx since they get handled automatically
|
||||||
|
throw new IOException(String.format("HTTP error %d : %s for URL %s", statusCode, reasonPhrase,
|
||||||
|
request.getRequestUri()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return ParsingUtilities.inputStreamToString(response.getEntity().getContent());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use binary exponential backoff strategy, instead of the default fixed
|
||||||
|
* retry interval, if the server doesn't provide a Retry-After time.
|
||||||
|
*/
|
||||||
|
class ExponentialBackoffRetryStrategy extends DefaultHttpRequestRetryStrategy {
|
||||||
|
|
||||||
|
private final TimeValue defaultInterval;
|
||||||
|
|
||||||
|
public ExponentialBackoffRetryStrategy(final int maxRetries, final TimeValue defaultRetryInterval) {
|
||||||
|
super(maxRetries, defaultRetryInterval);
|
||||||
|
this.defaultInterval = defaultRetryInterval;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TimeValue getRetryInterval(HttpResponse response, int execCount, HttpContext context) {
|
||||||
|
// Get the default implementation's interval
|
||||||
|
TimeValue interval = super.getRetryInterval(response, execCount, context);
|
||||||
|
// If it's the same as the default, there was no Retry-After, so use binary
|
||||||
|
// exponential backoff
|
||||||
|
if (interval.compareTo(defaultInterval) == 0) {
|
||||||
|
interval = TimeValue.of(((Double) (Math.pow(2, execCount) * defaultInterval.getDuration())).longValue(),
|
||||||
|
defaultInterval.getTimeUnit() );
|
||||||
|
return interval;
|
||||||
|
}
|
||||||
|
return interval;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -29,6 +29,7 @@ package com.google.refine.importing;
|
|||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
import static org.testng.Assert.assertEquals;
|
import static org.testng.Assert.assertEquals;
|
||||||
import static org.testng.Assert.assertTrue;
|
import static org.testng.Assert.assertTrue;
|
||||||
|
import static org.testng.Assert.fail;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -98,8 +99,6 @@ public class ImportingUtilitiesTests extends ImporterTest {
|
|||||||
public void urlImporting() throws IOException {
|
public void urlImporting() throws IOException {
|
||||||
|
|
||||||
String RESPONSE_BODY = "{code:401,message:Unauthorised}";
|
String RESPONSE_BODY = "{code:401,message:Unauthorised}";
|
||||||
String MESSAGE = String.format("HTTP error %d : %s | %s", 401,
|
|
||||||
"Client Error", RESPONSE_BODY);
|
|
||||||
|
|
||||||
MockWebServer server = new MockWebServer();
|
MockWebServer server = new MockWebServer();
|
||||||
MockResponse mockResponse = new MockResponse();
|
MockResponse mockResponse = new MockResponse();
|
||||||
@ -108,6 +107,8 @@ public class ImportingUtilitiesTests extends ImporterTest {
|
|||||||
server.start();
|
server.start();
|
||||||
server.enqueue(mockResponse);
|
server.enqueue(mockResponse);
|
||||||
HttpUrl url = server.url("/random");
|
HttpUrl url = server.url("/random");
|
||||||
|
String MESSAGE = String.format("HTTP error %d : %s for URL %s", 401,
|
||||||
|
"Client Error", url);
|
||||||
|
|
||||||
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
|
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
|
||||||
StringBody stringBody = new StringBody(url.toString(), ContentType.MULTIPART_FORM_DATA);
|
StringBody stringBody = new StringBody(url.toString(), ContentType.MULTIPART_FORM_DATA);
|
||||||
@ -145,9 +146,9 @@ public class ImportingUtilitiesTests extends ImporterTest {
|
|||||||
return job.canceled;
|
return job.canceled;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
Assert.fail("No Exception was thrown");
|
fail("No Exception was thrown");
|
||||||
} catch (Exception exception) {
|
} catch (Exception exception) {
|
||||||
Assert.assertEquals(MESSAGE, exception.getMessage());
|
assertEquals(exception.getMessage(), MESSAGE);
|
||||||
} finally {
|
} finally {
|
||||||
server.close();
|
server.close();
|
||||||
}
|
}
|
||||||
|
@ -91,7 +91,7 @@ public class StandardReconConfigTests extends RefineTest {
|
|||||||
return wordDistance(s1, s2);
|
return wordDistance(s1, s2);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Recon createReconServiceResults(String text, ArrayNode resultsList, long historyEntryID) throws IOException {
|
protected Recon createReconServiceResults(String text, ArrayNode resultsList, long historyEntryID) {
|
||||||
return super.createReconServiceResults(text, resultsList, historyEntryID);
|
return super.createReconServiceResults(text, resultsList, historyEntryID);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -33,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.operations.column;
|
package com.google.refine.operations.column;
|
||||||
|
|
||||||
|
import static org.testng.Assert.assertEquals;
|
||||||
|
import static org.testng.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -129,7 +132,7 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
|
|||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
Assert.fail("Test interrupted");
|
Assert.fail("Test interrupted");
|
||||||
}
|
}
|
||||||
Assert.assertFalse(process.isRunning());
|
Assert.assertFalse(process.isRunning(),"Process failed to complete within timeout " + timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -273,4 +276,102 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRetries() throws Exception {
|
||||||
|
try (MockWebServer server = new MockWebServer()) {
|
||||||
|
server.start();
|
||||||
|
HttpUrl url = server.url("/retries");
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
Row row = new Row(2);
|
||||||
|
row.setCell(0, new Cell("test" + (i + 1), null));
|
||||||
|
project.rows.add(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Queue 5 error responses with 1 sec. Retry-After interval
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
server.enqueue(new MockResponse()
|
||||||
|
.setHeader("Retry-After", 1)
|
||||||
|
.setResponseCode(429)
|
||||||
|
.setBody(Integer.toString(i,10)));
|
||||||
|
}
|
||||||
|
|
||||||
|
server.enqueue(new MockResponse().setBody("success"));
|
||||||
|
|
||||||
|
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
|
||||||
|
"fruits",
|
||||||
|
"\"" + url + "?city=\"+value",
|
||||||
|
OnError.StoreError,
|
||||||
|
"rand",
|
||||||
|
1,
|
||||||
|
100,
|
||||||
|
false,
|
||||||
|
null);
|
||||||
|
|
||||||
|
// 6 requests (4 retries @1 sec) + final response
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
runAndWait(op, 4500);
|
||||||
|
|
||||||
|
// Make sure that our Retry-After headers were obeyed (4*1 sec vs 4*100msec)
|
||||||
|
long elapsed = System.currentTimeMillis() - start;
|
||||||
|
assertTrue(elapsed > 4000, "Retry-After retries didn't take long enough - elapsed = " + elapsed );
|
||||||
|
|
||||||
|
// 1st row fails after 4 tries (3 retries), 2nd row tries twice and gets value
|
||||||
|
assertTrue(project.rows.get(0).getCellValue(1).toString().contains("HTTP error 429"), "missing 429 error");
|
||||||
|
assertEquals(project.rows.get(1).getCellValue(1).toString(), "success");
|
||||||
|
|
||||||
|
server.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExponentialRetries() throws Exception {
|
||||||
|
try (MockWebServer server = new MockWebServer()) {
|
||||||
|
server.start();
|
||||||
|
HttpUrl url = server.url("/retries");
|
||||||
|
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
Row row = new Row(2);
|
||||||
|
row.setCell(0, new Cell("test" + (i + 1), null));
|
||||||
|
project.rows.add(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use 503 Server Unavailable with no Retry-After header this time
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
server.enqueue(new MockResponse()
|
||||||
|
.setResponseCode(503)
|
||||||
|
.setBody(Integer.toString(i,10)));
|
||||||
|
}
|
||||||
|
server.enqueue(new MockResponse().setBody("success"));
|
||||||
|
|
||||||
|
server.enqueue(new MockResponse().setBody("not found").setResponseCode(404));
|
||||||
|
|
||||||
|
ColumnAdditionByFetchingURLsOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
|
||||||
|
"fruits",
|
||||||
|
"\"" + url + "?city=\"+value",
|
||||||
|
OnError.StoreError,
|
||||||
|
"rand",
|
||||||
|
1,
|
||||||
|
100,
|
||||||
|
false,
|
||||||
|
null);
|
||||||
|
|
||||||
|
// 6 requests (4 retries 200, 400, 800, 200 msec) + final response
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
runAndWait(op, 2500);
|
||||||
|
|
||||||
|
// Make sure that our exponential back off is working
|
||||||
|
long elapsed = System.currentTimeMillis() - start;
|
||||||
|
assertTrue(elapsed > 1600, "Exponential retries didn't take enough time - elapsed = " + elapsed);
|
||||||
|
|
||||||
|
// 1st row fails after 4 tries (3 retries), 2nd row tries twice and gets value, 3rd row is hard error
|
||||||
|
assertTrue(project.rows.get(0).getCellValue(1).toString().contains("HTTP error 503"), "Missing 503 error");
|
||||||
|
assertEquals(project.rows.get(1).getCellValue(1).toString(), "success");
|
||||||
|
assertTrue(project.rows.get(2).getCellValue(1).toString().contains("HTTP error 404"),"Missing 404 error");
|
||||||
|
|
||||||
|
server.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -38,9 +38,7 @@ import static org.mockito.Mockito.mock;
|
|||||||
import static org.powermock.api.mockito.PowerMockito.mockStatic;
|
import static org.powermock.api.mockito.PowerMockito.mockStatic;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -49,7 +47,6 @@ import java.util.Map;
|
|||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.mockito.invocation.InvocationOnMock;
|
import org.mockito.invocation.InvocationOnMock;
|
||||||
import org.mockito.stubbing.Answer;
|
import org.mockito.stubbing.Answer;
|
||||||
import org.powermock.api.mockito.PowerMockito;
|
import org.powermock.api.mockito.PowerMockito;
|
||||||
@ -225,7 +222,6 @@ public class ExtendDataOperationTests extends RefineTest {
|
|||||||
* Test to fetch simple strings
|
* Test to fetch simple strings
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void mockHttpCalls() throws Exception {
|
public void mockHttpCalls() throws Exception {
|
||||||
mockStatic(ReconciledDataExtensionJob.class);
|
mockStatic(ReconciledDataExtensionJob.class);
|
||||||
@ -236,7 +232,7 @@ public class ExtendDataOperationTests extends RefineTest {
|
|||||||
return fakeHttpCall(invocation.getArgument(0), invocation.getArgument(1));
|
return fakeHttpCall(invocation.getArgument(0), invocation.getArgument(1));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
PowerMockito.doAnswer(mockedResponse).when(ReconciledDataExtensionJob.class, "performQuery", anyString(), anyString());
|
PowerMockito.doAnswer(mockedResponse).when(ReconciledDataExtensionJob.class, "postExtendQuery", anyString(), anyString());
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterMethod
|
@AfterMethod
|
||||||
|
Loading…
Reference in New Issue
Block a user