Use Apache HTTP Commons for Fetch URL (#2692)
* Use mockwebserver instead of live network for tests Fixes #2680. Fixes #1904. * Remove use of deprecated methods * Convert to use Apache HTTP Components client library Fixes #1410 by virtue of redirect following being a built-in capability of the library, along with retries with binary backoff, built-in decompression, etc. * Address review comments
This commit is contained in:
parent
983c8bd422
commit
749704518c
@ -377,6 +377,12 @@
|
|||||||
<artifactId>powermock-api-mockito2</artifactId>
|
<artifactId>powermock-api-mockito2</artifactId>
|
||||||
<version>${powermock.version}</version>
|
<version>${powermock.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.squareup.okhttp3</groupId>
|
||||||
|
<artifactId>mockwebserver</artifactId>
|
||||||
|
<version>4.7.2</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</project>
|
</project>
|
||||||
|
@ -69,12 +69,15 @@ import org.apache.commons.fileupload.disk.DiskFileItemFactory;
|
|||||||
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
||||||
import org.apache.commons.fileupload.util.Streams;
|
import org.apache.commons.fileupload.util.Streams;
|
||||||
import org.apache.http.HttpEntity;
|
import org.apache.http.HttpEntity;
|
||||||
import org.apache.http.HttpResponse;
|
|
||||||
import org.apache.http.auth.AuthScope;
|
import org.apache.http.auth.AuthScope;
|
||||||
import org.apache.http.auth.UsernamePasswordCredentials;
|
import org.apache.http.auth.UsernamePasswordCredentials;
|
||||||
|
import org.apache.http.client.CredentialsProvider;
|
||||||
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||||
import org.apache.http.client.methods.HttpGet;
|
import org.apache.http.client.methods.HttpGet;
|
||||||
import org.apache.http.impl.client.DecompressingHttpClient;
|
import org.apache.http.impl.client.BasicCredentialsProvider;
|
||||||
import org.apache.http.impl.client.DefaultHttpClient;
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.impl.client.HttpClientBuilder;
|
||||||
|
import org.apache.http.impl.client.HttpClients;
|
||||||
import org.apache.http.util.EntityUtils;
|
import org.apache.http.util.EntityUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@ -218,7 +221,6 @@ public class ImportingUtilities {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
List<FileItem> tempFiles = (List<FileItem>)upload.parseRequest(request);
|
List<FileItem> tempFiles = (List<FileItem>)upload.parseRequest(request);
|
||||||
|
|
||||||
progress.setProgress("Uploading data ...", -1);
|
progress.setProgress("Uploading data ...", -1);
|
||||||
@ -280,28 +282,28 @@ public class ImportingUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
|
if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
|
||||||
DefaultHttpClient client = new DefaultHttpClient();
|
HttpClientBuilder clientbuilder = HttpClients.custom()
|
||||||
DecompressingHttpClient httpclient =
|
.setUserAgent(RefineServlet.getUserAgent());
|
||||||
new DecompressingHttpClient(client);
|
// .setConnectionBackoffStrategy(ConnectionBackoffStrategy)
|
||||||
HttpGet httpGet = new HttpGet(url.toURI());
|
|
||||||
httpGet.setHeader("User-Agent", RefineServlet.getUserAgent());
|
String userinfo = url.getUserInfo();
|
||||||
if ("https".equals(url.getProtocol())) {
|
// HTTPS only - no sending password in the clear over HTTP
|
||||||
// HTTPS only - no sending password in the clear over HTTP
|
if ("https".equals(url.getProtocol()) && userinfo != null) {
|
||||||
String userinfo = url.getUserInfo();
|
int s = userinfo.indexOf(':');
|
||||||
if (userinfo != null) {
|
if (s > 0) {
|
||||||
int s = userinfo.indexOf(':');
|
String user = userinfo.substring(0, s);
|
||||||
if (s > 0) {
|
String pw = userinfo.substring(s + 1, userinfo.length());
|
||||||
String user = userinfo.substring(0, s);
|
CredentialsProvider credsProvider = new BasicCredentialsProvider();
|
||||||
String pw = userinfo.substring(s + 1, userinfo.length());
|
credsProvider.setCredentials(new AuthScope(url.getHost(), 443),
|
||||||
client.getCredentialsProvider().setCredentials(
|
new UsernamePasswordCredentials(user, pw));
|
||||||
new AuthScope(url.getHost(), 443),
|
clientbuilder = clientbuilder.setDefaultCredentialsProvider(credsProvider);
|
||||||
new UsernamePasswordCredentials(user, pw));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
HttpResponse response = httpclient.execute(httpGet);
|
CloseableHttpClient httpclient = clientbuilder.build();
|
||||||
|
HttpGet httpGet = new HttpGet(url.toURI());
|
||||||
|
CloseableHttpResponse response = httpclient.execute(httpGet);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
response.getStatusLine();
|
response.getStatusLine();
|
||||||
HttpEntity entity = response.getEntity();
|
HttpEntity entity = response.getEntity();
|
||||||
@ -327,7 +329,7 @@ public class ImportingUtilities {
|
|||||||
downloadCount++;
|
downloadCount++;
|
||||||
EntityUtils.consume(entity);
|
EntityUtils.consume(entity);
|
||||||
} finally {
|
} finally {
|
||||||
httpGet.releaseConnection();
|
httpGet.reset();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Fallback handling for non HTTP connections (only FTP?)
|
// Fallback handling for non HTTP connections (only FTP?)
|
||||||
@ -355,7 +357,6 @@ public class ImportingUtilities {
|
|||||||
parameters.put(name, value);
|
parameters.put(name, value);
|
||||||
// TODO: We really want to store this on the request so it's available for everyone
|
// TODO: We really want to store this on the request so it's available for everyone
|
||||||
// request.getParameterMap().put(name, value);
|
// request.getParameterMap().put(name, value);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else { // is file content
|
} else { // is file content
|
||||||
|
@ -33,23 +33,39 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.operations.column;
|
package com.google.refine.operations.column;
|
||||||
|
|
||||||
|
import static com.google.common.base.Strings.isNullOrEmpty;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.net.HttpURLConnection;
|
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLConnection;
|
import java.nio.charset.Charset;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.http.Header;
|
||||||
|
import org.apache.http.HttpEntity;
|
||||||
|
import org.apache.http.StatusLine;
|
||||||
|
import org.apache.http.client.config.RequestConfig;
|
||||||
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||||
|
import org.apache.http.client.methods.HttpGet;
|
||||||
|
import org.apache.http.entity.ContentType;
|
||||||
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.impl.client.HttpClientBuilder;
|
||||||
|
import org.apache.http.impl.client.HttpClients;
|
||||||
|
import org.apache.http.message.BasicHeader;
|
||||||
|
import org.apache.http.util.EntityUtils;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.google.common.cache.CacheBuilder;
|
import com.google.common.cache.CacheBuilder;
|
||||||
import com.google.common.cache.CacheLoader;
|
import com.google.common.cache.CacheLoader;
|
||||||
import com.google.common.cache.LoadingCache;
|
import com.google.common.cache.LoadingCache;
|
||||||
|
|
||||||
|
import com.google.refine.RefineServlet;
|
||||||
import com.google.refine.browsing.Engine;
|
import com.google.refine.browsing.Engine;
|
||||||
import com.google.refine.browsing.EngineConfig;
|
import com.google.refine.browsing.EngineConfig;
|
||||||
import com.google.refine.browsing.FilteredRows;
|
import com.google.refine.browsing.FilteredRows;
|
||||||
@ -100,6 +116,9 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
final protected int _delay;
|
final protected int _delay;
|
||||||
final protected boolean _cacheResponses;
|
final protected boolean _cacheResponses;
|
||||||
final protected List<HttpHeader> _httpHeadersJson;
|
final protected List<HttpHeader> _httpHeadersJson;
|
||||||
|
private Header[] httpHeaders = new Header[0];
|
||||||
|
final private RequestConfig defaultRequestConfig;
|
||||||
|
private HttpClientBuilder httpClientBuilder;
|
||||||
|
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
public ColumnAdditionByFetchingURLsOperation(
|
public ColumnAdditionByFetchingURLsOperation(
|
||||||
@ -134,13 +153,39 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
_delay = delay;
|
_delay = delay;
|
||||||
_cacheResponses = cacheResponses;
|
_cacheResponses = cacheResponses;
|
||||||
_httpHeadersJson = httpHeadersJson;
|
_httpHeadersJson = httpHeadersJson;
|
||||||
|
|
||||||
|
List<Header> headers = new ArrayList<Header>();
|
||||||
|
if (_httpHeadersJson != null) {
|
||||||
|
for (HttpHeader header : _httpHeadersJson) {
|
||||||
|
if (!isNullOrEmpty(header.name) && !isNullOrEmpty(header.value)) {
|
||||||
|
headers.add(new BasicHeader(header.name, header.value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
httpHeaders = headers.toArray(httpHeaders);
|
||||||
|
|
||||||
|
defaultRequestConfig = RequestConfig.custom()
|
||||||
|
.setConnectTimeout(30 * 1000)
|
||||||
|
.setConnectionRequestTimeout(30 * 1000)
|
||||||
|
.setSocketTimeout(10 * 1000).build();
|
||||||
|
|
||||||
|
// TODO: Placeholder for future Basic Auth implementation
|
||||||
|
// CredentialsProvider credsProvider = new BasicCredentialsProvider();
|
||||||
|
// credsProvider.setCredentials(new AuthScope(host, 443),
|
||||||
|
// new UsernamePasswordCredentials(user, password));
|
||||||
|
|
||||||
|
httpClientBuilder = HttpClients.custom()
|
||||||
|
.setUserAgent(RefineServlet.getUserAgent())
|
||||||
|
.setDefaultRequestConfig(defaultRequestConfig);
|
||||||
|
// .setConnectionBackoffStrategy(ConnectionBackoffStrategy)
|
||||||
|
// .setDefaultCredentialsProvider(credsProvider);
|
||||||
}
|
}
|
||||||
|
|
||||||
@JsonProperty("newColumnName")
|
@JsonProperty("newColumnName")
|
||||||
public String getNewColumnName() {
|
public String getNewColumnName() {
|
||||||
return _newColumnName;
|
return _newColumnName;
|
||||||
}
|
}
|
||||||
|
|
||||||
@JsonProperty("columnInsertIndex")
|
@JsonProperty("columnInsertIndex")
|
||||||
public int getColumnInsertIndex() {
|
public int getColumnInsertIndex() {
|
||||||
return _columnInsertIndex;
|
return _columnInsertIndex;
|
||||||
@ -282,14 +327,15 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
FilteredRows filteredRows = _engine.getAllFilteredRows();
|
FilteredRows filteredRows = _engine.getAllFilteredRows();
|
||||||
filteredRows.accept(_project, createRowVisitor(urls));
|
filteredRows.accept(_project, createRowVisitor(urls));
|
||||||
|
|
||||||
List<CellAtRow> responseBodies = new ArrayList<CellAtRow>(urls.size());
|
int count = urls.size();
|
||||||
for (int i = 0; i < urls.size(); i++) {
|
List<CellAtRow> responseBodies = new ArrayList<CellAtRow>(count);
|
||||||
CellAtRow urlData = urls.get(i);
|
int i = 0;
|
||||||
|
for (CellAtRow urlData : urls) {
|
||||||
String urlString = urlData.cell.value.toString();
|
String urlString = urlData.cell.value.toString();
|
||||||
|
|
||||||
Serializable response = null;
|
Serializable response = null;
|
||||||
if (_urlCache != null) {
|
if (_urlCache != null) {
|
||||||
response = cachedFetch(urlString);
|
response = cachedFetch(urlString); // TODO: Why does this need a separate method?
|
||||||
} else {
|
} else {
|
||||||
response = fetch(urlString);
|
response = fetch(urlString);
|
||||||
}
|
}
|
||||||
@ -302,7 +348,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
responseBodies.add(cellAtRow);
|
responseBodies.add(cellAtRow);
|
||||||
}
|
}
|
||||||
|
|
||||||
_progress = i * 100 / urls.size();
|
_progress = i++ * 100 / count;
|
||||||
|
|
||||||
if (_canceled) {
|
if (_canceled) {
|
||||||
break;
|
break;
|
||||||
@ -335,68 +381,64 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
}
|
}
|
||||||
|
|
||||||
Serializable fetch(String urlString) {
|
Serializable fetch(String urlString) {
|
||||||
URL url = null;
|
HttpGet httpGet;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
url = new URL(urlString);
|
// Use of URL constructor below is purely to get additional error checking to mimic
|
||||||
} catch (MalformedURLException e) {
|
// previous behavior for the tests.
|
||||||
|
httpGet = new HttpGet(new URL(urlString).toURI());
|
||||||
|
} catch (IllegalArgumentException | MalformedURLException | URISyntaxException e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
URLConnection urlConnection = url.openConnection();
|
httpGet.setHeaders(httpHeaders);
|
||||||
urlConnection.setRequestProperty("Accept-Encoding", "gzip");
|
httpGet.setConfig(defaultRequestConfig);
|
||||||
if (_httpHeadersJson != null) {
|
|
||||||
for (int i = 0; i < _httpHeadersJson.size(); i++) {
|
|
||||||
String headerLabel = _httpHeadersJson.get(i).name;
|
|
||||||
String headerValue = _httpHeadersJson.get(i).value;
|
|
||||||
if (headerValue != null && !headerValue.isEmpty()) {
|
|
||||||
urlConnection.setRequestProperty(headerLabel, headerValue);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
CloseableHttpClient httpclient = httpClientBuilder.build();
|
||||||
|
|
||||||
|
CloseableHttpResponse response = null;
|
||||||
try {
|
try {
|
||||||
InputStream is = urlConnection.getInputStream();
|
response = httpclient.execute(httpGet);
|
||||||
try {
|
|
||||||
String encoding = urlConnection.getContentEncoding();
|
|
||||||
if (encoding == null) {
|
|
||||||
String contentType = urlConnection.getContentType();
|
|
||||||
if (contentType != null) {
|
|
||||||
final String charsetEqual = "charset=";
|
|
||||||
int c = contentType.lastIndexOf(charsetEqual);
|
|
||||||
if (c > 0) {
|
|
||||||
encoding = contentType.substring(c + charsetEqual.length());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ParsingUtilities.inputStreamToString(
|
|
||||||
is, (encoding == null) || ( encoding.equalsIgnoreCase("\"UTF-8\"")) ? "UTF-8" : encoding);
|
|
||||||
|
|
||||||
} finally {
|
HttpEntity entity = response.getEntity();
|
||||||
is.close();
|
if (entity == null) {
|
||||||
|
throw new Exception("No content found in " + httpGet.getURI().toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String encoding = null;
|
||||||
|
|
||||||
|
if (entity.getContentEncoding() != null) {
|
||||||
|
encoding = entity.getContentEncoding().getValue();
|
||||||
|
} else {
|
||||||
|
Charset charset = ContentType.getOrDefault(entity).getCharset();
|
||||||
|
if (charset != null) {
|
||||||
|
encoding = charset.name();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
String result = ParsingUtilities.inputStreamToString(
|
||||||
|
entity.getContent(), (encoding == null) || ( encoding.equalsIgnoreCase("\"UTF-8\"")) ? "UTF-8" : encoding);
|
||||||
|
|
||||||
|
EntityUtils.consume(entity);
|
||||||
|
return result;
|
||||||
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
String message;
|
String message;
|
||||||
if (urlConnection instanceof HttpURLConnection) {
|
if (response == null) {
|
||||||
int status = ((HttpURLConnection)urlConnection).getResponseCode();
|
message = "Unknown HTTP error " + e.getLocalizedMessage();
|
||||||
String errorString = "";
|
|
||||||
InputStream errorStream = ((HttpURLConnection)urlConnection).getErrorStream();
|
|
||||||
if (errorStream != null) {
|
|
||||||
errorString = ParsingUtilities.inputStreamToString(errorStream);
|
|
||||||
}
|
|
||||||
message = String.format("HTTP error %d : %s | %s",status,
|
|
||||||
((HttpURLConnection)urlConnection).getResponseMessage(),
|
|
||||||
errorString);
|
|
||||||
} else {
|
} else {
|
||||||
message = e.toString();
|
StatusLine status = response.getStatusLine();
|
||||||
|
HttpEntity errorEntity = response.getEntity();
|
||||||
|
String errorString = ParsingUtilities.inputStreamToString(errorEntity.getContent());
|
||||||
|
message = String.format("HTTP error %d : %s | %s", status.getStatusCode(),
|
||||||
|
status.getReasonPhrase(),
|
||||||
|
errorString);
|
||||||
}
|
}
|
||||||
return _onError == OnError.StoreError ?
|
return _onError == OnError.StoreError ? new EvalError(message) : null;
|
||||||
new EvalError(message) : null;
|
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return _onError == OnError.StoreError ?
|
return _onError == OnError.StoreError ? new EvalError(e.getMessage()) : null;
|
||||||
new EvalError(e.getMessage()) : null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,10 +34,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
package com.google.refine.operations.column;
|
package com.google.refine.operations.column;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
@ -45,7 +45,6 @@ import org.testng.annotations.BeforeMethod;
|
|||||||
import org.testng.annotations.BeforeTest;
|
import org.testng.annotations.BeforeTest;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
|
||||||
import com.google.refine.RefineTest;
|
import com.google.refine.RefineTest;
|
||||||
import com.google.refine.browsing.EngineConfig;
|
import com.google.refine.browsing.EngineConfig;
|
||||||
import com.google.refine.expr.ExpressionUtils;
|
import com.google.refine.expr.ExpressionUtils;
|
||||||
@ -64,11 +63,17 @@ import com.google.refine.process.ProcessManager;
|
|||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
import com.google.refine.util.TestUtils;
|
import com.google.refine.util.TestUtils;
|
||||||
|
|
||||||
|
import okhttp3.HttpUrl;
|
||||||
|
import okhttp3.mockwebserver.MockResponse;
|
||||||
|
import okhttp3.mockwebserver.MockWebServer;
|
||||||
|
import okhttp3.mockwebserver.RecordedRequest;
|
||||||
|
|
||||||
|
|
||||||
public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
|
public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
|
||||||
|
|
||||||
static final String ENGINE_JSON_URLS = "{\"mode\":\"row-based\"}";
|
static final String ENGINE_JSON_URLS = "{\"mode\":\"row-based\"}";
|
||||||
|
|
||||||
|
// This is only used for serialization tests. The URL is never fetched.
|
||||||
private String json = "{\"op\":\"core/column-addition-by-fetching-urls\","
|
private String json = "{\"op\":\"core/column-addition-by-fetching-urls\","
|
||||||
+ "\"description\":\"Create column employments at index 2 by fetching URLs based on column orcid using expression grel:\\\"https://pub.orcid.org/\\\"+value+\\\"/employments\\\"\","
|
+ "\"description\":\"Create column employments at index 2 by fetching URLs based on column orcid using expression grel:\\\"https://pub.orcid.org/\\\"+value+\\\"/employments\\\"\","
|
||||||
+ "\"engineConfig\":{\"mode\":\"row-based\",\"facets\":[]},"
|
+ "\"engineConfig\":{\"mode\":\"row-based\",\"facets\":[]},"
|
||||||
@ -111,18 +116,23 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
|
|||||||
project = createProjectWithColumns("UrlFetchingTests", "fruits");
|
project = createProjectWithColumns("UrlFetchingTests", "fruits");
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isHostReachable(String host, int timeout){
|
private void runAndWait(EngineDependentOperation op, int timeout) throws Exception {
|
||||||
boolean state = false;
|
ProcessManager pm = project.getProcessManager();
|
||||||
|
Process process = op.createProcess(project, options);
|
||||||
|
process.startPerforming(pm);
|
||||||
|
Assert.assertTrue(process.isRunning());
|
||||||
|
int time = 0;
|
||||||
try {
|
try {
|
||||||
state = InetAddress.getByName(host).isReachable(timeout);
|
while (process.isRunning() && time < timeout) {
|
||||||
} catch (IOException e) {
|
Thread.sleep(200);
|
||||||
// e.printStackTrace();
|
time += 200;
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Assert.fail("Test interrupted");
|
||||||
}
|
}
|
||||||
|
Assert.assertFalse(process.isRunning());
|
||||||
return state;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void serializeColumnAdditionByFetchingURLsOperation() throws Exception {
|
public void serializeColumnAdditionByFetchingURLsOperation() throws Exception {
|
||||||
TestUtils.isSerializedTo(ParsingUtilities.mapper.readValue(json, ColumnAdditionByFetchingURLsOperation.class), json);
|
TestUtils.isSerializedTo(ParsingUtilities.mapper.readValue(json, ColumnAdditionByFetchingURLsOperation.class), json);
|
||||||
@ -138,54 +148,45 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
|
|||||||
/**
|
/**
|
||||||
* Test for caching
|
* Test for caching
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testUrlCaching() throws Exception {
|
public void testUrlCaching() throws Exception {
|
||||||
if (!isHostReachable("www.random.org", 5000))
|
try (MockWebServer server = new MockWebServer()) {
|
||||||
return;
|
server.start();
|
||||||
|
HttpUrl url = server.url("/random");
|
||||||
for (int i = 0; i < 100; i++) {
|
|
||||||
Row row = new Row(2);
|
Random rand = new Random();
|
||||||
row.setCell(0, new Cell(i < 5 ? "apple":"orange", null));
|
for (int i = 0; i < 100; i++) {
|
||||||
project.rows.add(row);
|
Row row = new Row(2);
|
||||||
}
|
row.setCell(0, new Cell(i < 5 ? "apple":"orange", null));
|
||||||
|
project.rows.add(row);
|
||||||
|
// We won't need them all, but queue 100 random responses
|
||||||
|
server.enqueue(new MockResponse().setBody(Integer.toString(rand.nextInt(100))));
|
||||||
|
}
|
||||||
|
|
||||||
|
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
|
||||||
|
"fruits",
|
||||||
|
"\"" + url + "?city=\"+value",
|
||||||
|
OnError.StoreError,
|
||||||
|
"rand",
|
||||||
|
1,
|
||||||
|
500,
|
||||||
|
true,
|
||||||
|
null);
|
||||||
|
|
||||||
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
|
|
||||||
"fruits",
|
|
||||||
"\"https://www.random.org/integers/?num=1&min=1&max=100&col=1&base=10&format=plain&rnd=new&city=\"+value",
|
|
||||||
OnError.StoreError,
|
|
||||||
"rand",
|
|
||||||
1,
|
|
||||||
500,
|
|
||||||
true,
|
|
||||||
null);
|
|
||||||
ProcessManager pm = project.getProcessManager();
|
|
||||||
Process process = op.createProcess(project, options);
|
|
||||||
process.startPerforming(pm);
|
|
||||||
Assert.assertTrue(process.isRunning());
|
|
||||||
try {
|
|
||||||
// We have 100 rows and 500 ms per row but only two distinct
|
// We have 100 rows and 500 ms per row but only two distinct
|
||||||
// values so we should not wait more than ~2000 ms to get the
|
// values so we should not wait much more than ~1000 ms to get the
|
||||||
// results. Just to make sure the test passes with plenty of
|
// results.
|
||||||
// net latency we sleep for longer (but still less than
|
runAndWait(op, 1500);
|
||||||
// 50,000ms).
|
|
||||||
Thread.sleep(5000);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
Assert.fail("Test interrupted");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Inspect rows
|
||||||
// Inspect rows
|
String ref_val = (String)project.rows.get(0).getCellValue(1).toString();
|
||||||
String ref_val = (String)project.rows.get(0).getCellValue(1).toString();
|
Assert.assertFalse(ref_val.equals("apple")); // just to make sure I picked the right column
|
||||||
if (ref_val.startsWith("HTTP error"))
|
for (int i = 1; i < 4; i++) {
|
||||||
return;
|
// all random values should be equal due to caching
|
||||||
Assert.assertFalse(ref_val.equals("apple")); // just to make sure I picked the right column
|
Assert.assertEquals(project.rows.get(i).getCellValue(1).toString(), ref_val);
|
||||||
for (int i = 1; i < 4; i++) {
|
}
|
||||||
System.out.println("value:" + project.rows.get(i).getCellValue(1));
|
server.shutdown();
|
||||||
// all random values should be equal due to caching
|
|
||||||
Assert.assertEquals(project.rows.get(i).getCellValue(1).toString(), ref_val);
|
|
||||||
}
|
}
|
||||||
Assert.assertFalse(process.isRunning());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -195,17 +196,64 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testInvalidUrl() throws Exception {
|
public void testInvalidUrl() throws Exception {
|
||||||
Row row0 = new Row(2);
|
try (MockWebServer server = new MockWebServer()) {
|
||||||
row0.setCell(0, new Cell("auinrestrsc", null)); // malformed -> null
|
server.start();
|
||||||
project.rows.add(row0);
|
HttpUrl url = server.url("/random");
|
||||||
Row row1 = new Row(2);
|
server.enqueue(new MockResponse());
|
||||||
row1.setCell(0, new Cell("https://www.random.org/integers/?num=1&min=1&max=100&col=1&base=10&format=plain", null)); // fine
|
|
||||||
project.rows.add(row1);
|
|
||||||
Row row2 = new Row(2);
|
|
||||||
row2.setCell(0, new Cell("http://anursiebcuiesldcresturce.detur/anusclbc", null)); // well-formed but invalid
|
|
||||||
project.rows.add(row2);
|
|
||||||
|
|
||||||
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
|
Row row0 = new Row(2);
|
||||||
|
row0.setCell(0, new Cell("auinrestrsc", null)); // malformed -> null
|
||||||
|
project.rows.add(row0);
|
||||||
|
Row row1 = new Row(2);
|
||||||
|
row1.setCell(0, new Cell(url.toString(), null)); // fine
|
||||||
|
project.rows.add(row1);
|
||||||
|
Row row2 = new Row(2);
|
||||||
|
// well-formed but not resolvable.
|
||||||
|
row2.setCell(0, new Cell("http://domain.invalid/random", null));
|
||||||
|
project.rows.add(row2);
|
||||||
|
|
||||||
|
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
|
||||||
|
"fruits",
|
||||||
|
"value",
|
||||||
|
OnError.StoreError,
|
||||||
|
"junk",
|
||||||
|
1,
|
||||||
|
50,
|
||||||
|
true,
|
||||||
|
null);
|
||||||
|
|
||||||
|
runAndWait(op, 1000);
|
||||||
|
|
||||||
|
int newCol = project.columnModel.getColumnByName("junk").getCellIndex();
|
||||||
|
// Inspect rows
|
||||||
|
Assert.assertEquals(project.rows.get(0).getCellValue(newCol), null);
|
||||||
|
Assert.assertTrue(project.rows.get(1).getCellValue(newCol) != null);
|
||||||
|
Assert.assertTrue(ExpressionUtils.isError(project.rows.get(2).getCellValue(newCol)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHttpHeaders() throws Exception {
|
||||||
|
try (MockWebServer server = new MockWebServer()) {
|
||||||
|
server.start();
|
||||||
|
HttpUrl url = server.url("/checkheader");
|
||||||
|
|
||||||
|
Row row0 = new Row(2);
|
||||||
|
row0.setCell(0, new Cell(url.toString(), null));
|
||||||
|
project.rows.add(row0);
|
||||||
|
|
||||||
|
String userAgentValue = "OpenRefine";
|
||||||
|
String authorizationValue = "Basic";
|
||||||
|
String acceptValue = "*/*";
|
||||||
|
List<HttpHeader> headers = new ArrayList<>();
|
||||||
|
headers.add(new HttpHeader("authorization", authorizationValue));
|
||||||
|
headers.add(new HttpHeader("user-agent", userAgentValue));
|
||||||
|
headers.add(new HttpHeader("accept", acceptValue));
|
||||||
|
|
||||||
|
server.enqueue(new MockResponse().setBody("first"));
|
||||||
|
server.enqueue(new MockResponse().setBody("second"));
|
||||||
|
|
||||||
|
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
|
||||||
"fruits",
|
"fruits",
|
||||||
"value",
|
"value",
|
||||||
OnError.StoreError,
|
OnError.StoreError,
|
||||||
@ -213,89 +261,17 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
|
|||||||
1,
|
1,
|
||||||
50,
|
50,
|
||||||
true,
|
true,
|
||||||
null);
|
headers);
|
||||||
|
|
||||||
ProcessManager pm = project.getProcessManager();
|
runAndWait(op, 1000);
|
||||||
Process process = op.createProcess(project, options);
|
|
||||||
process.startPerforming(pm);
|
RecordedRequest request = server.takeRequest();
|
||||||
Assert.assertTrue(process.isRunning());
|
Assert.assertEquals(request.getHeader("user-agent"), userAgentValue);
|
||||||
try {
|
Assert.assertEquals(request.getHeader("authorization"), authorizationValue);
|
||||||
Thread.sleep(5000);
|
Assert.assertEquals(request.getHeader("accept"), acceptValue);
|
||||||
} catch (InterruptedException e) {
|
|
||||||
Assert.fail("Test interrupted");
|
server.shutdown();
|
||||||
}
|
}
|
||||||
Assert.assertFalse(process.isRunning());
|
|
||||||
|
|
||||||
int newCol = project.columnModel.getColumnByName("junk").getCellIndex();
|
|
||||||
// Inspect rows
|
|
||||||
Assert.assertEquals(project.rows.get(0).getCellValue(newCol), null);
|
|
||||||
Assert.assertTrue(project.rows.get(1).getCellValue(newCol) != null);
|
|
||||||
Assert.assertTrue(ExpressionUtils.isError(project.rows.get(2).getCellValue(newCol)));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testHttpHeaders() throws Exception {
|
|
||||||
Row row0 = new Row(2);
|
|
||||||
row0.setCell(0, new Cell("http://headers.jsontest.com", null));
|
|
||||||
/*
|
|
||||||
http://headers.jsontest.com is a service which returns the HTTP request headers
|
|
||||||
as JSON. For example:
|
|
||||||
{
|
|
||||||
"X-Cloud-Trace-Context": "579a1a2ee5c778dfc0810a3bf131ba4e/11053223648711966807",
|
|
||||||
"Authorization": "Basic",
|
|
||||||
"Host": "headers.jsontest.com",
|
|
||||||
"User-Agent": "OpenRefine",
|
|
||||||
"Accept": "*"
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
project.rows.add(row0);
|
|
||||||
|
|
||||||
String userAgentValue = "OpenRefine";
|
|
||||||
String authorizationValue = "Basic";
|
|
||||||
String acceptValue = "*/*";
|
|
||||||
List<HttpHeader> headers = new ArrayList<>();
|
|
||||||
headers.add(new HttpHeader("authorization", authorizationValue));
|
|
||||||
headers.add(new HttpHeader("user-agent", userAgentValue));
|
|
||||||
headers.add(new HttpHeader("accept", acceptValue));
|
|
||||||
|
|
||||||
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
|
|
||||||
"fruits",
|
|
||||||
"value",
|
|
||||||
OnError.StoreError,
|
|
||||||
"junk",
|
|
||||||
1,
|
|
||||||
50,
|
|
||||||
true,
|
|
||||||
headers);
|
|
||||||
ProcessManager pm = project.getProcessManager();
|
|
||||||
Process process = op.createProcess(project, options);
|
|
||||||
process.startPerforming(pm);
|
|
||||||
Assert.assertTrue(process.isRunning());
|
|
||||||
try {
|
|
||||||
Thread.sleep(5000);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
Assert.fail("Test interrupted");
|
|
||||||
}
|
|
||||||
Assert.assertFalse(process.isRunning());
|
|
||||||
|
|
||||||
int newCol = project.columnModel.getColumnByName("junk").getCellIndex();
|
|
||||||
ObjectNode headersUsed = null;
|
|
||||||
|
|
||||||
// sometime, we got response:
|
|
||||||
// Error
|
|
||||||
// Over Quota
|
|
||||||
// This application is temporarily over its serving quota. Please try again later.
|
|
||||||
try {
|
|
||||||
String response = project.rows.get(0).getCellValue(newCol).toString();
|
|
||||||
headersUsed = ParsingUtilities.mapper.readValue(response, ObjectNode.class);
|
|
||||||
} catch (IOException ex) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Inspect the results we got from remote service
|
|
||||||
Assert.assertEquals(headersUsed.get("user-agent").asText(), userAgentValue);
|
|
||||||
Assert.assertEquals(headersUsed.get("authorization").asText(), authorizationValue);
|
|
||||||
Assert.assertEquals(headersUsed.get("accept").asText(), acceptValue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user