Get and set any http headers submitted when using Add Column by Fetching URLs

This commit is contained in:
Owen Stephens 2018-01-12 14:15:30 +00:00
parent 4fb1867980
commit 7464e83766
2 changed files with 42 additions and 12 deletions

View File

@ -36,6 +36,8 @@ package com.google.refine.commands.column;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import org.json.JSONObject; import org.json.JSONObject;
import org.json.JSONArray;
import java.util.Enumeration;
import com.google.refine.commands.EngineDependentCommand; import com.google.refine.commands.EngineDependentCommand;
import com.google.refine.model.AbstractOperation; import com.google.refine.model.AbstractOperation;
@ -46,7 +48,7 @@ import com.google.refine.operations.column.ColumnAdditionByFetchingURLsOperation
public class AddColumnByFetchingURLsCommand extends EngineDependentCommand { public class AddColumnByFetchingURLsCommand extends EngineDependentCommand {
@Override @Override
protected AbstractOperation createOperation(Project project, protected AbstractOperation createOperation(Project project,
HttpServletRequest request, JSONObject engineConfig) throws Exception { HttpServletRequest request, JSONObject engineConfig) throws Exception {
String baseColumnName = request.getParameter("baseColumnName"); String baseColumnName = request.getParameter("baseColumnName");
String urlExpression = request.getParameter("urlExpression"); String urlExpression = request.getParameter("urlExpression");
@ -55,7 +57,8 @@ public class AddColumnByFetchingURLsCommand extends EngineDependentCommand {
int delay = Integer.parseInt(request.getParameter("delay")); int delay = Integer.parseInt(request.getParameter("delay"));
String onError = request.getParameter("onError"); String onError = request.getParameter("onError");
boolean cacheResponses = Boolean.parseBoolean(request.getParameter("cacheResponses")); boolean cacheResponses = Boolean.parseBoolean(request.getParameter("cacheResponses"));
JSONArray httpHeadersJson = new JSONArray(request.getParameter("httpHeaders"));
return new ColumnAdditionByFetchingURLsOperation( return new ColumnAdditionByFetchingURLsOperation(
engineConfig, engineConfig,
baseColumnName, baseColumnName,
@ -64,7 +67,8 @@ public class AddColumnByFetchingURLsCommand extends EngineDependentCommand {
newColumnName, newColumnName,
columnInsertIndex, columnInsertIndex,
delay, delay,
cacheResponses cacheResponses,
httpHeadersJson
); );
} }

View File

@ -42,12 +42,15 @@ import java.net.URL;
import java.net.URLConnection; import java.net.URLConnection;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.HashMap;
import java.util.Properties; import java.util.Properties;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
import org.json.JSONArray;
import org.json.JSONWriter; import org.json.JSONWriter;
import com.google.refine.browsing.Engine; import com.google.refine.browsing.Engine;
@ -66,6 +69,8 @@ import com.google.refine.model.Project;
import com.google.refine.model.Row; import com.google.refine.model.Row;
import com.google.refine.model.changes.CellAtRow; import com.google.refine.model.changes.CellAtRow;
import com.google.refine.model.changes.ColumnAdditionChange; import com.google.refine.model.changes.ColumnAdditionChange;
import com.google.refine.commands.HttpHeadersSupport;
import com.google.refine.commands.HttpHeadersSupport.HttpHeaderInfo;
import com.google.refine.operations.EngineDependentOperation; import com.google.refine.operations.EngineDependentOperation;
import com.google.refine.operations.OnError; import com.google.refine.operations.OnError;
import com.google.refine.operations.OperationRegistry; import com.google.refine.operations.OperationRegistry;
@ -77,6 +82,7 @@ import com.google.common.cache.CacheBuilder;
import com.google.common.cache.LoadingCache; import com.google.common.cache.LoadingCache;
import com.google.common.cache.CacheLoader; import com.google.common.cache.CacheLoader;
public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation { public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation {
final protected String _baseColumnName; final protected String _baseColumnName;
final protected String _urlExpression; final protected String _urlExpression;
@ -86,6 +92,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
final protected int _columnInsertIndex; final protected int _columnInsertIndex;
final protected int _delay; final protected int _delay;
final protected boolean _cacheResponses; final protected boolean _cacheResponses;
final protected JSONArray _httpHeadersJson;
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
JSONObject engineConfig = obj.getJSONObject("engineConfig"); JSONObject engineConfig = obj.getJSONObject("engineConfig");
@ -98,7 +105,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
obj.getString("newColumnName"), obj.getString("newColumnName"),
obj.getInt("columnInsertIndex"), obj.getInt("columnInsertIndex"),
obj.getInt("delay"), obj.getInt("delay"),
obj.optBoolean("cacheResponses", false) // false for retro-compatibility obj.optBoolean("cacheResponses", false), // false for retro-compatibility
obj.optJSONArray("httpHeadersJson") // will be null if it doesn't exist for retro-compatibility
); );
} }
@ -110,7 +118,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
String newColumnName, String newColumnName,
int columnInsertIndex, int columnInsertIndex,
int delay, int delay,
boolean cacheResponses boolean cacheResponses,
JSONArray httpHeadersJson
) { ) {
super(engineConfig); super(engineConfig);
@ -123,6 +132,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
_delay = delay; _delay = delay;
_cacheResponses = cacheResponses; _cacheResponses = cacheResponses;
_httpHeadersJson = httpHeadersJson;
} }
@Override @Override
@ -140,6 +150,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
writer.key("onError"); writer.value(TextTransformOperation.onErrorToString(_onError)); writer.key("onError"); writer.value(TextTransformOperation.onErrorToString(_onError));
writer.key("delay"); writer.value(_delay); writer.key("delay"); writer.value(_delay);
writer.key("cacheResponses"); writer.value(_cacheResponses); writer.key("cacheResponses"); writer.value(_cacheResponses);
writer.key("httpHeadersJson"); writer.value(_httpHeadersJson);
writer.endObject(); writer.endObject();
} }
@ -171,7 +182,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
engine, engine,
eval, eval,
getBriefDescription(null), getBriefDescription(null),
_cacheResponses _cacheResponses,
_httpHeadersJson
); );
} }
@ -188,7 +200,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
Engine engine, Engine engine,
Evaluable eval, Evaluable eval,
String description, String description,
boolean cacheResponses boolean cacheResponses,
JSONArray httpHeadersJson
) throws JSONException { ) throws JSONException {
super(description); super(description);
_project = project; _project = project;
@ -217,13 +230,13 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
result = null; result = null;
} }
if (result == null) { if (result == null) {
// the load method should not return any null value // the load method should not return any null value
throw new Exception("null result returned by fetch"); throw new Exception("null result returned by fetch");
} }
return result; return result;
} }
}); });
} }
} }
@ -324,6 +337,19 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
try { try {
URLConnection urlConnection = url.openConnection(); URLConnection urlConnection = url.openConnection();
if (_httpHeadersJson != null) {
Map<String, String> httpHeaders = new HashMap<>();
for (int i = 0; i < _httpHeadersJson.length(); i++) {
String headerLabel = _httpHeadersJson.getJSONObject(i).getString("name");
String headerValue = _httpHeadersJson.getJSONObject(i).getString("value");
httpHeaders.put(headerLabel, headerValue);
}
for (String headerLabel : HttpHeadersSupport.getHttpHeaderLabels()) {
HttpHeaderInfo info = HttpHeadersSupport.getHttpHeaderInfo(headerLabel);
urlConnection.setRequestProperty(info.header, httpHeaders.get(headerLabel));
}
}
// urlConnection.setRequestProperty(_headerKey, _headerValue); // urlConnection.setRequestProperty(_headerKey, _headerValue);
try { try {