Add caching in URL fetching

This commit is contained in:
Antonin Delpeuch 2017-03-07 20:21:27 +00:00
parent 5d8d372244
commit 782a2f5b48

View File

@ -43,6 +43,8 @@ import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Map;
import java.util.HashMap;
import org.json.JSONException;
import org.json.JSONObject;
@ -170,6 +172,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
final protected Evaluable _eval;
final protected long _historyEntryID;
protected int _cellIndex;
protected Map<URL, Serializable> _urlCache;
public ColumnAdditionByFetchingURLsProcess(
Project project,
@ -182,6 +185,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
_engine = engine;
_eval = eval;
_historyEntryID = HistoryEntry.allocateID();
_urlCache = new HashMap<URL, Serializable>();
}
@Override
@ -222,27 +226,21 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
List<CellAtRow> responseBodies = new ArrayList<CellAtRow>(urls.size());
for (int i = 0; i < urls.size(); i++) {
CellAtRow urlData = urls.get(i);
long start = System.currentTimeMillis();
CellAtRow cellAtRow = fetch(urlData);
CellAtRow cellAtRow = cachedFetch(urlData);
if (cellAtRow != null) {
responseBodies.add(cellAtRow);
}
_progress = i * 100 / urls.size();
try {
long delay = _delay - (System.currentTimeMillis() - start);
if (delay > 0) {
Thread.sleep(delay);
}
} catch (InterruptedException e) {
if (_canceled) {
break;
}
}
}
_urlCache.clear();
if (!_canceled) {
HistoryEntry historyEntry = new HistoryEntry(
_historyEntryID,
_project,
@ -259,16 +257,45 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
}
}
CellAtRow fetch(CellAtRow urlData) {
CellAtRow cachedFetch(CellAtRow urlData) {
String urlString = urlData.cell.value.toString();
URL url = null;
try {
url = new URL(urlString);
} catch (MalformedURLException e) {
return null;
}
Serializable cellResult = _urlCache.get(url);
if (cellResult == null) {
cellResult = fetch(url);
if (cellResult != null) {
_urlCache.put(url, cellResult);
}
try {
// Always sleep for the delay, no matter how long the
// request took. This is more responsible than substracting
// the time spend requesting the URL, because it naturally
// slows us down if the server is busy and takes a long time
// to reply.
if (_delay > 0) {
Thread.sleep(_delay);
}
} catch (InterruptedException e) {
return null;
}
}
if (cellResult != null) {
return new CellAtRow(
urlData.row,
new Cell(cellResult, null));
}
return null;
}
Serializable fetch(URL url) {
try {
URLConnection urlConnection = url.openConnection();
// urlConnection.setRequestProperty(_headerKey, _headerValue);
@ -287,12 +314,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
}
}
}
return new CellAtRow(
urlData.row,
new Cell(
ParsingUtilities.inputStreamToString(
is, (encoding == null) || ( encoding.equalsIgnoreCase("\"UTF-8\"")) ? "UTF-8" : encoding),
null));
return ParsingUtilities.inputStreamToString(
is, (encoding == null) || ( encoding.equalsIgnoreCase("\"UTF-8\"")) ? "UTF-8" : encoding);
} finally {
is.close();
@ -313,11 +336,11 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
message = e.toString();
}
return _onError == OnError.StoreError ?
new CellAtRow(urlData.row, new Cell(new EvalError(message), null)) : null;
new EvalError(message) : null;
}
} catch (Exception e) {
return _onError == OnError.StoreError ?
new CellAtRow(urlData.row, new Cell(new EvalError(e.getMessage()), null)) : null;
new EvalError(e.getMessage()) : null;
}
}