Add caching in URL fetching
This commit is contained in:
parent
5d8d372244
commit
782a2f5b48
@ -43,6 +43,8 @@ import java.net.URLConnection;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
@ -170,6 +172,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
||||
final protected Evaluable _eval;
|
||||
final protected long _historyEntryID;
|
||||
protected int _cellIndex;
|
||||
protected Map<URL, Serializable> _urlCache;
|
||||
|
||||
public ColumnAdditionByFetchingURLsProcess(
|
||||
Project project,
|
||||
@ -182,6 +185,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
||||
_engine = engine;
|
||||
_eval = eval;
|
||||
_historyEntryID = HistoryEntry.allocateID();
|
||||
_urlCache = new HashMap<URL, Serializable>();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -222,27 +226,21 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
||||
List<CellAtRow> responseBodies = new ArrayList<CellAtRow>(urls.size());
|
||||
for (int i = 0; i < urls.size(); i++) {
|
||||
CellAtRow urlData = urls.get(i);
|
||||
long start = System.currentTimeMillis();
|
||||
CellAtRow cellAtRow = fetch(urlData);
|
||||
CellAtRow cellAtRow = cachedFetch(urlData);
|
||||
if (cellAtRow != null) {
|
||||
responseBodies.add(cellAtRow);
|
||||
}
|
||||
|
||||
_progress = i * 100 / urls.size();
|
||||
try {
|
||||
long delay = _delay - (System.currentTimeMillis() - start);
|
||||
if (delay > 0) {
|
||||
Thread.sleep(delay);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
|
||||
if (_canceled) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_urlCache.clear();
|
||||
|
||||
if (!_canceled) {
|
||||
|
||||
HistoryEntry historyEntry = new HistoryEntry(
|
||||
_historyEntryID,
|
||||
_project,
|
||||
@ -259,16 +257,45 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
||||
}
|
||||
}
|
||||
|
||||
CellAtRow fetch(CellAtRow urlData) {
|
||||
CellAtRow cachedFetch(CellAtRow urlData) {
|
||||
String urlString = urlData.cell.value.toString();
|
||||
URL url = null;
|
||||
|
||||
try {
|
||||
url = new URL(urlString);
|
||||
} catch (MalformedURLException e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Serializable cellResult = _urlCache.get(url);
|
||||
if (cellResult == null) {
|
||||
cellResult = fetch(url);
|
||||
if (cellResult != null) {
|
||||
_urlCache.put(url, cellResult);
|
||||
}
|
||||
|
||||
try {
|
||||
// Always sleep for the delay, no matter how long the
|
||||
// request took. This is more responsible than substracting
|
||||
// the time spend requesting the URL, because it naturally
|
||||
// slows us down if the server is busy and takes a long time
|
||||
// to reply.
|
||||
if (_delay > 0) {
|
||||
Thread.sleep(_delay);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
if (cellResult != null) {
|
||||
return new CellAtRow(
|
||||
urlData.row,
|
||||
new Cell(cellResult, null));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
Serializable fetch(URL url) {
|
||||
try {
|
||||
URLConnection urlConnection = url.openConnection();
|
||||
// urlConnection.setRequestProperty(_headerKey, _headerValue);
|
||||
@ -287,12 +314,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
||||
}
|
||||
}
|
||||
}
|
||||
return new CellAtRow(
|
||||
urlData.row,
|
||||
new Cell(
|
||||
ParsingUtilities.inputStreamToString(
|
||||
is, (encoding == null) || ( encoding.equalsIgnoreCase("\"UTF-8\"")) ? "UTF-8" : encoding),
|
||||
null));
|
||||
return ParsingUtilities.inputStreamToString(
|
||||
is, (encoding == null) || ( encoding.equalsIgnoreCase("\"UTF-8\"")) ? "UTF-8" : encoding);
|
||||
|
||||
} finally {
|
||||
is.close();
|
||||
@ -313,11 +336,11 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
||||
message = e.toString();
|
||||
}
|
||||
return _onError == OnError.StoreError ?
|
||||
new CellAtRow(urlData.row, new Cell(new EvalError(message), null)) : null;
|
||||
new EvalError(message) : null;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
return _onError == OnError.StoreError ?
|
||||
new CellAtRow(urlData.row, new Cell(new EvalError(e.getMessage()), null)) : null;
|
||||
new EvalError(e.getMessage()) : null;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user