Move to Guava's cache for ColumnAdditionByFetchingURLsOperation
This commit is contained in:
parent
a9c4b0af16
commit
32c232c2d6
@ -43,8 +43,8 @@ import java.net.URLConnection;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
import java.util.Map;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.HashMap;
|
import java.util.concurrent.ExecutionException;
|
||||||
|
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
@ -73,6 +73,9 @@ import com.google.refine.operations.cell.TextTransformOperation;
|
|||||||
import com.google.refine.process.LongRunningProcess;
|
import com.google.refine.process.LongRunningProcess;
|
||||||
import com.google.refine.process.Process;
|
import com.google.refine.process.Process;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
import com.google.common.cache.CacheBuilder;
|
||||||
|
import com.google.common.cache.LoadingCache;
|
||||||
|
import com.google.common.cache.CacheLoader;
|
||||||
|
|
||||||
public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation {
|
public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation {
|
||||||
final protected String _baseColumnName;
|
final protected String _baseColumnName;
|
||||||
@ -172,7 +175,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
final protected Evaluable _eval;
|
final protected Evaluable _eval;
|
||||||
final protected long _historyEntryID;
|
final protected long _historyEntryID;
|
||||||
protected int _cellIndex;
|
protected int _cellIndex;
|
||||||
protected Map<String, Serializable> _urlCache;
|
protected LoadingCache<String, Serializable> _urlCache;
|
||||||
|
|
||||||
public ColumnAdditionByFetchingURLsProcess(
|
public ColumnAdditionByFetchingURLsProcess(
|
||||||
Project project,
|
Project project,
|
||||||
@ -185,7 +188,28 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
_engine = engine;
|
_engine = engine;
|
||||||
_eval = eval;
|
_eval = eval;
|
||||||
_historyEntryID = HistoryEntry.allocateID();
|
_historyEntryID = HistoryEntry.allocateID();
|
||||||
_urlCache = new HashMap<String, Serializable>();
|
_urlCache = CacheBuilder.newBuilder()
|
||||||
|
.maximumSize(2048)
|
||||||
|
.expireAfterWrite(10, TimeUnit.MINUTES)
|
||||||
|
.build(
|
||||||
|
new CacheLoader<String, Serializable>() {
|
||||||
|
public Serializable load(String urlString) {
|
||||||
|
Serializable result = fetch(urlString);
|
||||||
|
try {
|
||||||
|
// Always sleep for the delay, no matter how long the
|
||||||
|
// request took. This is more responsible than substracting
|
||||||
|
// the time spend requesting the URL, because it naturally
|
||||||
|
// slows us down if the server is busy and takes a long time
|
||||||
|
// to reply.
|
||||||
|
if (_delay > 0) {
|
||||||
|
Thread.sleep(_delay);
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -238,8 +262,6 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_urlCache.clear();
|
|
||||||
|
|
||||||
if (!_canceled) {
|
if (!_canceled) {
|
||||||
HistoryEntry historyEntry = new HistoryEntry(
|
HistoryEntry historyEntry = new HistoryEntry(
|
||||||
_historyEntryID,
|
_historyEntryID,
|
||||||
@ -260,25 +282,10 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
CellAtRow cachedFetch(CellAtRow urlData) {
|
CellAtRow cachedFetch(CellAtRow urlData) {
|
||||||
String urlString = urlData.cell.value.toString();
|
String urlString = urlData.cell.value.toString();
|
||||||
|
|
||||||
Serializable cellResult = _urlCache.get(urlString);
|
Serializable cellResult = null;
|
||||||
if (cellResult == null) {
|
try {
|
||||||
cellResult = fetch(urlString);
|
cellResult = _urlCache.get(urlString);
|
||||||
if (cellResult != null) {
|
} catch(ExecutionException e) {
|
||||||
_urlCache.put(urlString, cellResult);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Always sleep for the delay, no matter how long the
|
|
||||||
// request took. This is more responsible than substracting
|
|
||||||
// the time spend requesting the URL, because it naturally
|
|
||||||
// slows us down if the server is busy and takes a long time
|
|
||||||
// to reply.
|
|
||||||
if (_delay > 0) {
|
|
||||||
Thread.sleep(_delay);
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cellResult != null) {
|
if (cellResult != null) {
|
||||||
|
@ -128,12 +128,6 @@ public class UrlFetchingTests extends RefineTest {
|
|||||||
row.setCell(0, new Cell(i < 5 ? "apple":"orange", null));
|
row.setCell(0, new Cell(i < 5 ? "apple":"orange", null));
|
||||||
project.rows.add(row);
|
project.rows.add(row);
|
||||||
}
|
}
|
||||||
//engine.getAllRows().accept(project, new CountingRowVisitor(5)) ;
|
|
||||||
//engine.getAllFilteredRows().accept(project, new CountingRowVisitor(4));
|
|
||||||
// Function fc = new FacetCount();
|
|
||||||
// Integer count = (Integer) fc.call(bindings, new Object[] {"a", "value", "Column A"});
|
|
||||||
// Assert.assertEquals(count.intValue(), 4);
|
|
||||||
// EngineDependentOperation op = new RowRemovalOperation(engine_config);
|
|
||||||
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
|
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
|
||||||
"fruits",
|
"fruits",
|
||||||
"\"https://www.random.org/integers/?num=1&min=1&max=100&col=1&base=10&format=plain&rnd=new&city=\"+value",
|
"\"https://www.random.org/integers/?num=1&min=1&max=100&col=1&base=10&format=plain&rnd=new&city=\"+value",
|
||||||
|
Loading…
Reference in New Issue
Block a user