Merge pull request #1178 from wetneb/url_caching
Add caching in URL fetching
This commit is contained in:
commit
912600f0bd
@ -54,6 +54,7 @@ public class AddColumnByFetchingURLsCommand extends EngineDependentCommand {
|
|||||||
int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex"));
|
int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex"));
|
||||||
int delay = Integer.parseInt(request.getParameter("delay"));
|
int delay = Integer.parseInt(request.getParameter("delay"));
|
||||||
String onError = request.getParameter("onError");
|
String onError = request.getParameter("onError");
|
||||||
|
boolean cacheResponses = Boolean.parseBoolean(request.getParameter("cacheResponses"));
|
||||||
|
|
||||||
return new ColumnAdditionByFetchingURLsOperation(
|
return new ColumnAdditionByFetchingURLsOperation(
|
||||||
engineConfig,
|
engineConfig,
|
||||||
@ -62,7 +63,8 @@ public class AddColumnByFetchingURLsCommand extends EngineDependentCommand {
|
|||||||
TextTransformOperation.stringToOnError(onError),
|
TextTransformOperation.stringToOnError(onError),
|
||||||
newColumnName,
|
newColumnName,
|
||||||
columnInsertIndex,
|
columnInsertIndex,
|
||||||
delay
|
delay,
|
||||||
|
cacheResponses
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,8 +23,8 @@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
@ -43,6 +43,8 @@ import java.net.URLConnection;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
@ -71,19 +73,23 @@ import com.google.refine.operations.cell.TextTransformOperation;
|
|||||||
import com.google.refine.process.LongRunningProcess;
|
import com.google.refine.process.LongRunningProcess;
|
||||||
import com.google.refine.process.Process;
|
import com.google.refine.process.Process;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
import com.google.common.cache.CacheBuilder;
|
||||||
|
import com.google.common.cache.LoadingCache;
|
||||||
|
import com.google.common.cache.CacheLoader;
|
||||||
|
|
||||||
public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation {
|
public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation {
|
||||||
final protected String _baseColumnName;
|
final protected String _baseColumnName;
|
||||||
final protected String _urlExpression;
|
final protected String _urlExpression;
|
||||||
final protected OnError _onError;
|
final protected OnError _onError;
|
||||||
|
|
||||||
final protected String _newColumnName;
|
final protected String _newColumnName;
|
||||||
final protected int _columnInsertIndex;
|
final protected int _columnInsertIndex;
|
||||||
final protected int _delay;
|
final protected int _delay;
|
||||||
|
final protected boolean _cacheResponses;
|
||||||
|
|
||||||
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
|
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
|
||||||
JSONObject engineConfig = obj.getJSONObject("engineConfig");
|
JSONObject engineConfig = obj.getJSONObject("engineConfig");
|
||||||
|
|
||||||
return new ColumnAdditionByFetchingURLsOperation(
|
return new ColumnAdditionByFetchingURLsOperation(
|
||||||
engineConfig,
|
engineConfig,
|
||||||
obj.getString("baseColumnName"),
|
obj.getString("baseColumnName"),
|
||||||
@ -91,35 +97,38 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
TextTransformOperation.stringToOnError(obj.getString("onError")),
|
TextTransformOperation.stringToOnError(obj.getString("onError")),
|
||||||
obj.getString("newColumnName"),
|
obj.getString("newColumnName"),
|
||||||
obj.getInt("columnInsertIndex"),
|
obj.getInt("columnInsertIndex"),
|
||||||
obj.getInt("delay")
|
obj.getInt("delay"),
|
||||||
|
obj.optBoolean("cacheResponses", false) // false for retro-compatibility
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ColumnAdditionByFetchingURLsOperation(
|
public ColumnAdditionByFetchingURLsOperation(
|
||||||
JSONObject engineConfig,
|
JSONObject engineConfig,
|
||||||
String baseColumnName,
|
String baseColumnName,
|
||||||
String urlExpression,
|
String urlExpression,
|
||||||
OnError onError,
|
OnError onError,
|
||||||
String newColumnName,
|
String newColumnName,
|
||||||
int columnInsertIndex,
|
int columnInsertIndex,
|
||||||
int delay
|
int delay,
|
||||||
|
boolean cacheResponses
|
||||||
) {
|
) {
|
||||||
super(engineConfig);
|
super(engineConfig);
|
||||||
|
|
||||||
_baseColumnName = baseColumnName;
|
_baseColumnName = baseColumnName;
|
||||||
_urlExpression = urlExpression;
|
_urlExpression = urlExpression;
|
||||||
_onError = onError;
|
_onError = onError;
|
||||||
|
|
||||||
_newColumnName = newColumnName;
|
_newColumnName = newColumnName;
|
||||||
_columnInsertIndex = columnInsertIndex;
|
_columnInsertIndex = columnInsertIndex;
|
||||||
|
|
||||||
_delay = delay;
|
_delay = delay;
|
||||||
|
_cacheResponses = cacheResponses;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void write(JSONWriter writer, Properties options)
|
public void write(JSONWriter writer, Properties options)
|
||||||
throws JSONException {
|
throws JSONException {
|
||||||
|
|
||||||
writer.object();
|
writer.object();
|
||||||
writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
|
writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
|
||||||
writer.key("description"); writer.value(getBriefDescription(null));
|
writer.key("description"); writer.value(getBriefDescription(null));
|
||||||
@ -130,64 +139,93 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
writer.key("urlExpression"); writer.value(_urlExpression);
|
writer.key("urlExpression"); writer.value(_urlExpression);
|
||||||
writer.key("onError"); writer.value(TextTransformOperation.onErrorToString(_onError));
|
writer.key("onError"); writer.value(TextTransformOperation.onErrorToString(_onError));
|
||||||
writer.key("delay"); writer.value(_delay);
|
writer.key("delay"); writer.value(_delay);
|
||||||
|
writer.key("cacheResponses"); writer.value(_cacheResponses);
|
||||||
writer.endObject();
|
writer.endObject();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected String getBriefDescription(Project project) {
|
protected String getBriefDescription(Project project) {
|
||||||
return "Create column " + _newColumnName +
|
return "Create column " + _newColumnName +
|
||||||
" at index " + _columnInsertIndex +
|
" at index " + _columnInsertIndex +
|
||||||
" by fetching URLs based on column " + _baseColumnName +
|
" by fetching URLs based on column " + _baseColumnName +
|
||||||
" using expression " + _urlExpression;
|
" using expression " + _urlExpression;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String createDescription(Column column, List<CellAtRow> cellsAtRows) {
|
protected String createDescription(Column column, List<CellAtRow> cellsAtRows) {
|
||||||
return "Create new column " + _newColumnName +
|
return "Create new column " + _newColumnName +
|
||||||
", filling " + cellsAtRows.size() +
|
", filling " + cellsAtRows.size() +
|
||||||
" rows by fetching URLs based on column " + column.getName() +
|
" rows by fetching URLs based on column " + column.getName() +
|
||||||
" and formulated as " + _urlExpression;
|
" and formulated as " + _urlExpression;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Process createProcess(Project project, Properties options) throws Exception {
|
public Process createProcess(Project project, Properties options) throws Exception {
|
||||||
Engine engine = createEngine(project);
|
Engine engine = createEngine(project);
|
||||||
engine.initializeFromJSON(_engineConfig);
|
engine.initializeFromJSON(_engineConfig);
|
||||||
|
|
||||||
Evaluable eval = MetaParser.parse(_urlExpression);
|
Evaluable eval = MetaParser.parse(_urlExpression);
|
||||||
|
|
||||||
return new ColumnAdditionByFetchingURLsProcess(
|
return new ColumnAdditionByFetchingURLsProcess(
|
||||||
project,
|
project,
|
||||||
engine,
|
engine,
|
||||||
eval,
|
eval,
|
||||||
getBriefDescription(null)
|
getBriefDescription(null),
|
||||||
|
_cacheResponses
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
public class ColumnAdditionByFetchingURLsProcess extends LongRunningProcess implements Runnable {
|
public class ColumnAdditionByFetchingURLsProcess extends LongRunningProcess implements Runnable {
|
||||||
final protected Project _project;
|
final protected Project _project;
|
||||||
final protected Engine _engine;
|
final protected Engine _engine;
|
||||||
final protected Evaluable _eval;
|
final protected Evaluable _eval;
|
||||||
final protected long _historyEntryID;
|
final protected long _historyEntryID;
|
||||||
protected int _cellIndex;
|
protected int _cellIndex;
|
||||||
|
protected LoadingCache<String, Serializable> _urlCache;
|
||||||
|
|
||||||
public ColumnAdditionByFetchingURLsProcess(
|
public ColumnAdditionByFetchingURLsProcess(
|
||||||
Project project,
|
Project project,
|
||||||
Engine engine,
|
Engine engine,
|
||||||
Evaluable eval,
|
Evaluable eval,
|
||||||
String description
|
String description,
|
||||||
|
boolean cacheResponses
|
||||||
) throws JSONException {
|
) throws JSONException {
|
||||||
super(description);
|
super(description);
|
||||||
_project = project;
|
_project = project;
|
||||||
_engine = engine;
|
_engine = engine;
|
||||||
_eval = eval;
|
_eval = eval;
|
||||||
_historyEntryID = HistoryEntry.allocateID();
|
_historyEntryID = HistoryEntry.allocateID();
|
||||||
|
_urlCache = null;
|
||||||
|
if (cacheResponses) {
|
||||||
|
_urlCache = CacheBuilder.newBuilder()
|
||||||
|
.maximumSize(2048)
|
||||||
|
.expireAfterWrite(10, TimeUnit.MINUTES)
|
||||||
|
.build(
|
||||||
|
new CacheLoader<String, Serializable>() {
|
||||||
|
public Serializable load(String urlString) {
|
||||||
|
Serializable result = fetch(urlString);
|
||||||
|
try {
|
||||||
|
// Always sleep for the delay, no matter how long the
|
||||||
|
// request took. This is more responsible than substracting
|
||||||
|
// the time spend requesting the URL, because it naturally
|
||||||
|
// slows us down if the server is busy and takes a long time
|
||||||
|
// to reply.
|
||||||
|
if (_delay > 0) {
|
||||||
|
Thread.sleep(_delay);
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void write(JSONWriter writer, Properties options)
|
public void write(JSONWriter writer, Properties options)
|
||||||
throws JSONException {
|
throws JSONException {
|
||||||
|
|
||||||
writer.object();
|
writer.object();
|
||||||
writer.key("id"); writer.value(hashCode());
|
writer.key("id"); writer.value(hashCode());
|
||||||
writer.key("description"); writer.value(_description);
|
writer.key("description"); writer.value(_description);
|
||||||
@ -196,12 +234,12 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
writer.key("progress"); writer.value(_progress);
|
writer.key("progress"); writer.value(_progress);
|
||||||
writer.endObject();
|
writer.endObject();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Runnable getRunnable() {
|
protected Runnable getRunnable() {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
Column column = _project.columnModel.getColumnByName(_baseColumnName);
|
Column column = _project.columnModel.getColumnByName(_baseColumnName);
|
||||||
@ -213,66 +251,76 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
_project.processManager.onFailedProcess(this, new Exception("Another column already named " + _newColumnName));
|
_project.processManager.onFailedProcess(this, new Exception("Another column already named " + _newColumnName));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
List<CellAtRow> urls = new ArrayList<CellAtRow>(_project.rows.size());
|
List<CellAtRow> urls = new ArrayList<CellAtRow>(_project.rows.size());
|
||||||
|
|
||||||
FilteredRows filteredRows = _engine.getAllFilteredRows();
|
FilteredRows filteredRows = _engine.getAllFilteredRows();
|
||||||
filteredRows.accept(_project, createRowVisitor(urls));
|
filteredRows.accept(_project, createRowVisitor(urls));
|
||||||
|
|
||||||
List<CellAtRow> responseBodies = new ArrayList<CellAtRow>(urls.size());
|
List<CellAtRow> responseBodies = new ArrayList<CellAtRow>(urls.size());
|
||||||
for (int i = 0; i < urls.size(); i++) {
|
for (int i = 0; i < urls.size(); i++) {
|
||||||
CellAtRow urlData = urls.get(i);
|
CellAtRow urlData = urls.get(i);
|
||||||
long start = System.currentTimeMillis();
|
String urlString = urlData.cell.value.toString();
|
||||||
CellAtRow cellAtRow = fetch(urlData);
|
|
||||||
if (cellAtRow != null) {
|
Serializable response = null;
|
||||||
|
if (_urlCache != null) {
|
||||||
|
response = cachedFetch(urlString);
|
||||||
|
} else {
|
||||||
|
response = fetch(urlString);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (response != null) {
|
||||||
|
CellAtRow cellAtRow = new CellAtRow(
|
||||||
|
urlData.row,
|
||||||
|
new Cell(response, null));
|
||||||
|
|
||||||
responseBodies.add(cellAtRow);
|
responseBodies.add(cellAtRow);
|
||||||
}
|
}
|
||||||
|
|
||||||
_progress = i * 100 / urls.size();
|
_progress = i * 100 / urls.size();
|
||||||
try {
|
|
||||||
long delay = _delay - (System.currentTimeMillis() - start);
|
if (_canceled) {
|
||||||
if (delay > 0) {
|
break;
|
||||||
Thread.sleep(delay);
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
if (_canceled) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!_canceled) {
|
if (!_canceled) {
|
||||||
|
|
||||||
HistoryEntry historyEntry = new HistoryEntry(
|
HistoryEntry historyEntry = new HistoryEntry(
|
||||||
_historyEntryID,
|
_historyEntryID,
|
||||||
_project,
|
_project,
|
||||||
_description,
|
_description,
|
||||||
ColumnAdditionByFetchingURLsOperation.this,
|
ColumnAdditionByFetchingURLsOperation.this,
|
||||||
new ColumnAdditionChange(
|
new ColumnAdditionChange(
|
||||||
_newColumnName,
|
_newColumnName,
|
||||||
_columnInsertIndex,
|
_columnInsertIndex,
|
||||||
responseBodies)
|
responseBodies)
|
||||||
);
|
);
|
||||||
|
|
||||||
_project.history.addEntry(historyEntry);
|
_project.history.addEntry(historyEntry);
|
||||||
_project.processManager.onDoneProcess(this);
|
_project.processManager.onDoneProcess(this);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CellAtRow fetch(CellAtRow urlData) {
|
Serializable cachedFetch(String urlString) {
|
||||||
String urlString = urlData.cell.value.toString();
|
try {
|
||||||
|
return _urlCache.get(urlString);
|
||||||
|
} catch(ExecutionException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Serializable fetch(String urlString) {
|
||||||
URL url = null;
|
URL url = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
url = new URL(urlString);
|
url = new URL(urlString);
|
||||||
} catch (MalformedURLException e) {
|
} catch (MalformedURLException e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
URLConnection urlConnection = url.openConnection();
|
URLConnection urlConnection = url.openConnection();
|
||||||
// urlConnection.setRequestProperty(_headerKey, _headerValue);
|
// urlConnection.setRequestProperty(_headerKey, _headerValue);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
InputStream is = urlConnection.getInputStream();
|
InputStream is = urlConnection.getInputStream();
|
||||||
try {
|
try {
|
||||||
@ -287,12 +335,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new CellAtRow(
|
return ParsingUtilities.inputStreamToString(
|
||||||
urlData.row,
|
is, (encoding == null) || ( encoding.equalsIgnoreCase("\"UTF-8\"")) ? "UTF-8" : encoding);
|
||||||
new Cell(
|
|
||||||
ParsingUtilities.inputStreamToString(
|
|
||||||
is, (encoding == null) || ( encoding.equalsIgnoreCase("\"UTF-8\"")) ? "UTF-8" : encoding),
|
|
||||||
null));
|
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
is.close();
|
is.close();
|
||||||
@ -313,11 +357,11 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
message = e.toString();
|
message = e.toString();
|
||||||
}
|
}
|
||||||
return _onError == OnError.StoreError ?
|
return _onError == OnError.StoreError ?
|
||||||
new CellAtRow(urlData.row, new Cell(new EvalError(message), null)) : null;
|
new EvalError(message) : null;
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return _onError == OnError.StoreError ?
|
return _onError == OnError.StoreError ?
|
||||||
new CellAtRow(urlData.row, new Cell(new EvalError(e.getMessage()), null)) : null;
|
new EvalError(e.getMessage()) : null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -326,33 +370,33 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
int cellIndex;
|
int cellIndex;
|
||||||
Properties bindings;
|
Properties bindings;
|
||||||
List<CellAtRow> cellsAtRows;
|
List<CellAtRow> cellsAtRows;
|
||||||
|
|
||||||
public RowVisitor init(List<CellAtRow> cellsAtRows) {
|
public RowVisitor init(List<CellAtRow> cellsAtRows) {
|
||||||
Column column = _project.columnModel.getColumnByName(_baseColumnName);
|
Column column = _project.columnModel.getColumnByName(_baseColumnName);
|
||||||
|
|
||||||
this.cellIndex = column.getCellIndex();
|
this.cellIndex = column.getCellIndex();
|
||||||
this.bindings = ExpressionUtils.createBindings(_project);
|
this.bindings = ExpressionUtils.createBindings(_project);
|
||||||
this.cellsAtRows = cellsAtRows;
|
this.cellsAtRows = cellsAtRows;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void start(Project project) {
|
public void start(Project project) {
|
||||||
// nothing to do
|
// nothing to do
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void end(Project project) {
|
public void end(Project project) {
|
||||||
// nothing to do
|
// nothing to do
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean visit(Project project, int rowIndex, Row row) {
|
public boolean visit(Project project, int rowIndex, Row row) {
|
||||||
Cell cell = row.getCell(cellIndex);
|
Cell cell = row.getCell(cellIndex);
|
||||||
Cell newCell = null;
|
Cell newCell = null;
|
||||||
|
|
||||||
ExpressionUtils.bind(bindings, row, rowIndex, _baseColumnName, cell);
|
ExpressionUtils.bind(bindings, row, rowIndex, _baseColumnName, cell);
|
||||||
|
|
||||||
Object o = _eval.evaluate(bindings);
|
Object o = _eval.evaluate(bindings);
|
||||||
if (o != null) {
|
if (o != null) {
|
||||||
if (o instanceof Cell) {
|
if (o instanceof Cell) {
|
||||||
@ -366,11 +410,11 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (newCell != null) {
|
if (newCell != null) {
|
||||||
cellsAtRows.add(new CellAtRow(rowIndex, newCell));
|
cellsAtRows.add(new CellAtRow(rowIndex, newCell));
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}.init(cellsAtRows);
|
}.init(cellsAtRows);
|
||||||
|
@ -0,0 +1,164 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2010, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.tests.model;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.AfterMethod;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.BeforeTest;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.google.refine.ProjectManager;
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.browsing.Engine;
|
||||||
|
import com.google.refine.browsing.RowVisitor;
|
||||||
|
import com.google.refine.grel.Function;
|
||||||
|
import com.google.refine.io.FileProjectManager;
|
||||||
|
import com.google.refine.model.Cell;
|
||||||
|
import com.google.refine.model.Column;
|
||||||
|
import com.google.refine.model.ModelException;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.model.Row;
|
||||||
|
import com.google.refine.process.Process;
|
||||||
|
import com.google.refine.process.ProcessManager;
|
||||||
|
import com.google.refine.operations.OnError;
|
||||||
|
import com.google.refine.operations.EngineDependentOperation;
|
||||||
|
import com.google.refine.operations.column.ColumnAdditionByFetchingURLsOperation;
|
||||||
|
import com.google.refine.tests.RefineTest;
|
||||||
|
import com.google.refine.tests.util.TestUtils;
|
||||||
|
|
||||||
|
|
||||||
|
public class UrlFetchingTests extends RefineTest {
|
||||||
|
|
||||||
|
static final String ENGINE_JSON_URLS = "{\"mode\":\"row-based\"}}";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@BeforeTest
|
||||||
|
public void init() {
|
||||||
|
logger = LoggerFactory.getLogger(this.getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
// dependencies
|
||||||
|
Project project;
|
||||||
|
Properties options;
|
||||||
|
JSONObject engine_config;
|
||||||
|
Engine engine;
|
||||||
|
Properties bindings;
|
||||||
|
|
||||||
|
@BeforeMethod
|
||||||
|
public void SetUp() throws JSONException, IOException, ModelException {
|
||||||
|
File dir = TestUtils.createTempDirectory("openrefine-test-workspace-dir");
|
||||||
|
FileProjectManager.initialize(dir);
|
||||||
|
project = new Project();
|
||||||
|
ProjectMetadata pm = new ProjectMetadata();
|
||||||
|
pm.setName("URL Fetching Test Project");
|
||||||
|
ProjectManager.singleton.registerProject(project, pm);
|
||||||
|
|
||||||
|
int index = project.columnModel.allocateNewCellIndex();
|
||||||
|
Column column = new Column(index,"fruits");
|
||||||
|
project.columnModel.addColumn(index, column, true);
|
||||||
|
|
||||||
|
options = mock(Properties.class);
|
||||||
|
engine = new Engine(project);
|
||||||
|
engine_config = new JSONObject(ENGINE_JSON_URLS);
|
||||||
|
engine.initializeFromJSON(engine_config);
|
||||||
|
engine.setMode(Engine.Mode.RowBased);
|
||||||
|
|
||||||
|
bindings = new Properties();
|
||||||
|
bindings.put("project", project);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterMethod
|
||||||
|
public void TearDown() {
|
||||||
|
project = null;
|
||||||
|
options = null;
|
||||||
|
engine = null;
|
||||||
|
bindings = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for caching
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUrlCaching() throws Exception {
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
Row row = new Row(2);
|
||||||
|
row.setCell(0, new Cell(i < 5 ? "apple":"orange", null));
|
||||||
|
project.rows.add(row);
|
||||||
|
}
|
||||||
|
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
|
||||||
|
"fruits",
|
||||||
|
"\"https://www.random.org/integers/?num=1&min=1&max=100&col=1&base=10&format=plain&rnd=new&city=\"+value",
|
||||||
|
OnError.SetToBlank,
|
||||||
|
"rand",
|
||||||
|
1,
|
||||||
|
500,
|
||||||
|
true);
|
||||||
|
ProcessManager pm = project.getProcessManager();
|
||||||
|
Process process = op.createProcess(project, options);
|
||||||
|
process.startPerforming(pm);
|
||||||
|
Assert.assertTrue(process.isRunning());
|
||||||
|
try {
|
||||||
|
// We have 100 rows and 500 ms per row but only two distinct
|
||||||
|
// values so we should not wait more than ~2000 ms to get the
|
||||||
|
// results. Just to make sure the test passes with plenty of
|
||||||
|
// net latency we sleep for longer (but still less than
|
||||||
|
// 50,000ms).
|
||||||
|
Thread.sleep(5000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Assert.fail("Test interrupted");
|
||||||
|
}
|
||||||
|
Assert.assertFalse(process.isRunning());
|
||||||
|
|
||||||
|
// Inspect rows
|
||||||
|
String ref_val = (String)project.rows.get(0).getCellValue(1);
|
||||||
|
Assert.assertTrue(ref_val != "apple"); // just to make sure I picked the right column
|
||||||
|
for (int i = 1; i < 4; i++) {
|
||||||
|
// all random values should be equal due to caching
|
||||||
|
Assert.assertEquals(project.rows.get(i).getCellValue(1), ref_val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -500,6 +500,7 @@
|
|||||||
"on-error": "On error",
|
"on-error": "On error",
|
||||||
"set-blank": "set to blank",
|
"set-blank": "set to blank",
|
||||||
"store-err": "store error",
|
"store-err": "store error",
|
||||||
|
"cache-responses": "Cache responses",
|
||||||
"copy-val": "copy value from original column",
|
"copy-val": "copy value from original column",
|
||||||
"warning-col-name": "You must enter a column name.",
|
"warning-col-name": "You must enter a column name.",
|
||||||
"add-col-fetch": "Add column by fetching URLs based on column",
|
"add-col-fetch": "Add column by fetching URLs based on column",
|
||||||
|
@ -500,6 +500,7 @@
|
|||||||
"on-error": "On error",
|
"on-error": "On error",
|
||||||
"set-blank": "set to blank",
|
"set-blank": "set to blank",
|
||||||
"store-err": "store error",
|
"store-err": "store error",
|
||||||
|
"cache-responses": "Cache responses",
|
||||||
"copy-val": "copy value from original column",
|
"copy-val": "copy value from original column",
|
||||||
"warning-col-name": "You must enter a column name.",
|
"warning-col-name": "You must enter a column name.",
|
||||||
"add-col-fetch": "Add column by fetching URLs based on column",
|
"add-col-fetch": "Add column by fetching URLs based on column",
|
||||||
|
@ -500,6 +500,7 @@
|
|||||||
"on-error": "En cas d’erreur",
|
"on-error": "En cas d’erreur",
|
||||||
"set-blank": "vider la cellule",
|
"set-blank": "vider la cellule",
|
||||||
"store-err": "conserver l’erreur",
|
"store-err": "conserver l’erreur",
|
||||||
|
"cache-responses": "Mettre les réponses en cache",
|
||||||
"copy-val": "copier la valeur depuis la colonne originale",
|
"copy-val": "copier la valeur depuis la colonne originale",
|
||||||
"warning-col-name": "Vous devez indiquer un nom de colonne.",
|
"warning-col-name": "Vous devez indiquer un nom de colonne.",
|
||||||
"add-col-fetch": "Ajouter une colonne en moissonnant les données depuis les URL d’une colonne",
|
"add-col-fetch": "Ajouter une colonne en moissonnant les données depuis les URL d’une colonne",
|
||||||
|
@ -12,11 +12,14 @@
|
|||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td width="1%" style="white-space: pre;" bind="or_views_onErr"></td>
|
<td width="1%" style="white-space: pre;" bind="or_views_onErr"></td>
|
||||||
<td colspan="3">
|
<td>
|
||||||
<input type="radio" name="dialog-onerror-choice" value="set-to-blank" checked id="$add-column-error-set-to-blank"/>
|
<input type="radio" name="dialog-onerror-choice" value="set-to-blank" checked id="$add-column-error-set-to-blank"/>
|
||||||
<label for="$add-column-error-set-to-blank" bind="or_views_setBlank"></label>
|
<label for="$add-column-error-set-to-blank" bind="or_views_setBlank"></label>
|
||||||
<input type="radio" name="dialog-onerror-choice" value="store-error" id="$add-column-error-store-error" />
|
<input type="radio" name="dialog-onerror-choice" value="store-error" id="$add-column-error-store-error" />
|
||||||
<label for="$add-column-error-store-error" bind="or_views_storeErr"></label></td>
|
<label for="$add-column-error-store-error" bind="or_views_storeErr"></label></td>
|
||||||
|
<td colspan="2">
|
||||||
|
<input type="checkbox" name="dialog-cache-responses" id="$add-column-cache-responses" checked="checked" />
|
||||||
|
<label for="$add-column-cache-responses" bind="or_views_cacheResponses"></label></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr><td colspan="4"><h3><span bind="or_views_urlFetch"></span></h3></td></tr>
|
<tr><td colspan="4"><h3><span bind="or_views_urlFetch"></span></h3></td></tr>
|
||||||
<tr><td colspan="4">$EXPRESSION_PREVIEW_WIDGET$</td></tr>
|
<tr><td colspan="4">$EXPRESSION_PREVIEW_WIDGET$</td></tr>
|
||||||
@ -27,4 +30,4 @@
|
|||||||
<button class="button" bind="cancelButton"></button>
|
<button class="button" bind="cancelButton"></button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -103,6 +103,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
|
|||||||
elmts.or_views_onErr.text($.i18n._('core-views')["on-error"]);
|
elmts.or_views_onErr.text($.i18n._('core-views')["on-error"]);
|
||||||
elmts.or_views_setBlank.text($.i18n._('core-views')["set-blank"]);
|
elmts.or_views_setBlank.text($.i18n._('core-views')["set-blank"]);
|
||||||
elmts.or_views_storeErr.text($.i18n._('core-views')["store-err"]);
|
elmts.or_views_storeErr.text($.i18n._('core-views')["store-err"]);
|
||||||
|
elmts.or_views_cacheResponses.text($.i18n._('core-views')["cache-responses"]);
|
||||||
elmts.or_views_urlFetch.text($.i18n._('core-views')["url-fetch"]);
|
elmts.or_views_urlFetch.text($.i18n._('core-views')["url-fetch"]);
|
||||||
elmts.okButton.html($.i18n._('core-buttons')["ok"]);
|
elmts.okButton.html($.i18n._('core-buttons')["ok"]);
|
||||||
elmts.cancelButton.text($.i18n._('core-buttons')["cancel"]);
|
elmts.cancelButton.text($.i18n._('core-buttons')["cancel"]);
|
||||||
@ -135,7 +136,8 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
|
|||||||
newColumnName: columnName,
|
newColumnName: columnName,
|
||||||
columnInsertIndex: columnIndex + 1,
|
columnInsertIndex: columnIndex + 1,
|
||||||
delay: elmts.throttleDelayInput[0].value,
|
delay: elmts.throttleDelayInput[0].value,
|
||||||
onError: $('input[name="dialog-onerror-choice"]:checked')[0].value
|
onError: $('input[name="dialog-onerror-choice"]:checked')[0].value,
|
||||||
|
cacheResponses: $('input[name="dialog-cache-responses"]')[0].checked,
|
||||||
},
|
},
|
||||||
null,
|
null,
|
||||||
{ modelsChanged: true }
|
{ modelsChanged: true }
|
||||||
|
Loading…
Reference in New Issue
Block a user