Added command Add Column by Fetching URLs.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@1203 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
9041ebf7b9
commit
1f69fba43c
@ -0,0 +1,36 @@
|
||||
package com.google.gridworks.commands.column;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.gridworks.commands.EngineDependentCommand;
|
||||
import com.google.gridworks.model.AbstractOperation;
|
||||
import com.google.gridworks.model.Project;
|
||||
import com.google.gridworks.operations.cell.TextTransformOperation;
|
||||
import com.google.gridworks.operations.column.ColumnAdditionByFetchingURLsOperation;
|
||||
|
||||
public class AddColumnByFetchingURLsCommand extends EngineDependentCommand {
|
||||
@Override
|
||||
protected AbstractOperation createOperation(Project project,
|
||||
HttpServletRequest request, JSONObject engineConfig) throws Exception {
|
||||
|
||||
String baseColumnName = request.getParameter("baseColumnName");
|
||||
String urlExpression = request.getParameter("urlExpression");
|
||||
String newColumnName = request.getParameter("newColumnName");
|
||||
int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex"));
|
||||
int delay = Integer.parseInt(request.getParameter("delay"));
|
||||
String onError = request.getParameter("onError");
|
||||
|
||||
return new ColumnAdditionByFetchingURLsOperation(
|
||||
engineConfig,
|
||||
baseColumnName,
|
||||
urlExpression,
|
||||
TextTransformOperation.stringToOnError(onError),
|
||||
newColumnName,
|
||||
columnInsertIndex,
|
||||
delay
|
||||
);
|
||||
}
|
||||
|
||||
}
|
@ -16,6 +16,7 @@ import com.google.gridworks.operations.cell.MultiValuedCellSplitOperation;
|
||||
import com.google.gridworks.operations.cell.TextTransformOperation;
|
||||
import com.google.gridworks.operations.cell.TransposeColumnsIntoRowsOperation;
|
||||
import com.google.gridworks.operations.cell.TransposeRowsIntoColumnsOperation;
|
||||
import com.google.gridworks.operations.column.ColumnAdditionByFetchingURLsOperation;
|
||||
import com.google.gridworks.operations.column.ColumnAdditionOperation;
|
||||
import com.google.gridworks.operations.column.ColumnMoveOperation;
|
||||
import com.google.gridworks.operations.column.ColumnRemovalOperation;
|
||||
@ -66,6 +67,7 @@ public abstract class OperationRegistry {
|
||||
register("column-move", ColumnMoveOperation.class);
|
||||
register("column-split", ColumnSplitOperation.class);
|
||||
register("extend-data", ExtendDataOperation.class);
|
||||
register("column-addition-by-fetching-urls", ColumnAdditionByFetchingURLsOperation.class);
|
||||
|
||||
register("row-removal", RowRemovalOperation.class);
|
||||
register("row-star", RowStarOperation.class);
|
||||
|
@ -0,0 +1,291 @@
|
||||
package com.google.gridworks.operations.column;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.gridworks.browsing.Engine;
|
||||
import com.google.gridworks.browsing.FilteredRows;
|
||||
import com.google.gridworks.browsing.RowVisitor;
|
||||
import com.google.gridworks.expr.EvalError;
|
||||
import com.google.gridworks.expr.Evaluable;
|
||||
import com.google.gridworks.expr.ExpressionUtils;
|
||||
import com.google.gridworks.expr.MetaParser;
|
||||
import com.google.gridworks.expr.WrappedCell;
|
||||
import com.google.gridworks.history.HistoryEntry;
|
||||
import com.google.gridworks.model.AbstractOperation;
|
||||
import com.google.gridworks.model.Cell;
|
||||
import com.google.gridworks.model.Column;
|
||||
import com.google.gridworks.model.Project;
|
||||
import com.google.gridworks.model.Row;
|
||||
import com.google.gridworks.model.changes.CellAtRow;
|
||||
import com.google.gridworks.model.changes.ColumnAdditionChange;
|
||||
import com.google.gridworks.operations.EngineDependentOperation;
|
||||
import com.google.gridworks.operations.OnError;
|
||||
import com.google.gridworks.operations.OperationRegistry;
|
||||
import com.google.gridworks.operations.cell.TextTransformOperation;
|
||||
import com.google.gridworks.process.LongRunningProcess;
|
||||
import com.google.gridworks.process.Process;
|
||||
import com.google.gridworks.util.ParsingUtilities;
|
||||
|
||||
public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation {
|
||||
final protected String _baseColumnName;
|
||||
final protected String _urlExpression;
|
||||
final protected OnError _onError;
|
||||
|
||||
final protected String _newColumnName;
|
||||
final protected int _columnInsertIndex;
|
||||
final protected int _delay;
|
||||
|
||||
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
|
||||
JSONObject engineConfig = obj.getJSONObject("engineConfig");
|
||||
|
||||
return new ColumnAdditionByFetchingURLsOperation(
|
||||
engineConfig,
|
||||
obj.getString("baseColumnName"),
|
||||
obj.getString("urlExpression"),
|
||||
TextTransformOperation.stringToOnError(obj.getString("onError")),
|
||||
obj.getString("newColumnName"),
|
||||
obj.getInt("columnInsertIndex"),
|
||||
obj.getInt("delay")
|
||||
);
|
||||
}
|
||||
|
||||
public ColumnAdditionByFetchingURLsOperation(
|
||||
JSONObject engineConfig,
|
||||
String baseColumnName,
|
||||
String urlExpression,
|
||||
OnError onError,
|
||||
String newColumnName,
|
||||
int columnInsertIndex,
|
||||
int delay
|
||||
) {
|
||||
super(engineConfig);
|
||||
|
||||
_baseColumnName = baseColumnName;
|
||||
_urlExpression = urlExpression;
|
||||
_onError = onError;
|
||||
|
||||
_newColumnName = newColumnName;
|
||||
_columnInsertIndex = columnInsertIndex;
|
||||
|
||||
_delay = delay;
|
||||
}
|
||||
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
|
||||
writer.key("description"); writer.value(getBriefDescription(null));
|
||||
writer.key("engineConfig"); writer.value(getEngineConfig());
|
||||
writer.key("newColumnName"); writer.value(_newColumnName);
|
||||
writer.key("columnInsertIndex"); writer.value(_columnInsertIndex);
|
||||
writer.key("baseColumnName"); writer.value(_baseColumnName);
|
||||
writer.key("urlExpression"); writer.value(_urlExpression);
|
||||
writer.key("onError"); writer.value(TextTransformOperation.onErrorToString(_onError));
|
||||
writer.key("delay"); writer.value(_delay);
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
protected String getBriefDescription(Project project) {
|
||||
return "Create column " + _newColumnName +
|
||||
" at index " + _columnInsertIndex +
|
||||
" by fetching URLs based on column " + _baseColumnName +
|
||||
" using expression " + _urlExpression;
|
||||
}
|
||||
|
||||
protected String createDescription(Column column, List<CellAtRow> cellsAtRows) {
|
||||
return "Create new column " + _newColumnName +
|
||||
", filling " + cellsAtRows.size() +
|
||||
" rows by fetching URLs based on column " + column.getName() +
|
||||
" and formulated as " + _urlExpression;
|
||||
}
|
||||
|
||||
|
||||
public Process createProcess(Project project, Properties options) throws Exception {
|
||||
Column column = project.columnModel.getColumnByName(_baseColumnName);
|
||||
if (column == null) {
|
||||
throw new Exception("No column named " + _baseColumnName);
|
||||
}
|
||||
if (project.columnModel.getColumnByName(_newColumnName) != null) {
|
||||
throw new Exception("Another column already named " + _newColumnName);
|
||||
}
|
||||
|
||||
Engine engine = createEngine(project);
|
||||
engine.initializeFromJSON(_engineConfig);
|
||||
|
||||
Evaluable eval = MetaParser.parse(_urlExpression);
|
||||
|
||||
return new ColumnAdditionByFetchingURLsProcess(
|
||||
project,
|
||||
engine,
|
||||
eval,
|
||||
getBriefDescription(null)
|
||||
);
|
||||
}
|
||||
|
||||
public class ColumnAdditionByFetchingURLsProcess extends LongRunningProcess implements Runnable {
|
||||
final protected Project _project;
|
||||
final protected Engine _engine;
|
||||
final protected Evaluable _eval;
|
||||
final protected long _historyEntryID;
|
||||
protected int _cellIndex;
|
||||
|
||||
public ColumnAdditionByFetchingURLsProcess(
|
||||
Project project,
|
||||
Engine engine,
|
||||
Evaluable eval,
|
||||
String description
|
||||
) throws JSONException {
|
||||
super(description);
|
||||
_project = project;
|
||||
_engine = engine;
|
||||
_eval = eval;
|
||||
_historyEntryID = HistoryEntry.allocateID();
|
||||
}
|
||||
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(hashCode());
|
||||
writer.key("description"); writer.value(_description);
|
||||
writer.key("immediate"); writer.value(false);
|
||||
writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done"));
|
||||
writer.key("progress"); writer.value(_progress);
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
protected Runnable getRunnable() {
|
||||
return this;
|
||||
}
|
||||
|
||||
public void run() {
|
||||
List<CellAtRow> urls = new ArrayList<CellAtRow>(_project.rows.size());
|
||||
|
||||
FilteredRows filteredRows = _engine.getAllFilteredRows();
|
||||
filteredRows.accept(_project, createRowVisitor(urls));
|
||||
|
||||
List<CellAtRow> responseBodies = new ArrayList<CellAtRow>(urls.size());
|
||||
for (int i = 0; i < urls.size(); i++) {
|
||||
CellAtRow urlData = urls.get(i);
|
||||
CellAtRow cellAtRow = fetch(urlData);
|
||||
if (cellAtRow != null) {
|
||||
responseBodies.add(cellAtRow);
|
||||
}
|
||||
|
||||
_progress = i * 100 / urls.size();
|
||||
try {
|
||||
Thread.sleep(_delay);
|
||||
} catch (InterruptedException e) {
|
||||
if (_canceled) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!_canceled) {
|
||||
|
||||
HistoryEntry historyEntry = new HistoryEntry(
|
||||
_historyEntryID,
|
||||
_project,
|
||||
_description,
|
||||
ColumnAdditionByFetchingURLsOperation.this,
|
||||
new ColumnAdditionChange(
|
||||
_newColumnName,
|
||||
_columnInsertIndex,
|
||||
responseBodies)
|
||||
);
|
||||
|
||||
_project.history.addEntry(historyEntry);
|
||||
_project.processManager.onDoneProcess(this);
|
||||
}
|
||||
}
|
||||
|
||||
CellAtRow fetch(CellAtRow urlData) {
|
||||
String urlString = urlData.cell.value.toString();
|
||||
URL url = null;
|
||||
|
||||
try {
|
||||
url = new URL(urlString);
|
||||
} catch (MalformedURLException e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
InputStream is = url.openStream();
|
||||
try {
|
||||
return new CellAtRow(urlData.row, new Cell(ParsingUtilities.inputStreamToString(is), null));
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
return _onError == OnError.StoreError ?
|
||||
new CellAtRow(urlData.row, new Cell(new EvalError(e.getMessage()), null)) : null;
|
||||
}
|
||||
}
|
||||
|
||||
RowVisitor createRowVisitor(List<CellAtRow> cellsAtRows) {
|
||||
return new RowVisitor() {
|
||||
int cellIndex;
|
||||
Properties bindings;
|
||||
List<CellAtRow> cellsAtRows;
|
||||
|
||||
public RowVisitor init(List<CellAtRow> cellsAtRows) {
|
||||
Column column = _project.columnModel.getColumnByName(_baseColumnName);
|
||||
|
||||
this.cellIndex = column.getCellIndex();
|
||||
this.bindings = ExpressionUtils.createBindings(_project);
|
||||
this.cellsAtRows = cellsAtRows;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start(Project project) {
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end(Project project) {
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
public boolean visit(Project project, int rowIndex, Row row) {
|
||||
Cell cell = row.getCell(cellIndex);
|
||||
Cell newCell = null;
|
||||
|
||||
ExpressionUtils.bind(bindings, row, rowIndex, _baseColumnName, cell);
|
||||
|
||||
Object o = _eval.evaluate(bindings);
|
||||
if (o != null) {
|
||||
if (o instanceof Cell) {
|
||||
newCell = (Cell) o;
|
||||
} else if (o instanceof WrappedCell) {
|
||||
newCell = ((WrappedCell) o).cell;
|
||||
} else {
|
||||
Serializable v = ExpressionUtils.wrapStorable(o);
|
||||
if (ExpressionUtils.isNonBlankData(v)) {
|
||||
newCell = new Cell(v.toString(), null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (newCell != null) {
|
||||
cellsAtRows.add(new CellAtRow(rowIndex, newCell));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}.init(cellsAtRows);
|
||||
}
|
||||
}
|
||||
}
|
@ -55,6 +55,7 @@ function registerCommands() {
|
||||
GS.registerCommand(module, "move-column", new Packages.com.google.gridworks.commands.column.MoveColumnCommand());
|
||||
GS.registerCommand(module, "split-column", new Packages.com.google.gridworks.commands.column.SplitColumnCommand());
|
||||
GS.registerCommand(module, "extend-data", new Packages.com.google.gridworks.commands.column.ExtendDataCommand());
|
||||
GS.registerCommand(module, "add-column-by-fetching-urls", new Packages.com.google.gridworks.commands.column.AddColumnByFetchingURLsCommand());
|
||||
|
||||
GS.registerCommand(module, "denormalize", new Packages.com.google.gridworks.commands.row.DenormalizeCommand());
|
||||
|
||||
|
@ -0,0 +1,25 @@
|
||||
<div class="dialog-frame" style="width: 700px;">
|
||||
<div class="dialog-header" bind="dialogHeader"></div>
|
||||
<div class="dialog-body" bind="dialogBody">
|
||||
<div class="grid-layout layout-normal layout-full"><table cols="2">
|
||||
<tr>
|
||||
<td width="1%" style="white-space: pre;">New column name</td>
|
||||
<td><input bind="columnNameInput" size="40" /></td>
|
||||
|
||||
<td width="1%" style="white-space: pre;">Throttle delay</td>
|
||||
<td><input bind="throttleDelayInput" size="10" value="5000" /> milliseconds</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="1%" style="white-space: pre;">On error</td>
|
||||
<td colspan="3"><input type="radio" name="dialog-onerror-choice" value="set-to-blank" checked /> set to blank
|
||||
<input type="radio" name="dialog-onerror-choice" value="store-error" /> store error</td>
|
||||
</tr>
|
||||
<tr><td colspan="4"><h3>Formulate the URLs to fetch:</h3></td></tr>
|
||||
<tr><td colspan="4">$EXPRESSION_PREVIEW_WIDGET$</td></tr>
|
||||
</table></div>
|
||||
</div>
|
||||
<div class="dialog-footer" bind="dialogFooter">
|
||||
<button bind="okButton"> OK </button>
|
||||
<button bind="cancelButton">Cancel</button>
|
||||
</div>
|
||||
</div>
|
@ -44,6 +44,51 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
|
||||
);
|
||||
};
|
||||
|
||||
var doAddColumnByFetchingURLs = function() {
|
||||
var frame = $(
|
||||
DOM.loadHTML("core", "scripts/views/data-table/add-column-by-fetching-urls-dialog.html")
|
||||
.replace("$EXPRESSION_PREVIEW_WIDGET$", ExpressionPreviewDialog.generateWidgetHtml()));
|
||||
|
||||
var elmts = DOM.bind(frame);
|
||||
elmts.dialogHeader.text("Add column by fetching URLs based on column " + column.name);
|
||||
|
||||
var level = DialogSystem.showDialog(frame);
|
||||
var dismiss = function() { DialogSystem.dismissUntil(level - 1); };
|
||||
|
||||
elmts.cancelButton.click(dismiss);
|
||||
elmts.okButton.click(function() {
|
||||
var columnName = $.trim(elmts.columnNameInput[0].value);
|
||||
if (!columnName.length) {
|
||||
alert("You must enter a column name.");
|
||||
return;
|
||||
}
|
||||
|
||||
Gridworks.postCoreProcess(
|
||||
"add-column-by-fetching-urls",
|
||||
{
|
||||
baseColumnName: column.name,
|
||||
urlExpression: previewWidget.getExpression(true),
|
||||
newColumnName: columnName,
|
||||
columnInsertIndex: columnIndex + 1,
|
||||
delay: elmts.throttleDelayInput[0].value,
|
||||
onError: $('input[name="dialog-onerror-choice"]:checked')[0].value
|
||||
},
|
||||
null,
|
||||
{ modelsChanged: true }
|
||||
);
|
||||
dismiss();
|
||||
});
|
||||
|
||||
var o = DataTableView.sampleVisibleRows(column);
|
||||
var previewWidget = new ExpressionPreviewDialog.Widget(
|
||||
elmts,
|
||||
column.cellIndex,
|
||||
o.rowIndices,
|
||||
o.values,
|
||||
null
|
||||
);
|
||||
};
|
||||
|
||||
var doAddColumnFromFreebase = function() {
|
||||
var o = DataTableView.sampleVisibleRows(column);
|
||||
new ExtendDataPreviewDialog(
|
||||
@ -184,6 +229,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
|
||||
label: "Split into Several Columns ...",
|
||||
click: doSplitColumn
|
||||
},
|
||||
{},
|
||||
{
|
||||
label: "Add Column Based on This Column ...",
|
||||
click: doAddColumn
|
||||
@ -192,6 +238,10 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
|
||||
label: "Add Columns From Freebase ...",
|
||||
click: doAddColumnFromFreebase
|
||||
},
|
||||
{
|
||||
label: "Add Column By Fetching URLs ...",
|
||||
click: doAddColumnByFetchingURLs
|
||||
},
|
||||
{},
|
||||
{
|
||||
label: "Rename This Column",
|
||||
|
Loading…
Reference in New Issue
Block a user