package com.google.refine.operations.cell; import java.io.Serializable; import java.util.List; import java.util.Properties; import org.json.JSONException; import org.json.JSONObject; import org.json.JSONWriter; import com.google.refine.browsing.RowVisitor; import com.google.refine.expr.Evaluable; import com.google.refine.expr.ExpressionUtils; import com.google.refine.expr.MetaParser; import com.google.refine.expr.WrappedCell; import com.google.refine.model.AbstractOperation; import com.google.refine.model.Cell; import com.google.refine.model.Column; import com.google.refine.model.Project; import com.google.refine.model.Row; import com.google.refine.model.changes.CellChange; import com.google.refine.operations.EngineDependentMassCellOperation; import com.google.refine.operations.OnError; import com.google.refine.operations.OperationRegistry; public class TextTransformOperation extends EngineDependentMassCellOperation { final protected String _expression; final protected OnError _onError; final protected boolean _repeat; final protected int _repeatCount; static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { JSONObject engineConfig = obj.getJSONObject("engineConfig"); return new TextTransformOperation( engineConfig, obj.getString("columnName"), obj.getString("expression"), stringToOnError(obj.getString("onError")), obj.getBoolean("repeat"), obj.getInt("repeatCount") ); } static public OnError stringToOnError(String s) { if ("set-to-blank".equalsIgnoreCase(s)) { return OnError.SetToBlank; } else if ("store-error".equalsIgnoreCase(s)) { return OnError.StoreError; } else { return OnError.KeepOriginal; } } static public String onErrorToString(OnError onError) { if (onError == OnError.SetToBlank) { return "set-to-blank"; } else if (onError == OnError.StoreError) { return "store-error"; } else { return "keep-original"; } } public TextTransformOperation( JSONObject engineConfig, String columnName, String expression, OnError onError, boolean repeat, int repeatCount ) { super(engineConfig, columnName, true); _expression = expression; _onError = onError; _repeat = repeat; _repeatCount = repeatCount; } public void write(JSONWriter writer, Properties options) throws JSONException { writer.object(); writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); writer.key("description"); writer.value(getBriefDescription(null)); writer.key("engineConfig"); writer.value(getEngineConfig()); writer.key("columnName"); writer.value(_columnName); writer.key("expression"); writer.value(_expression); writer.key("onError"); writer.value(onErrorToString(_onError)); writer.key("repeat"); writer.value(_repeat); writer.key("repeatCount"); writer.value(_repeatCount); writer.endObject(); } protected String getBriefDescription(Project project) { return "Text transform on cells in column " + _columnName + " using expression " + _expression; } protected String createDescription(Column column, List cellChanges) { return "Text transform on " + cellChanges.size() + " cells in column " + column.getName() + ": " + _expression; } protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { Column column = project.columnModel.getColumnByName(_columnName); Evaluable eval = MetaParser.parse(_expression); Properties bindings = ExpressionUtils.createBindings(project); return new RowVisitor() { int cellIndex; Properties bindings; List cellChanges; Evaluable eval; public RowVisitor init(int cellIndex, Properties bindings, List cellChanges, Evaluable eval) { this.cellIndex = cellIndex; this.bindings = bindings; this.cellChanges = cellChanges; this.eval = eval; return this; } @Override public void start(Project project) { // nothing to do } @Override public void end(Project project) { // nothing to do } public boolean visit(Project project, int rowIndex, Row row) { Cell cell = row.getCell(cellIndex); Cell newCell = null; Object oldValue = cell != null ? cell.value : null; ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); Object o = eval.evaluate(bindings); if (o == null) { if (oldValue != null) { CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, null); cellChanges.add(cellChange); } } else { if (o instanceof Cell) { newCell = (Cell) o; } else if (o instanceof WrappedCell) { newCell = ((WrappedCell) o).cell; } else { Serializable newValue = ExpressionUtils.wrapStorable(o); if (ExpressionUtils.isError(newValue)) { if (_onError == OnError.KeepOriginal) { return false; } else if (_onError == OnError.SetToBlank) { newValue = null; } } if (!ExpressionUtils.sameValue(oldValue, newValue)) { newCell = new Cell(newValue, (cell != null) ? cell.recon : null); if (_repeat) { for (int i = 0; i < _repeatCount; i++) { ExpressionUtils.bind(bindings, row, rowIndex, _columnName, newCell); newValue = ExpressionUtils.wrapStorable(eval.evaluate(bindings)); if (ExpressionUtils.isError(newValue)) { break; } else if (ExpressionUtils.sameValue(newCell.value, newValue)) { break; } newCell = new Cell(newValue, newCell.recon); } } } } if (newCell != null) { CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); cellChanges.add(cellChange); } } return false; } }.init(column.getCellIndex(), bindings, cellChanges, eval); } }