diff --git a/src/main/java/com/metaweb/gridworks/browsing/Engine.java b/src/main/java/com/metaweb/gridworks/browsing/Engine.java index 989b3f02b..eeb460c37 100644 --- a/src/main/java/com/metaweb/gridworks/browsing/Engine.java +++ b/src/main/java/com/metaweb/gridworks/browsing/Engine.java @@ -73,7 +73,7 @@ public class Engine implements Jsonizable { facet.computeChoices(_project, filteredRows); } } - + @Override public void write(JSONWriter writer, Properties options) throws JSONException { diff --git a/src/main/java/com/metaweb/gridworks/commands/Command.java b/src/main/java/com/metaweb/gridworks/commands/Command.java index ef7d4f2d5..a6d716f96 100644 --- a/src/main/java/com/metaweb/gridworks/commands/Command.java +++ b/src/main/java/com/metaweb/gridworks/commands/Command.java @@ -170,6 +170,14 @@ public abstract class Command { return a; } + protected JSONObject getEngineConfig(HttpServletRequest request) throws Exception { + String json = request.getParameter("engine"); + if (json != null) { + return jsonStringToObject(json); + } + return null; + } + protected Engine getEngine(HttpServletRequest request, Project project) throws Exception { Engine engine = new Engine(project); String json = request.getParameter("engine"); diff --git a/src/main/java/com/metaweb/gridworks/commands/EngineDependentCommand.java b/src/main/java/com/metaweb/gridworks/commands/EngineDependentCommand.java new file mode 100644 index 000000000..ac6d63830 --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/commands/EngineDependentCommand.java @@ -0,0 +1,38 @@ +package com.metaweb.gridworks.commands; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONObject; + +import com.metaweb.gridworks.model.AbstractOperation; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.process.Process; + +abstract public class EngineDependentCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + AbstractOperation op = createOperation(request, getEngineConfig(request)); + Process process = op.createProcess(project, new Properties()); + + boolean done = project.processManager.queueProcess(process); + + respond(response, "{ \"code\" : " + (done ? "\"ok\"" : "\"pending\"") + " }"); + + } catch (Exception e) { + respondException(response, e); + } + } + + abstract protected AbstractOperation createOperation( + HttpServletRequest request, JSONObject engineConfig) throws Exception; +} diff --git a/src/main/java/com/metaweb/gridworks/commands/edit/DoTextTransformCommand.java b/src/main/java/com/metaweb/gridworks/commands/edit/DoTextTransformCommand.java index dc8d3d3ee..1b6e75bf2 100644 --- a/src/main/java/com/metaweb/gridworks/commands/edit/DoTextTransformCommand.java +++ b/src/main/java/com/metaweb/gridworks/commands/edit/DoTextTransformCommand.java @@ -1,99 +1,22 @@ package com.metaweb.gridworks.commands.edit; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; + +import org.json.JSONObject; -import com.metaweb.gridworks.browsing.Engine; -import com.metaweb.gridworks.browsing.FilteredRows; -import com.metaweb.gridworks.browsing.RowVisitor; -import com.metaweb.gridworks.commands.Command; -import com.metaweb.gridworks.expr.Evaluable; -import com.metaweb.gridworks.expr.ExpressionUtils; -import com.metaweb.gridworks.expr.Parser; -import com.metaweb.gridworks.history.HistoryEntry; -import com.metaweb.gridworks.model.Cell; -import com.metaweb.gridworks.model.Column; -import com.metaweb.gridworks.model.Project; -import com.metaweb.gridworks.model.Row; -import com.metaweb.gridworks.model.changes.CellChange; -import com.metaweb.gridworks.model.changes.MassCellChange; -import com.metaweb.gridworks.process.QuickHistoryEntryProcess; +import com.metaweb.gridworks.commands.EngineDependentCommand; +import com.metaweb.gridworks.model.AbstractOperation; +import com.metaweb.gridworks.model.operations.TextTransformOperation; -public class DoTextTransformCommand extends Command { - +public class DoTextTransformCommand extends EngineDependentCommand { @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { + protected AbstractOperation createOperation(HttpServletRequest request, + JSONObject engineConfig) throws Exception { - try { - Project project = getProject(request); - Engine engine = getEngine(request, project); - - int cellIndex = Integer.parseInt(request.getParameter("cell")); - Column column = project.columnModel.getColumnByCellIndex(cellIndex); - if (column == null) { - respond(response, "{ \"code\" : \"error\", \"message\" : \"No such column\" }"); - return; - } - - String columnName = column.getHeaderLabel(); - String expression = request.getParameter("expression"); - - Evaluable eval = new Parser(expression).getExpression(); - Properties bindings = ExpressionUtils.createBindings(project); - - List cellChanges = new ArrayList(project.rows.size()); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, new RowVisitor() { - int cellIndex; - Properties bindings; - List cellChanges; - Evaluable eval; - - public RowVisitor init(int cellIndex, Properties bindings, List cellChanges, Evaluable eval) { - this.cellIndex = cellIndex; - this.bindings = bindings; - this.cellChanges = cellChanges; - this.eval = eval; - return this; - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - if (cellIndex < row.cells.size()) { - Cell cell = row.cells.get(cellIndex); - if (cell.value != null) { - ExpressionUtils.bind(bindings, row, cell); - - Cell newCell = new Cell(eval.evaluate(bindings), cell.recon); - - CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); - cellChanges.add(cellChange); - } - } - return false; - } - }.init(cellIndex, bindings, cellChanges, eval)); - - MassCellChange massCellChange = new MassCellChange(cellChanges, cellIndex); - HistoryEntry historyEntry = new HistoryEntry( - project, "Text transform on " + columnName + ": " + expression, massCellChange); - - boolean done = project.processManager.queueProcess( - new QuickHistoryEntryProcess(project, historyEntry)); - - respond(response, "{ \"code\" : " + (done ? "\"ok\"" : "\"pending\"") + " }"); - - } catch (Exception e) { - respondException(response, e); - } + int cellIndex = Integer.parseInt(request.getParameter("cell")); + String expression = request.getParameter("expression"); + + return new TextTransformOperation(engineConfig, cellIndex, expression); } } diff --git a/src/main/java/com/metaweb/gridworks/commands/recon/ApproveNewReconcileCommand.java b/src/main/java/com/metaweb/gridworks/commands/recon/ApproveNewReconcileCommand.java index e6a37b16e..7ed058c26 100644 --- a/src/main/java/com/metaweb/gridworks/commands/recon/ApproveNewReconcileCommand.java +++ b/src/main/java/com/metaweb/gridworks/commands/recon/ApproveNewReconcileCommand.java @@ -1,88 +1,21 @@ package com.metaweb.gridworks.commands.recon; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; -import com.metaweb.gridworks.browsing.Engine; -import com.metaweb.gridworks.browsing.FilteredRows; -import com.metaweb.gridworks.browsing.RowVisitor; -import com.metaweb.gridworks.commands.Command; -import com.metaweb.gridworks.history.HistoryEntry; -import com.metaweb.gridworks.model.Cell; -import com.metaweb.gridworks.model.Column; -import com.metaweb.gridworks.model.Project; -import com.metaweb.gridworks.model.Recon; -import com.metaweb.gridworks.model.Row; -import com.metaweb.gridworks.model.Recon.Judgment; -import com.metaweb.gridworks.model.changes.CellChange; -import com.metaweb.gridworks.model.changes.MassCellChange; -import com.metaweb.gridworks.process.QuickHistoryEntryProcess; +import org.json.JSONObject; -public class ApproveNewReconcileCommand extends Command { +import com.metaweb.gridworks.commands.EngineDependentCommand; +import com.metaweb.gridworks.model.AbstractOperation; +import com.metaweb.gridworks.model.operations.ApproveNewReconOperation; + +public class ApproveNewReconcileCommand extends EngineDependentCommand { @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { + protected AbstractOperation createOperation(HttpServletRequest request, + JSONObject engineConfig) throws Exception { - try { - Project project = getProject(request); - Engine engine = getEngine(request, project); - - int cellIndex = Integer.parseInt(request.getParameter("cell")); - Column column = project.columnModel.getColumnByCellIndex(cellIndex); - if (column == null) { - respond(response, "{ \"code\" : \"error\", \"message\" : \"No such column\" }"); - return; - } - - String columnName = column.getHeaderLabel(); - List cellChanges = new ArrayList(project.rows.size()); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, new RowVisitor() { - int cellIndex; - List cellChanges; - - public RowVisitor init(int cellIndex, List cellChanges) { - this.cellIndex = cellIndex; - this.cellChanges = cellChanges; - return this; - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - if (cellIndex < row.cells.size()) { - Cell cell = row.cells.get(cellIndex); - - Cell newCell = new Cell( - cell.value, - cell.recon != null ? cell.recon.dup() : new Recon() - ); - newCell.recon.match = null; - newCell.recon.judgment = Judgment.New; - - CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); - cellChanges.add(cellChange); - } - return false; - } - }.init(cellIndex, cellChanges)); - - MassCellChange massCellChange = new MassCellChange(cellChanges, cellIndex); - HistoryEntry historyEntry = new HistoryEntry( - project, "Approve new topics for " + columnName, massCellChange); - - boolean done = project.processManager.queueProcess( - new QuickHistoryEntryProcess(project, historyEntry)); - - respond(response, "{ \"code\" : " + (done ? "\"ok\"" : "\"pending\"") + " }"); - } catch (Exception e) { - respondException(response, e); - } + int cellIndex = Integer.parseInt(request.getParameter("cell")); + + return new ApproveNewReconOperation(engineConfig, cellIndex); } } diff --git a/src/main/java/com/metaweb/gridworks/commands/recon/ApproveReconcileCommand.java b/src/main/java/com/metaweb/gridworks/commands/recon/ApproveReconcileCommand.java index 95a1cbfad..41145278e 100644 --- a/src/main/java/com/metaweb/gridworks/commands/recon/ApproveReconcileCommand.java +++ b/src/main/java/com/metaweb/gridworks/commands/recon/ApproveReconcileCommand.java @@ -1,88 +1,21 @@ package com.metaweb.gridworks.commands.recon; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; -import com.metaweb.gridworks.browsing.Engine; -import com.metaweb.gridworks.browsing.FilteredRows; -import com.metaweb.gridworks.browsing.RowVisitor; -import com.metaweb.gridworks.commands.Command; -import com.metaweb.gridworks.history.HistoryEntry; -import com.metaweb.gridworks.model.Cell; -import com.metaweb.gridworks.model.Column; -import com.metaweb.gridworks.model.Project; -import com.metaweb.gridworks.model.Row; -import com.metaweb.gridworks.model.Recon.Judgment; -import com.metaweb.gridworks.model.changes.CellChange; -import com.metaweb.gridworks.model.changes.MassCellChange; -import com.metaweb.gridworks.process.QuickHistoryEntryProcess; +import org.json.JSONObject; -public class ApproveReconcileCommand extends Command { +import com.metaweb.gridworks.commands.EngineDependentCommand; +import com.metaweb.gridworks.model.AbstractOperation; +import com.metaweb.gridworks.model.operations.ApproveReconOperation; + +public class ApproveReconcileCommand extends EngineDependentCommand { @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { + protected AbstractOperation createOperation(HttpServletRequest request, + JSONObject engineConfig) throws Exception { - try { - Project project = getProject(request); - Engine engine = getEngine(request, project); - - int cellIndex = Integer.parseInt(request.getParameter("cell")); - Column column = project.columnModel.getColumnByCellIndex(cellIndex); - if (column == null) { - respond(response, "{ \"code\" : \"error\", \"message\" : \"No such column\" }"); - return; - } - - String columnName = column.getHeaderLabel(); - List cellChanges = new ArrayList(project.rows.size()); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, new RowVisitor() { - int cellIndex; - List cellChanges; - - public RowVisitor init(int cellIndex, List cellChanges) { - this.cellIndex = cellIndex; - this.cellChanges = cellChanges; - return this; - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - if (cellIndex < row.cells.size()) { - Cell cell = row.cells.get(cellIndex); - if (cell.recon != null && cell.recon.candidates.size() > 0) { - Cell newCell = new Cell( - cell.value, - cell.recon.dup() - ); - newCell.recon.match = newCell.recon.candidates.get(0); - newCell.recon.judgment = Judgment.Approve; - - CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); - cellChanges.add(cellChange); - } - } - return false; - } - }.init(cellIndex, cellChanges)); - - MassCellChange massCellChange = new MassCellChange(cellChanges, cellIndex); - HistoryEntry historyEntry = new HistoryEntry( - project, "Approve best recon candidates for " + columnName, massCellChange); - - boolean done = project.processManager.queueProcess( - new QuickHistoryEntryProcess(project, historyEntry)); - - respond(response, "{ \"code\" : " + (done ? "\"ok\"" : "\"pending\"") + " }"); - } catch (Exception e) { - respondException(response, e); - } + int cellIndex = Integer.parseInt(request.getParameter("cell")); + + return new ApproveReconOperation(engineConfig, cellIndex); } } diff --git a/src/main/java/com/metaweb/gridworks/commands/recon/DiscardReconcileCommand.java b/src/main/java/com/metaweb/gridworks/commands/recon/DiscardReconcileCommand.java index f78343efc..c54741c0c 100644 --- a/src/main/java/com/metaweb/gridworks/commands/recon/DiscardReconcileCommand.java +++ b/src/main/java/com/metaweb/gridworks/commands/recon/DiscardReconcileCommand.java @@ -1,81 +1,20 @@ package com.metaweb.gridworks.commands.recon; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; -import com.metaweb.gridworks.browsing.Engine; -import com.metaweb.gridworks.browsing.FilteredRows; -import com.metaweb.gridworks.browsing.RowVisitor; -import com.metaweb.gridworks.commands.Command; -import com.metaweb.gridworks.history.HistoryEntry; -import com.metaweb.gridworks.model.Cell; -import com.metaweb.gridworks.model.Column; -import com.metaweb.gridworks.model.Project; -import com.metaweb.gridworks.model.Row; -import com.metaweb.gridworks.model.changes.CellChange; -import com.metaweb.gridworks.model.changes.MassCellChange; -import com.metaweb.gridworks.process.QuickHistoryEntryProcess; +import org.json.JSONObject; -public class DiscardReconcileCommand extends Command { - +import com.metaweb.gridworks.commands.EngineDependentCommand; +import com.metaweb.gridworks.model.AbstractOperation; +import com.metaweb.gridworks.model.operations.DiscardReconOperation; + +public class DiscardReconcileCommand extends EngineDependentCommand { @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { + protected AbstractOperation createOperation(HttpServletRequest request, + JSONObject engineConfig) throws Exception { - try { - Project project = getProject(request); - Engine engine = getEngine(request, project); - - int cellIndex = Integer.parseInt(request.getParameter("cell")); - Column column = project.columnModel.getColumnByCellIndex(cellIndex); - if (column == null) { - respond(response, "{ \"code\" : \"error\", \"message\" : \"No such column\" }"); - return; - } - - String columnName = column.getHeaderLabel(); - List cellChanges = new ArrayList(project.rows.size()); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, new RowVisitor() { - int cellIndex; - List cellChanges; - - public RowVisitor init(int cellIndex, List cellChanges) { - this.cellIndex = cellIndex; - this.cellChanges = cellChanges; - return this; - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - if (cellIndex < row.cells.size()) { - Cell cell = row.cells.get(cellIndex); - - Cell newCell = new Cell(cell.value, null); - - CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); - cellChanges.add(cellChange); - } - return false; - } - }.init(cellIndex, cellChanges)); - - MassCellChange massCellChange = new MassCellChange(cellChanges, cellIndex); - HistoryEntry historyEntry = new HistoryEntry( - project, "Discard recon results for " + columnName, massCellChange); - - boolean done = project.processManager.queueProcess( - new QuickHistoryEntryProcess(project, historyEntry)); - - respond(response, "{ \"code\" : " + (done ? "\"ok\"" : "\"pending\"") + " }"); - } catch (Exception e) { - respondException(response, e); - } + int cellIndex = Integer.parseInt(request.getParameter("cell")); + + return new DiscardReconOperation(engineConfig, cellIndex); } } diff --git a/src/main/java/com/metaweb/gridworks/commands/recon/ReconcileCommand.java b/src/main/java/com/metaweb/gridworks/commands/recon/ReconcileCommand.java index 9746e9350..c85586e88 100644 --- a/src/main/java/com/metaweb/gridworks/commands/recon/ReconcileCommand.java +++ b/src/main/java/com/metaweb/gridworks/commands/recon/ReconcileCommand.java @@ -1,83 +1,22 @@ package com.metaweb.gridworks.commands.recon; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; -import com.metaweb.gridworks.browsing.Engine; -import com.metaweb.gridworks.browsing.FilteredRows; -import com.metaweb.gridworks.browsing.RowVisitor; -import com.metaweb.gridworks.commands.Command; -import com.metaweb.gridworks.model.Cell; -import com.metaweb.gridworks.model.Column; -import com.metaweb.gridworks.model.Project; -import com.metaweb.gridworks.model.Row; -import com.metaweb.gridworks.process.ReconProcess; -import com.metaweb.gridworks.process.ReconProcess.ReconEntry; +import org.json.JSONObject; -public class ReconcileCommand extends Command { +import com.metaweb.gridworks.commands.EngineDependentCommand; +import com.metaweb.gridworks.model.AbstractOperation; +import com.metaweb.gridworks.model.operations.ReconOperation; + +public class ReconcileCommand extends EngineDependentCommand { @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { + protected AbstractOperation createOperation(HttpServletRequest request, + JSONObject engineConfig) throws Exception { - try { - Project project = getProject(request); - Engine engine = getEngine(request, project); - - int cellIndex = Integer.parseInt(request.getParameter("cell")); - Column column = project.columnModel.getColumnByCellIndex(cellIndex); - if (column == null) { - respond(response, "{ \"code\" : \"error\", \"message\" : \"No such column\" }"); - return; - } - - String columnName = column.getHeaderLabel(); - String typeID = request.getParameter("type"); - - List entries = new ArrayList(project.rows.size()); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, new RowVisitor() { - int cellIndex; - List entries; - - public RowVisitor init(int cellIndex, List entries) { - this.cellIndex = cellIndex; - this.entries = entries; - return this; - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - if (cellIndex < row.cells.size()) { - Cell cell = row.cells.get(cellIndex); - if (cell.value != null) { - entries.add(new ReconEntry(rowIndex, cell)); - } - } - return false; - } - }.init(cellIndex, entries)); - - ReconProcess process = new ReconProcess( - project, - "Reconcile " + columnName + " to type " + typeID, - cellIndex, - entries, - typeID - ); - - boolean done = project.processManager.queueProcess(process); - - respond(response, "{ \"code\" : " + (done ? "\"ok\"" : "\"pending\"") + " }"); - - } catch (Exception e) { - respondException(response, e); - } + int cellIndex = Integer.parseInt(request.getParameter("cell")); + String typeID = request.getParameter("type"); + + return new ReconOperation(engineConfig, cellIndex, typeID); } } diff --git a/src/main/java/com/metaweb/gridworks/history/HistoryEntry.java b/src/main/java/com/metaweb/gridworks/history/HistoryEntry.java index 125877f51..4102418c0 100644 --- a/src/main/java/com/metaweb/gridworks/history/HistoryEntry.java +++ b/src/main/java/com/metaweb/gridworks/history/HistoryEntry.java @@ -16,22 +16,25 @@ import org.json.JSONWriter; import com.metaweb.gridworks.Jsonizable; import com.metaweb.gridworks.ProjectManager; +import com.metaweb.gridworks.model.AbstractOperation; import com.metaweb.gridworks.model.Project; public class HistoryEntry implements Serializable, Jsonizable { private static final long serialVersionUID = 532766467813930262L; - public long id; - public long projectID; - public String description; - public Date time; + final public long id; + final public long projectID; + final public String description; + final public AbstractOperation operation; + final public Date time; transient protected Change _change; - public HistoryEntry(Project project, String description, Change change) { + public HistoryEntry(Project project, String description, AbstractOperation operation, Change change) { this.id = Math.round(Math.random() * 1000000) + System.currentTimeMillis(); this.projectID = project.id; this.description = description; + this.operation = operation; this.time = new Date(); _change = change; diff --git a/src/main/java/com/metaweb/gridworks/model/AbstractOperation.java b/src/main/java/com/metaweb/gridworks/model/AbstractOperation.java new file mode 100644 index 000000000..2b3fbdbc1 --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/model/AbstractOperation.java @@ -0,0 +1,15 @@ +package com.metaweb.gridworks.model; + +import java.io.Serializable; +import java.util.Properties; + +import com.metaweb.gridworks.Jsonizable; +import com.metaweb.gridworks.process.Process; + +/* + * An abstract operation can be applied to different but similar + * projects. + */ +public interface AbstractOperation extends Serializable, Jsonizable { + public Process createProcess(Project project, Properties options) throws Exception; +} diff --git a/src/main/java/com/metaweb/gridworks/model/operations/ApproveNewReconOperation.java b/src/main/java/com/metaweb/gridworks/model/operations/ApproveNewReconOperation.java new file mode 100644 index 000000000..f8fa211ae --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/model/operations/ApproveNewReconOperation.java @@ -0,0 +1,73 @@ +package com.metaweb.gridworks.model.operations; + +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.metaweb.gridworks.browsing.RowVisitor; +import com.metaweb.gridworks.model.Cell; +import com.metaweb.gridworks.model.Column; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.model.Recon; +import com.metaweb.gridworks.model.Row; +import com.metaweb.gridworks.model.Recon.Judgment; +import com.metaweb.gridworks.model.changes.CellChange; + +public class ApproveNewReconOperation extends EngineDependentMassCellOperation { + private static final long serialVersionUID = -5205694623711144436L; + + public ApproveNewReconOperation(JSONObject engineConfig, int cellIndex) { + super(engineConfig, cellIndex); + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + // TODO Auto-generated method stub + + } + + @Override + protected String createDescription(Column column, + List cellChanges) { + + return "Approve new topics for " + cellChanges.size() + + " cells in column " + column.getHeaderLabel(); + } + + @Override + protected RowVisitor createRowVisitor(Project project, List cellChanges) throws Exception { + // TODO Auto-generated method stub + return new RowVisitor() { + int cellIndex; + List cellChanges; + + public RowVisitor init(int cellIndex, List cellChanges) { + this.cellIndex = cellIndex; + this.cellChanges = cellChanges; + return this; + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + if (cellIndex < row.cells.size()) { + Cell cell = row.cells.get(cellIndex); + + Cell newCell = new Cell( + cell.value, + cell.recon != null ? cell.recon.dup() : new Recon() + ); + newCell.recon.match = null; + newCell.recon.judgment = Judgment.New; + + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); + cellChanges.add(cellChange); + } + return false; + } + }.init(_cellIndex, cellChanges); + } +} diff --git a/src/main/java/com/metaweb/gridworks/model/operations/ApproveReconOperation.java b/src/main/java/com/metaweb/gridworks/model/operations/ApproveReconOperation.java new file mode 100644 index 000000000..c4ab636c7 --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/model/operations/ApproveReconOperation.java @@ -0,0 +1,73 @@ +package com.metaweb.gridworks.model.operations; + +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.metaweb.gridworks.browsing.RowVisitor; +import com.metaweb.gridworks.model.Cell; +import com.metaweb.gridworks.model.Column; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.model.Row; +import com.metaweb.gridworks.model.Recon.Judgment; +import com.metaweb.gridworks.model.changes.CellChange; + +public class ApproveReconOperation extends EngineDependentMassCellOperation { + private static final long serialVersionUID = 5393888241057341155L; + + public ApproveReconOperation(JSONObject engineConfig, int cellIndex) { + super(engineConfig, cellIndex); + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + // TODO Auto-generated method stub + + } + + @Override + protected String createDescription(Column column, + List cellChanges) { + + return "Approve best candidates for " + cellChanges.size() + + " cells in column " + column.getHeaderLabel(); + } + + @Override + protected RowVisitor createRowVisitor(Project project, List cellChanges) throws Exception { + // TODO Auto-generated method stub + return new RowVisitor() { + int cellIndex; + List cellChanges; + + public RowVisitor init(int cellIndex, List cellChanges) { + this.cellIndex = cellIndex; + this.cellChanges = cellChanges; + return this; + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + if (cellIndex < row.cells.size()) { + Cell cell = row.cells.get(cellIndex); + if (cell.recon != null && cell.recon.candidates.size() > 0) { + Cell newCell = new Cell( + cell.value, + cell.recon.dup() + ); + newCell.recon.match = newCell.recon.candidates.get(0); + newCell.recon.judgment = Judgment.Approve; + + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); + cellChanges.add(cellChange); + } + } + return false; + } + }.init(_cellIndex, cellChanges); + } +} diff --git a/src/main/java/com/metaweb/gridworks/model/operations/DiscardReconOperation.java b/src/main/java/com/metaweb/gridworks/model/operations/DiscardReconOperation.java new file mode 100644 index 000000000..4c2999d23 --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/model/operations/DiscardReconOperation.java @@ -0,0 +1,66 @@ +package com.metaweb.gridworks.model.operations; + +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.metaweb.gridworks.browsing.RowVisitor; +import com.metaweb.gridworks.model.Cell; +import com.metaweb.gridworks.model.Column; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.model.Row; +import com.metaweb.gridworks.model.changes.CellChange; + +public class DiscardReconOperation extends EngineDependentMassCellOperation { + private static final long serialVersionUID = 6799029731665369179L; + + public DiscardReconOperation(JSONObject engineConfig, int cellIndex) { + super(engineConfig, cellIndex); + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + // TODO Auto-generated method stub + + } + + @Override + protected String createDescription(Column column, + List cellChanges) { + + return "Discard recon results for " + cellChanges.size() + + " cells in column " + column.getHeaderLabel(); + } + + @Override + protected RowVisitor createRowVisitor(Project project, List cellChanges) throws Exception { + // TODO Auto-generated method stub + return new RowVisitor() { + int cellIndex; + List cellChanges; + + public RowVisitor init(int cellIndex, List cellChanges) { + this.cellIndex = cellIndex; + this.cellChanges = cellChanges; + return this; + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + if (cellIndex < row.cells.size()) { + Cell cell = row.cells.get(cellIndex); + + Cell newCell = new Cell(cell.value, null); + + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); + cellChanges.add(cellChange); + } + return false; + } + }.init(_cellIndex, cellChanges); + } +} diff --git a/src/main/java/com/metaweb/gridworks/model/operations/EngineDependentMassCellOperation.java b/src/main/java/com/metaweb/gridworks/model/operations/EngineDependentMassCellOperation.java new file mode 100644 index 000000000..02abbb707 --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/model/operations/EngineDependentMassCellOperation.java @@ -0,0 +1,55 @@ +package com.metaweb.gridworks.model.operations; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONObject; + +import com.metaweb.gridworks.browsing.Engine; +import com.metaweb.gridworks.browsing.FilteredRows; +import com.metaweb.gridworks.browsing.RowVisitor; +import com.metaweb.gridworks.history.HistoryEntry; +import com.metaweb.gridworks.model.Column; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.model.changes.CellChange; +import com.metaweb.gridworks.model.changes.MassCellChange; +import com.metaweb.gridworks.process.Process; +import com.metaweb.gridworks.process.QuickHistoryEntryProcess; + +abstract public class EngineDependentMassCellOperation extends EngineDependentOperation { + private static final long serialVersionUID = -8962461328087299452L; + + final protected int _cellIndex; + + protected EngineDependentMassCellOperation(JSONObject engineConfig, int cellIndex) { + super(engineConfig); + _cellIndex = cellIndex; + } + + @Override + public Process createProcess(Project project, Properties options) throws Exception { + Engine engine = createEngine(project); + + Column column = project.columnModel.getColumnByCellIndex(_cellIndex); + if (column == null) { + throw new Exception("No column corresponding to cell index " + _cellIndex); + } + + List cellChanges = new ArrayList(project.rows.size()); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(project, createRowVisitor(project, cellChanges)); + + String description = createDescription(column, cellChanges); + + MassCellChange massCellChange = new MassCellChange(cellChanges, _cellIndex); + HistoryEntry historyEntry = new HistoryEntry( + project, description, this, massCellChange); + + return new QuickHistoryEntryProcess(project, historyEntry); + } + + abstract protected RowVisitor createRowVisitor(Project project, List cellChanges) throws Exception; + abstract protected String createDescription(Column column, List cellChanges); +} diff --git a/src/main/java/com/metaweb/gridworks/model/operations/EngineDependentOperation.java b/src/main/java/com/metaweb/gridworks/model/operations/EngineDependentOperation.java new file mode 100644 index 000000000..d1d994e8c --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/model/operations/EngineDependentOperation.java @@ -0,0 +1,37 @@ +package com.metaweb.gridworks.model.operations; + +import org.json.JSONException; +import org.json.JSONObject; + +import com.metaweb.gridworks.browsing.Engine; +import com.metaweb.gridworks.model.AbstractOperation; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.util.ParsingUtilities; + +abstract public class EngineDependentOperation implements AbstractOperation { + final private String _engineConfigString; + + transient protected JSONObject _engineConfig; + + protected EngineDependentOperation(JSONObject engineConfig) { + _engineConfig = engineConfig; + _engineConfigString = engineConfig.toString(); + } + + protected Engine createEngine(Project project) throws Exception { + Engine engine = new Engine(project); + engine.initializeFromJSON(getEngineConfig()); + return engine; + } + + protected JSONObject getEngineConfig() { + if (_engineConfig == null) { + try { + _engineConfig = ParsingUtilities.evaluateJsonStringToObject(_engineConfigString); + } catch (JSONException e) { + // ignore + } + } + return _engineConfig; + } +} diff --git a/src/main/java/com/metaweb/gridworks/model/operations/ReconOperation.java b/src/main/java/com/metaweb/gridworks/model/operations/ReconOperation.java new file mode 100644 index 000000000..841d9cd06 --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/model/operations/ReconOperation.java @@ -0,0 +1,344 @@ +package com.metaweb.gridworks.model.operations; + +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.metaweb.gridworks.browsing.Engine; +import com.metaweb.gridworks.browsing.FilteredRows; +import com.metaweb.gridworks.browsing.RowVisitor; +import com.metaweb.gridworks.history.HistoryEntry; +import com.metaweb.gridworks.model.Cell; +import com.metaweb.gridworks.model.Column; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.model.Recon; +import com.metaweb.gridworks.model.ReconCandidate; +import com.metaweb.gridworks.model.Row; +import com.metaweb.gridworks.model.changes.CellChange; +import com.metaweb.gridworks.model.changes.MassCellChange; +import com.metaweb.gridworks.process.LongRunningProcess; +import com.metaweb.gridworks.process.Process; +import com.metaweb.gridworks.util.ParsingUtilities; + +public class ReconOperation extends EngineDependentOperation { + private static final long serialVersionUID = 838795186905314865L; + + final protected int _cellIndex; + final protected String _typeID; + + public ReconOperation(JSONObject engineConfig, int cellIndex, String typeID) { + super(engineConfig); + _cellIndex = cellIndex; + _typeID = typeID; + } + + @Override + public Process createProcess(Project project, Properties options) throws Exception { + Engine engine = createEngine(project); + + Column column = project.columnModel.getColumnByCellIndex(_cellIndex); + if (column == null) { + throw new Exception("No column corresponding to cell index " + _cellIndex); + } + + List entries = new ArrayList(project.rows.size()); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(project, new RowVisitor() { + int cellIndex; + List entries; + + public RowVisitor init(int cellIndex, List entries) { + this.cellIndex = cellIndex; + this.entries = entries; + return this; + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + if (cellIndex < row.cells.size()) { + Cell cell = row.cells.get(cellIndex); + if (cell.value != null) { + entries.add(new ReconEntry(rowIndex, cell)); + } + } + return false; + } + }.init(_cellIndex, entries)); + + String description = + "Reconcile " + entries.size() + + " cells in column " + column.getHeaderLabel() + + " to type " + _typeID; + + return new ReconProcess(project, description, entries); + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + // TODO Auto-generated method stub + + } + + static protected class ReconEntry { + final public int rowIndex; + final public Cell cell; + + public ReconEntry(int rowIndex, Cell cell) { + this.rowIndex = rowIndex; + this.cell = cell; + } + } + + public class ReconProcess extends LongRunningProcess implements Runnable { + final protected Project _project; + final protected List _entries; + + public ReconProcess(Project project, String description, List entries) { + super(description); + _project = project; + _entries = entries; + } + + @Override + protected Runnable getRunnable() { + return this; + } + + @Override + public void run() { + Map> valueToEntries = new HashMap>(); + + for (ReconEntry entry : _entries) { + Object value = entry.cell.value; + if (value != null && value instanceof String) { + List entries2; + if (valueToEntries.containsKey(value)) { + entries2 = valueToEntries.get(value); + } else { + entries2 = new LinkedList(); + valueToEntries.put((String) value, entries2); + } + entries2.add(entry); + } + } + + List cellChanges = new ArrayList(_entries.size()); + List values = new ArrayList(valueToEntries.keySet()); + for (int i = 0; i < values.size(); i += 10) { + try { + recon(valueToEntries, values, i, Math.min(i + 10, values.size()), cellChanges); + } catch (JSONException e1) { + e1.printStackTrace(); + } + _progress = i * 100 / values.size(); + + try { + Thread.sleep(50); + } catch (InterruptedException e) { + if (_canceled) { + break; + } + } + } + + MassCellChange massCellChange = new MassCellChange(cellChanges, _cellIndex); + HistoryEntry historyEntry = new HistoryEntry( + _project, + _description, + ReconOperation.this, + massCellChange + ); + + _project.history.addEntry(historyEntry); + _project.processManager.onDoneProcess(this); + } + + protected void recon( + Map> valueToEntries, + List values, + int from, + int to, + List cellChanges + ) throws JSONException { + + StringWriter stringWriter = new StringWriter(); + JSONWriter jsonWriter = new JSONWriter(stringWriter); + + jsonWriter.object(); + for (int i = 0; from + i < to; i++) { + jsonWriter.key("q" + i + ":search"); + + jsonWriter.object(); + + jsonWriter.key("query"); jsonWriter.value(values.get(from + i)); + jsonWriter.key("limit"); jsonWriter.value(5); + jsonWriter.key("type"); jsonWriter.value(_typeID); + jsonWriter.key("type_strict"); jsonWriter.value("should"); + jsonWriter.key("indent"); jsonWriter.value(1); + jsonWriter.key("type_exclude"); jsonWriter.value("/common/image"); + jsonWriter.key("domain_exclude"); jsonWriter.value("/freebase"); + + jsonWriter.endObject(); + } + jsonWriter.endObject(); + + StringBuffer sb = new StringBuffer(); + sb.append("http://api.freebase.com/api/service/search?indent=1&queries="); + sb.append(ParsingUtilities.encode(stringWriter.toString())); + + try { + URL url = new URL(sb.toString()); + URLConnection connection = url.openConnection(); + connection.setConnectTimeout(5000); + connection.connect(); + + InputStream is = connection.getInputStream(); + try { + String s = ParsingUtilities.inputStreamToString(is); + JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); + + for (int i = 0; from + i < to; i++) { + String value = values.get(from + i); + String key = "q" + i + ":search"; + if (!o.has(key)) { + continue; + } + + JSONObject o2 = o.getJSONObject(key); + if (!(o2.has("result"))) { + continue; + } + + JSONArray results = o2.getJSONArray("result"); + + Recon recon = createRecon(value, results); + for (ReconEntry entry : valueToEntries.get(value)) { + Cell oldCell = entry.cell; + + Cell newCell = new Cell(oldCell.value, recon); + + CellChange cellChange = new CellChange( + entry.rowIndex, + _cellIndex, + oldCell, + newCell + ); + cellChanges.add(cellChange); + } + } + } finally { + is.close(); + } + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + protected Recon createRecon(String text, JSONArray results) throws JSONException { + Recon recon = new Recon(); + + int length = results.length(); + for (int i = 0; i < length && recon.candidates.size() < 3; i++) { + JSONObject result = results.getJSONObject(i); + if (!result.has("name")) { + continue; + } + + JSONArray types = result.getJSONArray("type"); + String[] typeIDs = new String[types.length()]; + for (int j = 0; j < typeIDs.length; j++) { + typeIDs[j] = types.getJSONObject(j).getString("id"); + } + + ReconCandidate candidate = new ReconCandidate( + result.getString("id"), + result.getString("guid"), + result.getString("name"), + typeIDs, + result.getDouble("relevance:score") + ); + + // best match + if (i == 0) { + recon.features.put("nameMatch", text.equalsIgnoreCase(candidate.topicName)); + recon.features.put("nameLevenshtein", StringUtils.getLevenshteinDistance(text, candidate.topicName)); + recon.features.put("nameWordDistance", wordDistance(text, candidate.topicName)); + + recon.features.put("typeMatch", false); + for (String typeID : candidate.typeIDs) { + if (_typeID.equals(typeID)) { + recon.features.put("typeMatch", true); + break; + } + } + } + + recon.candidates.add(candidate); + } + + return recon; + } + } + + static protected double wordDistance(String s1, String s2) { + Set words1 = breakWords(s1); + Set words2 = breakWords(s2); + return words1.size() >= words2.size() ? wordDistance(words1, words2) : wordDistance(words2, words1); + } + + static protected double wordDistance(Set longWords, Set shortWords) { + double common = 0; + for (String word : shortWords) { + if (longWords.contains(word)) { + common++; + } + } + return common / longWords.size(); + } + + static protected Set s_stopWords; + static { + s_stopWords = new HashSet(); + s_stopWords.add("the"); + s_stopWords.add("a"); + s_stopWords.add("and"); + s_stopWords.add("of"); + s_stopWords.add("on"); + s_stopWords.add("in"); + s_stopWords.add("at"); + s_stopWords.add("by"); + } + + static protected Set breakWords(String s) { + String[] words = s.toLowerCase().split("\\s+"); + + Set set = new HashSet(words.length); + for (String word : words) { + if (!s_stopWords.contains(word)) { + set.add(word); + } + } + return set; + } +} diff --git a/src/main/java/com/metaweb/gridworks/model/operations/TextTransformOperation.java b/src/main/java/com/metaweb/gridworks/model/operations/TextTransformOperation.java new file mode 100644 index 000000000..2bb5ad549 --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/model/operations/TextTransformOperation.java @@ -0,0 +1,81 @@ +package com.metaweb.gridworks.model.operations; + +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.metaweb.gridworks.browsing.RowVisitor; +import com.metaweb.gridworks.expr.Evaluable; +import com.metaweb.gridworks.expr.ExpressionUtils; +import com.metaweb.gridworks.expr.Parser; +import com.metaweb.gridworks.model.Cell; +import com.metaweb.gridworks.model.Column; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.model.Row; +import com.metaweb.gridworks.model.changes.CellChange; + +public class TextTransformOperation extends EngineDependentMassCellOperation { + private static final long serialVersionUID = -7698202759999537298L; + + final protected String _expression; + + public TextTransformOperation(JSONObject engineConfig, int cellIndex, String expression) { + super(engineConfig, cellIndex); + _expression = expression; + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + // TODO Auto-generated method stub + + } + + @Override + protected String createDescription(Column column, + List cellChanges) { + + return "Text transform on " + cellChanges.size() + + " cells in column " + column.getHeaderLabel() + ": " + _expression; + } + + @Override + protected RowVisitor createRowVisitor(Project project, List cellChanges) throws Exception { + Evaluable eval = new Parser(_expression).getExpression(); + Properties bindings = ExpressionUtils.createBindings(project); + + return new RowVisitor() { + int cellIndex; + Properties bindings; + List cellChanges; + Evaluable eval; + + public RowVisitor init(int cellIndex, Properties bindings, List cellChanges, Evaluable eval) { + this.cellIndex = cellIndex; + this.bindings = bindings; + this.cellChanges = cellChanges; + this.eval = eval; + return this; + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + if (cellIndex < row.cells.size()) { + Cell cell = row.cells.get(cellIndex); + if (cell.value != null) { + ExpressionUtils.bind(bindings, row, cell); + + Cell newCell = new Cell(eval.evaluate(bindings), cell.recon); + + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); + cellChanges.add(cellChange); + } + } + return false; + } + }.init(_cellIndex, bindings, cellChanges, eval); + } +} diff --git a/src/main/java/com/metaweb/gridworks/process/ReconProcess.java b/src/main/java/com/metaweb/gridworks/process/ReconProcess.java deleted file mode 100644 index 266a8f27e..000000000 --- a/src/main/java/com/metaweb/gridworks/process/ReconProcess.java +++ /dev/null @@ -1,274 +0,0 @@ -package com.metaweb.gridworks.process; - -import java.io.IOException; -import java.io.InputStream; -import java.io.StringWriter; -import java.io.UnsupportedEncodingException; -import java.net.URL; -import java.net.URLConnection; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.commons.lang.StringUtils; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.metaweb.gridworks.history.HistoryEntry; -import com.metaweb.gridworks.model.Cell; -import com.metaweb.gridworks.model.Project; -import com.metaweb.gridworks.model.Recon; -import com.metaweb.gridworks.model.ReconCandidate; -import com.metaweb.gridworks.model.changes.CellChange; -import com.metaweb.gridworks.model.changes.MassCellChange; -import com.metaweb.gridworks.util.ParsingUtilities; - -public class ReconProcess extends LongRunningProcess implements Runnable { - static public class ReconEntry { - final public int rowIndex; - final public Cell cell; - - public ReconEntry(int rowIndex, Cell cell) { - this.rowIndex = rowIndex; - this.cell = cell; - } - } - - final protected Project _project; - final protected int _cellIndex; - final protected List _entries; - final protected String _typeID; - - public ReconProcess(Project project, String description, int cellIndex, List entries, String typeID) { - super(description); - _project = project; - _cellIndex = cellIndex; - _entries = entries; - _typeID = typeID; - } - - @Override - protected Runnable getRunnable() { - return this; - } - - @Override - public void run() { - Map> valueToEntries = new HashMap>(); - - for (ReconEntry entry : _entries) { - Object value = entry.cell.value; - if (value != null && value instanceof String) { - List entries2; - if (valueToEntries.containsKey(value)) { - entries2 = valueToEntries.get(value); - } else { - entries2 = new LinkedList(); - valueToEntries.put((String) value, entries2); - } - entries2.add(entry); - } - } - - List cellChanges = new ArrayList(_entries.size()); - List values = new ArrayList(valueToEntries.keySet()); - for (int i = 0; i < values.size(); i += 10) { - try { - recon(valueToEntries, values, i, Math.min(i + 10, values.size()), cellChanges); - } catch (JSONException e1) { - e1.printStackTrace(); - } - _progress = i * 100 / values.size(); - - try { - Thread.sleep(50); - } catch (InterruptedException e) { - if (_canceled) { - break; - } - } - } - - MassCellChange massCellChange = new MassCellChange(cellChanges, _cellIndex); - HistoryEntry historyEntry = new HistoryEntry(_project, _description, massCellChange); - - _project.history.addEntry(historyEntry); - - _project.processManager.onDoneProcess(this); - } - - protected void recon( - Map> valueToEntries, - List values, - int from, - int to, - List cellChanges - ) throws JSONException { - - StringWriter stringWriter = new StringWriter(); - JSONWriter jsonWriter = new JSONWriter(stringWriter); - - jsonWriter.object(); - for (int i = 0; from + i < to; i++) { - jsonWriter.key("q" + i + ":search"); - - jsonWriter.object(); - - jsonWriter.key("query"); jsonWriter.value(values.get(from + i)); - jsonWriter.key("limit"); jsonWriter.value(5); - jsonWriter.key("type"); jsonWriter.value(_typeID); - jsonWriter.key("type_strict"); jsonWriter.value("should"); - jsonWriter.key("indent"); jsonWriter.value(1); - jsonWriter.key("type_exclude"); jsonWriter.value("/common/image"); - jsonWriter.key("domain_exclude"); jsonWriter.value("/freebase"); - - jsonWriter.endObject(); - } - jsonWriter.endObject(); - - StringBuffer sb = new StringBuffer(); - sb.append("http://api.freebase.com/api/service/search?indent=1&queries="); - sb.append(ParsingUtilities.encode(stringWriter.toString())); - - try { - URL url = new URL(sb.toString()); - URLConnection connection = url.openConnection(); - connection.setConnectTimeout(5000); - connection.connect(); - - InputStream is = connection.getInputStream(); - try { - String s = ParsingUtilities.inputStreamToString(is); - JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); - - for (int i = 0; from + i < to; i++) { - String value = values.get(from + i); - String key = "q" + i + ":search"; - if (!o.has(key)) { - continue; - } - - JSONObject o2 = o.getJSONObject(key); - if (!(o2.has("result"))) { - continue; - } - - JSONArray results = o2.getJSONArray("result"); - - Recon recon = createRecon(value, results); - for (ReconEntry entry : valueToEntries.get(value)) { - Cell oldCell = entry.cell; - - Cell newCell = new Cell(oldCell.value, recon); - - CellChange cellChange = new CellChange( - entry.rowIndex, - _cellIndex, - oldCell, - newCell - ); - cellChanges.add(cellChange); - } - } - } finally { - is.close(); - } - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - protected Recon createRecon(String text, JSONArray results) throws JSONException { - Recon recon = new Recon(); - - int length = results.length(); - for (int i = 0; i < length && recon.candidates.size() < 3; i++) { - JSONObject result = results.getJSONObject(i); - if (!result.has("name")) { - continue; - } - - JSONArray types = result.getJSONArray("type"); - String[] typeIDs = new String[types.length()]; - for (int j = 0; j < typeIDs.length; j++) { - typeIDs[j] = types.getJSONObject(j).getString("id"); - } - - ReconCandidate candidate = new ReconCandidate( - result.getString("id"), - result.getString("guid"), - result.getString("name"), - typeIDs, - result.getDouble("relevance:score") - ); - - // best match - if (i == 0) { - recon.features.put("nameMatch", text.equalsIgnoreCase(candidate.topicName)); - recon.features.put("nameLevenshtein", StringUtils.getLevenshteinDistance(text, candidate.topicName)); - recon.features.put("nameWordDistance", wordDistance(text, candidate.topicName)); - - recon.features.put("typeMatch", false); - for (String typeID : candidate.typeIDs) { - if (_typeID.equals(typeID)) { - recon.features.put("typeMatch", true); - break; - } - } - } - - recon.candidates.add(candidate); - } - - return recon; - } - - protected double wordDistance(String s1, String s2) { - Set words1 = breakWords(s1); - Set words2 = breakWords(s2); - return words1.size() >= words2.size() ? wordDistance(words1, words2) : wordDistance(words2, words1); - } - - protected double wordDistance(Set longWords, Set shortWords) { - double common = 0; - for (String word : shortWords) { - if (longWords.contains(word)) { - common++; - } - } - return common / longWords.size(); - } - - static protected Set s_stopWords; - static { - s_stopWords = new HashSet(); - s_stopWords.add("the"); - s_stopWords.add("a"); - s_stopWords.add("and"); - s_stopWords.add("of"); - s_stopWords.add("on"); - s_stopWords.add("in"); - s_stopWords.add("at"); - s_stopWords.add("by"); - } - - protected Set breakWords(String s) { - String[] words = s.toLowerCase().split("\\s+"); - - Set set = new HashSet(words.length); - for (String word : words) { - if (!s_stopWords.contains(word)) { - set.add(word); - } - } - return set; - } -}