Added commands for searching for specific topics to match cells with.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@113 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-02-20 00:47:08 +00:00
parent ea2c904704
commit b730dfd8f9
8 changed files with 302 additions and 49 deletions

View File

@ -37,6 +37,7 @@ import com.metaweb.gridworks.commands.recon.ApproveNewReconcileCommand;
import com.metaweb.gridworks.commands.recon.ApproveReconcileCommand; import com.metaweb.gridworks.commands.recon.ApproveReconcileCommand;
import com.metaweb.gridworks.commands.recon.DiscardReconcileCommand; import com.metaweb.gridworks.commands.recon.DiscardReconcileCommand;
import com.metaweb.gridworks.commands.recon.JudgeOneCellCommand; import com.metaweb.gridworks.commands.recon.JudgeOneCellCommand;
import com.metaweb.gridworks.commands.recon.MatchSpecificTopicCommand;
import com.metaweb.gridworks.commands.recon.ReconcileCommand; import com.metaweb.gridworks.commands.recon.ReconcileCommand;
import com.metaweb.gridworks.commands.util.GetExpressionLanguageInfoCommand; import com.metaweb.gridworks.commands.util.GetExpressionLanguageInfoCommand;
import com.metaweb.gridworks.commands.util.GuessTypesOfColumnCommand; import com.metaweb.gridworks.commands.util.GuessTypesOfColumnCommand;
@ -73,10 +74,11 @@ public class GridworksServlet extends HttpServlet {
_commands.put("split-multi-value-cells", new SplitMultiValueCellsCommand()); _commands.put("split-multi-value-cells", new SplitMultiValueCellsCommand());
_commands.put("reconcile", new ReconcileCommand()); _commands.put("reconcile", new ReconcileCommand());
_commands.put("approve-reconcile", new ApproveReconcileCommand()); _commands.put("recon-approve-best-matches", new ApproveReconcileCommand());
_commands.put("approve-new-reconcile", new ApproveNewReconcileCommand()); _commands.put("recon-mark-new-topics", new ApproveNewReconcileCommand());
_commands.put("discard-reconcile", new DiscardReconcileCommand()); _commands.put("recon-discard-judgments", new DiscardReconcileCommand());
_commands.put("judge-one-cell", new JudgeOneCellCommand()); _commands.put("recon-match-specific-topic-to-cells", new MatchSpecificTopicCommand());
_commands.put("recon-judge-one-cell", new JudgeOneCellCommand());
_commands.put("save-protograph", new SaveProtographCommand()); _commands.put("save-protograph", new SaveProtographCommand());

View File

@ -31,16 +31,35 @@ public class JudgeOneCellCommand extends Command {
int rowIndex = Integer.parseInt(request.getParameter("row")); int rowIndex = Integer.parseInt(request.getParameter("row"));
int cellIndex = Integer.parseInt(request.getParameter("cell")); int cellIndex = Integer.parseInt(request.getParameter("cell"));
String judgment = request.getParameter("judgment"); String judgment = request.getParameter("judgment");
String candidateID = request.getParameter("candidate");
JudgeOneCellProcess process = new JudgeOneCellProcess( JudgeOneCellProcess process = null;
project,
"Judge one cell's recon result", if (judgment != null) {
rowIndex, process = new JudgeOneCellProcess(
cellIndex, project,
judgment, "Judge one cell's recon result",
candidateID judgment,
); rowIndex,
cellIndex,
request.getParameter("candidate")
);
} else {
ReconCandidate match = new ReconCandidate(
request.getParameter("topicID"),
request.getParameter("topicGUID"),
request.getParameter("topicName"),
request.getParameter("types").split(","),
100
);
process = new JudgeOneCellProcess(
project,
"Judge one cell's recon result",
rowIndex,
cellIndex,
match
);
}
boolean done = project.processManager.queueProcess(process); boolean done = project.processManager.queueProcess(process);
if (done) { if (done) {
@ -62,15 +81,26 @@ public class JudgeOneCellCommand extends Command {
final int cellIndex; final int cellIndex;
final String judgment; final String judgment;
final String candidateID; final String candidateID;
final ReconCandidate match;
Cell newCell; Cell newCell;
JudgeOneCellProcess(Project project, String briefDescription, int rowIndex, int cellIndex, String judgment, String candidateID) { JudgeOneCellProcess(Project project, String briefDescription, String judgment, int rowIndex, int cellIndex, String candidateID) {
super(project, briefDescription); super(project, briefDescription);
this.rowIndex = rowIndex; this.rowIndex = rowIndex;
this.cellIndex = cellIndex; this.cellIndex = cellIndex;
this.judgment = judgment; this.judgment = judgment;
this.candidateID = candidateID; this.candidateID = candidateID;
this.match = null;
}
JudgeOneCellProcess(Project project, String briefDescription, int rowIndex, int cellIndex, ReconCandidate match) {
super(project, briefDescription);
this.rowIndex = rowIndex;
this.cellIndex = cellIndex;
this.judgment = null;
this.candidateID = null;
this.match = match;
} }
protected HistoryEntry createHistoryEntry() throws Exception { protected HistoryEntry createHistoryEntry() throws Exception {
@ -96,35 +126,44 @@ public class JudgeOneCellCommand extends Command {
String description = null; String description = null;
if ("match".equals(judgment)) { if (match != null) {
ReconCandidate match = null;
if (cell.recon != null) {
for (ReconCandidate c : cell.recon.candidates) {
if (candidateID.equals(c.topicID)) {
match = c;
break;
}
}
}
if (match == null) {
throw new Exception("No such recon candidate");
}
newCell.recon.judgment = Recon.Judgment.Matched; newCell.recon.judgment = Recon.Judgment.Matched;
newCell.recon.match = match; newCell.recon.match = this.match;
description = "Match " + match.topicName + description = "Match " + this.match.topicName +
" (" + match.topicID + ") to " + " (" + match.topicID + ") to " +
cellDescription; cellDescription;
} else {
if ("match".equals(judgment)) {
ReconCandidate match = null;
} else if ("new".equals(judgment)) { if (cell.recon != null) {
newCell.recon.judgment = Recon.Judgment.New; for (ReconCandidate c : cell.recon.candidates) {
description = "Mark to create new topic for " + cellDescription; if (candidateID.equals(c.topicID)) {
} else if ("discard".equals(judgment)) { match = c;
newCell.recon.judgment = Recon.Judgment.None; break;
newCell.recon.match = null; }
description = "Discard recon judgment for " + cellDescription; }
}
if (match == null) {
throw new Exception("No such recon candidate");
}
newCell.recon.judgment = Recon.Judgment.Matched;
newCell.recon.match = match;
description = "Match " + match.topicName +
" (" + match.topicID + ") to " +
cellDescription;
} else if ("new".equals(judgment)) {
newCell.recon.judgment = Recon.Judgment.New;
description = "Mark to create new topic for " + cellDescription;
} else if ("discard".equals(judgment)) {
newCell.recon.judgment = Recon.Judgment.None;
newCell.recon.match = null;
description = "Discard recon judgment for " + cellDescription;
}
} }
Change change = new CellChange(rowIndex, cellIndex, cell, newCell); Change change = new CellChange(rowIndex, cellIndex, cell, newCell);

View File

@ -0,0 +1,29 @@
package com.metaweb.gridworks.commands.recon;
import javax.servlet.http.HttpServletRequest;
import org.json.JSONObject;
import com.metaweb.gridworks.commands.EngineDependentCommand;
import com.metaweb.gridworks.model.AbstractOperation;
import com.metaweb.gridworks.model.ReconCandidate;
import com.metaweb.gridworks.model.operations.MatchSpecificTopicReconOperation;
public class MatchSpecificTopicCommand extends EngineDependentCommand {
@Override
protected AbstractOperation createOperation(HttpServletRequest request,
JSONObject engineConfig) throws Exception {
String columnName = request.getParameter("columnName");
ReconCandidate match = new ReconCandidate(
request.getParameter("topicID"),
request.getParameter("topicGUID"),
request.getParameter("topicName"),
request.getParameter("types").split(","),
100
);
return new MatchSpecificTopicReconOperation(engineConfig, columnName, match);
}
}

View File

@ -9,8 +9,10 @@ import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import javax.servlet.ServletException; import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
@ -76,12 +78,18 @@ public class GuessTypesOfColumnCommand extends Command {
int cellIndex = column.getCellIndex(); int cellIndex = column.getCellIndex();
List<String> samples = new ArrayList<String>(10); List<String> samples = new ArrayList<String>(10);
Set<String> sampleSet = new HashSet<String>();
for (Row row : project.rows) { for (Row row : project.rows) {
Object value = row.getCellValue(cellIndex); Object value = row.getCellValue(cellIndex);
if (!ExpressionUtils.isBlank(value)) { if (!ExpressionUtils.isBlank(value)) {
samples.add(value.toString()); String s = value.toString().trim();
if (samples.size() >= 10) { if (!sampleSet.contains(s)) {
break; samples.add(s);
sampleSet.add(s);
if (samples.size() >= 10) {
break;
}
} }
} }
} }

View File

@ -0,0 +1,89 @@
package com.metaweb.gridworks.model.operations;
import java.util.List;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.metaweb.gridworks.browsing.RowVisitor;
import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Column;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Recon;
import com.metaweb.gridworks.model.ReconCandidate;
import com.metaweb.gridworks.model.Row;
import com.metaweb.gridworks.model.Recon.Judgment;
import com.metaweb.gridworks.model.changes.CellChange;
public class MatchSpecificTopicReconOperation extends EngineDependentMassCellOperation {
private static final long serialVersionUID = -5205694623711144436L;
final protected ReconCandidate match;
public MatchSpecificTopicReconOperation(JSONObject engineConfig, String columnName, ReconCandidate match) {
super(engineConfig, columnName, false);
this.match = match;
}
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("op"); writer.value("recon-match-specific-topic-to-cells");
writer.key("description"); writer.value(
"Match specific topic " +
match.topicName + " (" +
match.topicID + ") to cells in column " + _columnName);
writer.key("engineConfig"); writer.value(getEngineConfig());
writer.key("columnName"); writer.value(_columnName);
writer.endObject();
}
protected String getBriefDescription() {
return "Match specific topic " +
match.topicName + " (" +
match.topicID + ") to cells in column " + _columnName;
}
protected String createDescription(Column column,
List<CellChange> cellChanges) {
return "Match specific topic " +
match.topicName + " (" +
match.topicID + ") to " + cellChanges.size() +
" cells in column " + column.getHeaderLabel();
}
protected RowVisitor createRowVisitor(Project project, List<CellChange> cellChanges) throws Exception {
Column column = project.columnModel.getColumnByName(_columnName);
return new RowVisitor() {
int cellIndex;
List<CellChange> cellChanges;
public RowVisitor init(int cellIndex, List<CellChange> cellChanges) {
this.cellIndex = cellIndex;
this.cellChanges = cellChanges;
return this;
}
public boolean visit(Project project, int rowIndex, Row row, boolean contextual) {
if (cellIndex < row.cells.size()) {
Cell cell = row.cells.get(cellIndex);
Cell newCell = new Cell(
cell.value,
cell.recon != null ? cell.recon.dup() : new Recon()
);
newCell.recon.match = match;
newCell.recon.judgment = Judgment.Matched;
CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
cellChanges.add(cellChange);
}
return false;
}
}.init(column.getCellIndex(), cellChanges);
}
}

View File

@ -86,6 +86,15 @@ DataTableCellUI.prototype._render = function() {
renderCandidate(candidates[i], i); renderCandidate(candidates[i], i);
} }
} }
$('<a href="javascript:{}"></a>')
.addClass("data-table-recon-search")
.click(function(evt) {
self._searchForMatch();
return false;
})
.text("search for match")
.appendTo($('<div>').appendTo(divContent));
} }
} }
}; };
@ -107,7 +116,41 @@ DataTableCellUI.prototype._doJudgment = function(judgment, params) {
params.row = this._rowIndex; params.row = this._rowIndex;
params.cell = this._cellIndex; params.cell = this._cellIndex;
params.judgment = judgment; params.judgment = judgment;
this.doPostThenUpdate("judge-one-cell", params); this.doPostThenUpdate("recon-judge-one-cell", params);
};
DataTableCellUI.prototype._searchForMatch = function() {
var self = this;
var frame = DialogSystem.createDialog();
frame.width("200px");
var header = $('<div></div>').addClass("dialog-header").text("Search for Match").appendTo(frame);
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
$('<p></p>').text("Search Freebase for topic to match " + this._cell.v).appendTo(body);
var input = $('<input />').attr("value", this._cell.v).appendTo($('<p></p>').appendTo(body));
input.suggest({}).bind("fb-select", function(e, data) {
var params = {
row: self._rowIndex,
cell: self._cellIndex,
topicID: data.id,
topicGUID: data.guid,
topicName: data.name,
types: $.map(data.type, function(elmt) { return elmt.id; }).join(",")
};
self.doPostThenUpdate("recon-judge-one-cell", params);
DialogSystem.dismissUntil(level - 1);
});
$('<button></button>').text("Cancel").click(function() {
DialogSystem.dismissUntil(level - 1);
}).appendTo(footer);
var level = DialogSystem.showDialog(frame);
input[0].focus();
}; };
DataTableCellUI.prototype.createUpdateFunction = function(onBefore) { DataTableCellUI.prototype.createUpdateFunction = function(onBefore) {

View File

@ -214,6 +214,14 @@ DataTableColumnHeaderUI.prototype._createMenuForColumnHeader = function(elmt) {
click: function() { click: function() {
self._doDiscardReconResults(); self._doDiscardReconResults();
} }
},
{},
{
label: "Match Filtered Cells to ...",
tooltip: "Search for a topic to match all filtered cells to",
click: function() {
self._doSearchToMatch();
}
} }
] ]
}, },
@ -395,25 +403,60 @@ DataTableColumnHeaderUI.prototype._doReconcile = function() {
DataTableColumnHeaderUI.prototype._doDiscardReconResults = function() { DataTableColumnHeaderUI.prototype._doDiscardReconResults = function() {
this._dataTableView.doPostThenUpdate( this._dataTableView.doPostThenUpdate(
"discard-reconcile", "recon-discard-judgments",
{ columnName: this._column.headerLabel } { columnName: this._column.headerLabel }
); );
}; };
DataTableColumnHeaderUI.prototype._doApproveBestCandidates = function() { DataTableColumnHeaderUI.prototype._doApproveBestCandidates = function() {
this._dataTableView.doPostThenUpdate( this._dataTableView.doPostThenUpdate(
"approve-reconcile", "recon-approve-best-matches",
{ columnName: this._column.headerLabel } { columnName: this._column.headerLabel }
); );
}; };
DataTableColumnHeaderUI.prototype._doApproveNewTopics = function() { DataTableColumnHeaderUI.prototype._doApproveNewTopics = function() {
this._dataTableView.doPostThenUpdate( this._dataTableView.doPostThenUpdate(
"approve-new-reconcile", "recon-mark-new-topics",
{ columnName: this._column.headerLabel } { columnName: this._column.headerLabel }
); );
}; };
DataTableColumnHeaderUI.prototype._doSearchToMatch = function() {
var self = this;
var frame = DialogSystem.createDialog();
frame.width("200px");
var header = $('<div></div>').addClass("dialog-header").text("Search for Match").appendTo(frame);
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
$('<p></p>').text("Search Freebase for a topic to match all filtered cells:").appendTo(body);
var input = $('<input />').appendTo($('<p></p>').appendTo(body));
input.suggest({}).bind("fb-select", function(e, data) {
self._dataTableView.doPostThenUpdate(
"recon-match-specific-topic-to-cells",
{
columnName: self._column.headerLabel,
topicID: data.id,
topicGUID: data.guid,
topicName: data.name,
types: $.map(data.type, function(elmt) { return elmt.id; }).join(",")
}
);
DialogSystem.dismissUntil(level - 1);
});
$('<button></button>').text("Cancel").click(function() {
DialogSystem.dismissUntil(level - 1);
}).appendTo(footer);
var level = DialogSystem.showDialog(frame);
input[0].focus();
};
DataTableColumnHeaderUI.prototype._doAddColumn = function(initialExpression) { DataTableColumnHeaderUI.prototype._doAddColumn = function(initialExpression) {
var self = this; var self = this;
DataTableView.promptExpressionOnVisibleRows( DataTableView.promptExpressionOnVisibleRows(

View File

@ -73,12 +73,12 @@ a.data-table-recon-topic:hover {
margin: 0 0.5em; margin: 0 0.5em;
} }
a.data-table-recon-action { a.data-table-recon-action, a.data-table-recon-search {
font-size: 80%; font-size: 80%;
text-decoration: none; text-decoration: none;
color: #aaf; color: #aaf;
} }
a.data-table-recon-action:hover { a.data-table-recon-action:hover, a.data-table-recon-search:hover {
text-decoration: underline; text-decoration: underline;
color: #008; color: #008;
} }