Support reusing newly created topics for cells with the same content.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@121 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
e4b01cb36c
commit
5e9be8c258
@ -6,7 +6,9 @@ import org.json.JSONObject;
|
||||
|
||||
import com.metaweb.gridworks.commands.EngineDependentCommand;
|
||||
import com.metaweb.gridworks.model.AbstractOperation;
|
||||
import com.metaweb.gridworks.model.Recon;
|
||||
import com.metaweb.gridworks.model.ReconCandidate;
|
||||
import com.metaweb.gridworks.model.Recon.Judgment;
|
||||
import com.metaweb.gridworks.operations.ReconJudgeSimilarCellsOperation;
|
||||
|
||||
public class ReconJudgeSimilarCellsCommand extends EngineDependentCommand {
|
||||
@ -17,7 +19,7 @@ public class ReconJudgeSimilarCellsCommand extends EngineDependentCommand {
|
||||
|
||||
String columnName = request.getParameter("columnName");
|
||||
String similarValue = request.getParameter("similarValue");
|
||||
String judgment = request.getParameter("judgment");
|
||||
Judgment judgment = Recon.stringToJudgment(request.getParameter("judgment"));
|
||||
|
||||
ReconCandidate match = null;
|
||||
String topicID = request.getParameter("topicID");
|
||||
@ -33,12 +35,15 @@ public class ReconJudgeSimilarCellsCommand extends EngineDependentCommand {
|
||||
);
|
||||
}
|
||||
|
||||
String shareNewTopics = request.getParameter("shareNewTopics");
|
||||
|
||||
return new ReconJudgeSimilarCellsOperation(
|
||||
engineConfig,
|
||||
columnName,
|
||||
similarValue,
|
||||
judgment,
|
||||
match
|
||||
match,
|
||||
"true".equals(shareNewTopics)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -14,8 +14,10 @@ public class ReconMarkNewTopicsCommand extends EngineDependentCommand {
|
||||
protected AbstractOperation createOperation(HttpServletRequest request,
|
||||
JSONObject engineConfig) throws Exception {
|
||||
|
||||
String columnName = request.getParameter("columnName");
|
||||
|
||||
return new ReconMarkNewTopicsOperation(engineConfig, columnName);
|
||||
return new ReconMarkNewTopicsOperation(
|
||||
engineConfig,
|
||||
request.getParameter("columnName"),
|
||||
"true".equals(request.getParameter("shareNewTopics"))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -58,11 +58,16 @@ public class Recon implements Serializable, HasFields, Jsonizable {
|
||||
s_featureMap.put("nameWordDistance", Feature_nameWordDistance);
|
||||
}
|
||||
|
||||
final public long id;
|
||||
public Object[] features = new Object[Feature_max];
|
||||
public List<ReconCandidate> candidates = new LinkedList<ReconCandidate>();
|
||||
public Judgment judgment = Judgment.None;
|
||||
public ReconCandidate match = null;
|
||||
|
||||
public Recon() {
|
||||
id = System.currentTimeMillis() * 1000000 + Math.round(Math.random() * 1000000);
|
||||
}
|
||||
|
||||
public Recon dup() {
|
||||
Recon r = new Recon();
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
package com.metaweb.gridworks.operations;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONArray;
|
||||
@ -26,6 +28,7 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
final protected String _similarValue;
|
||||
final protected Judgment _judgment;
|
||||
final protected ReconCandidate _match;
|
||||
final protected boolean _shareNewTopics;
|
||||
|
||||
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
|
||||
JSONObject engineConfig = obj.getJSONObject("engineConfig");
|
||||
@ -59,7 +62,8 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
obj.getString("columnName"),
|
||||
obj.getString("similarValue"),
|
||||
judgment,
|
||||
match
|
||||
match,
|
||||
obj.has("shareNewTopics") ? obj.getBoolean("shareNewTopics") : false
|
||||
);
|
||||
}
|
||||
|
||||
@ -68,25 +72,14 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
String columnName,
|
||||
String similarValue,
|
||||
Judgment judgment,
|
||||
ReconCandidate match
|
||||
ReconCandidate match,
|
||||
boolean shareNewTopics
|
||||
) {
|
||||
super(engineConfig, columnName, false);
|
||||
this._similarValue = similarValue;
|
||||
this._judgment = judgment;
|
||||
this._match = match;
|
||||
}
|
||||
|
||||
public ReconJudgeSimilarCellsOperation(
|
||||
JSONObject engineConfig,
|
||||
String columnName,
|
||||
String similarValue,
|
||||
String judgmentString,
|
||||
ReconCandidate match
|
||||
) {
|
||||
super(engineConfig, columnName, false);
|
||||
this._similarValue = similarValue;
|
||||
this._judgment = Recon.stringToJudgment(judgmentString);
|
||||
this._match = match;
|
||||
this._shareNewTopics = shareNewTopics;
|
||||
}
|
||||
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
@ -102,6 +95,7 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
if (_match != null) {
|
||||
writer.key("match"); _match.write(writer, options);
|
||||
}
|
||||
writer.key("shareNewTopics"); writer.value(_shareNewTopics);
|
||||
|
||||
writer.endObject();
|
||||
}
|
||||
@ -111,8 +105,13 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
return "Discard recon judgments for cells containing \"" +
|
||||
_similarValue + "\" in column " + _columnName;
|
||||
} else if (_judgment == Judgment.New) {
|
||||
return "Mark to create new topics for cells containing \"" +
|
||||
_similarValue + "\" in column " + _columnName;
|
||||
if (_shareNewTopics) {
|
||||
return "Mark to create one single new topic for all cells containing \"" +
|
||||
_similarValue + "\" in column " + _columnName;
|
||||
} else {
|
||||
return "Mark to create one new topic for each cell containing \"" +
|
||||
_similarValue + "\" in column " + _columnName;
|
||||
}
|
||||
} else if (_judgment == Judgment.Matched) {
|
||||
return "Match topic " +
|
||||
_match.topicName + " (" +
|
||||
@ -129,8 +128,13 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
return "Discard recon judgments for " + cellChanges.size() + " cells containing \"" +
|
||||
_similarValue + "\" in column " + _columnName;
|
||||
} else if (_judgment == Judgment.New) {
|
||||
return "Mark to create new topics for " + cellChanges.size() + " cells containing \"" +
|
||||
_similarValue + "\" in column " + _columnName;
|
||||
if (_shareNewTopics) {
|
||||
return "Mark to create one single new topic for " + cellChanges.size() + " cells containing \"" +
|
||||
_similarValue + "\" in column " + _columnName;
|
||||
} else {
|
||||
return "Mark to create one new topic for each of " + cellChanges.size() + " cells containing \"" +
|
||||
_similarValue + "\" in column " + _columnName;
|
||||
}
|
||||
} else if (_judgment == Judgment.Matched) {
|
||||
return "Match topic " +
|
||||
_match.topicName + " (" +
|
||||
@ -145,8 +149,9 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
Column column = project.columnModel.getColumnByName(_columnName);
|
||||
|
||||
return new RowVisitor() {
|
||||
int _cellIndex;
|
||||
List<CellChange> _cellChanges;
|
||||
int _cellIndex;
|
||||
List<CellChange> _cellChanges;
|
||||
Map<String, Recon> _sharedRecons = new HashMap<String, Recon>();
|
||||
|
||||
public RowVisitor init(int cellIndex, List<CellChange> cellChanges) {
|
||||
_cellIndex = cellIndex;
|
||||
@ -160,20 +165,32 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
!ExpressionUtils.isBlank(cell.value) &&
|
||||
_similarValue.equals(cell.value)) {
|
||||
|
||||
Cell newCell = new Cell(
|
||||
cell.value,
|
||||
cell.recon == null ? new Recon() : cell.recon.dup()
|
||||
);
|
||||
Recon recon = null;
|
||||
if (_judgment == Judgment.New && _shareNewTopics) {
|
||||
String s = cell.value.toString();
|
||||
if (_sharedRecons.containsKey(s)) {
|
||||
recon = _sharedRecons.get(s);
|
||||
} else {
|
||||
recon = new Recon();
|
||||
recon.judgment = Judgment.New;
|
||||
|
||||
if (_judgment == Judgment.Matched) {
|
||||
newCell.recon.judgment = Recon.Judgment.Matched;
|
||||
newCell.recon.match = _match;
|
||||
} else if (_judgment == Judgment.New) {
|
||||
newCell.recon.judgment = Recon.Judgment.New;
|
||||
} else if (_judgment == Judgment.None) {
|
||||
newCell.recon.judgment = Recon.Judgment.None;
|
||||
newCell.recon.match = null;
|
||||
}
|
||||
_sharedRecons.put(s, recon);
|
||||
}
|
||||
} else {
|
||||
recon = cell.recon == null ? new Recon() : cell.recon.dup();
|
||||
if (_judgment == Judgment.Matched) {
|
||||
recon.judgment = Recon.Judgment.Matched;
|
||||
recon.match = _match;
|
||||
} else if (_judgment == Judgment.New) {
|
||||
recon.judgment = Recon.Judgment.New;
|
||||
recon.match = null;
|
||||
} else if (_judgment == Judgment.None) {
|
||||
recon.judgment = Recon.Judgment.None;
|
||||
recon.match = null;
|
||||
}
|
||||
}
|
||||
|
||||
Cell newCell = new Cell(cell.value, recon);
|
||||
|
||||
CellChange cellChange = new CellChange(rowIndex, _cellIndex, cell, newCell);
|
||||
_cellChanges.add(cellChange);
|
||||
|
@ -1,6 +1,8 @@
|
||||
package com.metaweb.gridworks.operations;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONException;
|
||||
@ -20,18 +22,21 @@ import com.metaweb.gridworks.model.changes.CellChange;
|
||||
public class ReconMarkNewTopicsOperation extends EngineDependentMassCellOperation {
|
||||
private static final long serialVersionUID = -5205694623711144436L;
|
||||
|
||||
final protected boolean _shareNewTopics;
|
||||
|
||||
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
|
||||
JSONObject engineConfig = obj.getJSONObject("engineConfig");
|
||||
String columnName = obj.getString("columnName");
|
||||
|
||||
return new ReconMarkNewTopicsOperation(
|
||||
engineConfig,
|
||||
columnName
|
||||
obj.getString("columnName"),
|
||||
obj.has("shareNewTopics") ? obj.getBoolean("shareNewTopics") : false
|
||||
);
|
||||
}
|
||||
|
||||
public ReconMarkNewTopicsOperation(JSONObject engineConfig, String columnName) {
|
||||
public ReconMarkNewTopicsOperation(JSONObject engineConfig, String columnName, boolean shareNewTopics) {
|
||||
super(engineConfig, columnName, false);
|
||||
_shareNewTopics = shareNewTopics;
|
||||
}
|
||||
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
@ -42,18 +47,25 @@ public class ReconMarkNewTopicsOperation extends EngineDependentMassCellOperatio
|
||||
writer.key("description"); writer.value(getBriefDescription());
|
||||
writer.key("engineConfig"); writer.value(getEngineConfig());
|
||||
writer.key("columnName"); writer.value(_columnName);
|
||||
writer.key("shareNewTopics"); writer.value(_shareNewTopics);
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
protected String getBriefDescription() {
|
||||
return "Mark to create new topics for cells in column " + _columnName;
|
||||
return "Mark to create new topics for cells in column " + _columnName +
|
||||
(_shareNewTopics ?
|
||||
", one topic for each group of similar cells" :
|
||||
", one topic for each cell");
|
||||
}
|
||||
|
||||
protected String createDescription(Column column,
|
||||
List<CellChange> cellChanges) {
|
||||
|
||||
return "Mark to create new topics for " + cellChanges.size() +
|
||||
" cells in column " + column.getHeaderLabel();
|
||||
" cells in column " + column.getHeaderLabel() +
|
||||
(_shareNewTopics ?
|
||||
", one topic for each group of similar cells" :
|
||||
", one topic for each cell");
|
||||
}
|
||||
|
||||
protected RowVisitor createRowVisitor(Project project, List<CellChange> cellChanges) throws Exception {
|
||||
@ -62,6 +74,7 @@ public class ReconMarkNewTopicsOperation extends EngineDependentMassCellOperatio
|
||||
return new RowVisitor() {
|
||||
int cellIndex;
|
||||
List<CellChange> cellChanges;
|
||||
Map<String, Recon> _sharedRecons = new HashMap<String, Recon>();
|
||||
|
||||
public RowVisitor init(int cellIndex, List<CellChange> cellChanges) {
|
||||
this.cellIndex = cellIndex;
|
||||
@ -70,15 +83,26 @@ public class ReconMarkNewTopicsOperation extends EngineDependentMassCellOperatio
|
||||
}
|
||||
|
||||
public boolean visit(Project project, int rowIndex, Row row, boolean contextual) {
|
||||
if (cellIndex < row.cells.size()) {
|
||||
Cell cell = row.cells.get(cellIndex);
|
||||
Cell cell = row.getCell(cellIndex);
|
||||
if (cell != null) {
|
||||
Recon recon = null;
|
||||
if (_shareNewTopics) {
|
||||
String s = cell.value == null ? "" : cell.value.toString();
|
||||
if (_sharedRecons.containsKey(s)) {
|
||||
recon = _sharedRecons.get(s);
|
||||
} else {
|
||||
recon = new Recon();
|
||||
recon.judgment = Judgment.New;
|
||||
|
||||
Cell newCell = new Cell(
|
||||
cell.value,
|
||||
cell.recon != null ? cell.recon.dup() : new Recon()
|
||||
);
|
||||
newCell.recon.match = null;
|
||||
newCell.recon.judgment = Judgment.New;
|
||||
_sharedRecons.put(s, recon);
|
||||
}
|
||||
} else {
|
||||
recon = cell.recon == null ? new Recon() : cell.recon.dup();
|
||||
recon.match = null;
|
||||
recon.judgment = Judgment.New;
|
||||
}
|
||||
|
||||
Cell newCell = new Cell(cell.value, recon);
|
||||
|
||||
CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell);
|
||||
cellChanges.add(cellChange);
|
||||
|
@ -8,8 +8,10 @@ import com.metaweb.gridworks.model.Cell;
|
||||
import com.metaweb.gridworks.model.Column;
|
||||
import com.metaweb.gridworks.model.Project;
|
||||
import com.metaweb.gridworks.model.Row;
|
||||
import com.metaweb.gridworks.model.Recon.Judgment;
|
||||
import com.metaweb.gridworks.protograph.AnonymousNode;
|
||||
import com.metaweb.gridworks.protograph.CellNode;
|
||||
import com.metaweb.gridworks.protograph.CellTopicNode;
|
||||
import com.metaweb.gridworks.protograph.FreebaseProperty;
|
||||
import com.metaweb.gridworks.protograph.FreebaseTopicNode;
|
||||
import com.metaweb.gridworks.protograph.Link;
|
||||
@ -25,7 +27,7 @@ public class Transposer {
|
||||
Node rootNode,
|
||||
TransposedNodeFactory nodeFactory
|
||||
) {
|
||||
Context rootContext = new Context(rootNode, null, null, 5);
|
||||
Context rootContext = new Context(rootNode, null, null, 20);
|
||||
|
||||
for (Row row : project.rows) {
|
||||
descend(project, protograph, nodeFactory, row, rootNode, rootContext);
|
||||
@ -53,6 +55,13 @@ public class Transposer {
|
||||
Column column = project.columnModel.getColumnByName(node2.columnName);
|
||||
Cell cell = row.getCell(column.getCellIndex());
|
||||
if (cell != null && !ExpressionUtils.isBlank(cell.value)) {
|
||||
if (node2 instanceof CellTopicNode) {
|
||||
if (!((CellTopicNode) node2).createForNoReconMatch &&
|
||||
(cell.recon == null || cell.recon.judgment == Judgment.None)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
context.count++;
|
||||
if (context.limit > 0 && context.count > context.limit) {
|
||||
return;
|
||||
|
@ -9,6 +9,7 @@ import org.json.JSONObject;
|
||||
|
||||
import com.metaweb.gridworks.model.Cell;
|
||||
import com.metaweb.gridworks.model.Recon;
|
||||
import com.metaweb.gridworks.model.Recon.Judgment;
|
||||
import com.metaweb.gridworks.protograph.AnonymousNode;
|
||||
import com.metaweb.gridworks.protograph.CellKeyNode;
|
||||
import com.metaweb.gridworks.protograph.CellNode;
|
||||
@ -22,6 +23,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
||||
protected List<WritingTransposedNode> rootNodes = new LinkedList<WritingTransposedNode>();
|
||||
protected StringBuffer stringBuffer;
|
||||
protected Map<String, Long> varPool = new HashMap<String, Long>();
|
||||
protected Map<Long, String> newTopicVars = new HashMap<Long, String>();
|
||||
|
||||
public String getLoad() {
|
||||
stringBuffer = new StringBuffer();
|
||||
@ -38,7 +40,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
||||
stringBuffer.append(line);
|
||||
}
|
||||
protected void writeLine(String subject, String predicate, String object) {
|
||||
if (subject != null) {
|
||||
if (subject != null && object != null) {
|
||||
writeLine("{ 's' : '" + subject + "', 'p' : '" + predicate + "', 'o' : " + object + " }");
|
||||
}
|
||||
}
|
||||
@ -113,18 +115,30 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
||||
if (cell.recon != null &&
|
||||
cell.recon.judgment == Recon.Judgment.Matched &&
|
||||
cell.recon.match != null) {
|
||||
|
||||
id = cell.recon.match.topicID;
|
||||
} else if (node.createForNoReconMatch ||
|
||||
(cell.recon != null && cell.recon.judgment == Judgment.New)) {
|
||||
if (cell.recon != null && newTopicVars.containsKey(cell.recon.id)) {
|
||||
id = newTopicVars.get(cell.recon.id);
|
||||
} else {
|
||||
long var = 0;
|
||||
if (varPool.containsKey(node.columnName)) {
|
||||
var = varPool.get(node.columnName);
|
||||
}
|
||||
varPool.put(node.columnName, var + 1);
|
||||
|
||||
id = "$" + node.columnName.replaceAll("\\W+", "_") + "_" + var;
|
||||
|
||||
writeLine("{ 's' : '" + id + "', 'p' : 'type', 'o' : '" + node.type.id + "' }");
|
||||
writeLine("{ 's' : '" + id + "', 'p' : 'name', 'o' : " + JSONObject.quote(cell.value.toString()) + " }");
|
||||
|
||||
if (cell.recon != null) {
|
||||
newTopicVars.put(cell.recon.id, id);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
long var = 0;
|
||||
if (varPool.containsKey(node.columnName)) {
|
||||
var = varPool.get(node.columnName);
|
||||
}
|
||||
varPool.put(node.columnName, var + 1);
|
||||
|
||||
id = "$" + node.columnName.replaceAll("\\W+", "_") + "_" + var;
|
||||
|
||||
writeLine("{ 's' : '" + id + "', 'p' : 'type', 'o' : '" + node.type.id + "' }");
|
||||
writeLine("{ 's' : '" + id + "', 'p' : 'name', 'o' : " + JSONObject.quote(cell.value.toString()) + " }");
|
||||
return null;
|
||||
}
|
||||
|
||||
if (subject != null) {
|
||||
|
@ -130,7 +130,7 @@ DataTableCellUI.prototype._doMatchNewTopicToOneCell = function() {
|
||||
};
|
||||
|
||||
DataTableCellUI.prototype._doMatchNewTopicToSimilarCells = function() {
|
||||
this._doJudgmentForSimilarCells("new");
|
||||
this._doJudgmentForSimilarCells("new", { shareNewTopics: true });
|
||||
};
|
||||
|
||||
DataTableCellUI.prototype._doMatchTopicToOneCell = function(candidate) {
|
||||
|
@ -209,8 +209,8 @@ DataTableColumnHeaderUI.prototype._createMenuForColumnHeader = function(elmt) {
|
||||
}
|
||||
},
|
||||
{
|
||||
label: "Create One New Topic for All Cells",
|
||||
tooltip: "Mark to create one new, common topic for all cells in this column for all current filtered rows",
|
||||
label: "Create One New Topic for Similar Cells",
|
||||
tooltip: "Mark to create one new topic for each group of similar cells in this column for all current filtered rows",
|
||||
click: function() {
|
||||
self._doReconMarkNewTopics(true);
|
||||
}
|
||||
@ -459,10 +459,10 @@ DataTableColumnHeaderUI.prototype._doReconMatchBestCandidates = function() {
|
||||
);
|
||||
};
|
||||
|
||||
DataTableColumnHeaderUI.prototype._doReconMarkNewTopics = function() {
|
||||
DataTableColumnHeaderUI.prototype._doReconMarkNewTopics = function(shareNewTopics) {
|
||||
this._dataTableView.doPostThenUpdate(
|
||||
"recon-mark-new-topics",
|
||||
{ columnName: this._column.headerLabel }
|
||||
{ columnName: this._column.headerLabel, shareNewTopics: shareNewTopics }
|
||||
);
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user