now kNN clustering is fully operational... not very practical though, needs more work and testing

git-svn-id: http://google-refine.googlecode.com/svn/trunk@225 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Stefano Mazzocchi 2010-03-07 08:27:13 +00:00
parent e06d8fe130
commit f7b0caa1b8
4 changed files with 35 additions and 18 deletions

View File

@ -60,10 +60,10 @@ public class kNNClusterer extends Clusterer {
_config = o; _config = o;
_treeBuilder = new VPTreeBuilder(_distance); _treeBuilder = new VPTreeBuilder(_distance);
try { try {
_radius = (float) o.getDouble("radius"); _radius = (float) o.getJSONObject("params").getDouble("radius");
} catch (JSONException e) { } catch (JSONException e) {
Gridworks.warn("No radius found, using default"); Gridworks.warn("No radius found, using default");
_radius = 1.0f; _radius = 0.1f;
} }
} }

View File

@ -40,11 +40,19 @@ public class Node implements Serializable {
return obj.toString(); return obj.toString();
} }
public boolean equals(Object n) { @Override
if (n instanceof Node) { public boolean equals(Object o) {
return ((Node) n).get().equals(this.obj); if (this == o) {
} else { return true;
}
if (o instanceof Node) {
return ((Node) o).get().equals(this.obj);
}
return false; return false;
} }
@Override
public int hashCode() {
return this.obj.hashCode();
} }
} }

View File

@ -42,10 +42,9 @@ public class VPTreeBuilder {
public VPTree buildVPTree() { public VPTree buildVPTree() {
Node[] nodes_array = this.nodes.toArray(new Node[this.nodes.size()]); Node[] nodes_array = this.nodes.toArray(new Node[this.nodes.size()]);
Gridworks.log("building tree with nodes: " + nodes_array.length);
VPTree tree = new VPTree(); VPTree tree = new VPTree();
tree.setRoot(addNode(nodes_array, 0, nodes_array.length - 1)); tree.setRoot(addNode(nodes_array, 0, nodes_array.length - 1));
Gridworks.log("tree built"); Gridworks.log("Built vptree with " + nodes_array.length + " nodes");
return tree; return tree;
} }
@ -68,9 +67,7 @@ public class VPTreeBuilder {
Map<Serializable,List<? extends Serializable>> map = new HashMap<Serializable,List<? extends Serializable>>(); Map<Serializable,List<? extends Serializable>> map = new HashMap<Serializable,List<? extends Serializable>>();
for (Node n : nodes) { for (Node n : nodes) {
Serializable s = n.get(); Serializable s = n.get();
Gridworks.log(" find results for: " + s);
List<? extends Serializable> results = seeker.range(s, radius); List<? extends Serializable> results = seeker.range(s, radius);
Gridworks.log(" found: " + results.size());
map.put(s, results); map.put(s, results);
} }

View File

@ -14,7 +14,7 @@ FacetBasedEditDialog.prototype._createDialog = function() {
var frame = DialogSystem.createDialog(); var frame = DialogSystem.createDialog();
frame.width("900px"); frame.width("900px");
var header = $('<div></div>').addClass("dialog-header").text("Facet-based edit of column " + this._columnName).appendTo(frame); var header = $('<div></div>').addClass("dialog-header").text("Cluster & Edit column " + this._columnName).appendTo(frame);
var body = $('<div></div>').addClass("dialog-body").appendTo(frame); var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame); var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
@ -28,14 +28,14 @@ FacetBasedEditDialog.prototype._createDialog = function() {
'</select>' + '</select>' +
'</td>' + '</td>' +
'<td>' + '<td>' +
'<div id="binning-controls">Keying Function: <select bind="keyingFunctionSelector">' + '<div class="binning-controls">Keying Function: <select bind="keyingFunctionSelector">' +
'<option selected="true">fingerprint</option>' + '<option selected="true">fingerprint</option>' +
'<option>ngram-fingerprint</option>' + '<option>ngram-fingerprint</option>' +
'<option>double-metaphone</option>' + '<option>double-metaphone</option>' +
'<option>metaphone</option>' + '<option>metaphone</option>' +
'<option>soundex</option>' + '<option>soundex</option>' +
'</select></div>' + '</select></div>' +
'<div id="knn-controls" class="hidden">Distance Function: <select bind="distanceFunctionSelector">' + '<div class="knn-controls hidden">Distance Function: <select bind="distanceFunctionSelector">' +
'<option selected="true">levenshtein</option>' + '<option selected="true">levenshtein</option>' +
'<option>jaro</option>' + '<option>jaro</option>' +
'<option>jaccard</option>' + '<option>jaccard</option>' +
@ -48,6 +48,9 @@ FacetBasedEditDialog.prototype._createDialog = function() {
'<div id="ngram-fingerprint-params" class="function-params hidden">' + '<div id="ngram-fingerprint-params" class="function-params hidden">' +
'Ngram Size: <input type="text" value="1" bind="ngramSize">' + 'Ngram Size: <input type="text" value="1" bind="ngramSize">' +
'</div>' + '</div>' +
'<div class="knn-controls hidden">' +
'Radius: <input type="text" value="0.1" bind="radius">' +
'</div>' +
'</td>' + '</td>' +
'<td bind="resultSummary" align="right">' + '<td bind="resultSummary" align="right">' +
'</td>' + '</td>' +
@ -61,13 +64,13 @@ FacetBasedEditDialog.prototype._createDialog = function() {
this._elmts.methodSelector.change(function() { this._elmts.methodSelector.change(function() {
var selection = $(this).find("option:selected").text(); var selection = $(this).find("option:selected").text();
if (selection == 'key collision') { if (selection == 'key collision') {
body.find("#binning-controls").show(); body.find(".binning-controls").show();
body.find("#knn-controls").hide(); body.find(".knn-controls").hide();
self._method = "binning"; self._method = "binning";
self._elmts.keyingFunctionSelector.change(); self._elmts.keyingFunctionSelector.change();
} else if (selection = 'nearest neightbor') { } else if (selection = 'nearest neightbor') {
body.find("#binning-controls").hide(); body.find(".binning-controls").hide();
body.find("#knn-controls").show(); body.find(".knn-controls").show();
self._method = "knn"; self._method = "knn";
self._elmts.distanceFunctionSelector.change(); self._elmts.distanceFunctionSelector.change();
} }
@ -92,6 +95,15 @@ FacetBasedEditDialog.prototype._createDialog = function() {
} }
}); });
this._elmts.radius.change(function() {
try {
self._params = { "radius" : parseFloat($(this).val()) };
self._cluster();
} catch (e) {
alert("radius must be a number");
}
});
//this._elmts.clusterButton.click(function() { self._cluster(); }); //this._elmts.clusterButton.click(function() { self._cluster(); });
//this._elmts.unclusterButton.click(function() { self._uncluster(); }); //this._elmts.unclusterButton.click(function() { self._uncluster(); });