now kNN clustering is fully operational... not very practical though, needs more work and testing

git-svn-id: http://google-refine.googlecode.com/svn/trunk@225 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Stefano Mazzocchi 2010-03-07 08:27:13 +00:00
parent e06d8fe130
commit f7b0caa1b8
4 changed files with 35 additions and 18 deletions

View File

@ -60,10 +60,10 @@ public class kNNClusterer extends Clusterer {
_config = o;
_treeBuilder = new VPTreeBuilder(_distance);
try {
_radius = (float) o.getDouble("radius");
_radius = (float) o.getJSONObject("params").getDouble("radius");
} catch (JSONException e) {
Gridworks.warn("No radius found, using default");
_radius = 1.0f;
_radius = 0.1f;
}
}

View File

@ -40,11 +40,19 @@ public class Node implements Serializable {
return obj.toString();
}
public boolean equals(Object n) {
if (n instanceof Node) {
return ((Node) n).get().equals(this.obj);
} else {
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o instanceof Node) {
return ((Node) o).get().equals(this.obj);
}
return false;
}
@Override
public int hashCode() {
return this.obj.hashCode();
}
}

View File

@ -42,10 +42,9 @@ public class VPTreeBuilder {
public VPTree buildVPTree() {
Node[] nodes_array = this.nodes.toArray(new Node[this.nodes.size()]);
Gridworks.log("building tree with nodes: " + nodes_array.length);
VPTree tree = new VPTree();
tree.setRoot(addNode(nodes_array, 0, nodes_array.length - 1));
Gridworks.log("tree built");
Gridworks.log("Built vptree with " + nodes_array.length + " nodes");
return tree;
}
@ -68,9 +67,7 @@ public class VPTreeBuilder {
Map<Serializable,List<? extends Serializable>> map = new HashMap<Serializable,List<? extends Serializable>>();
for (Node n : nodes) {
Serializable s = n.get();
Gridworks.log(" find results for: " + s);
List<? extends Serializable> results = seeker.range(s, radius);
Gridworks.log(" found: " + results.size());
map.put(s, results);
}

View File

@ -14,7 +14,7 @@ FacetBasedEditDialog.prototype._createDialog = function() {
var frame = DialogSystem.createDialog();
frame.width("900px");
var header = $('<div></div>').addClass("dialog-header").text("Facet-based edit of column " + this._columnName).appendTo(frame);
var header = $('<div></div>').addClass("dialog-header").text("Cluster & Edit column " + this._columnName).appendTo(frame);
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
@ -28,14 +28,14 @@ FacetBasedEditDialog.prototype._createDialog = function() {
'</select>' +
'</td>' +
'<td>' +
'<div id="binning-controls">Keying Function: <select bind="keyingFunctionSelector">' +
'<div class="binning-controls">Keying Function: <select bind="keyingFunctionSelector">' +
'<option selected="true">fingerprint</option>' +
'<option>ngram-fingerprint</option>' +
'<option>double-metaphone</option>' +
'<option>metaphone</option>' +
'<option>soundex</option>' +
'</select></div>' +
'<div id="knn-controls" class="hidden">Distance Function: <select bind="distanceFunctionSelector">' +
'<div class="knn-controls hidden">Distance Function: <select bind="distanceFunctionSelector">' +
'<option selected="true">levenshtein</option>' +
'<option>jaro</option>' +
'<option>jaccard</option>' +
@ -48,6 +48,9 @@ FacetBasedEditDialog.prototype._createDialog = function() {
'<div id="ngram-fingerprint-params" class="function-params hidden">' +
'Ngram Size: <input type="text" value="1" bind="ngramSize">' +
'</div>' +
'<div class="knn-controls hidden">' +
'Radius: <input type="text" value="0.1" bind="radius">' +
'</div>' +
'</td>' +
'<td bind="resultSummary" align="right">' +
'</td>' +
@ -61,13 +64,13 @@ FacetBasedEditDialog.prototype._createDialog = function() {
this._elmts.methodSelector.change(function() {
var selection = $(this).find("option:selected").text();
if (selection == 'key collision') {
body.find("#binning-controls").show();
body.find("#knn-controls").hide();
body.find(".binning-controls").show();
body.find(".knn-controls").hide();
self._method = "binning";
self._elmts.keyingFunctionSelector.change();
} else if (selection = 'nearest neightbor') {
body.find("#binning-controls").hide();
body.find("#knn-controls").show();
body.find(".binning-controls").hide();
body.find(".knn-controls").show();
self._method = "knn";
self._elmts.distanceFunctionSelector.change();
}
@ -92,6 +95,15 @@ FacetBasedEditDialog.prototype._createDialog = function() {
}
});
this._elmts.radius.change(function() {
try {
self._params = { "radius" : parseFloat($(this).val()) };
self._cluster();
} catch (e) {
alert("radius must be a number");
}
});
//this._elmts.clusterButton.click(function() { self._cluster(); });
//this._elmts.unclusterButton.click(function() { self._uncluster(); });