now kNN clustering is fully operational... not very practical though, needs more work and testing
git-svn-id: http://google-refine.googlecode.com/svn/trunk@225 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
e06d8fe130
commit
f7b0caa1b8
@ -60,10 +60,10 @@ public class kNNClusterer extends Clusterer {
|
||||
_config = o;
|
||||
_treeBuilder = new VPTreeBuilder(_distance);
|
||||
try {
|
||||
_radius = (float) o.getDouble("radius");
|
||||
_radius = (float) o.getJSONObject("params").getDouble("radius");
|
||||
} catch (JSONException e) {
|
||||
Gridworks.warn("No radius found, using default");
|
||||
_radius = 1.0f;
|
||||
_radius = 0.1f;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -40,11 +40,19 @@ public class Node implements Serializable {
|
||||
return obj.toString();
|
||||
}
|
||||
|
||||
public boolean equals(Object n) {
|
||||
if (n instanceof Node) {
|
||||
return ((Node) n).get().equals(this.obj);
|
||||
} else {
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o instanceof Node) {
|
||||
return ((Node) o).get().equals(this.obj);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.obj.hashCode();
|
||||
}
|
||||
}
|
||||
|
@ -42,10 +42,9 @@ public class VPTreeBuilder {
|
||||
|
||||
public VPTree buildVPTree() {
|
||||
Node[] nodes_array = this.nodes.toArray(new Node[this.nodes.size()]);
|
||||
Gridworks.log("building tree with nodes: " + nodes_array.length);
|
||||
VPTree tree = new VPTree();
|
||||
tree.setRoot(addNode(nodes_array, 0, nodes_array.length - 1));
|
||||
Gridworks.log("tree built");
|
||||
Gridworks.log("Built vptree with " + nodes_array.length + " nodes");
|
||||
return tree;
|
||||
}
|
||||
|
||||
@ -68,9 +67,7 @@ public class VPTreeBuilder {
|
||||
Map<Serializable,List<? extends Serializable>> map = new HashMap<Serializable,List<? extends Serializable>>();
|
||||
for (Node n : nodes) {
|
||||
Serializable s = n.get();
|
||||
Gridworks.log(" find results for: " + s);
|
||||
List<? extends Serializable> results = seeker.range(s, radius);
|
||||
Gridworks.log(" found: " + results.size());
|
||||
map.put(s, results);
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,7 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
||||
var frame = DialogSystem.createDialog();
|
||||
frame.width("900px");
|
||||
|
||||
var header = $('<div></div>').addClass("dialog-header").text("Facet-based edit of column " + this._columnName).appendTo(frame);
|
||||
var header = $('<div></div>').addClass("dialog-header").text("Cluster & Edit column " + this._columnName).appendTo(frame);
|
||||
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
|
||||
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
|
||||
|
||||
@ -28,14 +28,14 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
||||
'</select>' +
|
||||
'</td>' +
|
||||
'<td>' +
|
||||
'<div id="binning-controls">Keying Function: <select bind="keyingFunctionSelector">' +
|
||||
'<div class="binning-controls">Keying Function: <select bind="keyingFunctionSelector">' +
|
||||
'<option selected="true">fingerprint</option>' +
|
||||
'<option>ngram-fingerprint</option>' +
|
||||
'<option>double-metaphone</option>' +
|
||||
'<option>metaphone</option>' +
|
||||
'<option>soundex</option>' +
|
||||
'</select></div>' +
|
||||
'<div id="knn-controls" class="hidden">Distance Function: <select bind="distanceFunctionSelector">' +
|
||||
'<div class="knn-controls hidden">Distance Function: <select bind="distanceFunctionSelector">' +
|
||||
'<option selected="true">levenshtein</option>' +
|
||||
'<option>jaro</option>' +
|
||||
'<option>jaccard</option>' +
|
||||
@ -48,6 +48,9 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
||||
'<div id="ngram-fingerprint-params" class="function-params hidden">' +
|
||||
'Ngram Size: <input type="text" value="1" bind="ngramSize">' +
|
||||
'</div>' +
|
||||
'<div class="knn-controls hidden">' +
|
||||
'Radius: <input type="text" value="0.1" bind="radius">' +
|
||||
'</div>' +
|
||||
'</td>' +
|
||||
'<td bind="resultSummary" align="right">' +
|
||||
'</td>' +
|
||||
@ -61,13 +64,13 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
||||
this._elmts.methodSelector.change(function() {
|
||||
var selection = $(this).find("option:selected").text();
|
||||
if (selection == 'key collision') {
|
||||
body.find("#binning-controls").show();
|
||||
body.find("#knn-controls").hide();
|
||||
body.find(".binning-controls").show();
|
||||
body.find(".knn-controls").hide();
|
||||
self._method = "binning";
|
||||
self._elmts.keyingFunctionSelector.change();
|
||||
} else if (selection = 'nearest neightbor') {
|
||||
body.find("#binning-controls").hide();
|
||||
body.find("#knn-controls").show();
|
||||
body.find(".binning-controls").hide();
|
||||
body.find(".knn-controls").show();
|
||||
self._method = "knn";
|
||||
self._elmts.distanceFunctionSelector.change();
|
||||
}
|
||||
@ -92,6 +95,15 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
||||
}
|
||||
});
|
||||
|
||||
this._elmts.radius.change(function() {
|
||||
try {
|
||||
self._params = { "radius" : parseFloat($(this).val()) };
|
||||
self._cluster();
|
||||
} catch (e) {
|
||||
alert("radius must be a number");
|
||||
}
|
||||
});
|
||||
|
||||
//this._elmts.clusterButton.click(function() { self._cluster(); });
|
||||
//this._elmts.unclusterButton.click(function() { self._uncluster(); });
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user