now kNN clustering is fully operational... not very practical though, needs more work and testing
git-svn-id: http://google-refine.googlecode.com/svn/trunk@225 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
e06d8fe130
commit
f7b0caa1b8
@ -60,10 +60,10 @@ public class kNNClusterer extends Clusterer {
|
|||||||
_config = o;
|
_config = o;
|
||||||
_treeBuilder = new VPTreeBuilder(_distance);
|
_treeBuilder = new VPTreeBuilder(_distance);
|
||||||
try {
|
try {
|
||||||
_radius = (float) o.getDouble("radius");
|
_radius = (float) o.getJSONObject("params").getDouble("radius");
|
||||||
} catch (JSONException e) {
|
} catch (JSONException e) {
|
||||||
Gridworks.warn("No radius found, using default");
|
Gridworks.warn("No radius found, using default");
|
||||||
_radius = 1.0f;
|
_radius = 0.1f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -40,11 +40,19 @@ public class Node implements Serializable {
|
|||||||
return obj.toString();
|
return obj.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean equals(Object n) {
|
@Override
|
||||||
if (n instanceof Node) {
|
public boolean equals(Object o) {
|
||||||
return ((Node) n).get().equals(this.obj);
|
if (this == o) {
|
||||||
} else {
|
return true;
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
if (o instanceof Node) {
|
||||||
|
return ((Node) o).get().equals(this.obj);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return this.obj.hashCode();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -42,10 +42,9 @@ public class VPTreeBuilder {
|
|||||||
|
|
||||||
public VPTree buildVPTree() {
|
public VPTree buildVPTree() {
|
||||||
Node[] nodes_array = this.nodes.toArray(new Node[this.nodes.size()]);
|
Node[] nodes_array = this.nodes.toArray(new Node[this.nodes.size()]);
|
||||||
Gridworks.log("building tree with nodes: " + nodes_array.length);
|
|
||||||
VPTree tree = new VPTree();
|
VPTree tree = new VPTree();
|
||||||
tree.setRoot(addNode(nodes_array, 0, nodes_array.length - 1));
|
tree.setRoot(addNode(nodes_array, 0, nodes_array.length - 1));
|
||||||
Gridworks.log("tree built");
|
Gridworks.log("Built vptree with " + nodes_array.length + " nodes");
|
||||||
return tree;
|
return tree;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -68,9 +67,7 @@ public class VPTreeBuilder {
|
|||||||
Map<Serializable,List<? extends Serializable>> map = new HashMap<Serializable,List<? extends Serializable>>();
|
Map<Serializable,List<? extends Serializable>> map = new HashMap<Serializable,List<? extends Serializable>>();
|
||||||
for (Node n : nodes) {
|
for (Node n : nodes) {
|
||||||
Serializable s = n.get();
|
Serializable s = n.get();
|
||||||
Gridworks.log(" find results for: " + s);
|
|
||||||
List<? extends Serializable> results = seeker.range(s, radius);
|
List<? extends Serializable> results = seeker.range(s, radius);
|
||||||
Gridworks.log(" found: " + results.size());
|
|
||||||
map.put(s, results);
|
map.put(s, results);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
|||||||
var frame = DialogSystem.createDialog();
|
var frame = DialogSystem.createDialog();
|
||||||
frame.width("900px");
|
frame.width("900px");
|
||||||
|
|
||||||
var header = $('<div></div>').addClass("dialog-header").text("Facet-based edit of column " + this._columnName).appendTo(frame);
|
var header = $('<div></div>').addClass("dialog-header").text("Cluster & Edit column " + this._columnName).appendTo(frame);
|
||||||
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
|
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
|
||||||
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
|
var footer = $('<div></div>').addClass("dialog-footer").appendTo(frame);
|
||||||
|
|
||||||
@ -28,14 +28,14 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
|||||||
'</select>' +
|
'</select>' +
|
||||||
'</td>' +
|
'</td>' +
|
||||||
'<td>' +
|
'<td>' +
|
||||||
'<div id="binning-controls">Keying Function: <select bind="keyingFunctionSelector">' +
|
'<div class="binning-controls">Keying Function: <select bind="keyingFunctionSelector">' +
|
||||||
'<option selected="true">fingerprint</option>' +
|
'<option selected="true">fingerprint</option>' +
|
||||||
'<option>ngram-fingerprint</option>' +
|
'<option>ngram-fingerprint</option>' +
|
||||||
'<option>double-metaphone</option>' +
|
'<option>double-metaphone</option>' +
|
||||||
'<option>metaphone</option>' +
|
'<option>metaphone</option>' +
|
||||||
'<option>soundex</option>' +
|
'<option>soundex</option>' +
|
||||||
'</select></div>' +
|
'</select></div>' +
|
||||||
'<div id="knn-controls" class="hidden">Distance Function: <select bind="distanceFunctionSelector">' +
|
'<div class="knn-controls hidden">Distance Function: <select bind="distanceFunctionSelector">' +
|
||||||
'<option selected="true">levenshtein</option>' +
|
'<option selected="true">levenshtein</option>' +
|
||||||
'<option>jaro</option>' +
|
'<option>jaro</option>' +
|
||||||
'<option>jaccard</option>' +
|
'<option>jaccard</option>' +
|
||||||
@ -48,6 +48,9 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
|||||||
'<div id="ngram-fingerprint-params" class="function-params hidden">' +
|
'<div id="ngram-fingerprint-params" class="function-params hidden">' +
|
||||||
'Ngram Size: <input type="text" value="1" bind="ngramSize">' +
|
'Ngram Size: <input type="text" value="1" bind="ngramSize">' +
|
||||||
'</div>' +
|
'</div>' +
|
||||||
|
'<div class="knn-controls hidden">' +
|
||||||
|
'Radius: <input type="text" value="0.1" bind="radius">' +
|
||||||
|
'</div>' +
|
||||||
'</td>' +
|
'</td>' +
|
||||||
'<td bind="resultSummary" align="right">' +
|
'<td bind="resultSummary" align="right">' +
|
||||||
'</td>' +
|
'</td>' +
|
||||||
@ -61,13 +64,13 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
|||||||
this._elmts.methodSelector.change(function() {
|
this._elmts.methodSelector.change(function() {
|
||||||
var selection = $(this).find("option:selected").text();
|
var selection = $(this).find("option:selected").text();
|
||||||
if (selection == 'key collision') {
|
if (selection == 'key collision') {
|
||||||
body.find("#binning-controls").show();
|
body.find(".binning-controls").show();
|
||||||
body.find("#knn-controls").hide();
|
body.find(".knn-controls").hide();
|
||||||
self._method = "binning";
|
self._method = "binning";
|
||||||
self._elmts.keyingFunctionSelector.change();
|
self._elmts.keyingFunctionSelector.change();
|
||||||
} else if (selection = 'nearest neightbor') {
|
} else if (selection = 'nearest neightbor') {
|
||||||
body.find("#binning-controls").hide();
|
body.find(".binning-controls").hide();
|
||||||
body.find("#knn-controls").show();
|
body.find(".knn-controls").show();
|
||||||
self._method = "knn";
|
self._method = "knn";
|
||||||
self._elmts.distanceFunctionSelector.change();
|
self._elmts.distanceFunctionSelector.change();
|
||||||
}
|
}
|
||||||
@ -92,6 +95,15 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
this._elmts.radius.change(function() {
|
||||||
|
try {
|
||||||
|
self._params = { "radius" : parseFloat($(this).val()) };
|
||||||
|
self._cluster();
|
||||||
|
} catch (e) {
|
||||||
|
alert("radius must be a number");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
//this._elmts.clusterButton.click(function() { self._cluster(); });
|
//this._elmts.clusterButton.click(function() { self._cluster(); });
|
||||||
//this._elmts.unclusterButton.click(function() { self._uncluster(); });
|
//this._elmts.unclusterButton.click(function() { self._uncluster(); });
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user