Implemented facets on cluster metrics in facet-based edit dialog.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@244 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
ff94de5900
commit
a3bcfc1576
@ -5,6 +5,8 @@ function FacetBasedEditDialog(columnName, expression) {
|
||||
this._function = "fingerprint";
|
||||
this._params = {};
|
||||
|
||||
this._facets = [];
|
||||
|
||||
this._createDialog();
|
||||
this._cluster();
|
||||
}
|
||||
@ -20,42 +22,49 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
||||
|
||||
var html = $(
|
||||
'<div>' +
|
||||
'<div class="facet-based-edit-dialog-controls"><table><tr>' +
|
||||
'<td>' +
|
||||
'Method: <select bind="methodSelector">' +
|
||||
'<option selected="true">key collision</option>' +
|
||||
'<option>nearest neightbor</option>' +
|
||||
'</select>' +
|
||||
'</td>' +
|
||||
'<td>' +
|
||||
'<div class="binning-controls">Keying Function: <select bind="keyingFunctionSelector">' +
|
||||
'<option selected="true">fingerprint</option>' +
|
||||
'<option>ngram-fingerprint</option>' +
|
||||
'<option>double-metaphone</option>' +
|
||||
'<option>metaphone</option>' +
|
||||
'<option>soundex</option>' +
|
||||
'</select></div>' +
|
||||
'<div class="knn-controls hidden">Distance Function: <select bind="distanceFunctionSelector">' +
|
||||
'<option selected="true">levenshtein</option>' +
|
||||
'<option>jaro</option>' +
|
||||
'<option>jaccard</option>' +
|
||||
'<option>gzip</option>' +
|
||||
'<option>bzip2</option>' +
|
||||
'<option>PPM</option>' +
|
||||
'</select></div>' +
|
||||
'</td>' +
|
||||
'<td>' +
|
||||
'<div id="ngram-fingerprint-params" class="function-params hidden">' +
|
||||
'Ngram Size: <input type="text" value="1" bind="ngramSize">' +
|
||||
'</div>' +
|
||||
'<div class="knn-controls hidden">' +
|
||||
'Radius: <input type="text" value="0.1" bind="radius">' +
|
||||
'</div>' +
|
||||
'</td>' +
|
||||
'<td bind="resultSummary" align="right">' +
|
||||
'</td>' +
|
||||
'</tr></table></div>' +
|
||||
'<div bind="tableContainer" class="facet-based-edit-dialog-table-container"></div>' +
|
||||
'<div class="facet-based-edit-dialog-controls"><table width="100%">' +
|
||||
'<tr>' +
|
||||
'<td>' +
|
||||
'Method: <select bind="methodSelector">' +
|
||||
'<option selected="true">key collision</option>' +
|
||||
'<option>nearest neightbor</option>' +
|
||||
'</select>' +
|
||||
'</td>' +
|
||||
'<td>' +
|
||||
'<div class="binning-controls">Keying Function: <select bind="keyingFunctionSelector">' +
|
||||
'<option selected="true">fingerprint</option>' +
|
||||
'<option>ngram-fingerprint</option>' +
|
||||
'<option>double-metaphone</option>' +
|
||||
'<option>metaphone</option>' +
|
||||
'<option>soundex</option>' +
|
||||
'</select></div>' +
|
||||
'<div class="knn-controls hidden">Distance Function: <select bind="distanceFunctionSelector">' +
|
||||
'<option selected="true">levenshtein</option>' +
|
||||
'<option>jaro</option>' +
|
||||
'<option>jaccard</option>' +
|
||||
'<option>gzip</option>' +
|
||||
'<option>bzip2</option>' +
|
||||
'<option>PPM</option>' +
|
||||
'</select></div>' +
|
||||
'</td>' +
|
||||
'<td>' +
|
||||
'<div id="ngram-fingerprint-params" class="function-params hidden">' +
|
||||
'Ngram Size: <input type="text" value="1" bind="ngramSize" size="3">' +
|
||||
'</div>' +
|
||||
'<div class="knn-controls hidden">' +
|
||||
'Radius: <input type="text" value="0.1" bind="radius" size="3">' +
|
||||
'</div>' +
|
||||
'</td>' +
|
||||
'<td bind="resultSummary" align="right">' +
|
||||
'</td>' +
|
||||
'</tr>' +
|
||||
'<tr>' +
|
||||
'<td colspan="3">' +
|
||||
'<div bind="tableContainer" class="facet-based-edit-dialog-table-container"></div>' +
|
||||
'</td>' +
|
||||
'<td bind="facetContainer" width="200"></td>' +
|
||||
'</tr>' +
|
||||
'</table></div>' +
|
||||
'</div>'
|
||||
).appendTo(body);
|
||||
|
||||
@ -117,7 +126,7 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
||||
$("#recon-dialog-tabs-strict").css("display", "");
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.prototype._renderTable = function() {
|
||||
FacetBasedEditDialog.prototype._renderTable = function(clusters) {
|
||||
var self = this;
|
||||
|
||||
var container = this._elmts.tableContainer;
|
||||
@ -125,10 +134,11 @@ FacetBasedEditDialog.prototype._renderTable = function() {
|
||||
|
||||
var trHead = table.insertRow(table.rows.length);
|
||||
trHead.className = "header";
|
||||
$(trHead.insertCell(0)).text("Cluster size");
|
||||
$(trHead.insertCell(1)).text("Facet choices in Cluster");
|
||||
$(trHead.insertCell(2)).text("Edit?");
|
||||
$(trHead.insertCell(3)).text("New cell value");
|
||||
$(trHead.insertCell(0)).text("Cluster Size");
|
||||
$(trHead.insertCell(1)).text("Row Count");
|
||||
$(trHead.insertCell(2)).text("Values in Cluster");
|
||||
$(trHead.insertCell(3)).text("Edit?");
|
||||
$(trHead.insertCell(4)).text("New Cell Value");
|
||||
|
||||
var renderCluster = function(cluster) {
|
||||
var tr = table.insertRow(table.rows.length);
|
||||
@ -136,17 +146,23 @@ FacetBasedEditDialog.prototype._renderTable = function() {
|
||||
|
||||
$(tr.insertCell(0)).text(cluster.choices.length);
|
||||
|
||||
var ul = $(tr.insertCell(1));
|
||||
$(tr.insertCell(1)).text(cluster.rowCount);
|
||||
|
||||
var ul = $('<ul>');
|
||||
var choices = cluster.choices;
|
||||
var rowCount = 0;
|
||||
for (var c = 0; c < choices.length; c++) {
|
||||
var choice = choices[c];
|
||||
var li = $('<li>').appendTo(ul);
|
||||
$('<span>').text(choice.v).appendTo(li);
|
||||
$('<span>').text(" (" + choice.c + ")").appendTo(li);
|
||||
$('<span>').text("(" + choice.c + " rows)").addClass("facet-based-edit-dialog-entry-count").appendTo(li);
|
||||
|
||||
rowCount += choice.c;
|
||||
}
|
||||
$(tr.insertCell(2)).append(ul);
|
||||
|
||||
var editCheck = $('<input type="checkbox" />')
|
||||
.appendTo(tr.insertCell(2))
|
||||
.appendTo(tr.insertCell(3))
|
||||
.click(function() {
|
||||
cluster.edit = !cluster.edit;
|
||||
});
|
||||
@ -154,20 +170,24 @@ FacetBasedEditDialog.prototype._renderTable = function() {
|
||||
editCheck.attr("checked", "true");
|
||||
}
|
||||
|
||||
var input = $('<input size="55" />')
|
||||
var input = $('<input size="25" />')
|
||||
.attr("value", cluster.value)
|
||||
.appendTo(tr.insertCell(3))
|
||||
.appendTo(tr.insertCell(4))
|
||||
.keyup(function() {
|
||||
cluster.value = this.value;
|
||||
});
|
||||
};
|
||||
for (var i = 0; i < this._clusters.length; i++) {
|
||||
renderCluster(this._clusters[i]);
|
||||
for (var i = 0; i < clusters.length; i++) {
|
||||
renderCluster(clusters[i]);
|
||||
}
|
||||
|
||||
container.empty().append(table);
|
||||
|
||||
this._elmts.resultSummary.text(this._clusters.length + " clusters found.");
|
||||
this._elmts.resultSummary.text(
|
||||
(clusters.length === this._clusters.length) ?
|
||||
(this._clusters.length + " clusters found.") :
|
||||
(clusters.length + " clusters filtered from " + this._clusters.length + " total.")
|
||||
);
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.prototype._cluster = function() {
|
||||
@ -191,21 +211,45 @@ FacetBasedEditDialog.prototype._cluster = function() {
|
||||
})
|
||||
},
|
||||
function(data) {
|
||||
var clusters = [];
|
||||
$.each(data, function() {
|
||||
clusters.push({
|
||||
edit: true,
|
||||
choices: this,
|
||||
value: this[0].v
|
||||
});
|
||||
});
|
||||
self._clusters = clusters;
|
||||
self._renderTable();
|
||||
self._updateData(data);
|
||||
},
|
||||
"json"
|
||||
);
|
||||
}
|
||||
|
||||
FacetBasedEditDialog.prototype._updateData = function(data) {
|
||||
var clusters = [];
|
||||
$.each(data, function() {
|
||||
var cluster = {
|
||||
edit: true,
|
||||
choices: this,
|
||||
value: this[0].v,
|
||||
size: this.length
|
||||
};
|
||||
|
||||
var sum = 0;
|
||||
var sumSquared = 0;
|
||||
var rowCount = 0;
|
||||
$.each(cluster.choices, function() {
|
||||
rowCount += this.c;
|
||||
|
||||
var l = this.v.length;
|
||||
sum += l;
|
||||
sumSquared += l * l;
|
||||
});
|
||||
|
||||
cluster.rowCount = rowCount;
|
||||
cluster.avg = sum / cluster.choices.length;
|
||||
cluster.variance = Math.sqrt(sumSquared / cluster.choices.length - cluster.avg * cluster.avg);
|
||||
|
||||
clusters.push(cluster);
|
||||
});
|
||||
this._clusters = clusters;
|
||||
|
||||
this._resetFacets();
|
||||
this._updateAll();
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.prototype._onApplyClose = function() {
|
||||
var self = this;
|
||||
this._apply(function() {
|
||||
@ -221,9 +265,10 @@ FacetBasedEditDialog.prototype._onApplyReCluster = function() {
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.prototype._apply = function(onDone) {
|
||||
var clusters = this._getRestrictedClusters();
|
||||
var edits = [];
|
||||
for (var i = 0; i < this._clusters.length; i++) {
|
||||
var cluster = this._clusters[i];
|
||||
for (var i = 0; i < clusters.length; i++) {
|
||||
var cluster = clusters[i];
|
||||
if (cluster.edit) {
|
||||
var values = [];
|
||||
for (var j = 0; j < cluster.choices.length; j++) {
|
||||
@ -263,3 +308,189 @@ FacetBasedEditDialog.prototype._dismiss = function() {
|
||||
DialogSystem.dismissUntil(this._level - 1);
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.prototype._getBaseClusters = function() {
|
||||
return [].concat(this._clusters);
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.prototype._getRestrictedClusters = function(except) {
|
||||
var clusters = this._getBaseClusters();
|
||||
for (var i = 0; i < this._facets.length; i++) {
|
||||
var facet = this._facets[i].facet;
|
||||
if (except !== facet) {
|
||||
clusters = facet.restrict(clusters);
|
||||
}
|
||||
}
|
||||
return clusters;
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.prototype._updateAll = function() {
|
||||
for (var i = 0; i < this._facets.length; i++) {
|
||||
var facet = this._facets[i].facet;
|
||||
var clusters = this._getRestrictedClusters(facet);
|
||||
facet.update(clusters);
|
||||
}
|
||||
this._renderTable(this._getRestrictedClusters());
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.prototype._resetFacets = function() {
|
||||
for (var i = 0; i < this._facets.length; i++) {
|
||||
var r = this._facets[i];
|
||||
r.facet.dispose();
|
||||
r.elmt.remove();
|
||||
}
|
||||
this._facets = [];
|
||||
|
||||
this._createFacet("Cluster Size", "size");
|
||||
this._createFacet("Row Count", "rowCount");
|
||||
this._createFacet("Value Length Average", "avg");
|
||||
this._createFacet("Value Length Variance", "variance");
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.prototype._createFacet = function(title, property) {
|
||||
var elmt = $('<div>').appendTo(this._elmts.facetContainer);
|
||||
this._facets.push({
|
||||
elmt: elmt,
|
||||
facet: new FacetBasedEditDialog.Facet(this, title, property, elmt, this._getBaseClusters())
|
||||
});
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.Facet = function(dialog, title, property, elmt, clusters) {
|
||||
this._dialog = dialog;
|
||||
this._property = property;
|
||||
|
||||
var self = this;
|
||||
|
||||
var max = Number.NEGATIVE_INFINITY;
|
||||
var min = Number.POSITIVE_INFINITY;
|
||||
for (var i = 0; i < clusters.length; i++) {
|
||||
var cluster = clusters[i];
|
||||
var val = cluster[property];
|
||||
max = Math.max(max, val);
|
||||
min = Math.min(min, val);
|
||||
}
|
||||
|
||||
this._min = min;
|
||||
this._max = max;
|
||||
if (min >= max) {
|
||||
this._step = 0;
|
||||
this._bins = [];
|
||||
} else {
|
||||
var diff = max - min;
|
||||
|
||||
this._step = 1;
|
||||
if (diff > 10) {
|
||||
while (this._step * 100 < diff) {
|
||||
this._step *= 10;
|
||||
}
|
||||
} else {
|
||||
while (this._step * 100 > diff) {
|
||||
this._step /= 10;
|
||||
}
|
||||
}
|
||||
|
||||
this._min = (Math.floor(this._min / this._step) * this._step);
|
||||
this._max = (Math.ceil(this._max / this._step) * this._step);
|
||||
this._binCount = 1 + Math.ceil((this._max - this._min) / this._step);
|
||||
if (this._binCount > 100) {
|
||||
this._step *= 2;
|
||||
this._binCount = Math.round((1 + this._binCount) / 2);
|
||||
}
|
||||
this._baseBins = this._computeDistribution(clusters);
|
||||
|
||||
this._from = this._min;
|
||||
this._to = this._max;
|
||||
|
||||
elmt.addClass("facet-based-edit-dialog-facet");
|
||||
var html = $(
|
||||
'<div class="facet-based-edit-dialog-facet-header">' + title + '</div>' +
|
||||
'<div class="facet-based-edit-dialog-facet-histogram" bind="histogramContainer"></div>' +
|
||||
'<div class="facet-based-edit-dialog-facet-slider" bind="slider"></div>' +
|
||||
'<div class="facet-based-edit-dialog-facet-selection" bind="selectionContainer"></div>'
|
||||
).appendTo(elmt);
|
||||
|
||||
this._elmts = DOM.bind(html);
|
||||
this._elmts.slider.slider({
|
||||
min: this._min,
|
||||
max: this._max,
|
||||
values: [ this._from, this._to ],
|
||||
slide: function(evt, ui) {
|
||||
self._from = ui.values[0];
|
||||
self._to = ui.values[1];
|
||||
self._setRangeIndicators();
|
||||
self._dialog._updateAll();
|
||||
}
|
||||
});
|
||||
this._setRangeIndicators();
|
||||
}
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.Facet.prototype.dispose = function() {
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.Facet.prototype.restrict = function(clusters) {
|
||||
if (this._baseBins.length == 0 || (this._from == this._min && this._to == this._max)) {
|
||||
return clusters;
|
||||
}
|
||||
|
||||
var clusters2 = [];
|
||||
for (var i = 0; i < clusters.length; i++) {
|
||||
var cluster = clusters[i];
|
||||
var val = cluster[this._property];
|
||||
if (val >= this._from && val <= this._to) {
|
||||
clusters2.push(cluster);
|
||||
}
|
||||
}
|
||||
return clusters2;
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.Facet.prototype.update = function(clusters) {
|
||||
if (this._baseBins.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
var bins = this._computeDistribution(clusters);
|
||||
|
||||
var max = 0;
|
||||
for (var i = 0; i < this._baseBins.length; i++) {
|
||||
max = Math.max(max, this._baseBins[i]);
|
||||
}
|
||||
|
||||
var values = [];
|
||||
var diffs = [];
|
||||
for (var i = 0; i < this._baseBins.length; i++) {
|
||||
var v = Math.ceil(100 * bins[i] / max);
|
||||
var diff = Math.ceil(100 * this._baseBins[i] / max) - v;
|
||||
|
||||
values.push(v == 0 ? 0 : Math.max(2, v)); // use min 2 to make sure something shows up
|
||||
diffs.push(diff == 0 ? 0 : Math.max(2, diff));
|
||||
}
|
||||
|
||||
this._elmts.histogramContainer.empty();
|
||||
$('<img />').attr("src",
|
||||
"http://chart.apis.google.com/chart?" + [
|
||||
"chs=" + this._elmts.histogramContainer[0].offsetWidth + "x50",
|
||||
"cht=bvs&chbh=r,0&chco=000088,aaaaff",
|
||||
"chd=t:" + values.join(",") + "|" + diffs.join(",")
|
||||
].join("&")
|
||||
).appendTo(this._elmts.histogramContainer);
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.Facet.prototype._setRangeIndicators = function() {
|
||||
this._elmts.selectionContainer.text(this._from + " to " + this._to);
|
||||
};
|
||||
|
||||
FacetBasedEditDialog.Facet.prototype._computeDistribution = function(clusters) {
|
||||
var bins = [];
|
||||
for (var b = 0; b < this._binCount; b++) {
|
||||
bins.push(0);
|
||||
}
|
||||
|
||||
for (var i = 0; i < clusters.length; i++) {
|
||||
var cluster = clusters[i];
|
||||
var val = cluster[this._property];
|
||||
var bin = Math.round((val - this._min) / this._step);
|
||||
bins[bin]++;
|
||||
}
|
||||
|
||||
return bins;
|
||||
};
|
||||
|
@ -46,10 +46,34 @@ table.facet-based-edit-dialog-entry-table input {
|
||||
padding: 0 0.1em;
|
||||
}
|
||||
|
||||
.facet-based-edit-dialog-entry-count {
|
||||
color: #aaa;
|
||||
margin-left: 0.5em;
|
||||
}
|
||||
|
||||
.facet-based-edit-dialog-controls {
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
.facet-based-edit-dialog-controls td {
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
|
||||
|
||||
.facet-based-edit-dialog-facet {
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
.facet-based-edit-dialog-facet-header {
|
||||
font-weight: bold;
|
||||
}
|
||||
.facet-based-edit-dialog-facet-histogram {
|
||||
margin: 10px 5px 5px 5px;
|
||||
height: 52px;
|
||||
}
|
||||
.facet-based-edit-dialog-facet-slider {
|
||||
margin: 5px;
|
||||
}
|
||||
.facet-based-edit-dialog-facet-selection {
|
||||
text-align: center;
|
||||
color: #888;
|
||||
}
|
Loading…
Reference in New Issue
Block a user