2010-03-13 10:32:06 +01:00
function ClusteringDialog ( columnName , expression ) {
2010-03-02 20:58:47 +01:00
this . _columnName = columnName ;
2010-03-02 21:33:11 +01:00
this . _expression = expression ;
2010-03-06 11:17:58 +01:00
this . _method = "binning" ;
this . _function = "fingerprint" ;
this . _params = { } ;
2010-03-02 20:58:47 +01:00
2010-03-09 00:05:44 +01:00
this . _facets = [ ] ;
2010-03-02 20:58:47 +01:00
this . _createDialog ( ) ;
this . _cluster ( ) ;
}
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _createDialog = function ( ) {
2010-03-02 20:58:47 +01:00
var self = this ;
var frame = DialogSystem . createDialog ( ) ;
2010-03-06 11:17:58 +01:00
frame . width ( "900px" ) ;
2010-03-02 20:58:47 +01:00
2010-03-10 08:45:14 +01:00
var header = $ ( '<div></div>' ) . addClass ( "dialog-header" ) . text ( 'Cluster & Edit column "' + this . _columnName + '"' ) . appendTo ( frame ) ;
2010-03-02 20:58:47 +01:00
var body = $ ( '<div></div>' ) . addClass ( "dialog-body" ) . appendTo ( frame ) ;
2010-03-09 21:58:51 +01:00
var footer = $ (
'<div class="dialog-footer">' +
'<table width="100%"><tr>' +
'<td class="left" style="text-align: left"></td>' +
'<td class="right" style="text-align: right"></td>' +
'</tr></table>' +
'</div>'
) . appendTo ( frame ) ;
2010-03-02 20:58:47 +01:00
var html = $ (
2010-03-15 19:48:39 +01:00
'<div class="grid-layout layout-normal layout-full"><table>' +
'<tr>' +
'<td>' +
'Method: <select bind="methodSelector">' +
'<option selected="true">key collision</option>' +
'<option>nearest neightbor</option>' +
'</select>' +
'</td>' +
'<td>' +
'<div class="binning-controls">Keying Function: <select bind="keyingFunctionSelector">' +
'<option selected="true">fingerprint</option>' +
'<option>ngram-fingerprint</option>' +
'<option>double-metaphone</option>' +
'</select></div>' +
'<div class="knn-controls hidden">Distance Function: <select bind="distanceFunctionSelector">' +
'<option selected="true">levenshtein</option>' +
'<option>PPM</option>' +
'</select></div>' +
'</td>' +
'<td>' +
'<div id="ngram-fingerprint-params" class="function-params hidden">' +
'Ngram Size: <input type="text" value="2" bind="ngramSize" name="ngram-size" size="3" class="param" datatype="int">' +
'</div>' +
'<div class="knn-controls hidden">' +
'<span style="margin-right: 1em">Radius: <input type="text" value="1.0" bind="radius" name="radius" size="3" class="param" datatype="float"></span>' +
'<span>Block Chars: <input type="text" value="6" bind="ngramBlock" name="blocking-ngram-size" size="3" class="param" datatype="int"></span>' +
'</div>' +
'</td>' +
'<td bind="resultSummary" align="right">' +
'</td>' +
'</tr>' +
'<tr>' +
'<td colspan="3">' +
'<div bind="tableContainer" class="clustering-dialog-table-container"></div>' +
'</td>' +
'<td bind="facetContainer" width="200"></td>' +
'</tr>' +
'</table></div>'
2010-03-02 20:58:47 +01:00
) . appendTo ( body ) ;
this . _elmts = DOM . bind ( html ) ;
2010-03-06 11:17:58 +01:00
this . _elmts . methodSelector . change ( function ( ) {
var selection = $ ( this ) . find ( "option:selected" ) . text ( ) ;
if ( selection == 'key collision' ) {
2010-03-07 09:27:13 +01:00
body . find ( ".binning-controls" ) . show ( ) ;
body . find ( ".knn-controls" ) . hide ( ) ;
2010-03-06 11:17:58 +01:00
self . _method = "binning" ;
self . _elmts . keyingFunctionSelector . change ( ) ;
} else if ( selection = 'nearest neightbor' ) {
2010-03-07 09:27:13 +01:00
body . find ( ".binning-controls" ) . hide ( ) ;
body . find ( ".knn-controls" ) . show ( ) ;
2010-03-06 11:17:58 +01:00
self . _method = "knn" ;
self . _elmts . distanceFunctionSelector . change ( ) ;
}
} ) ;
var changer = function ( ) {
self . _function = $ ( this ) . find ( "option:selected" ) . text ( ) ;
$ ( ".function-params" ) . hide ( ) ;
$ ( "#" + self . _function + "-params" ) . show ( ) ;
2010-03-10 02:18:41 +01:00
params _changer ( ) ;
2010-03-06 11:17:58 +01:00
} ;
this . _elmts . keyingFunctionSelector . change ( changer ) ;
this . _elmts . distanceFunctionSelector . change ( changer ) ;
2010-03-10 02:18:41 +01:00
var params _changer = function ( ) {
self . _params = { } ;
$ ( ".dialog-body input.param:visible" ) . each ( function ( ) {
var e = $ ( this ) ;
var name = e . attr ( 'name' ) ;
var datatype = e . attr ( 'datatype' ) || 'string' ;
var value = e . val ( ) ;
if ( datatype == 'int' ) {
value = parseInt ( value ) ;
} else if ( datatype == 'float' ) {
value = parseFloat ( value ) ;
}
self . _params [ name ] = value ;
} ) ;
self . _cluster ( ) ;
} ;
this . _elmts . ngramSize . change ( params _changer ) ;
this . _elmts . radius . change ( params _changer ) ;
this . _elmts . ngramBlock . change ( params _changer ) ;
2010-03-07 09:27:13 +01:00
2010-03-09 21:58:51 +01:00
var left _footer = footer . find ( ".left" ) ;
$ ( '<button></button>' ) . text ( "Select All" ) . click ( function ( ) { self . _selectAll ( ) ; } ) . appendTo ( left _footer ) ;
$ ( '<button></button>' ) . text ( "Deselect All" ) . click ( function ( ) { self . _deselectAll ( ) ; } ) . appendTo ( left _footer ) ;
2010-03-02 20:58:47 +01:00
2010-03-09 21:58:51 +01:00
var right _footer = footer . find ( ".right" ) ;
$ ( '<button></button>' ) . text ( "Apply & Re-Cluster" ) . click ( function ( ) { self . _onApplyReCluster ( ) ; } ) . appendTo ( right _footer ) ;
$ ( '<button></button>' ) . text ( "Apply & Close" ) . click ( function ( ) { self . _onApplyClose ( ) ; } ) . appendTo ( right _footer ) ;
$ ( '<button></button>' ) . text ( "Close" ) . click ( function ( ) { self . _dismiss ( ) ; } ) . appendTo ( right _footer ) ;
2010-03-02 20:58:47 +01:00
this . _level = DialogSystem . showDialog ( frame ) ;
$ ( "#recon-dialog-tabs" ) . tabs ( ) ;
$ ( "#recon-dialog-tabs-strict" ) . css ( "display" , "" ) ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _renderTable = function ( clusters ) {
2010-03-02 20:58:47 +01:00
var self = this ;
2010-03-07 00:01:46 +01:00
var container = this . _elmts . tableContainer ;
2010-03-02 20:58:47 +01:00
2010-03-13 10:32:06 +01:00
if ( clusters . length > 0 ) {
var table = $ ( '<table></table>' ) . addClass ( "clustering-dialog-entry-table" ) [ 0 ] ;
2010-03-07 00:01:46 +01:00
2010-03-13 10:32:06 +01:00
var trHead = table . insertRow ( table . rows . length ) ;
trHead . className = "header" ;
$ ( trHead . insertCell ( 0 ) ) . text ( "Cluster Size" ) ;
$ ( trHead . insertCell ( 1 ) ) . text ( "Row Count" ) ;
$ ( trHead . insertCell ( 2 ) ) . text ( "Values in Cluster" ) ;
$ ( trHead . insertCell ( 3 ) ) . text ( "Merge?" ) ;
$ ( trHead . insertCell ( 4 ) ) . text ( "New Cell Value" ) ;
var renderCluster = function ( cluster ) {
var tr = table . insertRow ( table . rows . length ) ;
tr . className = table . rows . length % 2 == 0 ? "odd" : "even" ;
$ ( tr . insertCell ( 0 ) ) . text ( cluster . choices . length ) ;
$ ( tr . insertCell ( 1 ) ) . text ( cluster . rowCount ) ;
var ul = $ ( '<ul></ul>' ) ;
var choices = cluster . choices ;
var rowCount = 0 ;
for ( var c = 0 ; c < choices . length ; c ++ ) {
var choice = choices [ c ] ;
var li = $ ( '<li></li>' ) ;
$ ( '<a href="javascript:{}" title="Use this value"></a>' ) . text ( choice . v ) . click ( function ( ) {
var parent = $ ( this ) . closest ( "tr" ) ;
parent . find ( "input[type='text']" ) . val ( $ ( this ) . text ( ) ) ;
parent . find ( "input:not(:checked)" ) . attr ( 'checked' , true ) . change ( ) ;
return false ;
} ) . appendTo ( li ) ;
$ ( '<span></span>' ) . text ( "(" + choice . c + " rows)" ) . addClass ( "clustering-dialog-entry-count" ) . appendTo ( li ) ;
rowCount += choice . c ;
li . appendTo ( ul ) ;
}
$ ( tr . insertCell ( 2 ) ) . append ( ul ) ;
var editCheck = $ ( '<input type="checkbox" />' )
. change ( function ( ) {
cluster . edit = ! cluster . edit ;
} ) . appendTo ( tr . insertCell ( 3 ) ) ;
if ( cluster . edit ) {
editCheck . attr ( "checked" , "true" ) ;
}
var input = $ ( '<input size="25" />' )
. attr ( "value" , cluster . value )
. keyup ( function ( ) {
cluster . value = this . value ;
} ) . appendTo ( tr . insertCell ( 4 ) ) ;
} ;
2010-03-09 00:05:44 +01:00
2010-03-13 10:32:06 +01:00
for ( var i = 0 ; i < clusters . length ; i ++ ) {
renderCluster ( clusters [ i ] ) ;
2010-03-02 20:58:47 +01:00
}
2010-03-13 10:32:06 +01:00
container . empty ( ) . append ( table ) ;
2010-03-02 20:58:47 +01:00
2010-03-13 10:32:06 +01:00
this . _elmts . resultSummary . html (
( clusters . length === this . _clusters . length ) ?
( "<b>" + this . _clusters . length + "</b> cluster" + ( ( this . _clusters . length != 1 ) ? "s" : "" ) + " found" ) :
( "<b>" + clusters . length + "</b> cluster" + ( ( clusters . length != 1 ) ? "s" : "" ) + " filtered from <b>" + this . _clusters . length + "</b> total" )
) ;
} else {
container . html (
'<div style="margin: 2em;"><div style="font-size: 130%; color: #333;">No clusters were found with the selected method</div><div style="padding-top: 1em; font-size: 110%; color: #888;">Try selecting another method above or changing its parameters</div></div>'
) ;
2010-03-02 20:58:47 +01:00
}
2010-03-06 11:17:58 +01:00
2010-03-02 20:58:47 +01:00
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _cluster = function ( ) {
2010-03-06 11:17:58 +01:00
var self = this ;
2010-03-02 20:58:47 +01:00
2010-03-06 11:17:58 +01:00
var container = this . _elmts . tableContainer . html (
2010-03-09 21:58:51 +01:00
'<div style="margin: 1em; font-size: 130%; color: #888;">Clustering... <img src="/images/small-spinner.gif"></div>'
2010-03-06 11:17:58 +01:00
) ;
2010-03-07 00:01:46 +01:00
this . _elmts . resultSummary . empty ( ) ;
2010-03-02 20:58:47 +01:00
2010-03-06 11:17:58 +01:00
$ . post (
"/command/compute-clusters?" + $ . param ( { project : theProject . id } ) ,
{
engine : JSON . stringify ( ui . browsingEngine . getJSON ( ) ) ,
clusterer : JSON . stringify ( {
'type' : this . _method ,
'function' : this . _function ,
'column' : this . _columnName ,
'params' : this . _params
} )
} ,
function ( data ) {
2010-03-09 00:05:44 +01:00
self . _updateData ( data ) ;
2010-03-06 11:17:58 +01:00
} ,
"json"
) ;
}
2010-03-02 21:33:11 +01:00
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _updateData = function ( data ) {
2010-03-09 00:05:44 +01:00
var clusters = [ ] ;
$ . each ( data , function ( ) {
var cluster = {
2010-03-09 21:58:51 +01:00
edit : false ,
2010-03-09 00:05:44 +01:00
choices : this ,
value : this [ 0 ] . v ,
size : this . length
} ;
var sum = 0 ;
var sumSquared = 0 ;
var rowCount = 0 ;
$ . each ( cluster . choices , function ( ) {
rowCount += this . c ;
var l = this . v . length ;
sum += l ;
sumSquared += l * l ;
} ) ;
cluster . rowCount = rowCount ;
cluster . avg = sum / cluster . choices . length ;
cluster . variance = Math . sqrt ( sumSquared / cluster . choices . length - cluster . avg * cluster . avg ) ;
clusters . push ( cluster ) ;
} ) ;
this . _clusters = clusters ;
this . _resetFacets ( ) ;
this . _updateAll ( ) ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _selectAll = function ( ) {
$ ( ".clustering-dialog-entry-table input:not(:checked)" ) . attr ( 'checked' , true ) . change ( ) ;
2010-03-09 21:58:51 +01:00
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _deselectAll = function ( ) {
$ ( ".clustering-dialog-entry-table input:checked" ) . attr ( 'checked' , false ) . change ( ) ;
2010-03-09 21:58:51 +01:00
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _onApplyClose = function ( ) {
2010-03-07 00:34:27 +01:00
var self = this ;
this . _apply ( function ( ) {
self . _dismiss ( ) ;
} ) ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _onApplyReCluster = function ( ) {
2010-03-07 00:34:27 +01:00
var self = this ;
this . _apply ( function ( ) {
self . _cluster ( ) ;
} ) ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _apply = function ( onDone ) {
2010-03-09 00:05:44 +01:00
var clusters = this . _getRestrictedClusters ( ) ;
2010-03-02 21:33:11 +01:00
var edits = [ ] ;
2010-03-09 00:05:44 +01:00
for ( var i = 0 ; i < clusters . length ; i ++ ) {
var cluster = clusters [ i ] ;
2010-03-02 21:33:11 +01:00
if ( cluster . edit ) {
var values = [ ] ;
for ( var j = 0 ; j < cluster . choices . length ; j ++ ) {
2010-03-06 11:17:58 +01:00
values . push ( cluster . choices [ j ] . v ) ;
2010-03-02 21:33:11 +01:00
}
edits . push ( {
from : values ,
to : cluster . value
} ) ;
}
}
if ( edits . length > 0 ) {
Gridworks . postProcess (
2010-03-07 01:25:00 +01:00
"mass-edit" ,
2010-03-02 21:33:11 +01:00
{ } ,
{
columnName : this . _columnName ,
expression : this . _expression ,
edits : JSON . stringify ( edits )
} ,
2010-03-07 00:34:27 +01:00
{ cellsChanged : true } ,
{
onError : function ( o ) {
alert ( "Error: " + o . message ) ;
} ,
onDone : onDone
}
2010-03-02 21:33:11 +01:00
) ;
} else {
alert ( "You must check some Edit? checkboxes for your edits to be applied." ) ;
}
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _dismiss = function ( ) {
2010-03-02 21:33:11 +01:00
DialogSystem . dismissUntil ( this . _level - 1 ) ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _getBaseClusters = function ( ) {
2010-03-09 00:05:44 +01:00
return [ ] . concat ( this . _clusters ) ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _getRestrictedClusters = function ( except ) {
2010-03-09 00:05:44 +01:00
var clusters = this . _getBaseClusters ( ) ;
for ( var i = 0 ; i < this . _facets . length ; i ++ ) {
var facet = this . _facets [ i ] . facet ;
if ( except !== facet ) {
clusters = facet . restrict ( clusters ) ;
}
}
return clusters ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _updateAll = function ( ) {
2010-03-09 00:05:44 +01:00
for ( var i = 0 ; i < this . _facets . length ; i ++ ) {
var facet = this . _facets [ i ] . facet ;
var clusters = this . _getRestrictedClusters ( facet ) ;
facet . update ( clusters ) ;
}
this . _renderTable ( this . _getRestrictedClusters ( ) ) ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _resetFacets = function ( ) {
2010-03-09 00:05:44 +01:00
for ( var i = 0 ; i < this . _facets . length ; i ++ ) {
var r = this . _facets [ i ] ;
r . facet . dispose ( ) ;
r . elmt . remove ( ) ;
}
this . _facets = [ ] ;
this . _createFacet ( "Cluster Size" , "size" ) ;
this . _createFacet ( "Row Count" , "rowCount" ) ;
this . _createFacet ( "Value Length Average" , "avg" ) ;
this . _createFacet ( "Value Length Variance" , "variance" ) ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . prototype . _createFacet = function ( title , property ) {
2010-03-09 00:05:44 +01:00
var elmt = $ ( '<div>' ) . appendTo ( this . _elmts . facetContainer ) ;
this . _facets . push ( {
elmt : elmt ,
2010-03-13 10:32:06 +01:00
facet : new ClusteringDialog . Facet ( this , title , property , elmt , this . _getBaseClusters ( ) )
2010-03-09 00:05:44 +01:00
} ) ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . Facet = function ( dialog , title , property , elmt , clusters ) {
2010-03-09 00:05:44 +01:00
this . _dialog = dialog ;
this . _property = property ;
var self = this ;
var max = Number . NEGATIVE _INFINITY ;
var min = Number . POSITIVE _INFINITY ;
for ( var i = 0 ; i < clusters . length ; i ++ ) {
var cluster = clusters [ i ] ;
var val = cluster [ property ] ;
max = Math . max ( max , val ) ;
min = Math . min ( min , val ) ;
}
this . _min = min ;
this . _max = max ;
if ( min >= max ) {
this . _step = 0 ;
2010-03-09 01:41:33 +01:00
this . _baseBins = [ ] ;
2010-03-09 00:05:44 +01:00
} else {
var diff = max - min ;
this . _step = 1 ;
if ( diff > 10 ) {
while ( this . _step * 100 < diff ) {
this . _step *= 10 ;
}
} else {
while ( this . _step * 100 > diff ) {
this . _step /= 10 ;
}
}
this . _min = ( Math . floor ( this . _min / this . _step ) * this . _step ) ;
this . _max = ( Math . ceil ( this . _max / this . _step ) * this . _step ) ;
this . _binCount = 1 + Math . ceil ( ( this . _max - this . _min ) / this . _step ) ;
if ( this . _binCount > 100 ) {
this . _step *= 2 ;
this . _binCount = Math . round ( ( 1 + this . _binCount ) / 2 ) ;
}
this . _baseBins = this . _computeDistribution ( clusters ) ;
this . _from = this . _min ;
this . _to = this . _max ;
2010-03-13 10:32:06 +01:00
elmt . addClass ( "clustering-dialog-facet" ) ;
2010-03-09 00:05:44 +01:00
var html = $ (
2010-03-13 10:32:06 +01:00
'<div class="clustering-dialog-facet-header">' + title + '</div>' +
'<div class="clustering-dialog-facet-histogram" bind="histogramContainer"></div>' +
'<div class="clustering-dialog-facet-slider" bind="slider"></div>' +
'<div class="clustering-dialog-facet-selection" bind="selectionContainer"></div>'
2010-03-09 00:05:44 +01:00
) . appendTo ( elmt ) ;
this . _elmts = DOM . bind ( html ) ;
this . _elmts . slider . slider ( {
min : this . _min ,
max : this . _max ,
values : [ this . _from , this . _to ] ,
2010-03-09 01:33:07 +01:00
stop : function ( evt , ui ) {
2010-03-09 00:05:44 +01:00
self . _from = ui . values [ 0 ] ;
self . _to = ui . values [ 1 ] ;
self . _setRangeIndicators ( ) ;
self . _dialog . _updateAll ( ) ;
}
} ) ;
this . _setRangeIndicators ( ) ;
}
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . Facet . prototype . dispose = function ( ) {
2010-03-09 00:05:44 +01:00
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . Facet . prototype . restrict = function ( clusters ) {
2010-03-09 00:05:44 +01:00
if ( this . _baseBins . length == 0 || ( this . _from == this . _min && this . _to == this . _max ) ) {
return clusters ;
}
var clusters2 = [ ] ;
for ( var i = 0 ; i < clusters . length ; i ++ ) {
var cluster = clusters [ i ] ;
var val = cluster [ this . _property ] ;
if ( val >= this . _from && val <= this . _to ) {
clusters2 . push ( cluster ) ;
}
}
return clusters2 ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . Facet . prototype . update = function ( clusters ) {
2010-03-09 00:05:44 +01:00
if ( this . _baseBins . length == 0 ) {
return ;
}
var bins = this . _computeDistribution ( clusters ) ;
var max = 0 ;
for ( var i = 0 ; i < this . _baseBins . length ; i ++ ) {
max = Math . max ( max , this . _baseBins [ i ] ) ;
}
var values = [ ] ;
var diffs = [ ] ;
for ( var i = 0 ; i < this . _baseBins . length ; i ++ ) {
var v = Math . ceil ( 100 * bins [ i ] / max ) ;
var diff = Math . ceil ( 100 * this . _baseBins [ i ] / max ) - v ;
values . push ( v == 0 ? 0 : Math . max ( 2 , v ) ) ; // use min 2 to make sure something shows up
diffs . push ( diff == 0 ? 0 : Math . max ( 2 , diff ) ) ;
}
this . _elmts . histogramContainer . empty ( ) ;
$ ( '<img />' ) . attr ( "src" ,
"http://chart.apis.google.com/chart?" + [
"chs=" + this . _elmts . histogramContainer [ 0 ] . offsetWidth + "x50" ,
"cht=bvs&chbh=r,0&chco=000088,aaaaff" ,
"chd=t:" + values . join ( "," ) + "|" + diffs . join ( "," )
] . join ( "&" )
) . appendTo ( this . _elmts . histogramContainer ) ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . Facet . prototype . _setRangeIndicators = function ( ) {
2010-03-09 00:05:44 +01:00
this . _elmts . selectionContainer . text ( this . _from + " to " + this . _to ) ;
} ;
2010-03-13 10:32:06 +01:00
ClusteringDialog . Facet . prototype . _computeDistribution = function ( clusters ) {
2010-03-09 00:05:44 +01:00
var bins = [ ] ;
for ( var b = 0 ; b < this . _binCount ; b ++ ) {
bins . push ( 0 ) ;
}
for ( var i = 0 ; i < clusters . length ; i ++ ) {
var cluster = clusters [ i ] ;
var val = cluster [ this . _property ] ;
var bin = Math . round ( ( val - this . _min ) / this . _step ) ;
bins [ bin ] ++ ;
}
return bins ;
} ;