diff --git a/main/src/com/google/refine/clustering/knn/kNNClusterer.java b/main/src/com/google/refine/clustering/knn/kNNClusterer.java index e2c455b85..9c4499c57 100644 --- a/main/src/com/google/refine/clustering/knn/kNNClusterer.java +++ b/main/src/com/google/refine/clustering/knn/kNNClusterer.java @@ -71,8 +71,6 @@ import edu.mit.simile.vicino.distances.JaroWinklerTFIDFDistance; import edu.mit.simile.vicino.distances.LevenshteinDistance; import edu.mit.simile.vicino.distances.PPMDistance; -import edu.tsinghua.dbgroup.EditDistanceClusterer; - public class kNNClusterer extends Clusterer { private Distance _distance; @@ -150,6 +148,7 @@ public class kNNClusterer extends Clusterer { int _blockingNgramSize = 6; HashSet _data; NGramClusterer _clusterer; + public BlockingClusteringRowVisitor(Distance d, JSONObject o) { _distance = d; _config = o; @@ -192,48 +191,6 @@ public class kNNClusterer extends Clusterer { return _clusterer.getClusters(_radius); } } - - class EditDistanceClusteringRowVisitor implements RowVisitor { - - int _radius = 2; - EditDistanceClusterer _clusterer; - public EditDistanceClusteringRowVisitor(JSONObject o) { - try { - JSONObject params = o.getJSONObject("params"); - _radius = params.getInt("radius"); - logger.debug("Use radius: {}", _radius); - } catch (JSONException e) { - logger.debug("No parameters found, using defaults"); - } - _clusterer = new EditDistanceClusterer(_radius); - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(_colindex); - if (cell != null && cell.value != null) { - Object v = cell.value; - String s = (v instanceof String) ? ((String) v) : v.toString().intern(); - _clusterer.populate(s); - count(s); - } - return false; - } - - public List> getClusters() { - return _clusterer.getClusters(); - } - } @Override public void initializeFromJSON(Project project, JSONObject o) throws Exception { @@ -243,21 +200,12 @@ public class kNNClusterer extends Clusterer { @Override public void computeClusters(Engine engine) { - if(_distance != _distances.get("levenshtein")) { - //VPTreeClusteringRowVisitor visitor = new VPTreeClusteringRowVisitor(_distance,_config); - BlockingClusteringRowVisitor visitor = new BlockingClusteringRowVisitor(_distance,_config); - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(_project, visitor); - - _clusters = visitor.getClusters(); - } else { - EditDistanceClusteringRowVisitor visitor = - new EditDistanceClusteringRowVisitor(_config); - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(_project, visitor); - - _clusters = visitor.getClusters(); - } + //VPTreeClusteringRowVisitor visitor = new VPTreeClusteringRowVisitor(_distance,_config); + BlockingClusteringRowVisitor visitor = new BlockingClusteringRowVisitor(_distance,_config); + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(_project, visitor); + + _clusters = visitor.getClusters(); } public static class ValuesComparator implements Comparator>, Serializable { diff --git a/main/webapp/WEB-INF/lib/EditDistanceJoiner.jar b/main/webapp/WEB-INF/lib/EditDistanceJoiner.jar deleted file mode 100644 index 275ba2be5..000000000 Binary files a/main/webapp/WEB-INF/lib/EditDistanceJoiner.jar and /dev/null differ diff --git a/main/webapp/modules/core/scripts/dialogs/clustering-dialog.html b/main/webapp/modules/core/scripts/dialogs/clustering-dialog.html index 19a2755d4..3e23fbf43 100644 --- a/main/webapp/modules/core/scripts/dialogs/clustering-dialog.html +++ b/main/webapp/modules/core/scripts/dialogs/clustering-dialog.html @@ -33,10 +33,7 @@ - -