RandomSec/src/main/java/edu/mit/simile/vicino/clustering/VPTreeClusterer.java
Stefano Mazzocchi f7ab7c9cf6 - incorporated Paolo Ciccarese's fixes for VPTrees in Vicino
- moved all clustering stuff in the vicino package space to simplify external collaboration on that code
- added "type" function to the GEL


git-svn-id: http://google-refine.googlecode.com/svn/trunk@292 7d457c2a-affb-35e4-300a-418c747d4874
2010-03-13 09:34:17 +00:00

63 lines
1.8 KiB
Java

package edu.mit.simile.vicino.clustering;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import edu.mit.simile.vicino.distances.Distance;
import edu.mit.simile.vicino.vptree.Node;
import edu.mit.simile.vicino.vptree.VPTree;
import edu.mit.simile.vicino.vptree.VPTreeBuilder;
import edu.mit.simile.vicino.vptree.VPTreeSeeker;
public class VPTreeClusterer extends Clusterer {
VPTreeBuilder _treeBuilder;
Distance _distance;
public VPTreeClusterer(Distance d) {
_distance = d;
_treeBuilder = new VPTreeBuilder(d);
}
public void populate(String s) {
_treeBuilder.populate(s);
}
public List<Set<Serializable>> getClusters(double radius) {
VPTree tree = _treeBuilder.buildVPTree();
Set<Node> nodes = _treeBuilder.getNodes();
VPTreeSeeker seeker = new VPTreeSeeker(_distance,tree);
Map<Serializable,Boolean> flags = new HashMap<Serializable,Boolean>();
for (Node n : nodes) {
flags.put(n.get(), true);
}
Map<Serializable,Set<Serializable>> map = new HashMap<Serializable,Set<Serializable>>();
for (Node n : nodes) {
Serializable s = n.get();
if (flags.get(s)) {
Set<Serializable> results = seeker.range(s, radius);
for (Serializable ss : results) {
flags.put(ss, false);
}
if (results.size() > 1) {
map.put(s, results);
}
}
}
List<Set<Serializable>> clusters = new ArrayList<Set<Serializable>>(map.values());
Collections.sort(clusters, new SizeComparator());
return clusters;
}
}