From 00a81c5fc4ba195ead0239eb4501721f9b80b9a5 Mon Sep 17 00:00:00 2001 From: Stefano Mazzocchi Date: Fri, 12 Mar 2010 19:10:22 +0000 Subject: [PATCH] make the kNN clustering report the right counts for the facet values (and order them in the clusters by counts) git-svn-id: http://google-refine.googlecode.com/svn/trunk@286 7d457c2a-affb-35e4-300a-418c747d4874 --- .../clustering/knn/kNNClusterer.java | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/metaweb/gridworks/clustering/knn/kNNClusterer.java b/src/main/java/com/metaweb/gridworks/clustering/knn/kNNClusterer.java index 686f4efb4..3a0f0b713 100644 --- a/src/main/java/com/metaweb/gridworks/clustering/knn/kNNClusterer.java +++ b/src/main/java/com/metaweb/gridworks/clustering/knn/kNNClusterer.java @@ -11,6 +11,7 @@ import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.TreeSet; +import java.util.Map.Entry; import org.json.JSONException; import org.json.JSONObject; @@ -44,8 +45,10 @@ public class kNNClusterer extends Clusterer { static protected Map _distances = new HashMap(); - ArrayList> _clusters; - + List> _clusters; + + Map _counts = new HashMap(); + static { _distances.put("levenshtein", new LevenshteinDistance()); _distances.put("jaccard", new JaccardDistance()); @@ -82,6 +85,7 @@ public class kNNClusterer extends Clusterer { Object v = cell.value; String s = (v instanceof String) ? ((String) v) : v.toString(); _treeBuilder.populate(s); + count(s); } return false; } @@ -120,6 +124,7 @@ public class kNNClusterer extends Clusterer { Object v = cell.value; String s = (v instanceof String) ? ((String) v) : v.toString().intern(); _data.add(s); + count(s); } return false; } @@ -183,6 +188,12 @@ public class kNNClusterer extends Clusterer { return o2.size() - o1.size(); } } + + public class ValuesComparator implements Comparator> { + public int compare(Entry o1, Entry o2) { + return o2.getValue() - o1.getValue(); + } + } public void initializeFromJSON(Project project, JSONObject o) throws Exception { super.initializeFromJSON(project, o); @@ -204,11 +215,17 @@ public class kNNClusterer extends Clusterer { writer.array(); for (Set m : _clusters) { if (m.size() > 1) { - writer.array(); + Map internal_counts = new HashMap(); for (Serializable s : m) { + internal_counts.put(s,_counts.get(s)); + } + List> values = new ArrayList>(internal_counts.entrySet()); + Collections.sort(values, new ValuesComparator()); + writer.array(); + for (Entry e : values) { writer.object(); - writer.key("v"); writer.value(s); - writer.key("c"); writer.value(1); + writer.key("v"); writer.value(e.getKey()); + writer.key("c"); writer.value(e.getValue()); writer.endObject(); } writer.endArray(); @@ -216,4 +233,12 @@ public class kNNClusterer extends Clusterer { } writer.endArray(); } + + private void count(Serializable s) { + if (_counts.containsKey(s)) { + _counts.put(s, _counts.get(s) + 1); + } else { + _counts.put(s, 1); + } + } }