much improved facet clustering dialog and functionality
NOTE: kNN clustering code operational but is not working as expected git-svn-id: http://google-refine.googlecode.com/svn/trunk@219 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
db824bffeb
commit
976c1da5c7
@ -69,6 +69,14 @@ public class Gridworks extends Server {
|
|||||||
public static void log(String message) {
|
public static void log(String message) {
|
||||||
logger.info(message);
|
logger.info(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void error(String message, Throwable t) {
|
||||||
|
logger.error(message, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void warn(String message) {
|
||||||
|
logger.warn(message);
|
||||||
|
}
|
||||||
|
|
||||||
/* -------------- Gridworks HTTP server ----------------- */
|
/* -------------- Gridworks HTTP server ----------------- */
|
||||||
|
|
||||||
|
@ -15,15 +15,16 @@ import com.metaweb.gridworks.commands.edit.AnnotateOneRowCommand;
|
|||||||
import com.metaweb.gridworks.commands.edit.AnnotateRowsCommand;
|
import com.metaweb.gridworks.commands.edit.AnnotateRowsCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.ApplyOperationsCommand;
|
import com.metaweb.gridworks.commands.edit.ApplyOperationsCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.CreateProjectCommand;
|
import com.metaweb.gridworks.commands.edit.CreateProjectCommand;
|
||||||
|
import com.metaweb.gridworks.commands.edit.DeleteProjectCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.DoTextTransformCommand;
|
import com.metaweb.gridworks.commands.edit.DoTextTransformCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.EditOneCellCommand;
|
import com.metaweb.gridworks.commands.edit.EditOneCellCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.FacetBasedEditCommand;
|
import com.metaweb.gridworks.commands.edit.FacetBasedEditCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.JoinMultiValueCellsCommand;
|
import com.metaweb.gridworks.commands.edit.JoinMultiValueCellsCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.RemoveColumnCommand;
|
import com.metaweb.gridworks.commands.edit.RemoveColumnCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.DeleteProjectCommand;
|
|
||||||
import com.metaweb.gridworks.commands.edit.SaveProtographCommand;
|
import com.metaweb.gridworks.commands.edit.SaveProtographCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.SplitMultiValueCellsCommand;
|
import com.metaweb.gridworks.commands.edit.SplitMultiValueCellsCommand;
|
||||||
import com.metaweb.gridworks.commands.edit.UndoRedoCommand;
|
import com.metaweb.gridworks.commands.edit.UndoRedoCommand;
|
||||||
|
import com.metaweb.gridworks.commands.info.ComputeClustersCommand;
|
||||||
import com.metaweb.gridworks.commands.info.ComputeFacetsCommand;
|
import com.metaweb.gridworks.commands.info.ComputeFacetsCommand;
|
||||||
import com.metaweb.gridworks.commands.info.ExportRowsCommand;
|
import com.metaweb.gridworks.commands.info.ExportRowsCommand;
|
||||||
import com.metaweb.gridworks.commands.info.GetAllProjectMetadataCommand;
|
import com.metaweb.gridworks.commands.info.GetAllProjectMetadataCommand;
|
||||||
@ -74,6 +75,7 @@ public class GridworksServlet extends HttpServlet {
|
|||||||
_commands.put("cancel-processes", new CancelProcessesCommand());
|
_commands.put("cancel-processes", new CancelProcessesCommand());
|
||||||
|
|
||||||
_commands.put("compute-facets", new ComputeFacetsCommand());
|
_commands.put("compute-facets", new ComputeFacetsCommand());
|
||||||
|
_commands.put("compute-clusters", new ComputeClustersCommand());
|
||||||
_commands.put("do-text-transform", new DoTextTransformCommand());
|
_commands.put("do-text-transform", new DoTextTransformCommand());
|
||||||
_commands.put("facet-based-edit", new FacetBasedEditCommand());
|
_commands.put("facet-based-edit", new FacetBasedEditCommand());
|
||||||
_commands.put("edit-one-cell", new EditOneCellCommand());
|
_commands.put("edit-one-cell", new EditOneCellCommand());
|
||||||
|
@ -0,0 +1,29 @@
|
|||||||
|
package com.metaweb.gridworks.clustering;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.Jsonizable;
|
||||||
|
import com.metaweb.gridworks.browsing.Engine;
|
||||||
|
import com.metaweb.gridworks.model.Column;
|
||||||
|
import com.metaweb.gridworks.model.Project;
|
||||||
|
|
||||||
|
public abstract class Clusterer implements Jsonizable {
|
||||||
|
|
||||||
|
protected Project _project;
|
||||||
|
protected int _colindex;
|
||||||
|
protected JSONObject _config;
|
||||||
|
|
||||||
|
public abstract void computeClusters(Engine engine);
|
||||||
|
|
||||||
|
public void initializeFromJSON(Project project, JSONObject o) throws Exception {
|
||||||
|
_project = project;
|
||||||
|
_config = o;
|
||||||
|
|
||||||
|
String colname = o.getString("column");
|
||||||
|
for (Column column : project.columnModel.columns) {
|
||||||
|
if (column.getHeaderLabel().equals(colname)) {
|
||||||
|
_colindex = column.getCellIndex();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,151 @@
|
|||||||
|
package com.metaweb.gridworks.clustering.binning;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Properties;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.browsing.Engine;
|
||||||
|
import com.metaweb.gridworks.browsing.FilteredRows;
|
||||||
|
import com.metaweb.gridworks.browsing.RowVisitor;
|
||||||
|
import com.metaweb.gridworks.clustering.Clusterer;
|
||||||
|
import com.metaweb.gridworks.model.Cell;
|
||||||
|
import com.metaweb.gridworks.model.Project;
|
||||||
|
import com.metaweb.gridworks.model.Row;
|
||||||
|
|
||||||
|
public class BinningClusterer extends Clusterer {
|
||||||
|
|
||||||
|
private Keyer _keyer;
|
||||||
|
|
||||||
|
static protected Map<String, Keyer> _keyers = new HashMap<String, Keyer>();
|
||||||
|
|
||||||
|
List<Map<Object,Integer>> _clusters;
|
||||||
|
|
||||||
|
static {
|
||||||
|
_keyers.put("fingerprint", new FingerprintKeyer());
|
||||||
|
_keyers.put("ngram-fingerprint", new NGramFingerprintKeyer());
|
||||||
|
_keyers.put("metaphone", new MetaphoneKeyer());
|
||||||
|
_keyers.put("double-metaphone", new DoubleMetaphoneKeyer());
|
||||||
|
_keyers.put("soundex", new SoundexKeyer());
|
||||||
|
}
|
||||||
|
|
||||||
|
class BinningRowVisitor implements RowVisitor {
|
||||||
|
|
||||||
|
Keyer _keyer;
|
||||||
|
Object[] _params;
|
||||||
|
JSONObject _config;
|
||||||
|
|
||||||
|
Map<String,Map<Object,Integer>> _map = new HashMap<String,Map<Object,Integer>>();
|
||||||
|
|
||||||
|
public BinningRowVisitor(Keyer k, JSONObject o) {
|
||||||
|
_keyer = k;
|
||||||
|
_config = o;
|
||||||
|
if (k instanceof NGramFingerprintKeyer) {
|
||||||
|
try {
|
||||||
|
int size = _config.getJSONObject("params").getInt("ngram-size");
|
||||||
|
_params = new Object[1];
|
||||||
|
_params[0] = size;
|
||||||
|
} catch (JSONException e) {
|
||||||
|
//Gridworks.warn("no ngram size specified, using default");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean visit(Project project, int rowIndex, Row row, boolean contextual) {
|
||||||
|
Cell cell = row.cells.get(_colindex);
|
||||||
|
if (cell != null && cell.value != null) {
|
||||||
|
Object v = cell.value;
|
||||||
|
String s = (v instanceof String) ? ((String) v) : v.toString();
|
||||||
|
String key = _keyer.key(s,_params);
|
||||||
|
if (_map.containsKey(key)) {
|
||||||
|
Map<Object,Integer> m = _map.get(key);
|
||||||
|
if (m.containsKey(v)) {
|
||||||
|
m.put(v, m.get(v) + 1);
|
||||||
|
} else {
|
||||||
|
m.put(v,1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Map<Object,Integer> m = new TreeMap<Object,Integer>();
|
||||||
|
m.put(v,0);
|
||||||
|
_map.put(key, m);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String,Map<Object,Integer>> getMap() {
|
||||||
|
return _map;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class SizeComparator implements Comparator<Map<Object,Integer>> {
|
||||||
|
public int compare(Map<Object,Integer> o1, Map<Object,Integer> o2) {
|
||||||
|
int s1 = o1.size();
|
||||||
|
int s2 = o2.size();
|
||||||
|
if (o1 == o2) {
|
||||||
|
int total1 = 0;
|
||||||
|
for (int i : o1.values()) {
|
||||||
|
total1 += i;
|
||||||
|
}
|
||||||
|
int total2 = 0;
|
||||||
|
for (int i : o2.values()) {
|
||||||
|
total2 += i;
|
||||||
|
}
|
||||||
|
return total2 - total1;
|
||||||
|
} else {
|
||||||
|
return s2 - s1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class EntriesComparator implements Comparator<Entry<Object,Integer>> {
|
||||||
|
public int compare(Entry<Object,Integer> o1, Entry<Object,Integer> o2) {
|
||||||
|
return o2.getValue() - o1.getValue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initializeFromJSON(Project project, JSONObject o) throws Exception {
|
||||||
|
super.initializeFromJSON(project, o);
|
||||||
|
_keyer = _keyers.get(o.getString("function").toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void computeClusters(Engine engine) {
|
||||||
|
BinningRowVisitor visitor = new BinningRowVisitor(_keyer,_config);
|
||||||
|
FilteredRows filteredRows = engine.getAllFilteredRows(true);
|
||||||
|
filteredRows.accept(_project, visitor);
|
||||||
|
|
||||||
|
Map<String,Map<Object,Integer>> map = visitor.getMap();
|
||||||
|
_clusters = new ArrayList<Map<Object,Integer>>(map.values());
|
||||||
|
Collections.sort(_clusters, new SizeComparator());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void write(JSONWriter writer, Properties options) throws JSONException {
|
||||||
|
EntriesComparator c = new EntriesComparator();
|
||||||
|
|
||||||
|
writer.array();
|
||||||
|
for (Map<Object,Integer> m : _clusters) {
|
||||||
|
if (m.size() > 1) {
|
||||||
|
writer.array();
|
||||||
|
List<Entry<Object,Integer>> entries = new ArrayList<Entry<Object,Integer>>(m.entrySet());
|
||||||
|
Collections.sort(entries,c);
|
||||||
|
for (Entry<Object,Integer> e : entries) {
|
||||||
|
writer.object();
|
||||||
|
writer.key("v"); writer.value(e.getKey());
|
||||||
|
writer.key("c"); writer.value(e.getValue());
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
writer.endArray();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writer.endArray();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,13 @@
|
|||||||
|
package com.metaweb.gridworks.clustering.binning;
|
||||||
|
|
||||||
|
import org.apache.commons.codec.language.DoubleMetaphone;
|
||||||
|
|
||||||
|
public class DoubleMetaphoneKeyer extends Keyer {
|
||||||
|
|
||||||
|
private DoubleMetaphone _metaphone2 = new DoubleMetaphone();
|
||||||
|
|
||||||
|
public String key(String s, Object... o) {
|
||||||
|
return _metaphone2.doubleMetaphone(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,31 @@
|
|||||||
|
package com.metaweb.gridworks.clustering.binning;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
|
public class FingerprintKeyer extends Keyer {
|
||||||
|
|
||||||
|
static final Pattern alphanum = Pattern.compile("\\p{Punct}|\\p{Cntrl}");
|
||||||
|
|
||||||
|
public String key(String s, Object... o) {
|
||||||
|
s = s.trim(); // first off, remove whitespace around the string
|
||||||
|
s = s.toLowerCase(); // then lowercase it
|
||||||
|
s = alphanum.matcher(s).replaceAll(""); // then remove all punctuation and control chars
|
||||||
|
String[] frags = StringUtils.split(s); // split by whitespace
|
||||||
|
TreeSet<String> set = new TreeSet<String>();
|
||||||
|
for (String ss : frags) {
|
||||||
|
set.add(ss); // order fragments and dedupe
|
||||||
|
}
|
||||||
|
StringBuffer b = new StringBuffer();
|
||||||
|
Iterator<String> i = set.iterator();
|
||||||
|
while (i.hasNext()) {
|
||||||
|
b.append(i.next());
|
||||||
|
b.append(' ');
|
||||||
|
}
|
||||||
|
return b.toString(); // join ordered fragments back together
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,12 @@
|
|||||||
|
package com.metaweb.gridworks.clustering.binning;
|
||||||
|
|
||||||
|
|
||||||
|
public abstract class Keyer {
|
||||||
|
|
||||||
|
public String key(String s) {
|
||||||
|
return this.key(s, (Object[]) null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract String key(String string, Object... params);
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,13 @@
|
|||||||
|
package com.metaweb.gridworks.clustering.binning;
|
||||||
|
|
||||||
|
import org.apache.commons.codec.language.Metaphone;
|
||||||
|
|
||||||
|
public class MetaphoneKeyer extends Keyer {
|
||||||
|
|
||||||
|
private Metaphone _metaphone = new Metaphone();
|
||||||
|
|
||||||
|
public String key(String s, Object... o) {
|
||||||
|
return _metaphone.metaphone(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,35 @@
|
|||||||
|
package com.metaweb.gridworks.clustering.binning;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class NGramFingerprintKeyer extends Keyer {
|
||||||
|
|
||||||
|
static final Pattern alphanum = Pattern.compile("\\p{Punct}|\\p{Cntrl}|\\p{Space}");
|
||||||
|
|
||||||
|
public String key(String s, Object... o) {
|
||||||
|
int ngram_size = 1;
|
||||||
|
if (o != null && o.length > 0 && o[0] instanceof Number) {
|
||||||
|
ngram_size = (Integer) o[0];
|
||||||
|
}
|
||||||
|
s = s.toLowerCase(); // then lowercase it
|
||||||
|
s = alphanum.matcher(s).replaceAll(""); // then remove all punctuation and control chars
|
||||||
|
TreeSet<String> set = ngram_split(s,ngram_size);
|
||||||
|
StringBuffer b = new StringBuffer();
|
||||||
|
Iterator<String> i = set.iterator();
|
||||||
|
while (i.hasNext()) {
|
||||||
|
b.append(i.next());
|
||||||
|
}
|
||||||
|
return b.toString(); // join ordered fragments back together
|
||||||
|
}
|
||||||
|
|
||||||
|
protected TreeSet<String> ngram_split(String s, int size) {
|
||||||
|
TreeSet<String> set = new TreeSet<String>();
|
||||||
|
char[] chars = s.toCharArray();
|
||||||
|
for (int i = 0; i + size <= chars.length; i++) {
|
||||||
|
set.add(new String(chars,i,size));
|
||||||
|
}
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,13 @@
|
|||||||
|
package com.metaweb.gridworks.clustering.binning;
|
||||||
|
|
||||||
|
import org.apache.commons.codec.language.Soundex;
|
||||||
|
|
||||||
|
public class SoundexKeyer extends Keyer {
|
||||||
|
|
||||||
|
private Soundex _soundex = new Soundex();
|
||||||
|
|
||||||
|
public String key(String s, Object... o) {
|
||||||
|
return _soundex.soundex(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,122 @@
|
|||||||
|
package com.metaweb.gridworks.clustering.knn;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.Gridworks;
|
||||||
|
import com.metaweb.gridworks.browsing.Engine;
|
||||||
|
import com.metaweb.gridworks.browsing.FilteredRows;
|
||||||
|
import com.metaweb.gridworks.browsing.RowVisitor;
|
||||||
|
import com.metaweb.gridworks.clustering.Clusterer;
|
||||||
|
import com.metaweb.gridworks.model.Cell;
|
||||||
|
import com.metaweb.gridworks.model.Project;
|
||||||
|
import com.metaweb.gridworks.model.Row;
|
||||||
|
|
||||||
|
import edu.mit.simile.vicino.Distance;
|
||||||
|
import edu.mit.simile.vicino.distances.BZip2Distance;
|
||||||
|
import edu.mit.simile.vicino.distances.GZipDistance;
|
||||||
|
import edu.mit.simile.vicino.distances.JaccardDistance;
|
||||||
|
import edu.mit.simile.vicino.distances.JaroDistance;
|
||||||
|
import edu.mit.simile.vicino.distances.LevenshteinDistance;
|
||||||
|
import edu.mit.simile.vicino.distances.PPMDistance;
|
||||||
|
import edu.mit.simile.vicino.vptree.VPTreeBuilder;
|
||||||
|
|
||||||
|
public class kNNClusterer extends Clusterer {
|
||||||
|
|
||||||
|
private Distance _distance;
|
||||||
|
|
||||||
|
static protected Map<String, Distance> _distances = new HashMap<String, Distance>();
|
||||||
|
|
||||||
|
List<List<? extends Serializable>> _clusters;
|
||||||
|
|
||||||
|
static {
|
||||||
|
_distances.put("levenshtein", new LevenshteinDistance());
|
||||||
|
_distances.put("jaro", new JaroDistance());
|
||||||
|
_distances.put("jaccard", new JaccardDistance());
|
||||||
|
_distances.put("gzip", new GZipDistance());
|
||||||
|
_distances.put("bzip2", new BZip2Distance());
|
||||||
|
_distances.put("ppm", new PPMDistance());
|
||||||
|
}
|
||||||
|
|
||||||
|
class kNNClusteringRowVisitor implements RowVisitor {
|
||||||
|
|
||||||
|
Distance _distance;
|
||||||
|
JSONObject _config;
|
||||||
|
VPTreeBuilder _treeBuilder;
|
||||||
|
float _radius;
|
||||||
|
|
||||||
|
public kNNClusteringRowVisitor(Distance d, JSONObject o) {
|
||||||
|
_distance = d;
|
||||||
|
_config = o;
|
||||||
|
_treeBuilder = new VPTreeBuilder(_distance);
|
||||||
|
try {
|
||||||
|
_radius = (float) o.getDouble("radius");
|
||||||
|
} catch (JSONException e) {
|
||||||
|
Gridworks.warn("No radius found, using default");
|
||||||
|
_radius = 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean visit(Project project, int rowIndex, Row row, boolean contextual) {
|
||||||
|
Cell cell = row.cells.get(_colindex);
|
||||||
|
if (cell != null && cell.value != null) {
|
||||||
|
Object v = cell.value;
|
||||||
|
String s = (v instanceof String) ? ((String) v) : v.toString();
|
||||||
|
_treeBuilder.populate(s);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Serializable,List<? extends Serializable>> getClusters() {
|
||||||
|
return _treeBuilder.getClusters(_radius);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class SizeComparator implements Comparator<List<? extends Serializable>> {
|
||||||
|
public int compare(List<? extends Serializable> o1, List<? extends Serializable> o2) {
|
||||||
|
return o2.size() - o1.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initializeFromJSON(Project project, JSONObject o) throws Exception {
|
||||||
|
super.initializeFromJSON(project, o);
|
||||||
|
_distance = _distances.get(o.getString("function").toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void computeClusters(Engine engine) {
|
||||||
|
kNNClusteringRowVisitor visitor = new kNNClusteringRowVisitor(_distance,_config);
|
||||||
|
FilteredRows filteredRows = engine.getAllFilteredRows(true);
|
||||||
|
filteredRows.accept(_project, visitor);
|
||||||
|
|
||||||
|
Map<Serializable,List<? extends Serializable>> clusters = visitor.getClusters();
|
||||||
|
_clusters = new ArrayList<List<? extends Serializable>>(clusters.values());
|
||||||
|
Collections.sort(_clusters, new SizeComparator());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void write(JSONWriter writer, Properties options) throws JSONException {
|
||||||
|
writer.array();
|
||||||
|
for (List<? extends Serializable> m : _clusters) {
|
||||||
|
if (m.size() > 1) {
|
||||||
|
writer.array();
|
||||||
|
for (Serializable s : m) {
|
||||||
|
writer.object();
|
||||||
|
writer.key("v"); writer.value(s);
|
||||||
|
writer.key("c"); writer.value(1);
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
writer.endArray();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writer.endArray();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,50 @@
|
|||||||
|
package com.metaweb.gridworks.commands.info;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import javax.servlet.ServletException;
|
||||||
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.Gridworks;
|
||||||
|
import com.metaweb.gridworks.browsing.Engine;
|
||||||
|
import com.metaweb.gridworks.clustering.Clusterer;
|
||||||
|
import com.metaweb.gridworks.clustering.binning.BinningClusterer;
|
||||||
|
import com.metaweb.gridworks.clustering.knn.kNNClusterer;
|
||||||
|
import com.metaweb.gridworks.commands.Command;
|
||||||
|
import com.metaweb.gridworks.model.Project;
|
||||||
|
|
||||||
|
public class ComputeClustersCommand extends Command {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
|
||||||
|
try {
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
Project project = getProject(request);
|
||||||
|
Engine engine = getEngine(request, project);
|
||||||
|
JSONObject clusterer_conf = getJsonParameter(request,"clusterer");
|
||||||
|
|
||||||
|
Clusterer clusterer = null;
|
||||||
|
String type = clusterer_conf.has("type") ? clusterer_conf.getString("type") : "binning";
|
||||||
|
|
||||||
|
if ("knn".equals(type)) {
|
||||||
|
clusterer = new kNNClusterer();
|
||||||
|
} else {
|
||||||
|
clusterer = new BinningClusterer();
|
||||||
|
}
|
||||||
|
|
||||||
|
clusterer.initializeFromJSON(project, clusterer_conf);
|
||||||
|
|
||||||
|
clusterer.computeClusters(engine);
|
||||||
|
|
||||||
|
respondJSON(response, clusterer);
|
||||||
|
Gridworks.log("computed clusters [" + type + "," + clusterer_conf.getString("function") + "] in " + (System.currentTimeMillis() - start) + "ms");
|
||||||
|
} catch (Exception e) {
|
||||||
|
respondException(response, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,39 +1,23 @@
|
|||||||
package com.metaweb.gridworks.expr.functions.strings;
|
package com.metaweb.gridworks.expr.functions.strings;
|
||||||
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
import java.util.TreeSet;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.json.JSONWriter;
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.clustering.binning.FingerprintKeyer;
|
||||||
|
import com.metaweb.gridworks.clustering.binning.Keyer;
|
||||||
import com.metaweb.gridworks.gel.Function;
|
import com.metaweb.gridworks.gel.Function;
|
||||||
|
|
||||||
public class Fingerprint implements Function {
|
public class Fingerprint implements Function {
|
||||||
|
|
||||||
static final Pattern alphanum = Pattern.compile("\\p{Punct}|\\p{Cntrl}");
|
static Keyer fingerprint = new FingerprintKeyer();
|
||||||
|
|
||||||
public Object call(Properties bindings, Object[] args) {
|
public Object call(Properties bindings, Object[] args) {
|
||||||
if (args.length == 1 && args[0] != null) {
|
if (args.length == 1 && args[0] != null) {
|
||||||
Object o = args[0];
|
Object o = args[0];
|
||||||
String s = (o instanceof String) ? (String) o : o.toString();
|
String s = (o instanceof String) ? (String) o : o.toString();
|
||||||
s = s.trim(); // first off, remove whitespace around the string
|
return fingerprint.key(s);
|
||||||
s = s.toLowerCase(); // then lowercase it
|
|
||||||
s = alphanum.matcher(s).replaceAll(""); // then remove all punctuation and control chars
|
|
||||||
String[] frags = StringUtils.split(s); // split by whitespace
|
|
||||||
TreeSet<String> set = new TreeSet<String>();
|
|
||||||
for (String ss : frags) {
|
|
||||||
set.add(ss); // order fragments and dedupe
|
|
||||||
}
|
|
||||||
StringBuffer b = new StringBuffer();
|
|
||||||
Iterator<String> i = set.iterator();
|
|
||||||
while (i.hasNext()) {
|
|
||||||
b.append(i.next());
|
|
||||||
b.append(' ');
|
|
||||||
}
|
|
||||||
return b.toString(); // join ordered fragments back together
|
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -1,20 +1,20 @@
|
|||||||
package com.metaweb.gridworks.expr.functions.strings;
|
package com.metaweb.gridworks.expr.functions.strings;
|
||||||
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.json.JSONWriter;
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.clustering.binning.Keyer;
|
||||||
|
import com.metaweb.gridworks.clustering.binning.NGramFingerprintKeyer;
|
||||||
import com.metaweb.gridworks.expr.EvalError;
|
import com.metaweb.gridworks.expr.EvalError;
|
||||||
import com.metaweb.gridworks.gel.ControlFunctionRegistry;
|
import com.metaweb.gridworks.gel.ControlFunctionRegistry;
|
||||||
import com.metaweb.gridworks.gel.Function;
|
import com.metaweb.gridworks.gel.Function;
|
||||||
|
|
||||||
public class NGramFingerprint implements Function {
|
public class NGramFingerprint implements Function {
|
||||||
|
|
||||||
static final Pattern alphanum = Pattern.compile("\\p{Punct}|\\p{Cntrl}|\\p{Space}");
|
static Keyer ngram_fingerprint = new NGramFingerprintKeyer();
|
||||||
|
|
||||||
public Object call(Properties bindings, Object[] args) {
|
public Object call(Properties bindings, Object[] args) {
|
||||||
if (args.length == 1 || args.length == 2) {
|
if (args.length == 1 || args.length == 2) {
|
||||||
@ -24,16 +24,8 @@ public class NGramFingerprint implements Function {
|
|||||||
ngram_size = (args[1] instanceof Number) ? ((Number) args[1]).intValue() : Integer.parseInt(args[1].toString());
|
ngram_size = (args[1] instanceof Number) ? ((Number) args[1]).intValue() : Integer.parseInt(args[1].toString());
|
||||||
}
|
}
|
||||||
Object o = args[0];
|
Object o = args[0];
|
||||||
String s = (o instanceof String) ? (String) o : o.toString();
|
String s = (o instanceof String) ? (String) o : o.toString();
|
||||||
s = s.toLowerCase(); // then lowercase it
|
return ngram_fingerprint.key(s,ngram_size);
|
||||||
s = alphanum.matcher(s).replaceAll(""); // then remove all punctuation and control chars
|
|
||||||
TreeSet<String> set = ngram_split(s,ngram_size);
|
|
||||||
StringBuffer b = new StringBuffer();
|
|
||||||
Iterator<String> i = set.iterator();
|
|
||||||
while (i.hasNext()) {
|
|
||||||
b.append(i.next());
|
|
||||||
}
|
|
||||||
return b.toString(); // join ordered fragments back together
|
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -2,21 +2,21 @@ package com.metaweb.gridworks.expr.functions.strings;
|
|||||||
|
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
import org.apache.commons.codec.language.DoubleMetaphone;
|
|
||||||
import org.apache.commons.codec.language.Metaphone;
|
|
||||||
import org.apache.commons.codec.language.Soundex;
|
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.json.JSONWriter;
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.clustering.binning.DoubleMetaphoneKeyer;
|
||||||
|
import com.metaweb.gridworks.clustering.binning.MetaphoneKeyer;
|
||||||
|
import com.metaweb.gridworks.clustering.binning.SoundexKeyer;
|
||||||
import com.metaweb.gridworks.expr.EvalError;
|
import com.metaweb.gridworks.expr.EvalError;
|
||||||
import com.metaweb.gridworks.gel.ControlFunctionRegistry;
|
import com.metaweb.gridworks.gel.ControlFunctionRegistry;
|
||||||
import com.metaweb.gridworks.gel.Function;
|
import com.metaweb.gridworks.gel.Function;
|
||||||
|
|
||||||
public class Phonetic implements Function {
|
public class Phonetic implements Function {
|
||||||
|
|
||||||
private DoubleMetaphone metaphone2 = new DoubleMetaphone();
|
static private DoubleMetaphoneKeyer metaphone2 = new DoubleMetaphoneKeyer();
|
||||||
private Metaphone metaphone = new Metaphone();
|
static private MetaphoneKeyer metaphone = new MetaphoneKeyer();
|
||||||
private Soundex soundex = new Soundex();
|
static private SoundexKeyer soundex = new SoundexKeyer();
|
||||||
|
|
||||||
public Object call(Properties bindings, Object[] args) {
|
public Object call(Properties bindings, Object[] args) {
|
||||||
if (args.length == 2) {
|
if (args.length == 2) {
|
||||||
@ -26,11 +26,11 @@ public class Phonetic implements Function {
|
|||||||
String str = (o1 instanceof String) ? (String) o1 : o1.toString();
|
String str = (o1 instanceof String) ? (String) o1 : o1.toString();
|
||||||
String encoding = ((String) o2).toLowerCase();
|
String encoding = ((String) o2).toLowerCase();
|
||||||
if ("doublemetaphone".equals(encoding)) {
|
if ("doublemetaphone".equals(encoding)) {
|
||||||
return metaphone2.doubleMetaphone(str);
|
return metaphone2.key(str);
|
||||||
} else if ("metaphone".equals(encoding)) {
|
} else if ("metaphone".equals(encoding)) {
|
||||||
return metaphone.metaphone(str);
|
return metaphone.key(str);
|
||||||
} else if ("soundex".equals(encoding)) {
|
} else if ("soundex".equals(encoding)) {
|
||||||
return soundex.soundex(str);
|
return soundex.key(str);
|
||||||
} else {
|
} else {
|
||||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " doesn't know how to handle the '" + encoding + "' encoding.");
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " doesn't know how to handle the '" + encoding + "' encoding.");
|
||||||
}
|
}
|
||||||
|
@ -5,11 +5,11 @@ import com.wcohen.ss.api.StringDistance;
|
|||||||
|
|
||||||
import edu.mit.simile.vicino.Distance;
|
import edu.mit.simile.vicino.Distance;
|
||||||
|
|
||||||
public class LevensteinDistance implements Distance {
|
public class LevenshteinDistance implements Distance {
|
||||||
|
|
||||||
StringDistance distance;
|
StringDistance distance;
|
||||||
|
|
||||||
public LevensteinDistance() {
|
public LevenshteinDistance() {
|
||||||
this.distance = new Levenstein();
|
this.distance = new Levenstein();
|
||||||
}
|
}
|
||||||
|
|
@ -39,4 +39,12 @@ public class Node implements Serializable {
|
|||||||
public String toString() {
|
public String toString() {
|
||||||
return obj.toString();
|
return obj.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object n) {
|
||||||
|
if (n instanceof Node) {
|
||||||
|
return ((Node) n).get().equals(this.obj);
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,8 +2,14 @@ package edu.mit.simile.vicino.vptree;
|
|||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Iterator;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.Gridworks;
|
||||||
|
|
||||||
import edu.mit.simile.vicino.Distance;
|
import edu.mit.simile.vicino.Distance;
|
||||||
|
|
||||||
@ -17,9 +23,10 @@ public class VPTreeBuilder {
|
|||||||
|
|
||||||
private Random generator = new Random(System.currentTimeMillis());
|
private Random generator = new Random(System.currentTimeMillis());
|
||||||
|
|
||||||
private VPTree tree;
|
|
||||||
private final Distance distance;
|
private final Distance distance;
|
||||||
|
|
||||||
|
private Set<Node> nodes = new HashSet<Node>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines a VPTree Builder for a specific distance.
|
* Defines a VPTree Builder for a specific distance.
|
||||||
*
|
*
|
||||||
@ -29,20 +36,47 @@ public class VPTreeBuilder {
|
|||||||
this.distance = distance;
|
this.distance = distance;
|
||||||
}
|
}
|
||||||
|
|
||||||
public VPTree buildVPTree(Collection<? extends Serializable> col) {
|
public void populate(Serializable s) {
|
||||||
Node nodes[] = new Node[col.size()];
|
nodes.add(new Node(s));
|
||||||
Iterator<? extends Serializable> i = col.iterator();
|
}
|
||||||
int counter = 0;
|
|
||||||
while (i.hasNext()) {
|
|
||||||
Serializable s = (Serializable) i.next();
|
|
||||||
nodes[counter++] = new Node(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
tree = new VPTree();
|
public VPTree buildVPTree() {
|
||||||
tree.setRoot(addNode(nodes, 0, nodes.length - 1));
|
Node[] nodes_array = this.nodes.toArray(new Node[this.nodes.size()]);
|
||||||
|
Gridworks.log("building tree with nodes: " + nodes_array.length);
|
||||||
|
VPTree tree = new VPTree();
|
||||||
|
tree.setRoot(addNode(nodes_array, 0, nodes_array.length - 1));
|
||||||
|
Gridworks.log("tree built");
|
||||||
return tree;
|
return tree;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public VPTree buildVPTree(Collection<? extends Serializable> values) {
|
||||||
|
reset();
|
||||||
|
for (Serializable s : values) {
|
||||||
|
populate(s);
|
||||||
|
}
|
||||||
|
return buildVPTree();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
this.nodes.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Serializable,List<? extends Serializable>> getClusters(float radius) {
|
||||||
|
VPTree tree = buildVPTree();
|
||||||
|
VPTreeSeeker seeker = new VPTreeSeeker(distance,tree);
|
||||||
|
|
||||||
|
Map<Serializable,List<? extends Serializable>> map = new HashMap<Serializable,List<? extends Serializable>>();
|
||||||
|
for (Node n : nodes) {
|
||||||
|
Serializable s = n.get();
|
||||||
|
Gridworks.log(" find results for: " + s);
|
||||||
|
List<? extends Serializable> results = seeker.range(s, radius);
|
||||||
|
Gridworks.log(" found: " + results.size());
|
||||||
|
map.put(s, results);
|
||||||
|
}
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
private TNode addNode(Node nodes[], int begin, int end) {
|
private TNode addNode(Node nodes[], int begin, int end) {
|
||||||
|
|
||||||
int delta = end - begin;
|
int delta = end - begin;
|
||||||
|
@ -2,6 +2,9 @@ function FacetBasedEditDialog(columnName, expression, entries) {
|
|||||||
this._columnName = columnName;
|
this._columnName = columnName;
|
||||||
this._expression = expression;
|
this._expression = expression;
|
||||||
this._entries = entries;
|
this._entries = entries;
|
||||||
|
this._method = "binning";
|
||||||
|
this._function = "fingerprint";
|
||||||
|
this._params = {};
|
||||||
|
|
||||||
this._createDialog();
|
this._createDialog();
|
||||||
this._cluster();
|
this._cluster();
|
||||||
@ -10,7 +13,7 @@ function FacetBasedEditDialog(columnName, expression, entries) {
|
|||||||
FacetBasedEditDialog.prototype._createDialog = function() {
|
FacetBasedEditDialog.prototype._createDialog = function() {
|
||||||
var self = this;
|
var self = this;
|
||||||
var frame = DialogSystem.createDialog();
|
var frame = DialogSystem.createDialog();
|
||||||
frame.width("800px");
|
frame.width("900px");
|
||||||
|
|
||||||
var header = $('<div></div>').addClass("dialog-header").text("Facet-based edit of column " + this._columnName).appendTo(frame);
|
var header = $('<div></div>').addClass("dialog-header").text("Facet-based edit of column " + this._columnName).appendTo(frame);
|
||||||
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
|
var body = $('<div></div>').addClass("dialog-body").appendTo(frame);
|
||||||
@ -18,17 +21,78 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
|||||||
|
|
||||||
var html = $(
|
var html = $(
|
||||||
'<div>' +
|
'<div>' +
|
||||||
|
'<div class="facet-based-edit-dialog-controls"><table><tr>' +
|
||||||
|
'<td>' +
|
||||||
|
'Method: <select bind="methodSelector">' +
|
||||||
|
'<option selected="true">key collision</option>' +
|
||||||
|
'<option>nearest neightbor</option>' +
|
||||||
|
'</select>' +
|
||||||
|
'</td>' +
|
||||||
|
'<td>' +
|
||||||
|
'<div id="binning-controls">Keying Function: <select bind="keyingFunctionSelector">' +
|
||||||
|
'<option selected="true">fingerprint</option>' +
|
||||||
|
'<option>ngram-fingerprint</option>' +
|
||||||
|
'<option>double-metaphone</option>' +
|
||||||
|
'<option>metaphone</option>' +
|
||||||
|
'<option>soundex</option>' +
|
||||||
|
'</select></div>' +
|
||||||
|
'<div id="knn-controls" class="hidden">Distance Function: <select bind="distanceFunctionSelector">' +
|
||||||
|
'<option selected="true">levenshtein</option>' +
|
||||||
|
'<option>jaro</option>' +
|
||||||
|
'<option>jaccard</option>' +
|
||||||
|
'<option>gzip</option>' +
|
||||||
|
'<option>bzip2</option>' +
|
||||||
|
'<option>PPM</option>' +
|
||||||
|
'</select></div>' +
|
||||||
|
'</td>' +
|
||||||
|
'<td>' +
|
||||||
|
'<div id="ngram-fingerprint-params" class="function-params hidden">' +
|
||||||
|
'Ngram Size: <input type="text" value="1" bind="ngramSize">' +
|
||||||
|
'</div>' +
|
||||||
|
'</td>' +
|
||||||
|
'</tr></table></div>' +
|
||||||
'<div bind="tableContainer" class="facet-based-edit-dialog-table-container"></div>' +
|
'<div bind="tableContainer" class="facet-based-edit-dialog-table-container"></div>' +
|
||||||
'<div class="facet-based-edit-dialog-controls">' +
|
|
||||||
'<button bind="clusterButton">Cluster</button> ' +
|
|
||||||
'<button bind="unclusterButton">Un-cluster</button> ' +
|
|
||||||
'</div>' +
|
|
||||||
'</div>'
|
'</div>'
|
||||||
).appendTo(body);
|
).appendTo(body);
|
||||||
|
|
||||||
this._elmts = DOM.bind(html);
|
this._elmts = DOM.bind(html);
|
||||||
this._elmts.clusterButton.click(function() { self._cluster(); });
|
|
||||||
this._elmts.unclusterButton.click(function() { self._uncluster(); });
|
this._elmts.methodSelector.change(function() {
|
||||||
|
var selection = $(this).find("option:selected").text();
|
||||||
|
if (selection == 'key collision') {
|
||||||
|
body.find("#binning-controls").show();
|
||||||
|
body.find("#knn-controls").hide();
|
||||||
|
self._method = "binning";
|
||||||
|
self._elmts.keyingFunctionSelector.change();
|
||||||
|
} else if (selection = 'nearest neightbor') {
|
||||||
|
body.find("#binning-controls").hide();
|
||||||
|
body.find("#knn-controls").show();
|
||||||
|
self._method = "knn";
|
||||||
|
self._elmts.distanceFunctionSelector.change();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
var changer = function() {
|
||||||
|
self._function = $(this).find("option:selected").text();
|
||||||
|
$(".function-params").hide();
|
||||||
|
$("#" + self._function + "-params").show();
|
||||||
|
self._cluster();
|
||||||
|
};
|
||||||
|
|
||||||
|
this._elmts.keyingFunctionSelector.change(changer);
|
||||||
|
this._elmts.distanceFunctionSelector.change(changer);
|
||||||
|
|
||||||
|
this._elmts.ngramSize.change(function() {
|
||||||
|
try {
|
||||||
|
self._params = { "ngram-size" : parseInt($(this).val()) };
|
||||||
|
self._cluster();
|
||||||
|
} catch (e) {
|
||||||
|
alert("ngram size must be a number");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
//this._elmts.clusterButton.click(function() { self._cluster(); });
|
||||||
|
//this._elmts.unclusterButton.click(function() { self._uncluster(); });
|
||||||
|
|
||||||
$('<button></button>').text("OK").click(function() { self._onOK(); }).appendTo(footer);
|
$('<button></button>').text("OK").click(function() { self._onOK(); }).appendTo(footer);
|
||||||
$('<button></button>').text("Cancel").click(function() { self._dismiss(); }).appendTo(footer);
|
$('<button></button>').text("Cancel").click(function() { self._dismiss(); }).appendTo(footer);
|
||||||
@ -41,9 +105,9 @@ FacetBasedEditDialog.prototype._createDialog = function() {
|
|||||||
|
|
||||||
FacetBasedEditDialog.prototype._renderTable = function() {
|
FacetBasedEditDialog.prototype._renderTable = function() {
|
||||||
var self = this;
|
var self = this;
|
||||||
var container = this._elmts.tableContainer.empty();
|
var container = this._elmts.tableContainer;
|
||||||
|
|
||||||
var table = $('<table></table>').addClass("facet-based-edit-dialog-entry-table").appendTo(container)[0];
|
var table = $('<table></table>').addClass("facet-based-edit-dialog-entry-table")[0];
|
||||||
|
|
||||||
var trHead = table.insertRow(table.rows.length);
|
var trHead = table.insertRow(table.rows.length);
|
||||||
trHead.className = "header";
|
trHead.className = "header";
|
||||||
@ -60,7 +124,7 @@ FacetBasedEditDialog.prototype._renderTable = function() {
|
|||||||
for (var c = 0; c < choices.length; c++) {
|
for (var c = 0; c < choices.length; c++) {
|
||||||
var choice = choices[c];
|
var choice = choices[c];
|
||||||
var li = $('<li>').appendTo(ul);
|
var li = $('<li>').appendTo(ul);
|
||||||
$('<span>').text(choice.v.l).appendTo(li);
|
$('<span>').text(choice.v).appendTo(li);
|
||||||
$('<span>').text(" (" + choice.c + ")").appendTo(li);
|
$('<span>').text(" (" + choice.c + ")").appendTo(li);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -73,7 +137,7 @@ FacetBasedEditDialog.prototype._renderTable = function() {
|
|||||||
editCheck.attr("checked", "true");
|
editCheck.attr("checked", "true");
|
||||||
}
|
}
|
||||||
|
|
||||||
var input = $('<input size="35" />')
|
var input = $('<input size="55" />')
|
||||||
.attr("value", cluster.value)
|
.attr("value", cluster.value)
|
||||||
.appendTo(tr.insertCell(2))
|
.appendTo(tr.insertCell(2))
|
||||||
.keyup(function() {
|
.keyup(function() {
|
||||||
@ -83,65 +147,43 @@ FacetBasedEditDialog.prototype._renderTable = function() {
|
|||||||
for (var i = 0; i < this._clusters.length; i++) {
|
for (var i = 0; i < this._clusters.length; i++) {
|
||||||
renderCluster(this._clusters[i]);
|
renderCluster(this._clusters[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
container.empty().append(table);
|
||||||
};
|
};
|
||||||
|
|
||||||
FacetBasedEditDialog.prototype._cluster = function() {
|
FacetBasedEditDialog.prototype._cluster = function() {
|
||||||
var clusters = [];
|
var self = this;
|
||||||
var map = {};
|
|
||||||
$.each(this._entries, function() {
|
|
||||||
var choice = {
|
|
||||||
v: this.v,
|
|
||||||
c: this.c
|
|
||||||
};
|
|
||||||
|
|
||||||
var s = this.v.l.toLowerCase().replace(/\W/g, ' ').replace(/\s+/g, ' ').split(" ").sort().join(" ");
|
|
||||||
if (s in map) {
|
|
||||||
map[s].choices.push(choice);
|
|
||||||
} else {
|
|
||||||
map[s] = {
|
|
||||||
edit: false,
|
|
||||||
choices: [ choice ]
|
|
||||||
};
|
|
||||||
clusters.push(map[s]);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
$.each(clusters, function() {
|
var container = this._elmts.tableContainer.html(
|
||||||
if (this.choices.length > 1) {
|
'<div style="margin: 1em; font-size: 130%; color: #888;">Loading... <img src="/images/small-spinner.gif"></div>'
|
||||||
this.choices.sort(function(a, b) {
|
);
|
||||||
var c = b.c - a.c;
|
|
||||||
return c != 0 ? c : a.v.l.localeCompare(b.v.l);
|
|
||||||
});
|
|
||||||
this.edit = true;
|
|
||||||
}
|
|
||||||
this.value = this.choices[0].v.l;
|
|
||||||
});
|
|
||||||
clusters.sort(function(a, b) {
|
|
||||||
var c = b.choices.length - a.choices.length;
|
|
||||||
return c != 0 ? c : a.value.localeCompare(b.value);
|
|
||||||
});
|
|
||||||
|
|
||||||
this._clusters = clusters;
|
|
||||||
this._renderTable();
|
|
||||||
};
|
|
||||||
|
|
||||||
FacetBasedEditDialog.prototype._uncluster = function() {
|
$.post(
|
||||||
var clusters = [];
|
"/command/compute-clusters?" + $.param({ project: theProject.id }),
|
||||||
$.each(this._entries, function() {
|
{
|
||||||
var cluster = {
|
engine: JSON.stringify(ui.browsingEngine.getJSON()),
|
||||||
edit: false,
|
clusterer: JSON.stringify({
|
||||||
choices: [{
|
'type' : this._method,
|
||||||
v: this.v,
|
'function' : this._function,
|
||||||
c: this.c
|
'column' : this._columnName,
|
||||||
}],
|
'params' : this._params
|
||||||
value: this.v.l
|
})
|
||||||
};
|
},
|
||||||
clusters.push(cluster);
|
function(data) {
|
||||||
});
|
var clusters = [];
|
||||||
|
$.each(data, function() {
|
||||||
this._clusters = clusters;
|
clusters.push({
|
||||||
this._renderTable();
|
edit: true,
|
||||||
};
|
choices: this,
|
||||||
|
value: this[0].v
|
||||||
|
});
|
||||||
|
});
|
||||||
|
self._clusters = clusters;
|
||||||
|
self._renderTable();
|
||||||
|
},
|
||||||
|
"json"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
FacetBasedEditDialog.prototype._onOK = function() {
|
FacetBasedEditDialog.prototype._onOK = function() {
|
||||||
var edits = [];
|
var edits = [];
|
||||||
@ -150,7 +192,7 @@ FacetBasedEditDialog.prototype._onOK = function() {
|
|||||||
if (cluster.edit) {
|
if (cluster.edit) {
|
||||||
var values = [];
|
var values = [];
|
||||||
for (var j = 0; j < cluster.choices.length; j++) {
|
for (var j = 0; j < cluster.choices.length; j++) {
|
||||||
values.push(cluster.choices[j].v.v);
|
values.push(cluster.choices[j].v);
|
||||||
}
|
}
|
||||||
|
|
||||||
edits.push({
|
edits.push({
|
||||||
|
@ -74,4 +74,8 @@ img {
|
|||||||
|
|
||||||
.fbs-pane, .fbs-flyout-pane {
|
.fbs-pane, .fbs-flyout-pane {
|
||||||
z-index: 2000;
|
z-index: 2000;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hidden {
|
||||||
|
display: none;
|
||||||
}
|
}
|
@ -14,7 +14,7 @@ table.facet-based-edit-dialog-main-layout > tbody > tr:last-child > td {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.facet-based-edit-dialog-table-container {
|
.facet-based-edit-dialog-table-container {
|
||||||
height: 450px;
|
height: 500px;
|
||||||
overflow: auto;
|
overflow: auto;
|
||||||
border: 1px solid #aaa;
|
border: 1px solid #aaa;
|
||||||
}
|
}
|
||||||
@ -42,5 +42,14 @@ table.facet-based-edit-dialog-entry-table > tbody > tr.even > td {
|
|||||||
}
|
}
|
||||||
|
|
||||||
table.facet-based-edit-dialog-entry-table input {
|
table.facet-based-edit-dialog-entry-table input {
|
||||||
border: none;
|
border: 1px solid #ccc;
|
||||||
|
padding: 0 0.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.facet-based-edit-dialog-controls {
|
||||||
|
margin-bottom: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.facet-based-edit-dialog-controls td {
|
||||||
|
padding-right: 0.5em;
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user