Separate distance and keyer registration in dedicated factories
This commit is contained in:
parent
de86a162fa
commit
72d76ee22a
@ -81,7 +81,7 @@ public class BinningClusterer extends Clusterer {
|
||||
@JsonProperty("function")
|
||||
public void setKeyer(String keyerName) {
|
||||
_keyerName = keyerName;
|
||||
_keyer = _keyers.get(_keyerName.toLowerCase());
|
||||
_keyer = KeyerFactory.get(_keyerName.toLowerCase());
|
||||
}
|
||||
|
||||
@JsonProperty("function")
|
||||
@ -123,22 +123,10 @@ public class BinningClusterer extends Clusterer {
|
||||
protected Keyer _keyer;
|
||||
protected BinningParameters _parameters;
|
||||
|
||||
static final protected Map<String, Keyer> _keyers = new HashMap<String, Keyer>();
|
||||
|
||||
final static Logger logger = LoggerFactory.getLogger("binning_clusterer");
|
||||
|
||||
List<Map<String,Integer>> _clusters;
|
||||
|
||||
static {
|
||||
_keyers.put("fingerprint", new FingerprintKeyer());
|
||||
_keyers.put("ngram-fingerprint", new NGramFingerprintKeyer());
|
||||
_keyers.put("metaphone", new MetaphoneKeyer());
|
||||
_keyers.put("double-metaphone", new DoubleMetaphoneKeyer());
|
||||
_keyers.put("metaphone3", new Metaphone3Keyer());
|
||||
_keyers.put("soundex", new SoundexKeyer());
|
||||
_keyers.put("cologne-phonetic", new ColognePhoneticKeyer());
|
||||
}
|
||||
|
||||
class BinningRowVisitor implements RowVisitor {
|
||||
|
||||
Keyer _keyer;
|
||||
|
@ -0,0 +1,47 @@
|
||||
package com.google.refine.clustering.binning;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Registry of keyers for clustering.
|
||||
*
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
public class KeyerFactory {
|
||||
|
||||
static final protected Map<String, Keyer> _keyers = new HashMap<String, Keyer>();
|
||||
|
||||
static {
|
||||
_keyers.put("fingerprint", new FingerprintKeyer());
|
||||
_keyers.put("ngram-fingerprint", new NGramFingerprintKeyer());
|
||||
_keyers.put("metaphone", new MetaphoneKeyer());
|
||||
_keyers.put("double-metaphone", new DoubleMetaphoneKeyer());
|
||||
_keyers.put("metaphone3", new Metaphone3Keyer());
|
||||
_keyers.put("soundex", new SoundexKeyer());
|
||||
_keyers.put("cologne-phonetic", new ColognePhoneticKeyer());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the keyer registered under a given name, or null if it does not exist.
|
||||
*/
|
||||
public static Keyer get(String name) {
|
||||
return _keyers.get(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a keyer under a code name.
|
||||
*/
|
||||
public static void put(String name, Keyer keyer) {
|
||||
_keyers.put(name, keyer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set of available keyer, by names.
|
||||
*/
|
||||
public static Set<String> getKeyerNames() {
|
||||
return _keyers.keySet();
|
||||
}
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
package com.google.refine.clustering.knn;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import edu.mit.simile.vicino.distances.BZip2Distance;
|
||||
import edu.mit.simile.vicino.distances.Distance;
|
||||
import edu.mit.simile.vicino.distances.GZipDistance;
|
||||
import edu.mit.simile.vicino.distances.JaccardDistance;
|
||||
import edu.mit.simile.vicino.distances.JaroDistance;
|
||||
import edu.mit.simile.vicino.distances.JaroWinklerDistance;
|
||||
import edu.mit.simile.vicino.distances.JaroWinklerTFIDFDistance;
|
||||
import edu.mit.simile.vicino.distances.LevenshteinDistance;
|
||||
import edu.mit.simile.vicino.distances.PPMDistance;
|
||||
|
||||
/**
|
||||
* Registry of distances for kNN clustering.
|
||||
*
|
||||
* @author Antonin Delpeuch
|
||||
*/
|
||||
public class DistanceFactory {
|
||||
|
||||
static final protected Map<String, Distance> _distances = new HashMap<String, Distance>();
|
||||
|
||||
static {
|
||||
_distances.put("levenshtein", new LevenshteinDistance());
|
||||
_distances.put("jaccard", new JaccardDistance());
|
||||
_distances.put("jaro", new JaroDistance());
|
||||
_distances.put("jaro-winkler", new JaroWinklerDistance());
|
||||
_distances.put("jaro-winkler-tfidf", new JaroWinklerTFIDFDistance());
|
||||
_distances.put("gzip", new GZipDistance());
|
||||
_distances.put("bzip2", new BZip2Distance());
|
||||
_distances.put("ppm", new PPMDistance());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the distance registered under this name, or null if it does not exist.
|
||||
*/
|
||||
public static Distance get(String name) {
|
||||
return _distances.get(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a new distance under a name.
|
||||
*/
|
||||
public static void put(String name, Distance distance) {
|
||||
_distances.put(name, distance);
|
||||
}
|
||||
|
||||
/**
|
||||
* Lists the available distances, by name.
|
||||
*/
|
||||
public static Set<String> getDistanceNames() {
|
||||
return _distances.keySet();
|
||||
}
|
||||
}
|
@ -89,7 +89,7 @@ public class kNNClusterer extends Clusterer {
|
||||
@JsonProperty("function")
|
||||
public void setDistance(String distanceStr) {
|
||||
_distanceStr = distanceStr;
|
||||
_distance = _distances.get(_distanceStr.toLowerCase());
|
||||
_distance = DistanceFactory.get(_distanceStr.toLowerCase());
|
||||
}
|
||||
|
||||
@JsonProperty("function")
|
||||
@ -133,25 +133,12 @@ public class kNNClusterer extends Clusterer {
|
||||
private Distance _distance;
|
||||
private kNNClustererConfigParameters _params;
|
||||
|
||||
static final protected Map<String, Distance> _distances = new HashMap<String, Distance>();
|
||||
|
||||
List<Set<Serializable>> _clusters;
|
||||
|
||||
Map<Serializable, Integer> _counts = new HashMap<Serializable, Integer>();
|
||||
|
||||
final static Logger logger = LoggerFactory.getLogger("kNN_clusterer");
|
||||
|
||||
static {
|
||||
_distances.put("levenshtein", new LevenshteinDistance());
|
||||
_distances.put("jaccard", new JaccardDistance());
|
||||
_distances.put("jaro", new JaroDistance());
|
||||
_distances.put("jaro-winkler", new JaroWinklerDistance());
|
||||
_distances.put("jaro-winkler-tfidf", new JaroWinklerTFIDFDistance());
|
||||
_distances.put("gzip", new GZipDistance());
|
||||
_distances.put("bzip2", new BZip2Distance());
|
||||
_distances.put("ppm", new PPMDistance());
|
||||
}
|
||||
|
||||
class VPTreeClusteringRowVisitor implements RowVisitor {
|
||||
|
||||
Distance _distance;
|
||||
|
Loading…
Reference in New Issue
Block a user