Jackson serialization for clustering classes
This commit is contained in:
parent
cf45f23e1d
commit
49f1367adc
27
main/src/com/google/refine/clustering/ClusteredEntry.java
Normal file
27
main/src/com/google/refine/clustering/ClusteredEntry.java
Normal file
@ -0,0 +1,27 @@
|
||||
package com.google.refine.clustering;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Comparator;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
public class ClusteredEntry {
|
||||
@JsonProperty("v")
|
||||
protected final Serializable value;
|
||||
@JsonProperty("c")
|
||||
protected final int count;
|
||||
|
||||
public ClusteredEntry(
|
||||
Serializable value,
|
||||
int count) {
|
||||
this.value = value;
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
public static Comparator<ClusteredEntry> comparator = new Comparator<ClusteredEntry>() {
|
||||
@Override
|
||||
public int compare(ClusteredEntry o1, ClusteredEntry o2) {
|
||||
return o2.count - o1.count;
|
||||
}
|
||||
};
|
||||
}
|
@ -2,6 +2,8 @@ package com.google.refine.clustering;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import com.google.refine.Jsonizable;
|
||||
import com.google.refine.model.Project;
|
||||
|
||||
@ -22,6 +24,7 @@ public abstract class ClustererConfig implements Jsonizable {
|
||||
columnName = o.getString("column");
|
||||
}
|
||||
|
||||
@JsonProperty("column")
|
||||
public String getColumnName() {
|
||||
return columnName;
|
||||
}
|
||||
@ -32,4 +35,10 @@ public abstract class ClustererConfig implements Jsonizable {
|
||||
* @return
|
||||
*/
|
||||
public abstract Clusterer apply(Project project);
|
||||
|
||||
/**
|
||||
* Type string used in Json serialization
|
||||
*/
|
||||
@JsonProperty("type")
|
||||
public abstract String getType();
|
||||
}
|
||||
|
@ -43,6 +43,7 @@ import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Properties;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
@ -50,10 +51,17 @@ import org.json.JSONWriter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
import com.fasterxml.jackson.annotation.JsonInclude.Include;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonValue;
|
||||
|
||||
import com.google.refine.Jsonizable;
|
||||
import com.google.refine.browsing.Engine;
|
||||
import com.google.refine.browsing.FilteredRows;
|
||||
import com.google.refine.browsing.RowVisitor;
|
||||
import com.google.refine.clustering.ClusteredEntry;
|
||||
import com.google.refine.clustering.Clusterer;
|
||||
import com.google.refine.clustering.ClustererConfig;
|
||||
import com.google.refine.model.Cell;
|
||||
@ -63,7 +71,8 @@ import com.google.refine.model.Row;
|
||||
public class BinningClusterer extends Clusterer {
|
||||
|
||||
public static class BinningClustererConfig extends ClustererConfig {
|
||||
|
||||
|
||||
@JsonProperty("function")
|
||||
private String _keyerName;
|
||||
private Keyer _keyer;
|
||||
private BinningParameters _parameters;
|
||||
@ -80,10 +89,13 @@ public class BinningClusterer extends Clusterer {
|
||||
}
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public Keyer getKeyer() {
|
||||
return _keyer;
|
||||
}
|
||||
|
||||
@JsonProperty("params")
|
||||
@JsonInclude(Include.NON_NULL)
|
||||
public BinningParameters getParameters() {
|
||||
return _parameters;
|
||||
}
|
||||
@ -108,10 +120,17 @@ public class BinningClusterer extends Clusterer {
|
||||
clusterer.initializeFromConfig(project, this);
|
||||
return clusterer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getType() {
|
||||
return "binning";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class BinningParameters implements Jsonizable {
|
||||
@JsonProperty("ngram-size")
|
||||
@JsonInclude(Include.NON_DEFAULT)
|
||||
public int ngramSize;
|
||||
|
||||
@Override
|
||||
@ -276,4 +295,22 @@ public class BinningClusterer extends Clusterer {
|
||||
}
|
||||
writer.endArray();
|
||||
}
|
||||
|
||||
protected static Map<String,Object> entryToMap(Entry<String,Integer> entry) {
|
||||
Map<String,Object> map = new HashMap<>();
|
||||
map.put("v", entry.getKey());
|
||||
map.put("c", entry.getValue());
|
||||
return map;
|
||||
}
|
||||
|
||||
@JsonValue
|
||||
public List<List<ClusteredEntry>> getJsonRepresentation() {
|
||||
EntriesComparator c = new EntriesComparator();
|
||||
return _clusters.stream()
|
||||
.map(m -> m.entrySet().stream()
|
||||
.sorted(c)
|
||||
.map(e -> new ClusteredEntry(e.getKey(), e.getValue()))
|
||||
.collect(Collectors.toList()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
@ -51,10 +52,15 @@ import org.json.JSONWriter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonValue;
|
||||
|
||||
import com.google.refine.Jsonizable;
|
||||
import com.google.refine.browsing.Engine;
|
||||
import com.google.refine.browsing.FilteredRows;
|
||||
import com.google.refine.browsing.RowVisitor;
|
||||
import com.google.refine.clustering.ClusteredEntry;
|
||||
import com.google.refine.clustering.Clusterer;
|
||||
import com.google.refine.clustering.ClustererConfig;
|
||||
import com.google.refine.model.Cell;
|
||||
@ -76,6 +82,7 @@ import edu.mit.simile.vicino.distances.PPMDistance;
|
||||
public class kNNClusterer extends Clusterer {
|
||||
|
||||
public static class kNNClustererConfig extends ClustererConfig {
|
||||
@JsonProperty("function")
|
||||
private String _distanceStr;
|
||||
private Distance _distance;
|
||||
private kNNClustererConfigParameters _parameters;
|
||||
@ -105,10 +112,12 @@ public class kNNClusterer extends Clusterer {
|
||||
}
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public Distance getDistance() {
|
||||
return _distance;
|
||||
}
|
||||
|
||||
@JsonProperty("params")
|
||||
public kNNClustererConfigParameters getParameters() {
|
||||
return _parameters;
|
||||
}
|
||||
@ -119,13 +128,20 @@ public class kNNClusterer extends Clusterer {
|
||||
clusterer.initializeFromConfig(project, this);
|
||||
return clusterer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getType() {
|
||||
return "knn";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class kNNClustererConfigParameters implements Jsonizable {
|
||||
public static final double defaultRadius = 1.0d;
|
||||
public static final int defaultBlockingNgramSize = 6;
|
||||
@JsonProperty("radius")
|
||||
public double radius = defaultRadius;
|
||||
@JsonProperty("blocking-ngram-size")
|
||||
public int blockingNgramSize = defaultBlockingNgramSize;
|
||||
|
||||
@Override
|
||||
@ -302,6 +318,21 @@ public class kNNClusterer extends Clusterer {
|
||||
writer.endArray();
|
||||
}
|
||||
|
||||
protected List<ClusteredEntry> getClusteredEntries(Set<Serializable> s) {
|
||||
return s.stream()
|
||||
.map(e -> new ClusteredEntry(e, _counts.get(e)))
|
||||
.sorted(ClusteredEntry.comparator)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@JsonValue
|
||||
public List<List<ClusteredEntry>> getJsonRepresentation() {
|
||||
return _clusters.stream()
|
||||
.filter(m -> m.size() > 1)
|
||||
.map(m -> getClusteredEntries(m))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private void count(Serializable s) {
|
||||
if (_counts.containsKey(s)) {
|
||||
_counts.put(s, _counts.get(s) + 1);
|
||||
|
Loading…
Reference in New Issue
Block a user