Jackson serialization for clustering classes

This commit is contained in:
Antonin Delpeuch 2018-09-28 10:26:43 +01:00
parent cf45f23e1d
commit 49f1367adc
4 changed files with 105 additions and 1 deletions

View File

@ -0,0 +1,27 @@
package com.google.refine.clustering;
import java.io.Serializable;
import java.util.Comparator;
import com.fasterxml.jackson.annotation.JsonProperty;
public class ClusteredEntry {
@JsonProperty("v")
protected final Serializable value;
@JsonProperty("c")
protected final int count;
public ClusteredEntry(
Serializable value,
int count) {
this.value = value;
this.count = count;
}
public static Comparator<ClusteredEntry> comparator = new Comparator<ClusteredEntry>() {
@Override
public int compare(ClusteredEntry o1, ClusteredEntry o2) {
return o2.count - o1.count;
}
};
}

View File

@ -2,6 +2,8 @@ package com.google.refine.clustering;
import org.json.JSONObject;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.refine.Jsonizable;
import com.google.refine.model.Project;
@ -22,6 +24,7 @@ public abstract class ClustererConfig implements Jsonizable {
columnName = o.getString("column");
}
@JsonProperty("column")
public String getColumnName() {
return columnName;
}
@ -32,4 +35,10 @@ public abstract class ClustererConfig implements Jsonizable {
* @return
*/
public abstract Clusterer apply(Project project);
/**
* Type string used in Json serialization
*/
@JsonProperty("type")
public abstract String getType();
}

View File

@ -43,6 +43,7 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.json.JSONException;
import org.json.JSONObject;
@ -50,10 +51,17 @@ import org.json.JSONWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.refine.Jsonizable;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.clustering.ClusteredEntry;
import com.google.refine.clustering.Clusterer;
import com.google.refine.clustering.ClustererConfig;
import com.google.refine.model.Cell;
@ -63,7 +71,8 @@ import com.google.refine.model.Row;
public class BinningClusterer extends Clusterer {
public static class BinningClustererConfig extends ClustererConfig {
@JsonProperty("function")
private String _keyerName;
private Keyer _keyer;
private BinningParameters _parameters;
@ -80,10 +89,13 @@ public class BinningClusterer extends Clusterer {
}
}
@JsonIgnore
public Keyer getKeyer() {
return _keyer;
}
@JsonProperty("params")
@JsonInclude(Include.NON_NULL)
public BinningParameters getParameters() {
return _parameters;
}
@ -108,10 +120,17 @@ public class BinningClusterer extends Clusterer {
clusterer.initializeFromConfig(project, this);
return clusterer;
}
@Override
public String getType() {
return "binning";
}
}
public static class BinningParameters implements Jsonizable {
@JsonProperty("ngram-size")
@JsonInclude(Include.NON_DEFAULT)
public int ngramSize;
@Override
@ -276,4 +295,22 @@ public class BinningClusterer extends Clusterer {
}
writer.endArray();
}
protected static Map<String,Object> entryToMap(Entry<String,Integer> entry) {
Map<String,Object> map = new HashMap<>();
map.put("v", entry.getKey());
map.put("c", entry.getValue());
return map;
}
@JsonValue
public List<List<ClusteredEntry>> getJsonRepresentation() {
EntriesComparator c = new EntriesComparator();
return _clusters.stream()
.map(m -> m.entrySet().stream()
.sorted(c)
.map(e -> new ClusteredEntry(e.getKey(), e.getValue()))
.collect(Collectors.toList()))
.collect(Collectors.toList());
}
}

View File

@ -44,6 +44,7 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;
import org.json.JSONException;
import org.json.JSONObject;
@ -51,10 +52,15 @@ import org.json.JSONWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.refine.Jsonizable;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.clustering.ClusteredEntry;
import com.google.refine.clustering.Clusterer;
import com.google.refine.clustering.ClustererConfig;
import com.google.refine.model.Cell;
@ -76,6 +82,7 @@ import edu.mit.simile.vicino.distances.PPMDistance;
public class kNNClusterer extends Clusterer {
public static class kNNClustererConfig extends ClustererConfig {
@JsonProperty("function")
private String _distanceStr;
private Distance _distance;
private kNNClustererConfigParameters _parameters;
@ -105,10 +112,12 @@ public class kNNClusterer extends Clusterer {
}
}
@JsonIgnore
public Distance getDistance() {
return _distance;
}
@JsonProperty("params")
public kNNClustererConfigParameters getParameters() {
return _parameters;
}
@ -119,13 +128,20 @@ public class kNNClusterer extends Clusterer {
clusterer.initializeFromConfig(project, this);
return clusterer;
}
@Override
public String getType() {
return "knn";
}
}
public static class kNNClustererConfigParameters implements Jsonizable {
public static final double defaultRadius = 1.0d;
public static final int defaultBlockingNgramSize = 6;
@JsonProperty("radius")
public double radius = defaultRadius;
@JsonProperty("blocking-ngram-size")
public int blockingNgramSize = defaultBlockingNgramSize;
@Override
@ -302,6 +318,21 @@ public class kNNClusterer extends Clusterer {
writer.endArray();
}
protected List<ClusteredEntry> getClusteredEntries(Set<Serializable> s) {
return s.stream()
.map(e -> new ClusteredEntry(e, _counts.get(e)))
.sorted(ClusteredEntry.comparator)
.collect(Collectors.toList());
}
@JsonValue
public List<List<ClusteredEntry>> getJsonRepresentation() {
return _clusters.stream()
.filter(m -> m.size() > 1)
.map(m -> getClusteredEntries(m))
.collect(Collectors.toList());
}
private void count(Serializable s) {
if (_counts.containsKey(s)) {
_counts.put(s, _counts.get(s) + 1);