Jackson serialization for clustering classes

This commit is contained in:
Antonin Delpeuch 2018-09-28 10:26:43 +01:00
parent cf45f23e1d
commit 49f1367adc
4 changed files with 105 additions and 1 deletions

View File

@ -0,0 +1,27 @@
package com.google.refine.clustering;
import java.io.Serializable;
import java.util.Comparator;
import com.fasterxml.jackson.annotation.JsonProperty;
public class ClusteredEntry {
@JsonProperty("v")
protected final Serializable value;
@JsonProperty("c")
protected final int count;
public ClusteredEntry(
Serializable value,
int count) {
this.value = value;
this.count = count;
}
public static Comparator<ClusteredEntry> comparator = new Comparator<ClusteredEntry>() {
@Override
public int compare(ClusteredEntry o1, ClusteredEntry o2) {
return o2.count - o1.count;
}
};
}

View File

@ -2,6 +2,8 @@ package com.google.refine.clustering;
import org.json.JSONObject; import org.json.JSONObject;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.refine.Jsonizable; import com.google.refine.Jsonizable;
import com.google.refine.model.Project; import com.google.refine.model.Project;
@ -22,6 +24,7 @@ public abstract class ClustererConfig implements Jsonizable {
columnName = o.getString("column"); columnName = o.getString("column");
} }
@JsonProperty("column")
public String getColumnName() { public String getColumnName() {
return columnName; return columnName;
} }
@ -32,4 +35,10 @@ public abstract class ClustererConfig implements Jsonizable {
* @return * @return
*/ */
public abstract Clusterer apply(Project project); public abstract Clusterer apply(Project project);
/**
* Type string used in Json serialization
*/
@JsonProperty("type")
public abstract String getType();
} }

View File

@ -43,6 +43,7 @@ import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Properties; import java.util.Properties;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.stream.Collectors;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
@ -50,10 +51,17 @@ import org.json.JSONWriter;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.refine.Jsonizable; import com.google.refine.Jsonizable;
import com.google.refine.browsing.Engine; import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows; import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor; import com.google.refine.browsing.RowVisitor;
import com.google.refine.clustering.ClusteredEntry;
import com.google.refine.clustering.Clusterer; import com.google.refine.clustering.Clusterer;
import com.google.refine.clustering.ClustererConfig; import com.google.refine.clustering.ClustererConfig;
import com.google.refine.model.Cell; import com.google.refine.model.Cell;
@ -64,6 +72,7 @@ public class BinningClusterer extends Clusterer {
public static class BinningClustererConfig extends ClustererConfig { public static class BinningClustererConfig extends ClustererConfig {
@JsonProperty("function")
private String _keyerName; private String _keyerName;
private Keyer _keyer; private Keyer _keyer;
private BinningParameters _parameters; private BinningParameters _parameters;
@ -80,10 +89,13 @@ public class BinningClusterer extends Clusterer {
} }
} }
@JsonIgnore
public Keyer getKeyer() { public Keyer getKeyer() {
return _keyer; return _keyer;
} }
@JsonProperty("params")
@JsonInclude(Include.NON_NULL)
public BinningParameters getParameters() { public BinningParameters getParameters() {
return _parameters; return _parameters;
} }
@ -109,9 +121,16 @@ public class BinningClusterer extends Clusterer {
return clusterer; return clusterer;
} }
@Override
public String getType() {
return "binning";
}
} }
public static class BinningParameters implements Jsonizable { public static class BinningParameters implements Jsonizable {
@JsonProperty("ngram-size")
@JsonInclude(Include.NON_DEFAULT)
public int ngramSize; public int ngramSize;
@Override @Override
@ -276,4 +295,22 @@ public class BinningClusterer extends Clusterer {
} }
writer.endArray(); writer.endArray();
} }
protected static Map<String,Object> entryToMap(Entry<String,Integer> entry) {
Map<String,Object> map = new HashMap<>();
map.put("v", entry.getKey());
map.put("c", entry.getValue());
return map;
}
@JsonValue
public List<List<ClusteredEntry>> getJsonRepresentation() {
EntriesComparator c = new EntriesComparator();
return _clusters.stream()
.map(m -> m.entrySet().stream()
.sorted(c)
.map(e -> new ClusteredEntry(e.getKey(), e.getValue()))
.collect(Collectors.toList()))
.collect(Collectors.toList());
}
} }

View File

@ -44,6 +44,7 @@ import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Properties; import java.util.Properties;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
@ -51,10 +52,15 @@ import org.json.JSONWriter;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.refine.Jsonizable; import com.google.refine.Jsonizable;
import com.google.refine.browsing.Engine; import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows; import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor; import com.google.refine.browsing.RowVisitor;
import com.google.refine.clustering.ClusteredEntry;
import com.google.refine.clustering.Clusterer; import com.google.refine.clustering.Clusterer;
import com.google.refine.clustering.ClustererConfig; import com.google.refine.clustering.ClustererConfig;
import com.google.refine.model.Cell; import com.google.refine.model.Cell;
@ -76,6 +82,7 @@ import edu.mit.simile.vicino.distances.PPMDistance;
public class kNNClusterer extends Clusterer { public class kNNClusterer extends Clusterer {
public static class kNNClustererConfig extends ClustererConfig { public static class kNNClustererConfig extends ClustererConfig {
@JsonProperty("function")
private String _distanceStr; private String _distanceStr;
private Distance _distance; private Distance _distance;
private kNNClustererConfigParameters _parameters; private kNNClustererConfigParameters _parameters;
@ -105,10 +112,12 @@ public class kNNClusterer extends Clusterer {
} }
} }
@JsonIgnore
public Distance getDistance() { public Distance getDistance() {
return _distance; return _distance;
} }
@JsonProperty("params")
public kNNClustererConfigParameters getParameters() { public kNNClustererConfigParameters getParameters() {
return _parameters; return _parameters;
} }
@ -120,12 +129,19 @@ public class kNNClusterer extends Clusterer {
return clusterer; return clusterer;
} }
@Override
public String getType() {
return "knn";
}
} }
public static class kNNClustererConfigParameters implements Jsonizable { public static class kNNClustererConfigParameters implements Jsonizable {
public static final double defaultRadius = 1.0d; public static final double defaultRadius = 1.0d;
public static final int defaultBlockingNgramSize = 6; public static final int defaultBlockingNgramSize = 6;
@JsonProperty("radius")
public double radius = defaultRadius; public double radius = defaultRadius;
@JsonProperty("blocking-ngram-size")
public int blockingNgramSize = defaultBlockingNgramSize; public int blockingNgramSize = defaultBlockingNgramSize;
@Override @Override
@ -302,6 +318,21 @@ public class kNNClusterer extends Clusterer {
writer.endArray(); writer.endArray();
} }
protected List<ClusteredEntry> getClusteredEntries(Set<Serializable> s) {
return s.stream()
.map(e -> new ClusteredEntry(e, _counts.get(e)))
.sorted(ClusteredEntry.comparator)
.collect(Collectors.toList());
}
@JsonValue
public List<List<ClusteredEntry>> getJsonRepresentation() {
return _clusters.stream()
.filter(m -> m.size() > 1)
.map(m -> getClusteredEntries(m))
.collect(Collectors.toList());
}
private void count(Serializable s) { private void count(Serializable s) {
if (_counts.containsKey(s)) { if (_counts.containsKey(s)) {
_counts.put(s, _counts.get(s) + 1); _counts.put(s, _counts.get(s) + 1);