Clustering config Jackson deserialization
This commit is contained in:
parent
d26d7b4a42
commit
ba8e406a97
@ -1,8 +1,11 @@
|
||||
package com.google.refine.clustering;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||
import com.fasterxml.jackson.annotation.JsonSubTypes.Type;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
|
||||
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
|
||||
import com.google.refine.model.Project;
|
||||
|
||||
/**
|
||||
@ -10,23 +13,27 @@ import com.google.refine.model.Project;
|
||||
* @author Antonin Delpeuch
|
||||
*
|
||||
*/
|
||||
@JsonTypeInfo(
|
||||
use=JsonTypeInfo.Id.NAME,
|
||||
include=JsonTypeInfo.As.PROPERTY,
|
||||
property="type")
|
||||
@JsonSubTypes({
|
||||
@Type(value = kNNClustererConfig.class, name = "knn"),
|
||||
@Type(value = BinningClustererConfig.class, name = "binning") })
|
||||
public abstract class ClustererConfig {
|
||||
|
||||
protected String columnName;
|
||||
|
||||
/**
|
||||
* Reads the configuration from a JSON payload (TODO: delete)
|
||||
* @param o
|
||||
*/
|
||||
public void initializeFromJSON(JSONObject o) {
|
||||
columnName = o.getString("column");
|
||||
}
|
||||
|
||||
@JsonProperty("column")
|
||||
public String getColumnName() {
|
||||
return columnName;
|
||||
}
|
||||
|
||||
@JsonProperty("column")
|
||||
public void setColumnName(String name) {
|
||||
columnName = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiate the configuration on a particular project.
|
||||
* @param project
|
||||
|
@ -44,7 +44,6 @@ import java.util.Map.Entry;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.json.JSONObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -67,33 +66,39 @@ public class BinningClusterer extends Clusterer {
|
||||
|
||||
public static class BinningClustererConfig extends ClustererConfig {
|
||||
|
||||
@JsonProperty("function")
|
||||
@JsonIgnore
|
||||
private String _keyerName;
|
||||
@JsonIgnore
|
||||
private Keyer _keyer;
|
||||
private BinningParameters _parameters;
|
||||
|
||||
@Override
|
||||
public void initializeFromJSON(JSONObject o) {
|
||||
super.initializeFromJSON(o);
|
||||
_keyerName = o.getString("function");
|
||||
_keyer = _keyers.get(_keyerName.toLowerCase());
|
||||
if(o.has("params")) {
|
||||
_parameters = BinningParameters.reconstruct(o.getJSONObject("params"));
|
||||
} else {
|
||||
_parameters = null;
|
||||
}
|
||||
}
|
||||
@JsonIgnore
|
||||
private BinningParameters _parameters = null;
|
||||
|
||||
@JsonIgnore
|
||||
public Keyer getKeyer() {
|
||||
return _keyer;
|
||||
}
|
||||
|
||||
@JsonProperty("function")
|
||||
public void setKeyer(String keyerName) {
|
||||
_keyerName = keyerName;
|
||||
_keyer = _keyers.get(_keyerName.toLowerCase());
|
||||
}
|
||||
|
||||
@JsonProperty("function")
|
||||
public String getKeyerName() {
|
||||
return _keyerName;
|
||||
}
|
||||
|
||||
@JsonProperty("params")
|
||||
@JsonInclude(Include.NON_NULL)
|
||||
public BinningParameters getParameters() {
|
||||
return _parameters;
|
||||
}
|
||||
|
||||
@JsonProperty("params")
|
||||
public void setParameters(BinningParameters params) {
|
||||
_parameters = params;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinningClusterer apply(Project project) {
|
||||
@ -112,13 +117,7 @@ public class BinningClusterer extends Clusterer {
|
||||
public static class BinningParameters {
|
||||
@JsonProperty("ngram-size")
|
||||
@JsonInclude(Include.NON_DEFAULT)
|
||||
public int ngramSize;
|
||||
|
||||
public static BinningParameters reconstruct(JSONObject o) {
|
||||
BinningParameters parameters = new BinningParameters();
|
||||
parameters.ngramSize = o.has("ngram-size") ? o.getInt("ngram-size") : 0;
|
||||
return parameters;
|
||||
}
|
||||
public int ngramSize = 0;
|
||||
}
|
||||
|
||||
protected Keyer _keyer;
|
||||
|
@ -43,7 +43,6 @@ import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.json.JSONObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -75,31 +74,38 @@ import edu.mit.simile.vicino.distances.PPMDistance;
|
||||
public class kNNClusterer extends Clusterer {
|
||||
|
||||
public static class kNNClustererConfig extends ClustererConfig {
|
||||
@JsonProperty("function")
|
||||
@JsonIgnore
|
||||
private String _distanceStr;
|
||||
@JsonIgnore
|
||||
private Distance _distance;
|
||||
private kNNClustererConfigParameters _parameters;
|
||||
|
||||
public void initializeFromJSON(JSONObject o) {
|
||||
super.initializeFromJSON(o);
|
||||
_distanceStr = o.getString("function");
|
||||
_distance = _distances.get(_distanceStr.toLowerCase());
|
||||
if(o.has("params")) {
|
||||
_parameters = kNNClustererConfigParameters.reconstruct(o.getJSONObject("params"));
|
||||
} else {
|
||||
_parameters = null;
|
||||
}
|
||||
}
|
||||
@JsonIgnore
|
||||
private kNNClustererConfigParameters _parameters = null;
|
||||
|
||||
@JsonIgnore
|
||||
public Distance getDistance() {
|
||||
return _distance;
|
||||
}
|
||||
|
||||
@JsonProperty("function")
|
||||
public void setDistance(String distanceStr) {
|
||||
_distanceStr = distanceStr;
|
||||
_distance = _distances.get(_distanceStr.toLowerCase());
|
||||
}
|
||||
|
||||
@JsonProperty("function")
|
||||
public String getDistanceStr() {
|
||||
return _distanceStr;
|
||||
}
|
||||
|
||||
@JsonProperty("params")
|
||||
public kNNClustererConfigParameters getParameters() {
|
||||
return _parameters;
|
||||
}
|
||||
|
||||
@JsonProperty("params")
|
||||
public void setParameters(kNNClustererConfigParameters params) {
|
||||
_parameters = params;
|
||||
}
|
||||
|
||||
@Override
|
||||
public kNNClusterer apply(Project project) {
|
||||
@ -122,17 +128,6 @@ public class kNNClusterer extends Clusterer {
|
||||
public double radius = defaultRadius;
|
||||
@JsonProperty("blocking-ngram-size")
|
||||
public int blockingNgramSize = defaultBlockingNgramSize;
|
||||
|
||||
public static kNNClustererConfigParameters reconstruct(JSONObject o) {
|
||||
kNNClustererConfigParameters params = new kNNClustererConfigParameters();
|
||||
if(o.has("radius")) {
|
||||
params.radius = o.getDouble("radius");
|
||||
}
|
||||
if(o.has("blocking-ngram-size")) {
|
||||
params.blockingNgramSize = o.getInt("blocking-ngram-size");
|
||||
}
|
||||
return params;
|
||||
}
|
||||
}
|
||||
|
||||
private Distance _distance;
|
||||
|
@ -39,17 +39,15 @@ import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.json.JSONObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.browsing.Engine;
|
||||
import com.google.refine.clustering.Clusterer;
|
||||
import com.google.refine.clustering.ClustererConfig;
|
||||
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
|
||||
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
|
||||
import com.google.refine.commands.Command;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
public class ComputeClustersCommand extends Command {
|
||||
|
||||
@ -63,24 +61,15 @@ public class ComputeClustersCommand extends Command {
|
||||
long start = System.currentTimeMillis();
|
||||
Project project = getProject(request);
|
||||
Engine engine = getEngine(request, project);
|
||||
JSONObject clusterer_conf = getJsonParameter(request,"clusterer");
|
||||
String clusterer_conf = request.getParameter("clusterer");
|
||||
ClustererConfig clustererConfig = ParsingUtilities.mapper.readValue(clusterer_conf, ClustererConfig.class);
|
||||
|
||||
String type = clusterer_conf.has("type") ? clusterer_conf.getString("type") : "binning";
|
||||
|
||||
ClustererConfig clustererConfig = null;
|
||||
if ("knn".equals(type)) {
|
||||
clustererConfig = new kNNClustererConfig();
|
||||
} else {
|
||||
clustererConfig = new BinningClustererConfig();
|
||||
}
|
||||
|
||||
clustererConfig.initializeFromJSON(clusterer_conf);
|
||||
Clusterer clusterer = clustererConfig.apply(project);
|
||||
|
||||
clusterer.computeClusters(engine);
|
||||
|
||||
respondJSON(response, clusterer);
|
||||
logger.info("computed clusters [{},{}] in {}ms", new Object[] { type, clusterer_conf.getString("function"), Long.toString(System.currentTimeMillis() - start) });
|
||||
logger.info("computed clusters [{}] in {}ms", new Object[] { clustererConfig.getType(), Long.toString(System.currentTimeMillis() - start) });
|
||||
} catch (Exception e) {
|
||||
respondException(response, e);
|
||||
}
|
||||
|
@ -1,14 +1,18 @@
|
||||
package com.google.refine.tests.clustering;
|
||||
|
||||
import org.json.JSONObject;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonParseException;
|
||||
import com.fasterxml.jackson.databind.JsonMappingException;
|
||||
import com.google.refine.browsing.Engine;
|
||||
import com.google.refine.clustering.binning.BinningClusterer;
|
||||
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
import com.google.refine.tests.util.TestUtils;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
public class BinningClustererTests extends RefineTest {
|
||||
|
||||
@ -30,28 +34,25 @@ public class BinningClustererTests extends RefineTest {
|
||||
+ "]";
|
||||
|
||||
@Test
|
||||
public void testSerializeBinningClustererConfig() {
|
||||
BinningClustererConfig config = new BinningClustererConfig();
|
||||
config.initializeFromJSON(new JSONObject(configJson));
|
||||
public void testSerializeBinningClustererConfig() throws JsonParseException, JsonMappingException, IOException {
|
||||
BinningClustererConfig config = ParsingUtilities.mapper.readValue(configJson, BinningClustererConfig.class);
|
||||
TestUtils.isSerializedTo(config, configJson);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerializeBinningClustererConfigWithNgrams() {
|
||||
BinningClustererConfig config = new BinningClustererConfig();
|
||||
config.initializeFromJSON(new JSONObject(configNgramJson));
|
||||
public void testSerializeBinningClustererConfigWithNgrams() throws JsonParseException, JsonMappingException, IOException {
|
||||
BinningClustererConfig config = ParsingUtilities.mapper.readValue(configNgramJson, BinningClustererConfig.class);
|
||||
TestUtils.isSerializedTo(config, configNgramJson);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerializeBinningClusterer() {
|
||||
public void testSerializeBinningClusterer() throws JsonParseException, JsonMappingException, IOException {
|
||||
Project project = createCSVProject("column\n"
|
||||
+ "a\n"
|
||||
+ "à\n"
|
||||
+ "c\n"
|
||||
+ "ĉ\n");
|
||||
BinningClustererConfig config = new BinningClustererConfig();
|
||||
config.initializeFromJSON(new JSONObject(configJson));
|
||||
BinningClustererConfig config = ParsingUtilities.mapper.readValue(configJson, BinningClustererConfig.class);
|
||||
BinningClusterer clusterer = config.apply(project);
|
||||
clusterer.computeClusters(new Engine(project));
|
||||
TestUtils.isSerializedTo(clusterer, clustererJson);
|
||||
|
@ -1,14 +1,18 @@
|
||||
package com.google.refine.tests.clustering;
|
||||
|
||||
import org.json.JSONObject;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonParseException;
|
||||
import com.fasterxml.jackson.databind.JsonMappingException;
|
||||
import com.google.refine.browsing.Engine;
|
||||
import com.google.refine.clustering.knn.kNNClusterer;
|
||||
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
import com.google.refine.tests.util.TestUtils;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
public class kNNClustererTests extends RefineTest {
|
||||
|
||||
@ -23,22 +27,20 @@ public class kNNClustererTests extends RefineTest {
|
||||
+ "]";
|
||||
|
||||
@Test
|
||||
public void serializekNNClustererConfig() {
|
||||
kNNClustererConfig config = new kNNClustererConfig();
|
||||
config.initializeFromJSON(new JSONObject(configJson));
|
||||
public void serializekNNClustererConfig() throws JsonParseException, JsonMappingException, IOException {
|
||||
kNNClustererConfig config = ParsingUtilities.mapper.readValue(configJson, kNNClustererConfig.class);
|
||||
TestUtils.isSerializedTo(config, configJson);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void serializekNNClusterer() {
|
||||
public void serializekNNClusterer() throws JsonParseException, JsonMappingException, IOException {
|
||||
Project project = createCSVProject("column\n"
|
||||
+ "ab\n"
|
||||
+ "abc\n"
|
||||
+ "c\n"
|
||||
+ "ĉ\n");
|
||||
|
||||
kNNClustererConfig config = new kNNClustererConfig();
|
||||
config.initializeFromJSON(new JSONObject(configJson));
|
||||
kNNClustererConfig config = ParsingUtilities.mapper.readValue(configJson, kNNClustererConfig.class);
|
||||
kNNClusterer clusterer = config.apply(project);
|
||||
clusterer.computeClusters(new Engine(project));
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user