Clustering config Jackson deserialization

This commit is contained in:
Antonin Delpeuch 2018-11-20 13:58:21 +00:00
parent d26d7b4a42
commit ba8e406a97
6 changed files with 82 additions and 89 deletions

View File

@ -1,8 +1,11 @@
package com.google.refine.clustering;
import org.json.JSONObject;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonSubTypes.Type;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
import com.google.refine.model.Project;
/**
@ -10,23 +13,27 @@ import com.google.refine.model.Project;
* @author Antonin Delpeuch
*
*/
@JsonTypeInfo(
use=JsonTypeInfo.Id.NAME,
include=JsonTypeInfo.As.PROPERTY,
property="type")
@JsonSubTypes({
@Type(value = kNNClustererConfig.class, name = "knn"),
@Type(value = BinningClustererConfig.class, name = "binning") })
public abstract class ClustererConfig {
protected String columnName;
/**
* Reads the configuration from a JSON payload (TODO: delete)
* @param o
*/
public void initializeFromJSON(JSONObject o) {
columnName = o.getString("column");
}
@JsonProperty("column")
public String getColumnName() {
return columnName;
}
@JsonProperty("column")
public void setColumnName(String name) {
columnName = name;
}
/**
* Instantiate the configuration on a particular project.
* @param project

View File

@ -44,7 +44,6 @@ import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -67,33 +66,39 @@ public class BinningClusterer extends Clusterer {
public static class BinningClustererConfig extends ClustererConfig {
@JsonProperty("function")
@JsonIgnore
private String _keyerName;
@JsonIgnore
private Keyer _keyer;
private BinningParameters _parameters;
@Override
public void initializeFromJSON(JSONObject o) {
super.initializeFromJSON(o);
_keyerName = o.getString("function");
_keyer = _keyers.get(_keyerName.toLowerCase());
if(o.has("params")) {
_parameters = BinningParameters.reconstruct(o.getJSONObject("params"));
} else {
_parameters = null;
}
}
@JsonIgnore
private BinningParameters _parameters = null;
@JsonIgnore
public Keyer getKeyer() {
return _keyer;
}
@JsonProperty("function")
public void setKeyer(String keyerName) {
_keyerName = keyerName;
_keyer = _keyers.get(_keyerName.toLowerCase());
}
@JsonProperty("function")
public String getKeyerName() {
return _keyerName;
}
@JsonProperty("params")
@JsonInclude(Include.NON_NULL)
public BinningParameters getParameters() {
return _parameters;
}
@JsonProperty("params")
public void setParameters(BinningParameters params) {
_parameters = params;
}
@Override
public BinningClusterer apply(Project project) {
@ -112,13 +117,7 @@ public class BinningClusterer extends Clusterer {
public static class BinningParameters {
@JsonProperty("ngram-size")
@JsonInclude(Include.NON_DEFAULT)
public int ngramSize;
public static BinningParameters reconstruct(JSONObject o) {
BinningParameters parameters = new BinningParameters();
parameters.ngramSize = o.has("ngram-size") ? o.getInt("ngram-size") : 0;
return parameters;
}
public int ngramSize = 0;
}
protected Keyer _keyer;

View File

@ -43,7 +43,6 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -75,31 +74,38 @@ import edu.mit.simile.vicino.distances.PPMDistance;
public class kNNClusterer extends Clusterer {
public static class kNNClustererConfig extends ClustererConfig {
@JsonProperty("function")
@JsonIgnore
private String _distanceStr;
@JsonIgnore
private Distance _distance;
private kNNClustererConfigParameters _parameters;
public void initializeFromJSON(JSONObject o) {
super.initializeFromJSON(o);
_distanceStr = o.getString("function");
_distance = _distances.get(_distanceStr.toLowerCase());
if(o.has("params")) {
_parameters = kNNClustererConfigParameters.reconstruct(o.getJSONObject("params"));
} else {
_parameters = null;
}
}
@JsonIgnore
private kNNClustererConfigParameters _parameters = null;
@JsonIgnore
public Distance getDistance() {
return _distance;
}
@JsonProperty("function")
public void setDistance(String distanceStr) {
_distanceStr = distanceStr;
_distance = _distances.get(_distanceStr.toLowerCase());
}
@JsonProperty("function")
public String getDistanceStr() {
return _distanceStr;
}
@JsonProperty("params")
public kNNClustererConfigParameters getParameters() {
return _parameters;
}
@JsonProperty("params")
public void setParameters(kNNClustererConfigParameters params) {
_parameters = params;
}
@Override
public kNNClusterer apply(Project project) {
@ -122,17 +128,6 @@ public class kNNClusterer extends Clusterer {
public double radius = defaultRadius;
@JsonProperty("blocking-ngram-size")
public int blockingNgramSize = defaultBlockingNgramSize;
public static kNNClustererConfigParameters reconstruct(JSONObject o) {
kNNClustererConfigParameters params = new kNNClustererConfigParameters();
if(o.has("radius")) {
params.radius = o.getDouble("radius");
}
if(o.has("blocking-ngram-size")) {
params.blockingNgramSize = o.getInt("blocking-ngram-size");
}
return params;
}
}
private Distance _distance;

View File

@ -39,17 +39,15 @@ import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.browsing.Engine;
import com.google.refine.clustering.Clusterer;
import com.google.refine.clustering.ClustererConfig;
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
import com.google.refine.commands.Command;
import com.google.refine.model.Project;
import com.google.refine.util.ParsingUtilities;
public class ComputeClustersCommand extends Command {
@ -63,24 +61,15 @@ public class ComputeClustersCommand extends Command {
long start = System.currentTimeMillis();
Project project = getProject(request);
Engine engine = getEngine(request, project);
JSONObject clusterer_conf = getJsonParameter(request,"clusterer");
String clusterer_conf = request.getParameter("clusterer");
ClustererConfig clustererConfig = ParsingUtilities.mapper.readValue(clusterer_conf, ClustererConfig.class);
String type = clusterer_conf.has("type") ? clusterer_conf.getString("type") : "binning";
ClustererConfig clustererConfig = null;
if ("knn".equals(type)) {
clustererConfig = new kNNClustererConfig();
} else {
clustererConfig = new BinningClustererConfig();
}
clustererConfig.initializeFromJSON(clusterer_conf);
Clusterer clusterer = clustererConfig.apply(project);
clusterer.computeClusters(engine);
respondJSON(response, clusterer);
logger.info("computed clusters [{},{}] in {}ms", new Object[] { type, clusterer_conf.getString("function"), Long.toString(System.currentTimeMillis() - start) });
logger.info("computed clusters [{}] in {}ms", new Object[] { clustererConfig.getType(), Long.toString(System.currentTimeMillis() - start) });
} catch (Exception e) {
respondException(response, e);
}

View File

@ -1,14 +1,18 @@
package com.google.refine.tests.clustering;
import org.json.JSONObject;
import java.io.IOException;
import org.testng.annotations.Test;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.google.refine.browsing.Engine;
import com.google.refine.clustering.binning.BinningClusterer;
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
import com.google.refine.model.Project;
import com.google.refine.tests.RefineTest;
import com.google.refine.tests.util.TestUtils;
import com.google.refine.util.ParsingUtilities;
public class BinningClustererTests extends RefineTest {
@ -30,28 +34,25 @@ public class BinningClustererTests extends RefineTest {
+ "]";
@Test
public void testSerializeBinningClustererConfig() {
BinningClustererConfig config = new BinningClustererConfig();
config.initializeFromJSON(new JSONObject(configJson));
public void testSerializeBinningClustererConfig() throws JsonParseException, JsonMappingException, IOException {
BinningClustererConfig config = ParsingUtilities.mapper.readValue(configJson, BinningClustererConfig.class);
TestUtils.isSerializedTo(config, configJson);
}
@Test
public void testSerializeBinningClustererConfigWithNgrams() {
BinningClustererConfig config = new BinningClustererConfig();
config.initializeFromJSON(new JSONObject(configNgramJson));
public void testSerializeBinningClustererConfigWithNgrams() throws JsonParseException, JsonMappingException, IOException {
BinningClustererConfig config = ParsingUtilities.mapper.readValue(configNgramJson, BinningClustererConfig.class);
TestUtils.isSerializedTo(config, configNgramJson);
}
@Test
public void testSerializeBinningClusterer() {
public void testSerializeBinningClusterer() throws JsonParseException, JsonMappingException, IOException {
Project project = createCSVProject("column\n"
+ "a\n"
+ "à\n"
+ "c\n"
+ "ĉ\n");
BinningClustererConfig config = new BinningClustererConfig();
config.initializeFromJSON(new JSONObject(configJson));
BinningClustererConfig config = ParsingUtilities.mapper.readValue(configJson, BinningClustererConfig.class);
BinningClusterer clusterer = config.apply(project);
clusterer.computeClusters(new Engine(project));
TestUtils.isSerializedTo(clusterer, clustererJson);

View File

@ -1,14 +1,18 @@
package com.google.refine.tests.clustering;
import org.json.JSONObject;
import java.io.IOException;
import org.testng.annotations.Test;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.google.refine.browsing.Engine;
import com.google.refine.clustering.knn.kNNClusterer;
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
import com.google.refine.model.Project;
import com.google.refine.tests.RefineTest;
import com.google.refine.tests.util.TestUtils;
import com.google.refine.util.ParsingUtilities;
public class kNNClustererTests extends RefineTest {
@ -23,22 +27,20 @@ public class kNNClustererTests extends RefineTest {
+ "]";
@Test
public void serializekNNClustererConfig() {
kNNClustererConfig config = new kNNClustererConfig();
config.initializeFromJSON(new JSONObject(configJson));
public void serializekNNClustererConfig() throws JsonParseException, JsonMappingException, IOException {
kNNClustererConfig config = ParsingUtilities.mapper.readValue(configJson, kNNClustererConfig.class);
TestUtils.isSerializedTo(config, configJson);
}
@Test
public void serializekNNClusterer() {
public void serializekNNClusterer() throws JsonParseException, JsonMappingException, IOException {
Project project = createCSVProject("column\n"
+ "ab\n"
+ "abc\n"
+ "c\n"
+ "ĉ\n");
kNNClustererConfig config = new kNNClustererConfig();
config.initializeFromJSON(new JSONObject(configJson));
kNNClustererConfig config = ParsingUtilities.mapper.readValue(configJson, kNNClustererConfig.class);
kNNClusterer clusterer = config.apply(project);
clusterer.computeClusters(new Engine(project));