Clustering config Jackson deserialization

This commit is contained in:
Antonin Delpeuch 2018-11-20 13:58:21 +00:00
parent d26d7b4a42
commit ba8e406a97
6 changed files with 82 additions and 89 deletions

View File

@ -1,8 +1,11 @@
package com.google.refine.clustering; package com.google.refine.clustering;
import org.json.JSONObject;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonSubTypes.Type;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
import com.google.refine.model.Project; import com.google.refine.model.Project;
/** /**
@ -10,23 +13,27 @@ import com.google.refine.model.Project;
* @author Antonin Delpeuch * @author Antonin Delpeuch
* *
*/ */
@JsonTypeInfo(
use=JsonTypeInfo.Id.NAME,
include=JsonTypeInfo.As.PROPERTY,
property="type")
@JsonSubTypes({
@Type(value = kNNClustererConfig.class, name = "knn"),
@Type(value = BinningClustererConfig.class, name = "binning") })
public abstract class ClustererConfig { public abstract class ClustererConfig {
protected String columnName; protected String columnName;
/**
* Reads the configuration from a JSON payload (TODO: delete)
* @param o
*/
public void initializeFromJSON(JSONObject o) {
columnName = o.getString("column");
}
@JsonProperty("column") @JsonProperty("column")
public String getColumnName() { public String getColumnName() {
return columnName; return columnName;
} }
@JsonProperty("column")
public void setColumnName(String name) {
columnName = name;
}
/** /**
* Instantiate the configuration on a particular project. * Instantiate the configuration on a particular project.
* @param project * @param project

View File

@ -44,7 +44,6 @@ import java.util.Map.Entry;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.json.JSONObject;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -67,33 +66,39 @@ public class BinningClusterer extends Clusterer {
public static class BinningClustererConfig extends ClustererConfig { public static class BinningClustererConfig extends ClustererConfig {
@JsonProperty("function") @JsonIgnore
private String _keyerName; private String _keyerName;
@JsonIgnore
private Keyer _keyer; private Keyer _keyer;
private BinningParameters _parameters; @JsonIgnore
private BinningParameters _parameters = null;
@Override
public void initializeFromJSON(JSONObject o) {
super.initializeFromJSON(o);
_keyerName = o.getString("function");
_keyer = _keyers.get(_keyerName.toLowerCase());
if(o.has("params")) {
_parameters = BinningParameters.reconstruct(o.getJSONObject("params"));
} else {
_parameters = null;
}
}
@JsonIgnore @JsonIgnore
public Keyer getKeyer() { public Keyer getKeyer() {
return _keyer; return _keyer;
} }
@JsonProperty("function")
public void setKeyer(String keyerName) {
_keyerName = keyerName;
_keyer = _keyers.get(_keyerName.toLowerCase());
}
@JsonProperty("function")
public String getKeyerName() {
return _keyerName;
}
@JsonProperty("params") @JsonProperty("params")
@JsonInclude(Include.NON_NULL) @JsonInclude(Include.NON_NULL)
public BinningParameters getParameters() { public BinningParameters getParameters() {
return _parameters; return _parameters;
} }
@JsonProperty("params")
public void setParameters(BinningParameters params) {
_parameters = params;
}
@Override @Override
public BinningClusterer apply(Project project) { public BinningClusterer apply(Project project) {
@ -112,13 +117,7 @@ public class BinningClusterer extends Clusterer {
public static class BinningParameters { public static class BinningParameters {
@JsonProperty("ngram-size") @JsonProperty("ngram-size")
@JsonInclude(Include.NON_DEFAULT) @JsonInclude(Include.NON_DEFAULT)
public int ngramSize; public int ngramSize = 0;
public static BinningParameters reconstruct(JSONObject o) {
BinningParameters parameters = new BinningParameters();
parameters.ngramSize = o.has("ngram-size") ? o.getInt("ngram-size") : 0;
return parameters;
}
} }
protected Keyer _keyer; protected Keyer _keyer;

View File

@ -43,7 +43,6 @@ import java.util.Map.Entry;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.json.JSONObject;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -75,31 +74,38 @@ import edu.mit.simile.vicino.distances.PPMDistance;
public class kNNClusterer extends Clusterer { public class kNNClusterer extends Clusterer {
public static class kNNClustererConfig extends ClustererConfig { public static class kNNClustererConfig extends ClustererConfig {
@JsonProperty("function") @JsonIgnore
private String _distanceStr; private String _distanceStr;
@JsonIgnore
private Distance _distance; private Distance _distance;
private kNNClustererConfigParameters _parameters; @JsonIgnore
private kNNClustererConfigParameters _parameters = null;
public void initializeFromJSON(JSONObject o) {
super.initializeFromJSON(o);
_distanceStr = o.getString("function");
_distance = _distances.get(_distanceStr.toLowerCase());
if(o.has("params")) {
_parameters = kNNClustererConfigParameters.reconstruct(o.getJSONObject("params"));
} else {
_parameters = null;
}
}
@JsonIgnore @JsonIgnore
public Distance getDistance() { public Distance getDistance() {
return _distance; return _distance;
} }
@JsonProperty("function")
public void setDistance(String distanceStr) {
_distanceStr = distanceStr;
_distance = _distances.get(_distanceStr.toLowerCase());
}
@JsonProperty("function")
public String getDistanceStr() {
return _distanceStr;
}
@JsonProperty("params") @JsonProperty("params")
public kNNClustererConfigParameters getParameters() { public kNNClustererConfigParameters getParameters() {
return _parameters; return _parameters;
} }
@JsonProperty("params")
public void setParameters(kNNClustererConfigParameters params) {
_parameters = params;
}
@Override @Override
public kNNClusterer apply(Project project) { public kNNClusterer apply(Project project) {
@ -122,17 +128,6 @@ public class kNNClusterer extends Clusterer {
public double radius = defaultRadius; public double radius = defaultRadius;
@JsonProperty("blocking-ngram-size") @JsonProperty("blocking-ngram-size")
public int blockingNgramSize = defaultBlockingNgramSize; public int blockingNgramSize = defaultBlockingNgramSize;
public static kNNClustererConfigParameters reconstruct(JSONObject o) {
kNNClustererConfigParameters params = new kNNClustererConfigParameters();
if(o.has("radius")) {
params.radius = o.getDouble("radius");
}
if(o.has("blocking-ngram-size")) {
params.blockingNgramSize = o.getInt("blocking-ngram-size");
}
return params;
}
} }
private Distance _distance; private Distance _distance;

View File

@ -39,17 +39,15 @@ import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpServletResponse;
import org.json.JSONObject;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.refine.browsing.Engine; import com.google.refine.browsing.Engine;
import com.google.refine.clustering.Clusterer; import com.google.refine.clustering.Clusterer;
import com.google.refine.clustering.ClustererConfig; import com.google.refine.clustering.ClustererConfig;
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
import com.google.refine.commands.Command; import com.google.refine.commands.Command;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.util.ParsingUtilities;
public class ComputeClustersCommand extends Command { public class ComputeClustersCommand extends Command {
@ -63,24 +61,15 @@ public class ComputeClustersCommand extends Command {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
Project project = getProject(request); Project project = getProject(request);
Engine engine = getEngine(request, project); Engine engine = getEngine(request, project);
JSONObject clusterer_conf = getJsonParameter(request,"clusterer"); String clusterer_conf = request.getParameter("clusterer");
ClustererConfig clustererConfig = ParsingUtilities.mapper.readValue(clusterer_conf, ClustererConfig.class);
String type = clusterer_conf.has("type") ? clusterer_conf.getString("type") : "binning";
ClustererConfig clustererConfig = null;
if ("knn".equals(type)) {
clustererConfig = new kNNClustererConfig();
} else {
clustererConfig = new BinningClustererConfig();
}
clustererConfig.initializeFromJSON(clusterer_conf);
Clusterer clusterer = clustererConfig.apply(project); Clusterer clusterer = clustererConfig.apply(project);
clusterer.computeClusters(engine); clusterer.computeClusters(engine);
respondJSON(response, clusterer); respondJSON(response, clusterer);
logger.info("computed clusters [{},{}] in {}ms", new Object[] { type, clusterer_conf.getString("function"), Long.toString(System.currentTimeMillis() - start) }); logger.info("computed clusters [{}] in {}ms", new Object[] { clustererConfig.getType(), Long.toString(System.currentTimeMillis() - start) });
} catch (Exception e) { } catch (Exception e) {
respondException(response, e); respondException(response, e);
} }

View File

@ -1,14 +1,18 @@
package com.google.refine.tests.clustering; package com.google.refine.tests.clustering;
import org.json.JSONObject; import java.io.IOException;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.google.refine.browsing.Engine; import com.google.refine.browsing.Engine;
import com.google.refine.clustering.binning.BinningClusterer; import com.google.refine.clustering.binning.BinningClusterer;
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig; import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.tests.RefineTest; import com.google.refine.tests.RefineTest;
import com.google.refine.tests.util.TestUtils; import com.google.refine.tests.util.TestUtils;
import com.google.refine.util.ParsingUtilities;
public class BinningClustererTests extends RefineTest { public class BinningClustererTests extends RefineTest {
@ -30,28 +34,25 @@ public class BinningClustererTests extends RefineTest {
+ "]"; + "]";
@Test @Test
public void testSerializeBinningClustererConfig() { public void testSerializeBinningClustererConfig() throws JsonParseException, JsonMappingException, IOException {
BinningClustererConfig config = new BinningClustererConfig(); BinningClustererConfig config = ParsingUtilities.mapper.readValue(configJson, BinningClustererConfig.class);
config.initializeFromJSON(new JSONObject(configJson));
TestUtils.isSerializedTo(config, configJson); TestUtils.isSerializedTo(config, configJson);
} }
@Test @Test
public void testSerializeBinningClustererConfigWithNgrams() { public void testSerializeBinningClustererConfigWithNgrams() throws JsonParseException, JsonMappingException, IOException {
BinningClustererConfig config = new BinningClustererConfig(); BinningClustererConfig config = ParsingUtilities.mapper.readValue(configNgramJson, BinningClustererConfig.class);
config.initializeFromJSON(new JSONObject(configNgramJson));
TestUtils.isSerializedTo(config, configNgramJson); TestUtils.isSerializedTo(config, configNgramJson);
} }
@Test @Test
public void testSerializeBinningClusterer() { public void testSerializeBinningClusterer() throws JsonParseException, JsonMappingException, IOException {
Project project = createCSVProject("column\n" Project project = createCSVProject("column\n"
+ "a\n" + "a\n"
+ "à\n" + "à\n"
+ "c\n" + "c\n"
+ "ĉ\n"); + "ĉ\n");
BinningClustererConfig config = new BinningClustererConfig(); BinningClustererConfig config = ParsingUtilities.mapper.readValue(configJson, BinningClustererConfig.class);
config.initializeFromJSON(new JSONObject(configJson));
BinningClusterer clusterer = config.apply(project); BinningClusterer clusterer = config.apply(project);
clusterer.computeClusters(new Engine(project)); clusterer.computeClusters(new Engine(project));
TestUtils.isSerializedTo(clusterer, clustererJson); TestUtils.isSerializedTo(clusterer, clustererJson);

View File

@ -1,14 +1,18 @@
package com.google.refine.tests.clustering; package com.google.refine.tests.clustering;
import org.json.JSONObject; import java.io.IOException;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.google.refine.browsing.Engine; import com.google.refine.browsing.Engine;
import com.google.refine.clustering.knn.kNNClusterer; import com.google.refine.clustering.knn.kNNClusterer;
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig; import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
import com.google.refine.model.Project; import com.google.refine.model.Project;
import com.google.refine.tests.RefineTest; import com.google.refine.tests.RefineTest;
import com.google.refine.tests.util.TestUtils; import com.google.refine.tests.util.TestUtils;
import com.google.refine.util.ParsingUtilities;
public class kNNClustererTests extends RefineTest { public class kNNClustererTests extends RefineTest {
@ -23,22 +27,20 @@ public class kNNClustererTests extends RefineTest {
+ "]"; + "]";
@Test @Test
public void serializekNNClustererConfig() { public void serializekNNClustererConfig() throws JsonParseException, JsonMappingException, IOException {
kNNClustererConfig config = new kNNClustererConfig(); kNNClustererConfig config = ParsingUtilities.mapper.readValue(configJson, kNNClustererConfig.class);
config.initializeFromJSON(new JSONObject(configJson));
TestUtils.isSerializedTo(config, configJson); TestUtils.isSerializedTo(config, configJson);
} }
@Test @Test
public void serializekNNClusterer() { public void serializekNNClusterer() throws JsonParseException, JsonMappingException, IOException {
Project project = createCSVProject("column\n" Project project = createCSVProject("column\n"
+ "ab\n" + "ab\n"
+ "abc\n" + "abc\n"
+ "c\n" + "c\n"
+ "ĉ\n"); + "ĉ\n");
kNNClustererConfig config = new kNNClustererConfig(); kNNClustererConfig config = ParsingUtilities.mapper.readValue(configJson, kNNClustererConfig.class);
config.initializeFromJSON(new JSONObject(configJson));
kNNClusterer clusterer = config.apply(project); kNNClusterer clusterer = config.apply(project);
clusterer.computeClusters(new Engine(project)); clusterer.computeClusters(new Engine(project));