Clustering config Jackson deserialization
This commit is contained in:
parent
d26d7b4a42
commit
ba8e406a97
@ -1,8 +1,11 @@
|
|||||||
package com.google.refine.clustering;
|
package com.google.refine.clustering;
|
||||||
|
|
||||||
import org.json.JSONObject;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonSubTypes.Type;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||||
|
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
|
||||||
|
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -10,23 +13,27 @@ import com.google.refine.model.Project;
|
|||||||
* @author Antonin Delpeuch
|
* @author Antonin Delpeuch
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@JsonTypeInfo(
|
||||||
|
use=JsonTypeInfo.Id.NAME,
|
||||||
|
include=JsonTypeInfo.As.PROPERTY,
|
||||||
|
property="type")
|
||||||
|
@JsonSubTypes({
|
||||||
|
@Type(value = kNNClustererConfig.class, name = "knn"),
|
||||||
|
@Type(value = BinningClustererConfig.class, name = "binning") })
|
||||||
public abstract class ClustererConfig {
|
public abstract class ClustererConfig {
|
||||||
|
|
||||||
protected String columnName;
|
protected String columnName;
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads the configuration from a JSON payload (TODO: delete)
|
|
||||||
* @param o
|
|
||||||
*/
|
|
||||||
public void initializeFromJSON(JSONObject o) {
|
|
||||||
columnName = o.getString("column");
|
|
||||||
}
|
|
||||||
|
|
||||||
@JsonProperty("column")
|
@JsonProperty("column")
|
||||||
public String getColumnName() {
|
public String getColumnName() {
|
||||||
return columnName;
|
return columnName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonProperty("column")
|
||||||
|
public void setColumnName(String name) {
|
||||||
|
columnName = name;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiate the configuration on a particular project.
|
* Instantiate the configuration on a particular project.
|
||||||
* @param project
|
* @param project
|
||||||
|
@ -44,7 +44,6 @@ import java.util.Map.Entry;
|
|||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.json.JSONObject;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -67,33 +66,39 @@ public class BinningClusterer extends Clusterer {
|
|||||||
|
|
||||||
public static class BinningClustererConfig extends ClustererConfig {
|
public static class BinningClustererConfig extends ClustererConfig {
|
||||||
|
|
||||||
@JsonProperty("function")
|
@JsonIgnore
|
||||||
private String _keyerName;
|
private String _keyerName;
|
||||||
|
@JsonIgnore
|
||||||
private Keyer _keyer;
|
private Keyer _keyer;
|
||||||
private BinningParameters _parameters;
|
@JsonIgnore
|
||||||
|
private BinningParameters _parameters = null;
|
||||||
@Override
|
|
||||||
public void initializeFromJSON(JSONObject o) {
|
|
||||||
super.initializeFromJSON(o);
|
|
||||||
_keyerName = o.getString("function");
|
|
||||||
_keyer = _keyers.get(_keyerName.toLowerCase());
|
|
||||||
if(o.has("params")) {
|
|
||||||
_parameters = BinningParameters.reconstruct(o.getJSONObject("params"));
|
|
||||||
} else {
|
|
||||||
_parameters = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@JsonIgnore
|
@JsonIgnore
|
||||||
public Keyer getKeyer() {
|
public Keyer getKeyer() {
|
||||||
return _keyer;
|
return _keyer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonProperty("function")
|
||||||
|
public void setKeyer(String keyerName) {
|
||||||
|
_keyerName = keyerName;
|
||||||
|
_keyer = _keyers.get(_keyerName.toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonProperty("function")
|
||||||
|
public String getKeyerName() {
|
||||||
|
return _keyerName;
|
||||||
|
}
|
||||||
|
|
||||||
@JsonProperty("params")
|
@JsonProperty("params")
|
||||||
@JsonInclude(Include.NON_NULL)
|
@JsonInclude(Include.NON_NULL)
|
||||||
public BinningParameters getParameters() {
|
public BinningParameters getParameters() {
|
||||||
return _parameters;
|
return _parameters;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonProperty("params")
|
||||||
|
public void setParameters(BinningParameters params) {
|
||||||
|
_parameters = params;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BinningClusterer apply(Project project) {
|
public BinningClusterer apply(Project project) {
|
||||||
@ -112,13 +117,7 @@ public class BinningClusterer extends Clusterer {
|
|||||||
public static class BinningParameters {
|
public static class BinningParameters {
|
||||||
@JsonProperty("ngram-size")
|
@JsonProperty("ngram-size")
|
||||||
@JsonInclude(Include.NON_DEFAULT)
|
@JsonInclude(Include.NON_DEFAULT)
|
||||||
public int ngramSize;
|
public int ngramSize = 0;
|
||||||
|
|
||||||
public static BinningParameters reconstruct(JSONObject o) {
|
|
||||||
BinningParameters parameters = new BinningParameters();
|
|
||||||
parameters.ngramSize = o.has("ngram-size") ? o.getInt("ngram-size") : 0;
|
|
||||||
return parameters;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Keyer _keyer;
|
protected Keyer _keyer;
|
||||||
|
@ -43,7 +43,6 @@ import java.util.Map.Entry;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.json.JSONObject;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -75,31 +74,38 @@ import edu.mit.simile.vicino.distances.PPMDistance;
|
|||||||
public class kNNClusterer extends Clusterer {
|
public class kNNClusterer extends Clusterer {
|
||||||
|
|
||||||
public static class kNNClustererConfig extends ClustererConfig {
|
public static class kNNClustererConfig extends ClustererConfig {
|
||||||
@JsonProperty("function")
|
@JsonIgnore
|
||||||
private String _distanceStr;
|
private String _distanceStr;
|
||||||
|
@JsonIgnore
|
||||||
private Distance _distance;
|
private Distance _distance;
|
||||||
private kNNClustererConfigParameters _parameters;
|
@JsonIgnore
|
||||||
|
private kNNClustererConfigParameters _parameters = null;
|
||||||
public void initializeFromJSON(JSONObject o) {
|
|
||||||
super.initializeFromJSON(o);
|
|
||||||
_distanceStr = o.getString("function");
|
|
||||||
_distance = _distances.get(_distanceStr.toLowerCase());
|
|
||||||
if(o.has("params")) {
|
|
||||||
_parameters = kNNClustererConfigParameters.reconstruct(o.getJSONObject("params"));
|
|
||||||
} else {
|
|
||||||
_parameters = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@JsonIgnore
|
@JsonIgnore
|
||||||
public Distance getDistance() {
|
public Distance getDistance() {
|
||||||
return _distance;
|
return _distance;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonProperty("function")
|
||||||
|
public void setDistance(String distanceStr) {
|
||||||
|
_distanceStr = distanceStr;
|
||||||
|
_distance = _distances.get(_distanceStr.toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonProperty("function")
|
||||||
|
public String getDistanceStr() {
|
||||||
|
return _distanceStr;
|
||||||
|
}
|
||||||
|
|
||||||
@JsonProperty("params")
|
@JsonProperty("params")
|
||||||
public kNNClustererConfigParameters getParameters() {
|
public kNNClustererConfigParameters getParameters() {
|
||||||
return _parameters;
|
return _parameters;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonProperty("params")
|
||||||
|
public void setParameters(kNNClustererConfigParameters params) {
|
||||||
|
_parameters = params;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public kNNClusterer apply(Project project) {
|
public kNNClusterer apply(Project project) {
|
||||||
@ -122,17 +128,6 @@ public class kNNClusterer extends Clusterer {
|
|||||||
public double radius = defaultRadius;
|
public double radius = defaultRadius;
|
||||||
@JsonProperty("blocking-ngram-size")
|
@JsonProperty("blocking-ngram-size")
|
||||||
public int blockingNgramSize = defaultBlockingNgramSize;
|
public int blockingNgramSize = defaultBlockingNgramSize;
|
||||||
|
|
||||||
public static kNNClustererConfigParameters reconstruct(JSONObject o) {
|
|
||||||
kNNClustererConfigParameters params = new kNNClustererConfigParameters();
|
|
||||||
if(o.has("radius")) {
|
|
||||||
params.radius = o.getDouble("radius");
|
|
||||||
}
|
|
||||||
if(o.has("blocking-ngram-size")) {
|
|
||||||
params.blockingNgramSize = o.getInt("blocking-ngram-size");
|
|
||||||
}
|
|
||||||
return params;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private Distance _distance;
|
private Distance _distance;
|
||||||
|
@ -39,17 +39,15 @@ import javax.servlet.ServletException;
|
|||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
import javax.servlet.http.HttpServletResponse;
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
import org.json.JSONObject;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.refine.browsing.Engine;
|
import com.google.refine.browsing.Engine;
|
||||||
import com.google.refine.clustering.Clusterer;
|
import com.google.refine.clustering.Clusterer;
|
||||||
import com.google.refine.clustering.ClustererConfig;
|
import com.google.refine.clustering.ClustererConfig;
|
||||||
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
|
|
||||||
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
|
|
||||||
import com.google.refine.commands.Command;
|
import com.google.refine.commands.Command;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
public class ComputeClustersCommand extends Command {
|
public class ComputeClustersCommand extends Command {
|
||||||
|
|
||||||
@ -63,24 +61,15 @@ public class ComputeClustersCommand extends Command {
|
|||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
Project project = getProject(request);
|
Project project = getProject(request);
|
||||||
Engine engine = getEngine(request, project);
|
Engine engine = getEngine(request, project);
|
||||||
JSONObject clusterer_conf = getJsonParameter(request,"clusterer");
|
String clusterer_conf = request.getParameter("clusterer");
|
||||||
|
ClustererConfig clustererConfig = ParsingUtilities.mapper.readValue(clusterer_conf, ClustererConfig.class);
|
||||||
|
|
||||||
String type = clusterer_conf.has("type") ? clusterer_conf.getString("type") : "binning";
|
|
||||||
|
|
||||||
ClustererConfig clustererConfig = null;
|
|
||||||
if ("knn".equals(type)) {
|
|
||||||
clustererConfig = new kNNClustererConfig();
|
|
||||||
} else {
|
|
||||||
clustererConfig = new BinningClustererConfig();
|
|
||||||
}
|
|
||||||
|
|
||||||
clustererConfig.initializeFromJSON(clusterer_conf);
|
|
||||||
Clusterer clusterer = clustererConfig.apply(project);
|
Clusterer clusterer = clustererConfig.apply(project);
|
||||||
|
|
||||||
clusterer.computeClusters(engine);
|
clusterer.computeClusters(engine);
|
||||||
|
|
||||||
respondJSON(response, clusterer);
|
respondJSON(response, clusterer);
|
||||||
logger.info("computed clusters [{},{}] in {}ms", new Object[] { type, clusterer_conf.getString("function"), Long.toString(System.currentTimeMillis() - start) });
|
logger.info("computed clusters [{}] in {}ms", new Object[] { clustererConfig.getType(), Long.toString(System.currentTimeMillis() - start) });
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
respondException(response, e);
|
respondException(response, e);
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,18 @@
|
|||||||
package com.google.refine.tests.clustering;
|
package com.google.refine.tests.clustering;
|
||||||
|
|
||||||
import org.json.JSONObject;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonParseException;
|
||||||
|
import com.fasterxml.jackson.databind.JsonMappingException;
|
||||||
import com.google.refine.browsing.Engine;
|
import com.google.refine.browsing.Engine;
|
||||||
import com.google.refine.clustering.binning.BinningClusterer;
|
import com.google.refine.clustering.binning.BinningClusterer;
|
||||||
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
|
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.tests.RefineTest;
|
import com.google.refine.tests.RefineTest;
|
||||||
import com.google.refine.tests.util.TestUtils;
|
import com.google.refine.tests.util.TestUtils;
|
||||||
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
public class BinningClustererTests extends RefineTest {
|
public class BinningClustererTests extends RefineTest {
|
||||||
|
|
||||||
@ -30,28 +34,25 @@ public class BinningClustererTests extends RefineTest {
|
|||||||
+ "]";
|
+ "]";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSerializeBinningClustererConfig() {
|
public void testSerializeBinningClustererConfig() throws JsonParseException, JsonMappingException, IOException {
|
||||||
BinningClustererConfig config = new BinningClustererConfig();
|
BinningClustererConfig config = ParsingUtilities.mapper.readValue(configJson, BinningClustererConfig.class);
|
||||||
config.initializeFromJSON(new JSONObject(configJson));
|
|
||||||
TestUtils.isSerializedTo(config, configJson);
|
TestUtils.isSerializedTo(config, configJson);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSerializeBinningClustererConfigWithNgrams() {
|
public void testSerializeBinningClustererConfigWithNgrams() throws JsonParseException, JsonMappingException, IOException {
|
||||||
BinningClustererConfig config = new BinningClustererConfig();
|
BinningClustererConfig config = ParsingUtilities.mapper.readValue(configNgramJson, BinningClustererConfig.class);
|
||||||
config.initializeFromJSON(new JSONObject(configNgramJson));
|
|
||||||
TestUtils.isSerializedTo(config, configNgramJson);
|
TestUtils.isSerializedTo(config, configNgramJson);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSerializeBinningClusterer() {
|
public void testSerializeBinningClusterer() throws JsonParseException, JsonMappingException, IOException {
|
||||||
Project project = createCSVProject("column\n"
|
Project project = createCSVProject("column\n"
|
||||||
+ "a\n"
|
+ "a\n"
|
||||||
+ "à\n"
|
+ "à\n"
|
||||||
+ "c\n"
|
+ "c\n"
|
||||||
+ "ĉ\n");
|
+ "ĉ\n");
|
||||||
BinningClustererConfig config = new BinningClustererConfig();
|
BinningClustererConfig config = ParsingUtilities.mapper.readValue(configJson, BinningClustererConfig.class);
|
||||||
config.initializeFromJSON(new JSONObject(configJson));
|
|
||||||
BinningClusterer clusterer = config.apply(project);
|
BinningClusterer clusterer = config.apply(project);
|
||||||
clusterer.computeClusters(new Engine(project));
|
clusterer.computeClusters(new Engine(project));
|
||||||
TestUtils.isSerializedTo(clusterer, clustererJson);
|
TestUtils.isSerializedTo(clusterer, clustererJson);
|
||||||
|
@ -1,14 +1,18 @@
|
|||||||
package com.google.refine.tests.clustering;
|
package com.google.refine.tests.clustering;
|
||||||
|
|
||||||
import org.json.JSONObject;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonParseException;
|
||||||
|
import com.fasterxml.jackson.databind.JsonMappingException;
|
||||||
import com.google.refine.browsing.Engine;
|
import com.google.refine.browsing.Engine;
|
||||||
import com.google.refine.clustering.knn.kNNClusterer;
|
import com.google.refine.clustering.knn.kNNClusterer;
|
||||||
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
|
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.tests.RefineTest;
|
import com.google.refine.tests.RefineTest;
|
||||||
import com.google.refine.tests.util.TestUtils;
|
import com.google.refine.tests.util.TestUtils;
|
||||||
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
public class kNNClustererTests extends RefineTest {
|
public class kNNClustererTests extends RefineTest {
|
||||||
|
|
||||||
@ -23,22 +27,20 @@ public class kNNClustererTests extends RefineTest {
|
|||||||
+ "]";
|
+ "]";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void serializekNNClustererConfig() {
|
public void serializekNNClustererConfig() throws JsonParseException, JsonMappingException, IOException {
|
||||||
kNNClustererConfig config = new kNNClustererConfig();
|
kNNClustererConfig config = ParsingUtilities.mapper.readValue(configJson, kNNClustererConfig.class);
|
||||||
config.initializeFromJSON(new JSONObject(configJson));
|
|
||||||
TestUtils.isSerializedTo(config, configJson);
|
TestUtils.isSerializedTo(config, configJson);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void serializekNNClusterer() {
|
public void serializekNNClusterer() throws JsonParseException, JsonMappingException, IOException {
|
||||||
Project project = createCSVProject("column\n"
|
Project project = createCSVProject("column\n"
|
||||||
+ "ab\n"
|
+ "ab\n"
|
||||||
+ "abc\n"
|
+ "abc\n"
|
||||||
+ "c\n"
|
+ "c\n"
|
||||||
+ "ĉ\n");
|
+ "ĉ\n");
|
||||||
|
|
||||||
kNNClustererConfig config = new kNNClustererConfig();
|
kNNClustererConfig config = ParsingUtilities.mapper.readValue(configJson, kNNClustererConfig.class);
|
||||||
config.initializeFromJSON(new JSONObject(configJson));
|
|
||||||
kNNClusterer clusterer = config.apply(project);
|
kNNClusterer clusterer = config.apply(project);
|
||||||
clusterer.computeClusters(new Engine(project));
|
clusterer.computeClusters(new Engine(project));
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user