Merge pull request #1730 from OpenRefine/json-clustering
Refactor clustering serialization in JSON
This commit is contained in:
commit
ec3e9ab1e9
@ -33,8 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.clustering;
|
package com.google.refine.clustering;
|
||||||
|
|
||||||
import org.json.JSONObject;
|
|
||||||
|
|
||||||
import com.google.refine.Jsonizable;
|
import com.google.refine.Jsonizable;
|
||||||
import com.google.refine.browsing.Engine;
|
import com.google.refine.browsing.Engine;
|
||||||
import com.google.refine.model.Column;
|
import com.google.refine.model.Column;
|
||||||
@ -44,15 +42,13 @@ public abstract class Clusterer implements Jsonizable {
|
|||||||
|
|
||||||
protected Project _project;
|
protected Project _project;
|
||||||
protected int _colindex;
|
protected int _colindex;
|
||||||
protected JSONObject _config;
|
|
||||||
|
|
||||||
public abstract void computeClusters(Engine engine);
|
public abstract void computeClusters(Engine engine);
|
||||||
|
|
||||||
public void initializeFromJSON(Project project, JSONObject o) throws Exception {
|
protected void initializeFromConfig(Project project, ClustererConfig c) {
|
||||||
_project = project;
|
_project = project;
|
||||||
_config = o;
|
|
||||||
|
|
||||||
String colname = o.getString("column");
|
String colname = c.getColumnName();
|
||||||
for (Column column : project.columnModel.columns) {
|
for (Column column : project.columnModel.columns) {
|
||||||
if (column.getName().equals(colname)) {
|
if (column.getName().equals(colname)) {
|
||||||
_colindex = column.getCellIndex();
|
_colindex = column.getCellIndex();
|
||||||
|
35
main/src/com/google/refine/clustering/ClustererConfig.java
Normal file
35
main/src/com/google/refine/clustering/ClustererConfig.java
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
package com.google.refine.clustering;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.google.refine.Jsonizable;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents the configuration data for a clusterer.
|
||||||
|
* @author Antonin Delpeuch
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public abstract class ClustererConfig implements Jsonizable {
|
||||||
|
|
||||||
|
protected String columnName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads the configuration from a JSON payload (TODO: delete)
|
||||||
|
* @param o
|
||||||
|
*/
|
||||||
|
public void initializeFromJSON(JSONObject o) {
|
||||||
|
columnName = o.getString("column");
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getColumnName() {
|
||||||
|
return columnName;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instantiate the configuration on a particular project.
|
||||||
|
* @param project
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public abstract Clusterer apply(Project project);
|
||||||
|
}
|
@ -50,17 +50,90 @@ import org.json.JSONWriter;
|
|||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.refine.Jsonizable;
|
||||||
import com.google.refine.browsing.Engine;
|
import com.google.refine.browsing.Engine;
|
||||||
import com.google.refine.browsing.FilteredRows;
|
import com.google.refine.browsing.FilteredRows;
|
||||||
import com.google.refine.browsing.RowVisitor;
|
import com.google.refine.browsing.RowVisitor;
|
||||||
import com.google.refine.clustering.Clusterer;
|
import com.google.refine.clustering.Clusterer;
|
||||||
|
import com.google.refine.clustering.ClustererConfig;
|
||||||
import com.google.refine.model.Cell;
|
import com.google.refine.model.Cell;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
|
|
||||||
public class BinningClusterer extends Clusterer {
|
public class BinningClusterer extends Clusterer {
|
||||||
|
|
||||||
|
public static class BinningClustererConfig extends ClustererConfig {
|
||||||
|
|
||||||
|
private String _keyerName;
|
||||||
|
private Keyer _keyer;
|
||||||
|
private BinningParameters _parameters;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initializeFromJSON(JSONObject o) {
|
||||||
|
super.initializeFromJSON(o);
|
||||||
|
_keyerName = o.getString("function");
|
||||||
|
_keyer = _keyers.get(_keyerName.toLowerCase());
|
||||||
|
if(o.has("params")) {
|
||||||
|
_parameters = BinningParameters.reconstruct(o.getJSONObject("params"));
|
||||||
|
} else {
|
||||||
|
_parameters = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Keyer getKeyer() {
|
||||||
|
return _keyer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BinningParameters getParameters() {
|
||||||
|
return _parameters;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
writer.object();
|
||||||
|
writer.key("function"); writer.value(_keyerName);
|
||||||
|
writer.key("type"); writer.value("binning");
|
||||||
|
writer.key("column"); writer.value(getColumnName());
|
||||||
|
if(_parameters != null) {
|
||||||
|
writer.key("params");
|
||||||
|
_parameters.write(writer, options);
|
||||||
|
}
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
|
||||||
private Keyer _keyer;
|
@Override
|
||||||
|
public BinningClusterer apply(Project project) {
|
||||||
|
BinningClusterer clusterer = new BinningClusterer();
|
||||||
|
clusterer.initializeFromConfig(project, this);
|
||||||
|
return clusterer;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class BinningParameters implements Jsonizable {
|
||||||
|
public int ngramSize;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
writer.object();
|
||||||
|
if(ngramSize > 0) {
|
||||||
|
writer.key("ngram-size");
|
||||||
|
writer.value(ngramSize);
|
||||||
|
}
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static BinningParameters reconstruct(JSONObject o) {
|
||||||
|
BinningParameters parameters = new BinningParameters();
|
||||||
|
parameters.ngramSize = o.has("ngram-size") ? o.getInt("ngram-size") : 0;
|
||||||
|
return parameters;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Keyer _keyer;
|
||||||
|
protected BinningParameters _parameters;
|
||||||
|
|
||||||
static final protected Map<String, Keyer> _keyers = new HashMap<String, Keyer>();
|
static final protected Map<String, Keyer> _keyers = new HashMap<String, Keyer>();
|
||||||
|
|
||||||
@ -82,21 +155,17 @@ public class BinningClusterer extends Clusterer {
|
|||||||
|
|
||||||
Keyer _keyer;
|
Keyer _keyer;
|
||||||
Object[] _params;
|
Object[] _params;
|
||||||
JSONObject _config;
|
BinningParameters _parameters;
|
||||||
|
|
||||||
Map<String,Map<String,Integer>> _map = new HashMap<String,Map<String,Integer>>();
|
Map<String,Map<String,Integer>> _map = new HashMap<String,Map<String,Integer>>();
|
||||||
|
|
||||||
public BinningRowVisitor(Keyer k, JSONObject o) {
|
public BinningRowVisitor(Keyer k, BinningParameters parameters) {
|
||||||
_keyer = k;
|
_keyer = k;
|
||||||
_config = o;
|
_parameters = parameters;
|
||||||
if (k instanceof NGramFingerprintKeyer) {
|
if (k instanceof NGramFingerprintKeyer) {
|
||||||
try {
|
if(_parameters != null) {
|
||||||
int size = _config.getJSONObject("params").getInt("ngram-size");
|
|
||||||
logger.debug("Using ngram size: {}", size);
|
|
||||||
_params = new Object[1];
|
_params = new Object[1];
|
||||||
_params[0] = size;
|
_params[0] = _parameters.ngramSize;
|
||||||
} catch (JSONException e) {
|
|
||||||
//Refine.warn("No params specified, using default");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -169,15 +238,15 @@ public class BinningClusterer extends Clusterer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
public void initializeFromConfig(Project project, BinningClustererConfig config) {
|
||||||
public void initializeFromJSON(Project project, JSONObject o) throws Exception {
|
super.initializeFromConfig(project, config);
|
||||||
super.initializeFromJSON(project, o);
|
_keyer = config.getKeyer();
|
||||||
_keyer = _keyers.get(o.getString("function").toLowerCase());
|
_parameters = config.getParameters();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void computeClusters(Engine engine) {
|
public void computeClusters(Engine engine) {
|
||||||
BinningRowVisitor visitor = new BinningRowVisitor(_keyer,_config);
|
BinningRowVisitor visitor = new BinningRowVisitor(_keyer,_parameters);
|
||||||
FilteredRows filteredRows = engine.getAllFilteredRows();
|
FilteredRows filteredRows = engine.getAllFilteredRows();
|
||||||
filteredRows.accept(_project, visitor);
|
filteredRows.accept(_project, visitor);
|
||||||
|
|
||||||
|
@ -51,10 +51,12 @@ import org.json.JSONWriter;
|
|||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.refine.Jsonizable;
|
||||||
import com.google.refine.browsing.Engine;
|
import com.google.refine.browsing.Engine;
|
||||||
import com.google.refine.browsing.FilteredRows;
|
import com.google.refine.browsing.FilteredRows;
|
||||||
import com.google.refine.browsing.RowVisitor;
|
import com.google.refine.browsing.RowVisitor;
|
||||||
import com.google.refine.clustering.Clusterer;
|
import com.google.refine.clustering.Clusterer;
|
||||||
|
import com.google.refine.clustering.ClustererConfig;
|
||||||
import com.google.refine.model.Cell;
|
import com.google.refine.model.Cell;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
@ -72,8 +74,84 @@ import edu.mit.simile.vicino.distances.LevenshteinDistance;
|
|||||||
import edu.mit.simile.vicino.distances.PPMDistance;
|
import edu.mit.simile.vicino.distances.PPMDistance;
|
||||||
|
|
||||||
public class kNNClusterer extends Clusterer {
|
public class kNNClusterer extends Clusterer {
|
||||||
|
|
||||||
|
public static class kNNClustererConfig extends ClustererConfig {
|
||||||
|
private String _distanceStr;
|
||||||
|
private Distance _distance;
|
||||||
|
private kNNClustererConfigParameters _parameters;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
writer.object();
|
||||||
|
writer.key("function"); writer.value(_distanceStr);
|
||||||
|
writer.key("type"); writer.value("knn");
|
||||||
|
writer.key("column"); writer.value(getColumnName());
|
||||||
|
if(_parameters != null) {
|
||||||
|
writer.key("params");
|
||||||
|
_parameters.write(writer, options);
|
||||||
|
}
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initializeFromJSON(JSONObject o) {
|
||||||
|
super.initializeFromJSON(o);
|
||||||
|
_distanceStr = o.getString("function");
|
||||||
|
_distance = _distances.get(_distanceStr.toLowerCase());
|
||||||
|
if(o.has("params")) {
|
||||||
|
_parameters = kNNClustererConfigParameters.reconstruct(o.getJSONObject("params"));
|
||||||
|
} else {
|
||||||
|
_parameters = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Distance getDistance() {
|
||||||
|
return _distance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public kNNClustererConfigParameters getParameters() {
|
||||||
|
return _parameters;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public kNNClusterer apply(Project project) {
|
||||||
|
kNNClusterer clusterer = new kNNClusterer();
|
||||||
|
clusterer.initializeFromConfig(project, this);
|
||||||
|
return clusterer;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class kNNClustererConfigParameters implements Jsonizable {
|
||||||
|
public static final double defaultRadius = 1.0d;
|
||||||
|
public static final int defaultBlockingNgramSize = 6;
|
||||||
|
public double radius = defaultRadius;
|
||||||
|
public int blockingNgramSize = defaultBlockingNgramSize;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
writer.object();
|
||||||
|
writer.key("radius"); writer.value(radius);
|
||||||
|
writer.key("blocking-ngram-size");
|
||||||
|
writer.value(blockingNgramSize);
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static kNNClustererConfigParameters reconstruct(JSONObject o) {
|
||||||
|
kNNClustererConfigParameters params = new kNNClustererConfigParameters();
|
||||||
|
if(o.has("radius")) {
|
||||||
|
params.radius = o.getDouble("radius");
|
||||||
|
}
|
||||||
|
if(o.has("blocking-ngram-size")) {
|
||||||
|
params.blockingNgramSize = o.getInt("blocking-ngram-size");
|
||||||
|
}
|
||||||
|
return params;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private Distance _distance;
|
private Distance _distance;
|
||||||
|
private kNNClustererConfigParameters _params;
|
||||||
|
|
||||||
static final protected Map<String, Distance> _distances = new HashMap<String, Distance>();
|
static final protected Map<String, Distance> _distances = new HashMap<String, Distance>();
|
||||||
|
|
||||||
@ -97,20 +175,13 @@ public class kNNClusterer extends Clusterer {
|
|||||||
class VPTreeClusteringRowVisitor implements RowVisitor {
|
class VPTreeClusteringRowVisitor implements RowVisitor {
|
||||||
|
|
||||||
Distance _distance;
|
Distance _distance;
|
||||||
JSONObject _config;
|
kNNClustererConfigParameters _params;
|
||||||
VPTreeClusterer _clusterer;
|
VPTreeClusterer _clusterer;
|
||||||
double _radius = 1.0f;
|
|
||||||
|
|
||||||
public VPTreeClusteringRowVisitor(Distance d, JSONObject o) {
|
public VPTreeClusteringRowVisitor(Distance d, kNNClustererConfigParameters params) {
|
||||||
_distance = d;
|
_distance = d;
|
||||||
_config = o;
|
|
||||||
_clusterer = new VPTreeClusterer(_distance);
|
_clusterer = new VPTreeClusterer(_distance);
|
||||||
try {
|
_params = params;
|
||||||
JSONObject params = o.getJSONObject("params");
|
|
||||||
_radius = params.getDouble("radius");
|
|
||||||
} catch (JSONException e) {
|
|
||||||
//Refine.warn("No parameters found, using defaults");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -136,32 +207,23 @@ public class kNNClusterer extends Clusterer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public List<Set<Serializable>> getClusters() {
|
public List<Set<Serializable>> getClusters() {
|
||||||
return _clusterer.getClusters(_radius);
|
return _clusterer.getClusters(_params.radius);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class BlockingClusteringRowVisitor implements RowVisitor {
|
class BlockingClusteringRowVisitor implements RowVisitor {
|
||||||
|
|
||||||
Distance _distance;
|
Distance _distance;
|
||||||
JSONObject _config;
|
|
||||||
double _radius = 1.0d;
|
double _radius = 1.0d;
|
||||||
int _blockingNgramSize = 6;
|
int _blockingNgramSize = 6;
|
||||||
HashSet<String> _data;
|
HashSet<String> _data;
|
||||||
NGramClusterer _clusterer;
|
NGramClusterer _clusterer;
|
||||||
|
|
||||||
public BlockingClusteringRowVisitor(Distance d, JSONObject o) {
|
public BlockingClusteringRowVisitor(Distance d, kNNClustererConfigParameters params) {
|
||||||
_distance = d;
|
_distance = d;
|
||||||
_config = o;
|
|
||||||
_data = new HashSet<String>();
|
_data = new HashSet<String>();
|
||||||
try {
|
_blockingNgramSize = params.blockingNgramSize;
|
||||||
JSONObject params = o.getJSONObject("params");
|
_radius = params.radius;
|
||||||
_radius = params.getDouble("radius");
|
|
||||||
logger.debug("Use radius: {}", _radius);
|
|
||||||
_blockingNgramSize = params.getInt("blocking-ngram-size");
|
|
||||||
logger.debug("Use blocking ngram size: {}",_blockingNgramSize);
|
|
||||||
} catch (JSONException e) {
|
|
||||||
logger.debug("No parameters found, using defaults");
|
|
||||||
}
|
|
||||||
_clusterer = new NGramClusterer(_distance, _blockingNgramSize);
|
_clusterer = new NGramClusterer(_distance, _blockingNgramSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -191,17 +253,17 @@ public class kNNClusterer extends Clusterer {
|
|||||||
return _clusterer.getClusters(_radius);
|
return _clusterer.getClusters(_radius);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
public void initializeFromConfig(Project project, kNNClustererConfig config) {
|
||||||
public void initializeFromJSON(Project project, JSONObject o) throws Exception {
|
super.initializeFromConfig(project, config);
|
||||||
super.initializeFromJSON(project, o);
|
_distance = config.getDistance();
|
||||||
_distance = _distances.get(o.getString("function").toLowerCase());
|
_params = config.getParameters();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void computeClusters(Engine engine) {
|
public void computeClusters(Engine engine) {
|
||||||
//VPTreeClusteringRowVisitor visitor = new VPTreeClusteringRowVisitor(_distance,_config);
|
//VPTreeClusteringRowVisitor visitor = new VPTreeClusteringRowVisitor(_distance,_config);
|
||||||
BlockingClusteringRowVisitor visitor = new BlockingClusteringRowVisitor(_distance,_config);
|
BlockingClusteringRowVisitor visitor = new BlockingClusteringRowVisitor(_distance,_params);
|
||||||
FilteredRows filteredRows = engine.getAllFilteredRows();
|
FilteredRows filteredRows = engine.getAllFilteredRows();
|
||||||
filteredRows.accept(_project, visitor);
|
filteredRows.accept(_project, visitor);
|
||||||
|
|
||||||
|
@ -45,8 +45,9 @@ import org.slf4j.LoggerFactory;
|
|||||||
|
|
||||||
import com.google.refine.browsing.Engine;
|
import com.google.refine.browsing.Engine;
|
||||||
import com.google.refine.clustering.Clusterer;
|
import com.google.refine.clustering.Clusterer;
|
||||||
import com.google.refine.clustering.binning.BinningClusterer;
|
import com.google.refine.clustering.ClustererConfig;
|
||||||
import com.google.refine.clustering.knn.kNNClusterer;
|
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
|
||||||
|
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
|
||||||
import com.google.refine.commands.Command;
|
import com.google.refine.commands.Command;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
|
|
||||||
@ -64,16 +65,17 @@ public class ComputeClustersCommand extends Command {
|
|||||||
Engine engine = getEngine(request, project);
|
Engine engine = getEngine(request, project);
|
||||||
JSONObject clusterer_conf = getJsonParameter(request,"clusterer");
|
JSONObject clusterer_conf = getJsonParameter(request,"clusterer");
|
||||||
|
|
||||||
Clusterer clusterer = null;
|
|
||||||
String type = clusterer_conf.has("type") ? clusterer_conf.getString("type") : "binning";
|
String type = clusterer_conf.has("type") ? clusterer_conf.getString("type") : "binning";
|
||||||
|
|
||||||
|
ClustererConfig clustererConfig = null;
|
||||||
if ("knn".equals(type)) {
|
if ("knn".equals(type)) {
|
||||||
clusterer = new kNNClusterer();
|
clustererConfig = new kNNClustererConfig();
|
||||||
} else {
|
} else {
|
||||||
clusterer = new BinningClusterer();
|
clustererConfig = new BinningClustererConfig();
|
||||||
}
|
}
|
||||||
|
|
||||||
clusterer.initializeFromJSON(project, clusterer_conf);
|
clustererConfig.initializeFromJSON(clusterer_conf);
|
||||||
|
Clusterer clusterer = clustererConfig.apply(project);
|
||||||
|
|
||||||
clusterer.computeClusters(engine);
|
clusterer.computeClusters(engine);
|
||||||
|
|
||||||
|
@ -0,0 +1,59 @@
|
|||||||
|
package com.google.refine.tests.clustering;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.google.refine.browsing.Engine;
|
||||||
|
import com.google.refine.clustering.binning.BinningClusterer;
|
||||||
|
import com.google.refine.clustering.binning.BinningClusterer.BinningClustererConfig;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.tests.RefineTest;
|
||||||
|
import com.google.refine.tests.util.TestUtils;
|
||||||
|
|
||||||
|
public class BinningClustererTests extends RefineTest {
|
||||||
|
|
||||||
|
String configJson = "{"
|
||||||
|
+ "\"type\":\"binning\","
|
||||||
|
+ "\"function\":\"fingerprint\","
|
||||||
|
+ "\"column\":\"values\","
|
||||||
|
+ "\"params\":{}}";
|
||||||
|
|
||||||
|
String configNgramJson = "{"
|
||||||
|
+ "\"type\":\"binning\","
|
||||||
|
+ "\"function\":\"ngram-fingerprint\","
|
||||||
|
+ "\"column\":\"values\","
|
||||||
|
+ "\"params\":{\"ngram-size\":2}}";
|
||||||
|
|
||||||
|
String clustererJson = "["
|
||||||
|
+ " [{\"v\":\"a\",\"c\":1},{\"v\":\"à\",\"c\":1}],"
|
||||||
|
+ " [{\"v\":\"c\",\"c\":1},{\"v\":\"ĉ\",\"c\":1}]"
|
||||||
|
+ "]";
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSerializeBinningClustererConfig() {
|
||||||
|
BinningClustererConfig config = new BinningClustererConfig();
|
||||||
|
config.initializeFromJSON(new JSONObject(configJson));
|
||||||
|
TestUtils.isSerializedTo(config, configJson);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSerializeBinningClustererConfigWithNgrams() {
|
||||||
|
BinningClustererConfig config = new BinningClustererConfig();
|
||||||
|
config.initializeFromJSON(new JSONObject(configNgramJson));
|
||||||
|
TestUtils.isSerializedTo(config, configNgramJson);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSerializeBinningClusterer() {
|
||||||
|
Project project = createCSVProject("column\n"
|
||||||
|
+ "a\n"
|
||||||
|
+ "à\n"
|
||||||
|
+ "c\n"
|
||||||
|
+ "ĉ\n");
|
||||||
|
BinningClustererConfig config = new BinningClustererConfig();
|
||||||
|
config.initializeFromJSON(new JSONObject(configJson));
|
||||||
|
BinningClusterer clusterer = config.apply(project);
|
||||||
|
clusterer.computeClusters(new Engine(project));
|
||||||
|
TestUtils.isSerializedTo(clusterer, clustererJson);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,47 @@
|
|||||||
|
package com.google.refine.tests.clustering;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.google.refine.browsing.Engine;
|
||||||
|
import com.google.refine.clustering.knn.kNNClusterer;
|
||||||
|
import com.google.refine.clustering.knn.kNNClusterer.kNNClustererConfig;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.tests.RefineTest;
|
||||||
|
import com.google.refine.tests.util.TestUtils;
|
||||||
|
|
||||||
|
public class kNNClustererTests extends RefineTest {
|
||||||
|
|
||||||
|
public static String configJson = "{"
|
||||||
|
+ "\"type\":\"knn\","
|
||||||
|
+ "\"function\":\"PPM\","
|
||||||
|
+ "\"column\":\"values\","
|
||||||
|
+ "\"params\":{\"radius\":1,\"blocking-ngram-size\":2}"
|
||||||
|
+ "}";
|
||||||
|
public static String clustererJson = "["
|
||||||
|
+ " [{\"v\":\"ab\",\"c\":1},{\"v\":\"abc\",\"c\":1}]"
|
||||||
|
+ "]";
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void serializekNNClustererConfig() {
|
||||||
|
kNNClustererConfig config = new kNNClustererConfig();
|
||||||
|
config.initializeFromJSON(new JSONObject(configJson));
|
||||||
|
TestUtils.isSerializedTo(config, configJson);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void serializekNNClusterer() {
|
||||||
|
Project project = createCSVProject("column\n"
|
||||||
|
+ "ab\n"
|
||||||
|
+ "abc\n"
|
||||||
|
+ "c\n"
|
||||||
|
+ "ĉ\n");
|
||||||
|
|
||||||
|
kNNClustererConfig config = new kNNClustererConfig();
|
||||||
|
config.initializeFromJSON(new JSONObject(configJson));
|
||||||
|
kNNClusterer clusterer = config.apply(project);
|
||||||
|
clusterer.computeClusters(new Engine(project));
|
||||||
|
|
||||||
|
TestUtils.isSerializedTo(clusterer, clustererJson);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user