Merge pull request #2155 from OpenRefine/issue-2152-lonely-clusters

Fix clusters with single candidates.
This commit is contained in:
Antonin Delpeuch 2019-09-18 19:08:18 +01:00 committed by GitHub
commit bbb5766a33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 30 additions and 0 deletions

View File

@ -242,6 +242,7 @@ public class BinningClusterer extends Clusterer {
public List<List<ClusteredEntry>> getJsonRepresentation() {
EntriesComparator c = new EntriesComparator();
return _clusters.stream()
.filter(m -> m.size() > 1)
.map(m -> m.entrySet().stream()
.sorted(c)
.map(e -> new ClusteredEntry(e.getKey(), e.getValue()))

View File

@ -26,7 +26,10 @@
******************************************************************************/
package com.google.refine.clustering.binning;
import static org.testng.Assert.assertEquals;
import java.io.IOException;
import java.util.Arrays;
import org.testng.annotations.Test;
@ -83,4 +86,16 @@ public class BinningClustererTests extends RefineTest {
clusterer.computeClusters(new Engine(project));
TestUtils.isSerializedTo(clusterer, clustererJson);
}
@Test
public void testNoLonelyClusters() throws JsonParseException, JsonMappingException, IOException {
Project project = createCSVProject("column\n"
+ "c\n"
+ "ĉ\n"
+ "d\n");
BinningClustererConfig config = ParsingUtilities.mapper.readValue(configJson, BinningClustererConfig.class);
BinningClusterer clusterer = config.apply(project);
clusterer.computeClusters(new Engine(project));
assertEquals(clusterer.getJsonRepresentation().size(), 1);
}
}

View File

@ -26,6 +26,8 @@
******************************************************************************/
package com.google.refine.clustering.knn;
import static org.testng.Assert.assertTrue;
import java.io.IOException;
import org.testng.annotations.Test;
@ -72,4 +74,16 @@ public class kNNClustererTests extends RefineTest {
TestUtils.isSerializedTo(clusterer, clustererJson);
}
@Test
public void testNoLonelyclusters() throws JsonParseException, JsonMappingException, IOException {
Project project = createCSVProject("column\n"
+ "foo\n"
+ "bar\n");
kNNClustererConfig config = ParsingUtilities.mapper.readValue(configJson, kNNClustererConfig.class);
kNNClusterer clusterer = config.apply(project);
clusterer.computeClusters(new Engine(project));
assertTrue(clusterer.getJsonRepresentation().isEmpty());
}
}