Fix clusters with single candidates. Closes #2152.

This commit is contained in:
Antonin Delpeuch 2019-09-11 12:12:32 +01:00
parent d4ba7e2791
commit 95b063162d
3 changed files with 30 additions and 0 deletions

View File

@ -242,6 +242,7 @@ public class BinningClusterer extends Clusterer {
public List<List<ClusteredEntry>> getJsonRepresentation() { public List<List<ClusteredEntry>> getJsonRepresentation() {
EntriesComparator c = new EntriesComparator(); EntriesComparator c = new EntriesComparator();
return _clusters.stream() return _clusters.stream()
.filter(m -> m.size() > 1)
.map(m -> m.entrySet().stream() .map(m -> m.entrySet().stream()
.sorted(c) .sorted(c)
.map(e -> new ClusteredEntry(e.getKey(), e.getValue())) .map(e -> new ClusteredEntry(e.getKey(), e.getValue()))

View File

@ -26,7 +26,10 @@
******************************************************************************/ ******************************************************************************/
package com.google.refine.clustering.binning; package com.google.refine.clustering.binning;
import static org.testng.Assert.assertEquals;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@ -83,4 +86,16 @@ public class BinningClustererTests extends RefineTest {
clusterer.computeClusters(new Engine(project)); clusterer.computeClusters(new Engine(project));
TestUtils.isSerializedTo(clusterer, clustererJson); TestUtils.isSerializedTo(clusterer, clustererJson);
} }
@Test
public void testNoLonelyClusters() throws JsonParseException, JsonMappingException, IOException {
Project project = createCSVProject("column\n"
+ "c\n"
+ "ĉ\n"
+ "d\n");
BinningClustererConfig config = ParsingUtilities.mapper.readValue(configJson, BinningClustererConfig.class);
BinningClusterer clusterer = config.apply(project);
clusterer.computeClusters(new Engine(project));
assertEquals(clusterer.getJsonRepresentation().size(), 1);
}
} }

View File

@ -26,6 +26,8 @@
******************************************************************************/ ******************************************************************************/
package com.google.refine.clustering.knn; package com.google.refine.clustering.knn;
import static org.testng.Assert.assertTrue;
import java.io.IOException; import java.io.IOException;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@ -72,4 +74,16 @@ public class kNNClustererTests extends RefineTest {
TestUtils.isSerializedTo(clusterer, clustererJson); TestUtils.isSerializedTo(clusterer, clustererJson);
} }
@Test
public void testNoLonelyclusters() throws JsonParseException, JsonMappingException, IOException {
Project project = createCSVProject("column\n"
+ "foo\n"
+ "bar\n");
kNNClustererConfig config = ParsingUtilities.mapper.readValue(configJson, kNNClustererConfig.class);
kNNClusterer clusterer = config.apply(project);
clusterer.computeClusters(new Engine(project));
assertTrue(clusterer.getJsonRepresentation().isEmpty());
}
} }