From 75a0fac71fcf9203af96e930a6cb5caef40ad0a0 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Mon, 31 Dec 2018 14:24:35 +0100 Subject: [PATCH] Split BM encoding, better tests. --- .../phonetic/keyers/BeiderMorseKeyer.java | 13 ++++++++++++- .../phonetic/keyers/BeiderMorseKeyerTest.java | 14 +++++++++----- .../phonetic/keyers/DaitchMokotoffKeyerTest.java | 6 ++++++ 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/extensions/phonetic/src/org/openrefine/phonetic/keyers/BeiderMorseKeyer.java b/extensions/phonetic/src/org/openrefine/phonetic/keyers/BeiderMorseKeyer.java index 710b89fb4..9f4912c24 100644 --- a/extensions/phonetic/src/org/openrefine/phonetic/keyers/BeiderMorseKeyer.java +++ b/extensions/phonetic/src/org/openrefine/phonetic/keyers/BeiderMorseKeyer.java @@ -12,7 +12,18 @@ public class BeiderMorseKeyer extends Keyer { @Override public String key(String string, Object... params) { try { - return encoder.encode(string); + /* + * Beider Morse encoding can return multiple phonetic + * encodings, separated by |. + * Ideally the Keyer interface should be changed to allow + * for multiple values to be returned (and the clustering code + * should be adapted accourdingly). + * + * As a simple workaround we only return the first value. + * We could also return the entire list but it would make + * matching harder. + */ + return encoder.encode(string).split("\\|")[0]; } catch (EncoderException e) { return string; } diff --git a/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/BeiderMorseKeyerTest.java b/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/BeiderMorseKeyerTest.java index 5830cfe6e..a0e03633e 100644 --- a/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/BeiderMorseKeyerTest.java +++ b/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/BeiderMorseKeyerTest.java @@ -1,24 +1,28 @@ package org.openrefine.phonetic.keyers; -import com.google.refine.clustering.binning.Keyer; - import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; import org.testng.annotations.Test; +import com.google.refine.clustering.binning.Keyer; + public class BeiderMorseKeyerTest { Keyer keyer = new BeiderMorseKeyer(); @Test public void testKey() { - assertTrue(keyer.key("Alphonse").contains("alponzi")); + assertEquals(keyer.key("Alphonse"), "YlfYnzi"); } @Test public void testAccents() { - assertEquals(keyer.key("Éléonore"), "ilionor|ilionori"); + assertEquals(keyer.key("Éléonore"), "ilionor"); + } + + @Test + public void testEmpty() { + assertEquals(keyer.key(""), ""); } } diff --git a/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/DaitchMokotoffKeyerTest.java b/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/DaitchMokotoffKeyerTest.java index d0251ab95..d949692cc 100644 --- a/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/DaitchMokotoffKeyerTest.java +++ b/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/DaitchMokotoffKeyerTest.java @@ -18,4 +18,10 @@ public class DaitchMokotoffKeyerTest { public void testAccents() { assertEquals(keyer.key("Éléonore"), "086900"); } + + + @Test + public void testEmpty() { + assertEquals(keyer.key(""), "000000"); + } }