Split BM encoding, better tests.

This commit is contained in:
Antonin Delpeuch 2018-12-31 14:24:35 +01:00
parent 1c90129829
commit 75a0fac71f
3 changed files with 27 additions and 6 deletions

View File

@ -12,7 +12,18 @@ public class BeiderMorseKeyer extends Keyer {
@Override @Override
public String key(String string, Object... params) { public String key(String string, Object... params) {
try { try {
return encoder.encode(string); /*
* Beider Morse encoding can return multiple phonetic
* encodings, separated by |.
* Ideally the Keyer interface should be changed to allow
* for multiple values to be returned (and the clustering code
* should be adapted accourdingly).
*
* As a simple workaround we only return the first value.
* We could also return the entire list but it would make
* matching harder.
*/
return encoder.encode(string).split("\\|")[0];
} catch (EncoderException e) { } catch (EncoderException e) {
return string; return string;
} }

View File

@ -1,24 +1,28 @@
package org.openrefine.phonetic.keyers; package org.openrefine.phonetic.keyers;
import com.google.refine.clustering.binning.Keyer;
import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import com.google.refine.clustering.binning.Keyer;
public class BeiderMorseKeyerTest { public class BeiderMorseKeyerTest {
Keyer keyer = new BeiderMorseKeyer(); Keyer keyer = new BeiderMorseKeyer();
@Test @Test
public void testKey() { public void testKey() {
assertTrue(keyer.key("Alphonse").contains("alponzi")); assertEquals(keyer.key("Alphonse"), "YlfYnzi");
} }
@Test @Test
public void testAccents() { public void testAccents() {
assertEquals(keyer.key("Éléonore"), "ilionor|ilionori"); assertEquals(keyer.key("Éléonore"), "ilionor");
}
@Test
public void testEmpty() {
assertEquals(keyer.key(""), "");
} }
} }

View File

@ -18,4 +18,10 @@ public class DaitchMokotoffKeyerTest {
public void testAccents() { public void testAccents() {
assertEquals(keyer.key("Éléonore"), "086900"); assertEquals(keyer.key("Éléonore"), "086900");
} }
@Test
public void testEmpty() {
assertEquals(keyer.key(""), "000000");
}
} }