Split BM encoding, better tests.
This commit is contained in:
parent
1c90129829
commit
75a0fac71f
@ -12,7 +12,18 @@ public class BeiderMorseKeyer extends Keyer {
|
||||
@Override
|
||||
public String key(String string, Object... params) {
|
||||
try {
|
||||
return encoder.encode(string);
|
||||
/*
|
||||
* Beider Morse encoding can return multiple phonetic
|
||||
* encodings, separated by |.
|
||||
* Ideally the Keyer interface should be changed to allow
|
||||
* for multiple values to be returned (and the clustering code
|
||||
* should be adapted accourdingly).
|
||||
*
|
||||
* As a simple workaround we only return the first value.
|
||||
* We could also return the entire list but it would make
|
||||
* matching harder.
|
||||
*/
|
||||
return encoder.encode(string).split("\\|")[0];
|
||||
} catch (EncoderException e) {
|
||||
return string;
|
||||
}
|
||||
|
@ -1,24 +1,28 @@
|
||||
package org.openrefine.phonetic.keyers;
|
||||
|
||||
import com.google.refine.clustering.binning.Keyer;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.clustering.binning.Keyer;
|
||||
|
||||
public class BeiderMorseKeyerTest {
|
||||
|
||||
Keyer keyer = new BeiderMorseKeyer();
|
||||
|
||||
@Test
|
||||
public void testKey() {
|
||||
assertTrue(keyer.key("Alphonse").contains("alponzi"));
|
||||
assertEquals(keyer.key("Alphonse"), "YlfYnzi");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAccents() {
|
||||
assertEquals(keyer.key("Éléonore"), "ilionor|ilionori");
|
||||
assertEquals(keyer.key("Éléonore"), "ilionor");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmpty() {
|
||||
assertEquals(keyer.key(""), "");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -18,4 +18,10 @@ public class DaitchMokotoffKeyerTest {
|
||||
public void testAccents() {
|
||||
assertEquals(keyer.key("Éléonore"), "086900");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testEmpty() {
|
||||
assertEquals(keyer.key(""), "000000");
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user