diff --git a/extensions/phonetic/module/MOD-INF/controller.js b/extensions/phonetic/module/MOD-INF/controller.js
new file mode 100644
index 000000000..5328e5d41
--- /dev/null
+++ b/extensions/phonetic/module/MOD-INF/controller.js
@@ -0,0 +1,50 @@
+/*
+
+Copyright 2010, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ */
+
+var html = "text/html";
+var encoding = "UTF-8";
+var ClientSideResourceManager = Packages.com.google.refine.ClientSideResourceManager;
+
+/*
+ * Function invoked to initialize the extension.
+ */
+function init() {
+ // Register new keyers
+ Packages.com.google.refine.clustering.binning.KeyerFactory.put("daitch-mokotoff", new Packages.org.openrefine.phonetic.keyers.DaitchMokotoffKeyer());
+ Packages.com.google.refine.clustering.binning.KeyerFactory.put("beider-morse", new Packages.org.openrefine.phonetic.keyers.BeiderMorseKeyer());
+
+ // Similarly, we could register new distances like this:
+ // Packages.com.google.refine.clustering.knn.DistanceFactory.put("my-distance", new Packages.org.openrefine.mydistances.MyDistance());
+}
+
+
diff --git a/extensions/phonetic/module/MOD-INF/module.properties b/extensions/phonetic/module/MOD-INF/module.properties
new file mode 100644
index 000000000..e207201d3
--- /dev/null
+++ b/extensions/phonetic/module/MOD-INF/module.properties
@@ -0,0 +1,4 @@
+name = phonetic
+description = OpenRefine Phonetic Clustering extension
+templating.macros = macros.vm
+requires = core
diff --git a/extensions/phonetic/pom.xml b/extensions/phonetic/pom.xml
new file mode 100644
index 000000000..e173f5e10
--- /dev/null
+++ b/extensions/phonetic/pom.xml
@@ -0,0 +1,123 @@
+
+ 4.0.0
+
+ org.openrefine
+ phonetic
+ jar
+ 3.2-SNAPSHOT
+
+ OpenRefine - Phonetic clustering extension
+ Adds a few advanced phonetic clustering methods
+ http://openrefine.org/
+
+ org.openrefine
+ extensions
+ 3.2-SNAPSHOT
+
+
+
+ openrefine-sample
+
+
+ src
+
+
+ tests/src
+ module/MOD-INF/classes
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+ 1.8
+
+
+ generate-sources
+
+ add-source
+
+
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.1
+
+
+ 1.8
+ UTF-8
+ false
+
+
+
+ org.apache.maven.plugins
+ maven-resources-plugin
+ 2.6
+
+ UTF-8
+
+
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+ 3.1.1
+
+
+ compile
+
+ copy-dependencies
+
+
+ module/MOD-INF/lib
+ runtime
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-clean-plugin
+ 3.1.0
+
+
+
+ module/MOD-INF/lib
+
+
+
+
+
+
+
+
+
+ ${project.groupId}
+ main
+ ${project.version}
+ provided
+
+
+ javax.servlet
+ servlet-api
+ 2.5
+ provided
+
+
+
+
+
+ org.testng
+ testng
+ 6.9.10
+ test
+
+
+
+
+
diff --git a/extensions/phonetic/src/org/openrefine/phonetic/keyers/BeiderMorseKeyer.java b/extensions/phonetic/src/org/openrefine/phonetic/keyers/BeiderMorseKeyer.java
new file mode 100644
index 000000000..710b89fb4
--- /dev/null
+++ b/extensions/phonetic/src/org/openrefine/phonetic/keyers/BeiderMorseKeyer.java
@@ -0,0 +1,21 @@
+package org.openrefine.phonetic.keyers;
+
+import com.google.refine.clustering.binning.Keyer;
+
+import org.apache.commons.codec.EncoderException;
+import org.apache.commons.codec.language.bm.BeiderMorseEncoder;
+
+public class BeiderMorseKeyer extends Keyer {
+
+ protected BeiderMorseEncoder encoder = new BeiderMorseEncoder();
+
+ @Override
+ public String key(String string, Object... params) {
+ try {
+ return encoder.encode(string);
+ } catch (EncoderException e) {
+ return string;
+ }
+ }
+
+}
diff --git a/extensions/phonetic/src/org/openrefine/phonetic/keyers/DaitchMokotoffKeyer.java b/extensions/phonetic/src/org/openrefine/phonetic/keyers/DaitchMokotoffKeyer.java
new file mode 100644
index 000000000..9a4af5216
--- /dev/null
+++ b/extensions/phonetic/src/org/openrefine/phonetic/keyers/DaitchMokotoffKeyer.java
@@ -0,0 +1,15 @@
+package org.openrefine.phonetic.keyers;
+
+import com.google.refine.clustering.binning.Keyer;
+import org.apache.commons.codec.language.DaitchMokotoffSoundex;
+
+public class DaitchMokotoffKeyer extends Keyer {
+
+ protected DaitchMokotoffSoundex encoder = new DaitchMokotoffSoundex();
+
+ @Override
+ public String key(String string, Object... params) {
+ return encoder.encode(string);
+ }
+
+}
diff --git a/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/BeiderMorseKeyerTest.java b/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/BeiderMorseKeyerTest.java
new file mode 100644
index 000000000..5830cfe6e
--- /dev/null
+++ b/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/BeiderMorseKeyerTest.java
@@ -0,0 +1,24 @@
+package org.openrefine.phonetic.keyers;
+
+import com.google.refine.clustering.binning.Keyer;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import org.testng.annotations.Test;
+
+public class BeiderMorseKeyerTest {
+
+ Keyer keyer = new BeiderMorseKeyer();
+
+ @Test
+ public void testKey() {
+ assertTrue(keyer.key("Alphonse").contains("alponzi"));
+ }
+
+ @Test
+ public void testAccents() {
+ assertEquals(keyer.key("Éléonore"), "ilionor|ilionori");
+ }
+
+}
diff --git a/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/DaitchMokotoffKeyerTest.java b/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/DaitchMokotoffKeyerTest.java
new file mode 100644
index 000000000..d0251ab95
--- /dev/null
+++ b/extensions/phonetic/tests/src/org/openrefine/phonetic/keyers/DaitchMokotoffKeyerTest.java
@@ -0,0 +1,21 @@
+package org.openrefine.phonetic.keyers;
+
+import static org.testng.Assert.assertEquals;
+
+import org.testng.annotations.Test;
+
+import com.google.refine.clustering.binning.Keyer;
+
+public class DaitchMokotoffKeyerTest {
+ protected Keyer keyer = new DaitchMokotoffKeyer();
+
+ @Test
+ public void testDaitchMokotoff() {
+ assertEquals(keyer.key("Alphonse"), "087640");
+ }
+
+ @Test
+ public void testAccents() {
+ assertEquals(keyer.key("Éléonore"), "086900");
+ }
+}
diff --git a/extensions/pom.xml b/extensions/pom.xml
index 98574dd8e..ca1fb7de3 100644
--- a/extensions/pom.xml
+++ b/extensions/pom.xml
@@ -23,6 +23,7 @@
database
gdata
pc-axis
+ phonetic