parent
4984837c9f
commit
1c90129829
50
extensions/phonetic/module/MOD-INF/controller.js
Normal file
50
extensions/phonetic/module/MOD-INF/controller.js
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2010, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
var html = "text/html";
|
||||||
|
var encoding = "UTF-8";
|
||||||
|
var ClientSideResourceManager = Packages.com.google.refine.ClientSideResourceManager;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Function invoked to initialize the extension.
|
||||||
|
*/
|
||||||
|
function init() {
|
||||||
|
// Register new keyers
|
||||||
|
Packages.com.google.refine.clustering.binning.KeyerFactory.put("daitch-mokotoff", new Packages.org.openrefine.phonetic.keyers.DaitchMokotoffKeyer());
|
||||||
|
Packages.com.google.refine.clustering.binning.KeyerFactory.put("beider-morse", new Packages.org.openrefine.phonetic.keyers.BeiderMorseKeyer());
|
||||||
|
|
||||||
|
// Similarly, we could register new distances like this:
|
||||||
|
// Packages.com.google.refine.clustering.knn.DistanceFactory.put("my-distance", new Packages.org.openrefine.mydistances.MyDistance());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
4
extensions/phonetic/module/MOD-INF/module.properties
Normal file
4
extensions/phonetic/module/MOD-INF/module.properties
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
name = phonetic
|
||||||
|
description = OpenRefine Phonetic Clustering extension
|
||||||
|
templating.macros = macros.vm
|
||||||
|
requires = core
|
123
extensions/phonetic/pom.xml
Normal file
123
extensions/phonetic/pom.xml
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<groupId>org.openrefine</groupId>
|
||||||
|
<artifactId>phonetic</artifactId>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
<version>3.2-SNAPSHOT</version>
|
||||||
|
|
||||||
|
<name>OpenRefine - Phonetic clustering extension</name>
|
||||||
|
<description>Adds a few advanced phonetic clustering methods</description>
|
||||||
|
<url>http://openrefine.org/</url>
|
||||||
|
<parent>
|
||||||
|
<groupId>org.openrefine</groupId>
|
||||||
|
<artifactId>extensions</artifactId>
|
||||||
|
<version>3.2-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<finalName>openrefine-sample</finalName>
|
||||||
|
<resources>
|
||||||
|
<resource>
|
||||||
|
<directory>src</directory>
|
||||||
|
</resource>
|
||||||
|
</resources>
|
||||||
|
<testSourceDirectory>tests/src</testSourceDirectory>
|
||||||
|
<outputDirectory>module/MOD-INF/classes</outputDirectory>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
<version>1.8</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>generate-sources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<sources>
|
||||||
|
<source>src</source>
|
||||||
|
</sources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<version>3.1</version>
|
||||||
|
<configuration>
|
||||||
|
<source>1.8</source>
|
||||||
|
<target>1.8</target>
|
||||||
|
<encoding>UTF-8</encoding>
|
||||||
|
<showDeprecation>false</showDeprecation>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-resources-plugin</artifactId>
|
||||||
|
<version>2.6</version>
|
||||||
|
<configuration>
|
||||||
|
<encoding>UTF-8</encoding>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-dependency-plugin</artifactId>
|
||||||
|
<version>3.1.1</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>compile</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>copy-dependencies</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<outputDirectory>module/MOD-INF/lib</outputDirectory>
|
||||||
|
<includeScope>runtime</includeScope>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-clean-plugin</artifactId>
|
||||||
|
<version>3.1.0</version>
|
||||||
|
<configuration>
|
||||||
|
<filesets>
|
||||||
|
<fileset>
|
||||||
|
<directory>module/MOD-INF/lib</directory>
|
||||||
|
</fileset>
|
||||||
|
</filesets>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${project.groupId}</groupId>
|
||||||
|
<artifactId>main</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>servlet-api</artifactId>
|
||||||
|
<version>2.5</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- add here the dependencies of your extension -->
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.testng</groupId>
|
||||||
|
<artifactId>testng</artifactId>
|
||||||
|
<version>6.9.10</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
</project>
|
||||||
|
|
@ -0,0 +1,21 @@
|
|||||||
|
package org.openrefine.phonetic.keyers;
|
||||||
|
|
||||||
|
import com.google.refine.clustering.binning.Keyer;
|
||||||
|
|
||||||
|
import org.apache.commons.codec.EncoderException;
|
||||||
|
import org.apache.commons.codec.language.bm.BeiderMorseEncoder;
|
||||||
|
|
||||||
|
public class BeiderMorseKeyer extends Keyer {
|
||||||
|
|
||||||
|
protected BeiderMorseEncoder encoder = new BeiderMorseEncoder();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String key(String string, Object... params) {
|
||||||
|
try {
|
||||||
|
return encoder.encode(string);
|
||||||
|
} catch (EncoderException e) {
|
||||||
|
return string;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,15 @@
|
|||||||
|
package org.openrefine.phonetic.keyers;
|
||||||
|
|
||||||
|
import com.google.refine.clustering.binning.Keyer;
|
||||||
|
import org.apache.commons.codec.language.DaitchMokotoffSoundex;
|
||||||
|
|
||||||
|
public class DaitchMokotoffKeyer extends Keyer {
|
||||||
|
|
||||||
|
protected DaitchMokotoffSoundex encoder = new DaitchMokotoffSoundex();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String key(String string, Object... params) {
|
||||||
|
return encoder.encode(string);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,24 @@
|
|||||||
|
package org.openrefine.phonetic.keyers;
|
||||||
|
|
||||||
|
import com.google.refine.clustering.binning.Keyer;
|
||||||
|
|
||||||
|
import static org.testng.Assert.assertEquals;
|
||||||
|
import static org.testng.Assert.assertTrue;
|
||||||
|
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
public class BeiderMorseKeyerTest {
|
||||||
|
|
||||||
|
Keyer keyer = new BeiderMorseKeyer();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testKey() {
|
||||||
|
assertTrue(keyer.key("Alphonse").contains("alponzi"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAccents() {
|
||||||
|
assertEquals(keyer.key("Éléonore"), "ilionor|ilionori");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,21 @@
|
|||||||
|
package org.openrefine.phonetic.keyers;
|
||||||
|
|
||||||
|
import static org.testng.Assert.assertEquals;
|
||||||
|
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.google.refine.clustering.binning.Keyer;
|
||||||
|
|
||||||
|
public class DaitchMokotoffKeyerTest {
|
||||||
|
protected Keyer keyer = new DaitchMokotoffKeyer();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDaitchMokotoff() {
|
||||||
|
assertEquals(keyer.key("Alphonse"), "087640");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAccents() {
|
||||||
|
assertEquals(keyer.key("Éléonore"), "086900");
|
||||||
|
}
|
||||||
|
}
|
@ -23,6 +23,7 @@
|
|||||||
<module>database</module>
|
<module>database</module>
|
||||||
<module>gdata</module>
|
<module>gdata</module>
|
||||||
<module>pc-axis</module>
|
<module>pc-axis</module>
|
||||||
|
<module>phonetic</module>
|
||||||
<!-- Add new extensions here -->
|
<!-- Add new extensions here -->
|
||||||
</modules>
|
</modules>
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user