Issue 399 - Add Cologne Phonetic Keyer and allow it to be used for clustering
git-svn-id: http://google-refine.googlecode.com/svn/trunk@2102 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
81045d91e1
commit
8da1291650
@ -75,6 +75,7 @@ public class BinningClusterer extends Clusterer {
|
||||
_keyers.put("double-metaphone", new DoubleMetaphoneKeyer());
|
||||
_keyers.put("metaphone3", new Metaphone3Keyer());
|
||||
_keyers.put("soundex", new SoundexKeyer());
|
||||
_keyers.put("cologne-phonetic", new ColognePhoneticKeyer());
|
||||
}
|
||||
|
||||
class BinningRowVisitor implements RowVisitor {
|
||||
|
@ -0,0 +1,51 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Thomas F. Morris.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.clustering.binning;
|
||||
|
||||
import org.apache.commons.codec.language.ColognePhonetic;
|
||||
|
||||
public class ColognePhoneticKeyer extends Keyer {
|
||||
|
||||
private ColognePhonetic _codec;
|
||||
|
||||
public ColognePhoneticKeyer() {
|
||||
_codec = new ColognePhonetic();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String key(String s, Object... o) {
|
||||
return _codec.colognePhonetic(s);
|
||||
}
|
||||
|
||||
}
|
@ -38,6 +38,7 @@ import java.util.Properties;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.clustering.binning.ColognePhoneticKeyer;
|
||||
import com.google.refine.clustering.binning.DoubleMetaphoneKeyer;
|
||||
import com.google.refine.clustering.binning.Metaphone3Keyer;
|
||||
import com.google.refine.clustering.binning.MetaphoneKeyer;
|
||||
@ -48,11 +49,14 @@ import com.google.refine.grel.Function;
|
||||
|
||||
public class Phonetic implements Function {
|
||||
|
||||
// TODO: We could probably lazily initialize these when needed for efficiency
|
||||
static private Metaphone3Keyer metaphone3 = new Metaphone3Keyer();
|
||||
static private DoubleMetaphoneKeyer metaphone2 = new DoubleMetaphoneKeyer();
|
||||
static private MetaphoneKeyer metaphone = new MetaphoneKeyer();
|
||||
static private SoundexKeyer soundex = new SoundexKeyer();
|
||||
static private ColognePhoneticKeyer cologne = new ColognePhoneticKeyer();
|
||||
|
||||
@Override
|
||||
public Object call(Properties bindings, Object[] args) {
|
||||
if (args.length == 2) {
|
||||
Object o1 = args[0];
|
||||
@ -69,6 +73,8 @@ public class Phonetic implements Function {
|
||||
return metaphone.key(str);
|
||||
} else if ("soundex".equalsIgnoreCase(encoding)) {
|
||||
return soundex.key(str);
|
||||
} else if ("cologne".equalsIgnoreCase(encoding)) {
|
||||
return cologne.key(str);
|
||||
} else {
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " doesn't know how to handle the '" + encoding + "' encoding.");
|
||||
}
|
||||
@ -77,6 +83,7 @@ public class Phonetic implements Function {
|
||||
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 3 strings");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
<option selected="true">fingerprint</option>
|
||||
<option>ngram-fingerprint</option>
|
||||
<option>metaphone3</option>
|
||||
<option>cologne-phonetic</option>
|
||||
</select></div>
|
||||
<div class="knn-controls hidden">Distance Function <select bind="distanceFunctionSelector">
|
||||
<option selected="true">levenshtein</option>
|
||||
|
Loading…
Reference in New Issue
Block a user