forgot to add the ngram class itself
git-svn-id: http://google-refine.googlecode.com/svn/trunk@931 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
b3173211e3
commit
5e0acf28d0
@ -0,0 +1,56 @@
|
|||||||
|
package com.metaweb.gridworks.expr.functions.strings;
|
||||||
|
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.expr.EvalError;
|
||||||
|
import com.metaweb.gridworks.gel.ControlFunctionRegistry;
|
||||||
|
import com.metaweb.gridworks.gel.Function;
|
||||||
|
|
||||||
|
public class NGram implements Function {
|
||||||
|
|
||||||
|
public Object call(Properties bindings, Object[] args) {
|
||||||
|
if (args.length == 2) {
|
||||||
|
Object s = args[0];
|
||||||
|
Object n = args[1];
|
||||||
|
|
||||||
|
if (s != null && s instanceof String && n != null && n instanceof Number) {
|
||||||
|
|
||||||
|
String[] tokens = StringUtils.split((String) s);
|
||||||
|
|
||||||
|
int count = ((Number) n).intValue();
|
||||||
|
if (count >= tokens.length) {
|
||||||
|
return new String[] { (String) s };
|
||||||
|
}
|
||||||
|
|
||||||
|
int len = tokens.length - count + 1;
|
||||||
|
String[] ngrams = new String[len];
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
String[] ss = new String[count];
|
||||||
|
for (int j = 0; j < count; j++) {
|
||||||
|
ss[j] = tokens[i + j];
|
||||||
|
}
|
||||||
|
ngrams[i] = StringUtils.join(ss,' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
return ngrams;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string and a number");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
|
||||||
|
writer.object();
|
||||||
|
writer.key("description"); writer.value("Returns an array of the word ngrams of s");
|
||||||
|
writer.key("params"); writer.value("string s, number n");
|
||||||
|
writer.key("returns"); writer.value("array of strings");
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user