Hooked up the recon service at data.labs.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@163 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
f16727c20c
commit
e57aae888b
@ -17,6 +17,7 @@ import org.json.JSONException;
|
|||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
import org.json.JSONWriter;
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import com.metaweb.gridworks.expr.ExpressionUtils;
|
||||||
import com.metaweb.gridworks.model.Cell;
|
import com.metaweb.gridworks.model.Cell;
|
||||||
import com.metaweb.gridworks.model.Project;
|
import com.metaweb.gridworks.model.Project;
|
||||||
import com.metaweb.gridworks.model.Recon;
|
import com.metaweb.gridworks.model.Recon;
|
||||||
@ -77,9 +78,10 @@ public class HeuristicReconConfig extends ReconConfig {
|
|||||||
|
|
||||||
static protected class HeuristicReconJob extends ReconJob {
|
static protected class HeuristicReconJob extends ReconJob {
|
||||||
String text;
|
String text;
|
||||||
|
String code;
|
||||||
|
|
||||||
public int getKey() {
|
public int getKey() {
|
||||||
return text.hashCode();
|
return code.hashCode();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -142,9 +144,42 @@ public class HeuristicReconConfig extends ReconConfig {
|
|||||||
String columnName, Cell cell) {
|
String columnName, Cell cell) {
|
||||||
|
|
||||||
HeuristicReconJob job = new HeuristicReconJob();
|
HeuristicReconJob job = new HeuristicReconJob();
|
||||||
|
if ("relevance".equals(service)) {
|
||||||
|
job.code = job.text = cell.value.toString();
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
StringWriter stringWriter = new StringWriter();
|
||||||
|
JSONWriter jsonWriter = new JSONWriter(stringWriter);
|
||||||
|
|
||||||
job.text = cell.value.toString();
|
jsonWriter.object();
|
||||||
|
jsonWriter.key("/type/object/name"); jsonWriter.value(cell.value.toString());
|
||||||
|
jsonWriter.key("/type/object/type"); jsonWriter.value(typeID);
|
||||||
|
|
||||||
|
for (ColumnDetail c : columnDetails) {
|
||||||
|
int cellIndex = project.columnModel.getColumnByName(c.columnName).getCellIndex();
|
||||||
|
|
||||||
|
Cell cell2 = row.getCell(cellIndex);
|
||||||
|
if (cell2 != null && ExpressionUtils.isNonBlankData(cell2.value)) {
|
||||||
|
jsonWriter.key(c.property.id);
|
||||||
|
|
||||||
|
if (cell2.recon != null && cell2.recon.match != null) {
|
||||||
|
jsonWriter.object();
|
||||||
|
jsonWriter.key("id"); jsonWriter.value(cell2.recon.match.topicID);
|
||||||
|
jsonWriter.key("name"); jsonWriter.value(cell2.recon.match.topicName);
|
||||||
|
jsonWriter.endObject();
|
||||||
|
} else {
|
||||||
|
jsonWriter.value(cell2.value.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
jsonWriter.endObject();
|
||||||
|
|
||||||
|
job.text = cell.value.toString();
|
||||||
|
job.code = stringWriter.toString();
|
||||||
|
} catch (JSONException e) {
|
||||||
|
//
|
||||||
|
}
|
||||||
|
}
|
||||||
return job;
|
return job;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -283,71 +318,41 @@ public class HeuristicReconConfig extends ReconConfig {
|
|||||||
return recon;
|
return recon;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static final String s_reconService = "http://data.labs.freebase.com/recon/query";
|
||||||
|
|
||||||
protected List<Recon> batchReconUsingReconService(List<ReconJob> jobs) {
|
protected List<Recon> batchReconUsingReconService(List<ReconJob> jobs) {
|
||||||
List<Recon> recons = new ArrayList<Recon>(jobs.size());
|
List<Recon> recons = new ArrayList<Recon>(jobs.size());
|
||||||
|
|
||||||
try {
|
for (int i = 0; i < jobs.size(); i++) {
|
||||||
StringWriter stringWriter = new StringWriter();
|
HeuristicReconJob job = (HeuristicReconJob) jobs.get(i);
|
||||||
JSONWriter jsonWriter = new JSONWriter(stringWriter);
|
Recon recon = null;
|
||||||
|
|
||||||
jsonWriter.object();
|
|
||||||
for (int i = 0; i < jobs.size(); i++) {
|
|
||||||
HeuristicReconJob job = (HeuristicReconJob) jobs.get(i);
|
|
||||||
|
|
||||||
jsonWriter.key("q" + i + ":search");
|
|
||||||
|
|
||||||
jsonWriter.object();
|
|
||||||
jsonWriter.key("query"); jsonWriter.value(job.text);
|
|
||||||
jsonWriter.key("limit"); jsonWriter.value(3);
|
|
||||||
jsonWriter.key("type"); jsonWriter.value(typeID);
|
|
||||||
jsonWriter.key("type_strict"); jsonWriter.value("should");
|
|
||||||
jsonWriter.key("type_exclude"); jsonWriter.value("/common/image");
|
|
||||||
jsonWriter.key("domain_exclude"); jsonWriter.value("/freebase");
|
|
||||||
jsonWriter.endObject();
|
|
||||||
}
|
|
||||||
jsonWriter.endObject();
|
|
||||||
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append("http://api.freebase.com/api/service/search?indent=1&queries=");
|
|
||||||
sb.append(ParsingUtilities.encode(stringWriter.toString()));
|
|
||||||
|
|
||||||
URL url = new URL(sb.toString());
|
|
||||||
URLConnection connection = url.openConnection();
|
|
||||||
connection.setConnectTimeout(5000);
|
|
||||||
connection.connect();
|
|
||||||
|
|
||||||
InputStream is = connection.getInputStream();
|
|
||||||
try {
|
try {
|
||||||
String s = ParsingUtilities.inputStreamToString(is);
|
StringBuffer sb = new StringBuffer();
|
||||||
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
|
sb.append(s_reconService + "?limit=5&q=");
|
||||||
|
sb.append(ParsingUtilities.encode(job.code));
|
||||||
|
|
||||||
for (int i = 0; i < jobs.size(); i++) {
|
URL url = new URL(sb.toString());
|
||||||
HeuristicReconJob job = (HeuristicReconJob) jobs.get(i);
|
URLConnection connection = url.openConnection();
|
||||||
|
connection.setConnectTimeout(5000);
|
||||||
|
connection.connect();
|
||||||
|
|
||||||
String text = job.text;
|
InputStream is = connection.getInputStream();
|
||||||
String key = "q" + i + ":search";
|
try {
|
||||||
if (!o.has(key)) {
|
String s = ParsingUtilities.inputStreamToString(is);
|
||||||
continue;
|
JSONArray a = ParsingUtilities.evaluateJsonStringToArray(s);
|
||||||
}
|
|
||||||
|
|
||||||
Recon recon = null;
|
recon = createReconFromReconResults(job.text, a);
|
||||||
|
} finally {
|
||||||
JSONObject o2 = o.getJSONObject(key);
|
is.close();
|
||||||
if (o2.has("result")) {
|
|
||||||
JSONArray results = o2.getJSONArray("result");
|
|
||||||
|
|
||||||
recon = createReconFromRelevanceResults(text, results);
|
|
||||||
} else {
|
|
||||||
recon = new Recon();
|
|
||||||
}
|
|
||||||
|
|
||||||
recons.add(recon);
|
|
||||||
}
|
}
|
||||||
} finally {
|
} catch (Exception e) {
|
||||||
is.close();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
if (recon == null) {
|
||||||
|
recon = new Recon();
|
||||||
|
}
|
||||||
|
recons.add(recon);
|
||||||
}
|
}
|
||||||
|
|
||||||
System.gc();
|
System.gc();
|
||||||
@ -355,6 +360,64 @@ public class HeuristicReconConfig extends ReconConfig {
|
|||||||
return recons;
|
return recons;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected Recon createReconFromReconResults(String text, JSONArray results) {
|
||||||
|
Recon recon = new Recon();
|
||||||
|
try {
|
||||||
|
int length = results.length();
|
||||||
|
int count = 0;
|
||||||
|
for (int i = 0; i < length && count < 3; i++) {
|
||||||
|
JSONObject result = results.getJSONObject(i);
|
||||||
|
if (!result.has("name")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String id = result.getString("id");
|
||||||
|
JSONArray names = result.getJSONArray("name");
|
||||||
|
double score = result.getDouble("score");
|
||||||
|
|
||||||
|
JSONArray types = result.getJSONArray("type");
|
||||||
|
String[] typeIDs = new String[types.length()];
|
||||||
|
for (int j = 0; j < typeIDs.length; j++) {
|
||||||
|
typeIDs[j] = types.getString(j);
|
||||||
|
}
|
||||||
|
|
||||||
|
ReconCandidate candidate = new ReconCandidate(
|
||||||
|
id,
|
||||||
|
"#" + id.substring(6),
|
||||||
|
names.getString(0),
|
||||||
|
typeIDs,
|
||||||
|
score
|
||||||
|
);
|
||||||
|
|
||||||
|
// best match
|
||||||
|
if (i == 0) {
|
||||||
|
recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.topicName));
|
||||||
|
recon.setFeature(Recon.Feature_nameLevenshtein, StringUtils.getLevenshteinDistance(text, candidate.topicName));
|
||||||
|
recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.topicName));
|
||||||
|
|
||||||
|
recon.setFeature(Recon.Feature_typeMatch, false);
|
||||||
|
for (String typeID : candidate.typeIDs) {
|
||||||
|
if (typeID.equals(typeID)) {
|
||||||
|
recon.setFeature(Recon.Feature_typeMatch, true);
|
||||||
|
if (autoMatch &&
|
||||||
|
(score > 0.6 ||
|
||||||
|
(result.has("match") && result.getBoolean("match")))) {
|
||||||
|
recon.match = candidate;
|
||||||
|
recon.judgment = Judgment.Matched;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
recon.addCandidate(candidate);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
} catch (JSONException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return recon;
|
||||||
|
}
|
||||||
|
|
||||||
static protected double wordDistance(String s1, String s2) {
|
static protected double wordDistance(String s1, String s2) {
|
||||||
Set<String> words1 = breakWords(s1);
|
Set<String> words1 = breakWords(s1);
|
||||||
|
@ -61,7 +61,7 @@ ReconDialog.prototype._createDialog = function() {
|
|||||||
'</tr>' +
|
'</tr>' +
|
||||||
'<tr>' +
|
'<tr>' +
|
||||||
'<td>' +
|
'<td>' +
|
||||||
'<input type="checkbox" checked bind="heuristicAutomatchCheck" /> Auto-match correctly-typed candidates scoring' +
|
'<input type="checkbox" checked bind="heuristicAutomatchCheck" /> Auto-match candidates with high confidence' +
|
||||||
'</td>' +
|
'</td>' +
|
||||||
'<td>' +
|
'<td>' +
|
||||||
'Use ' +
|
'Use ' +
|
||||||
@ -158,7 +158,8 @@ ReconDialog.prototype._populateDialog = function() {
|
|||||||
var td1 = tr.insertCell(1);
|
var td1 = tr.insertCell(1);
|
||||||
|
|
||||||
$(td0).html(column.headerLabel);
|
$(td0).html(column.headerLabel);
|
||||||
$('<input size="15" />')
|
$('<input size="15" name="recon-dialog-heuristic-property" />')
|
||||||
|
.attr("columnName", column.headerLabel)
|
||||||
.appendTo(td1)
|
.appendTo(td1)
|
||||||
.suggest({ type: '/type/property' });
|
.suggest({ type: '/type/property' });
|
||||||
}
|
}
|
||||||
@ -209,6 +210,21 @@ ReconDialog.prototype._onDoHeuristic = function() {
|
|||||||
if (type == null) {
|
if (type == null) {
|
||||||
alert("Please specify a type.");
|
alert("Please specify a type.");
|
||||||
} else {
|
} else {
|
||||||
|
var columnDetails = [];
|
||||||
|
var propertyInputs = $('input[name="recon-dialog-heuristic-property"]');
|
||||||
|
$.each(propertyInputs, function() {
|
||||||
|
var property = $(this).data("data.suggest");
|
||||||
|
if (property != null) {
|
||||||
|
columnDetails.push({
|
||||||
|
column: this.getAttribute("columnName"),
|
||||||
|
property: {
|
||||||
|
id: property.id,
|
||||||
|
name: property.name
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
Gridworks.postProcess(
|
Gridworks.postProcess(
|
||||||
"reconcile",
|
"reconcile",
|
||||||
{},
|
{},
|
||||||
@ -221,7 +237,8 @@ ReconDialog.prototype._onDoHeuristic = function() {
|
|||||||
id: type.id,
|
id: type.id,
|
||||||
name: type.name
|
name: type.name
|
||||||
},
|
},
|
||||||
autoMatch: this._elmts.heuristicAutomatchCheck[0].checked
|
autoMatch: this._elmts.heuristicAutomatchCheck[0].checked,
|
||||||
|
columnDetails: columnDetails
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
{ cellsChanged: true, columnStatsChanged: true }
|
{ cellsChanged: true, columnStatsChanged: true }
|
||||||
|
Loading…
Reference in New Issue
Block a user