First pass in trying to generalize standard reconciliation service UI. A lot of pieces are still Freebase-centric.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1032 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-06-24 20:13:51 +00:00
parent f0ed50e468
commit 058e86b4c8
44 changed files with 1155 additions and 824 deletions

View File

@ -110,6 +110,9 @@ public class GridworksServlet extends Butterfly {
{"upload-data", "com.metaweb.gridworks.commands.freebase.UploadDataCommand"},
{"mqlread", "com.metaweb.gridworks.commands.freebase.MQLReadCommand"},
{"mqlwrite", "com.metaweb.gridworks.commands.freebase.MQLWriteCommand"},
{"get-preference", "com.metaweb.gridworks.commands.GetPreferenceCommand"},
{"set-preference", "com.metaweb.gridworks.commands.SetPreferenceCommand"}
};
public static String getVersion() {

View File

@ -0,0 +1,54 @@
package com.metaweb.gridworks.commands;
import java.io.IOException;
import java.util.Properties;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.json.JSONException;
import org.json.JSONWriter;
import com.metaweb.gridworks.ProjectManager;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.preference.PreferenceStore;
import com.metaweb.gridworks.preference.TopList;
public class GetPreferenceCommand extends Command {
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
Project project = request.getParameter("project") != null ? getProject(request) : null;
PreferenceStore ps = project != null ?
project.getMetadata().getPreferenceStore() :
ProjectManager.singleton.getPreferenceStore();
String prefName = request.getParameter("name");
Object pref = ps.get(prefName);
try {
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json");
JSONWriter writer = new JSONWriter(response.getWriter());
writer.object();
writer.key("value");
if (pref == null || pref instanceof String || pref instanceof Number || pref instanceof Boolean) {
writer.value(pref);
} else if (pref instanceof TopList) {
TopList tl = (TopList) pref;
tl.write(writer, new Properties());
} else {
writer.value(pref.toString());
}
writer.endObject();
} catch (JSONException e) {
respondException(response, e);
}
}
}

View File

@ -0,0 +1,41 @@
package com.metaweb.gridworks.commands;
import java.io.IOException;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.json.JSONException;
import org.json.JSONTokener;
import com.metaweb.gridworks.ProjectManager;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.preference.PreferenceStore;
public class SetPreferenceCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
Project project = request.getParameter("project") != null ? getProject(request) : null;
PreferenceStore ps = project != null ?
project.getMetadata().getPreferenceStore() :
ProjectManager.singleton.getPreferenceStore();
String prefName = request.getParameter("name");
String valueString = request.getParameter("value");
try {
JSONTokener t = new JSONTokener(valueString);
Object o = t.nextValue();
ps.put(prefName, PreferenceStore.loadObject(o));
respond(response, "{ \"code\" : \"ok\" }");
} catch (JSONException e) {
respondException(response, e);
}
}
}

View File

@ -50,28 +50,28 @@ public class PreviewExtendDataCommand extends Command {
int cellIndex = project.columnModel.getColumnByName(columnName).getCellIndex();
List<String> topicNames = new ArrayList<String>();
List<String> topicGuids = new ArrayList<String>();
Set<String> guids = new HashSet<String>();
List<String> topicIds = new ArrayList<String>();
Set<String> ids = new HashSet<String>();
for (int i = 0; i < length; i++) {
int rowIndex = rowIndices.getInt(i);
if (rowIndex >= 0 && rowIndex < project.rows.size()) {
Row row = project.rows.get(rowIndex);
Cell cell = row.getCell(cellIndex);
if (cell != null && cell.recon != null && cell.recon.match != null) {
topicNames.add(cell.recon.match.topicName);
topicGuids.add(cell.recon.match.topicGUID);
guids.add(cell.recon.match.topicGUID);
topicNames.add(cell.recon.match.name);
topicIds.add(cell.recon.match.id);
ids.add(cell.recon.match.id);
} else {
topicNames.add(null);
topicGuids.add(null);
guids.add(null);
topicIds.add(null);
ids.add(null);
}
}
}
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
FreebaseDataExtensionJob job = new FreebaseDataExtensionJob(json);
Map<String, DataExtension> map = job.extend(guids, reconCandidateMap);
Map<String, DataExtension> map = job.extend(ids, reconCandidateMap);
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json");
@ -102,11 +102,11 @@ public class PreviewExtendDataCommand extends Command {
writer.key("rows");
writer.array();
for (int r = 0; r < topicNames.size(); r++) {
String guid = topicGuids.get(r);
String id = topicIds.get(r);
String topicName = topicNames.get(r);
if (guid != null && map.containsKey(guid)) {
DataExtension ext = map.get(guid);
if (id != null && map.containsKey(id)) {
DataExtension ext = map.get(id);
boolean first = true;
if (ext.data.length > 0) {
@ -123,8 +123,8 @@ public class PreviewExtendDataCommand extends Command {
if (cell != null && cell instanceof ReconCandidate) {
ReconCandidate rc = (ReconCandidate) cell;
writer.object();
writer.key("id"); writer.value(rc.topicID);
writer.key("name"); writer.value(rc.topicName);
writer.key("id"); writer.value(rc.id);
writer.key("name"); writer.value(rc.name);
writer.endObject();
} else {
writer.value(cell);
@ -138,9 +138,9 @@ public class PreviewExtendDataCommand extends Command {
}
writer.array();
if (guid != null) {
if (id != null) {
writer.object();
writer.key("id"); writer.value("/guid/" + guid.substring(1));
writer.key("id"); writer.value(id);
writer.key("name"); writer.value(topicName);
writer.endObject();
} else {

View File

@ -1,5 +1,6 @@
package com.metaweb.gridworks.commands.freebase;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
@ -38,6 +39,7 @@ public class GuessTypesOfColumnCommand extends Command {
try {
Project project = getProject(request);
String columnName = request.getParameter("columnName");
String serviceUrl = request.getParameter("service");
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json");
@ -54,7 +56,7 @@ public class GuessTypesOfColumnCommand extends Command {
writer.key("code"); writer.value("ok");
writer.key("types"); writer.array();
List<TypeGroup> typeGroups = guessTypes(project, column);
List<TypeGroup> typeGroups = guessTypes(project, column, serviceUrl);
for (TypeGroup tg : typeGroups) {
writer.object();
writer.key("id"); writer.value(tg.id);
@ -87,7 +89,7 @@ public class GuessTypesOfColumnCommand extends Command {
* @param column
* @return
*/
protected List<TypeGroup> guessTypes(Project project, Column column) {
protected List<TypeGroup> guessTypes(Project project, Column column, String serviceUrl) {
Map<String, TypeGroup> map = new HashMap<String, TypeGroup>();
int cellIndex = column.getCellIndex();
@ -115,7 +117,7 @@ public class GuessTypesOfColumnCommand extends Command {
jsonWriter.object();
for (int i = 0; i < samples.size(); i++) {
jsonWriter.key("q" + i + ":search");
jsonWriter.key("q" + i);
jsonWriter.object();
jsonWriter.key("query"); jsonWriter.value(samples.get(i));
@ -125,14 +127,26 @@ public class GuessTypesOfColumnCommand extends Command {
}
jsonWriter.endObject();
StringBuffer sb = new StringBuffer(1024);
sb.append("http://api.freebase.com/api/service/search?queries=");
sb.append(ParsingUtilities.encode(stringWriter.toString()));
URL url = new URL(sb.toString());
String queriesString = stringWriter.toString();
URL url = new URL(serviceUrl);
URLConnection connection = url.openConnection();
connection.setConnectTimeout(5000);
connection.connect();
{
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
connection.setConnectTimeout(30000);
connection.setDoOutput(true);
DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
try {
String body = "queries=" + ParsingUtilities.encode(queriesString);
dos.writeBytes(body);
} finally {
dos.flush();
dos.close();
}
connection.connect();
}
InputStream is = connection.getInputStream();
try {
@ -140,7 +154,7 @@ public class GuessTypesOfColumnCommand extends Command {
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
for (int i = 0; i < samples.size(); i++) {
String key = "q" + i + ":search";
String key = "q" + i;
if (!o.has(key)) {
continue;
}
@ -161,23 +175,24 @@ public class GuessTypesOfColumnCommand extends Command {
int typeCount = types.length();
for (int t = 0; t < typeCount; t++) {
JSONObject type = types.getJSONObject(t);
String id = type.getString("id");
if (id.equals("/common/topic") ||
id.equals("/base/ontologies/ontology_instance") ||
(id.startsWith("/base/") && id.endsWith("/topic")) ||
id.startsWith("/user/") ||
id.startsWith("/freebase/")
) {
continue;
Object type = types.get(t);
String typeID;
String typeName;
if (type instanceof String) {
typeID = typeName = (String) type;
} else {
typeID = ((JSONObject) type).getString("id");
typeName = ((JSONObject) type).getString("name");
}
if (map.containsKey(id)) {
TypeGroup tg = map.get(id);
tg.score += score;
double score2 = score * (typeCount - t) / (double) typeCount;
if (map.containsKey(typeID)) {
TypeGroup tg = map.get(typeID);
tg.score += score2;
tg.count++;
} else {
map.put(id, new TypeGroup(id, type.getString("name"), score));
map.put(typeID, new TypeGroup(typeID, typeName, score2));
}
}
}

View File

@ -44,7 +44,6 @@ public class ReconJudgeOneCellCommand extends Command {
match = new ReconCandidate(
topicID,
request.getParameter("topicGUID"),
request.getParameter("topicName"),
request.getParameter("types").split(","),
scoreString != null ? Double.parseDouble(scoreString) : 100
@ -57,7 +56,9 @@ public class ReconJudgeOneCellCommand extends Command {
judgment,
rowIndex,
cellIndex,
match
match,
request.getParameter("identifierSpace"),
request.getParameter("schemaSpace")
);
HistoryEntry historyEntry = project.processManager.queueProcess(process);
@ -88,10 +89,13 @@ public class ReconJudgeOneCellCommand extends Command {
protected static class JudgeOneCellProcess extends QuickHistoryEntryProcess {
final int rowIndex;
final int cellIndex;
final Judgment judgment;
final ReconCandidate match;
final int rowIndex;
final int cellIndex;
final Judgment judgment;
final ReconCandidate match;
final String identifierSpace;
final String schemaSpace;
Cell newCell;
JudgeOneCellProcess(
@ -100,7 +104,9 @@ public class ReconJudgeOneCellCommand extends Command {
Judgment judgment,
int rowIndex,
int cellIndex,
ReconCandidate match
ReconCandidate match,
String identifierSpace,
String schemaSpace
) {
super(project, briefDescription);
@ -108,6 +114,8 @@ public class ReconJudgeOneCellCommand extends Command {
this.rowIndex = rowIndex;
this.cellIndex = cellIndex;
this.match = match;
this.identifierSpace = identifierSpace;
this.schemaSpace = schemaSpace;
}
protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception {
@ -125,7 +133,7 @@ public class ReconJudgeOneCellCommand extends Command {
newCell = new Cell(
cell.value,
cell.recon == null ? new Recon(historyEntryID) : cell.recon.dup(historyEntryID)
cell.recon == null ? new Recon(historyEntryID, identifierSpace, schemaSpace) : cell.recon.dup(historyEntryID)
);
String cellDescription =
@ -152,16 +160,17 @@ public class ReconJudgeOneCellCommand extends Command {
} else {
newCell.recon.judgment = Recon.Judgment.Matched;
newCell.recon.match = this.match;
for (int m = 0; m < newCell.recon.candidates.size(); m++) {
if (newCell.recon.candidates.get(m).topicGUID.equals(this.match.topicGUID)) {
newCell.recon.matchRank = m;
break;
if (newCell.recon.candidates != null) {
for (int m = 0; m < newCell.recon.candidates.size(); m++) {
if (newCell.recon.candidates.get(m).id.equals(this.match.id)) {
newCell.recon.matchRank = m;
break;
}
}
}
description = "Match " + this.match.topicName +
" (" + match.topicID + ") to " +
description = "Match " + this.match.name +
" (" + match.id + ") to " +
cellDescription;
}

View File

@ -29,7 +29,6 @@ public class ReconJudgeSimilarCellsCommand extends EngineDependentCommand {
match = new ReconCandidate(
topicID,
request.getParameter("topicGUID"),
request.getParameter("topicName"),
request.getParameter("types").split(","),
scoreString != null ? Double.parseDouble(scoreString) : 100

View File

@ -19,12 +19,17 @@ public class ReconMatchSpecificTopicCommand extends EngineDependentCommand {
String columnName = request.getParameter("columnName");
ReconCandidate match = new ReconCandidate(
request.getParameter("topicID"),
request.getParameter("topicGUID"),
request.getParameter("topicName"),
request.getParameter("types").split(","),
100
);
return new ReconMatchSpecificTopicOperation(engineConfig, columnName, match);
return new ReconMatchSpecificTopicOperation(
engineConfig,
columnName,
match,
request.getParameter("identifierSpace"),
request.getParameter("schemaSpace")
);
}
}

View File

@ -86,11 +86,11 @@ public class XlsExporter implements Exporter {
Cell cell = row.cells.get(cellIndex);
if (cell != null) {
if (cell.recon != null && cell.recon.match != null) {
c.setCellValue(cell.recon.match.topicName);
c.setCellValue(cell.recon.match.name);
HSSFHyperlink hl = new HSSFHyperlink(HSSFHyperlink.LINK_URL);
hl.setLabel(cell.recon.match.topicName);
hl.setAddress("http://www.freebase.com/view" + cell.recon.match.topicID);
hl.setLabel(cell.recon.match.name);
hl.setAddress("http://www.freebase.com/view" + cell.recon.match.id);
c.setHyperlink(hl);
} else if (cell.value != null) {

View File

@ -207,9 +207,9 @@ public class ExcelImporter implements Importer {
recon = reconMap.get(id);
recon.judgmentBatchSize++;
} else {
recon = new Recon(0);
recon = new Recon(0, null, null);
recon.service = "import";
recon.match = new ReconCandidate(id, "", value.toString(), new String[0], 100);
recon.match = new ReconCandidate(id, value.toString(), new String[0], 100);
recon.matchRank = 0;
recon.judgment = Judgment.Matched;
recon.judgmentAction = "auto";

View File

@ -59,8 +59,11 @@ public class Recon implements HasFields, Jsonizable {
}
final public long id;
public Object[] features = new Object[Feature_max];
public String service = "unknown";
public String identifierSpace = null;
public String schemaSpace = null;
public Object[] features = new Object[Feature_max];
public List<ReconCandidate> candidates;
public Judgment judgment = Judgment.None;
@ -71,9 +74,18 @@ public class Recon implements HasFields, Jsonizable {
public ReconCandidate match = null;
public int matchRank = -1;
public Recon(long judgmentHistoryEntry) {
static public Recon makeFreebaseRecon(long judgmentHistoryEntry) {
return new Recon(
judgmentHistoryEntry,
"http://rdf.freebase.com/ns/type.object.id",
"http://rdf.freebase.com/ns/type.object.id");
}
public Recon(long judgmentHistoryEntry, String identifierSpace, String schemaSpace) {
id = System.currentTimeMillis() * 1000000 + Math.round(Math.random() * 1000000);
this.judgmentHistoryEntry = judgmentHistoryEntry;
this.identifierSpace = identifierSpace;
this.schemaSpace = schemaSpace;
}
protected Recon(long id, long judgmentHistoryEntry) {
@ -82,7 +94,7 @@ public class Recon implements HasFields, Jsonizable {
}
public Recon dup(long judgmentHistoryEntry) {
Recon r = new Recon(judgmentHistoryEntry);
Recon r = new Recon(judgmentHistoryEntry, identifierSpace, schemaSpace);
System.arraycopy(features, 0, r.features, 0, features.length);
@ -165,6 +177,12 @@ public class Recon implements HasFields, Jsonizable {
return matchRank;
} else if ("features".equals(name)) {
return new Features();
} else if ("service".equals(name)) {
return service;
} else if ("identifierSpace".equals(name)) {
return identifierSpace;
} else if ("schemaSpace".equals(name)) {
return schemaSpace;
}
return null;
}
@ -195,6 +213,10 @@ public class Recon implements HasFields, Jsonizable {
writer.object();
writer.key("id"); writer.value(id);
writer.key("service"); writer.value(service);
writer.key("identifierSpace"); writer.value(identifierSpace);
writer.key("schemaSpace"); writer.value(schemaSpace);
if (saveMode) {
writer.key("judgmentHistoryEntry"); writer.value(judgmentHistoryEntry);
}
@ -202,13 +224,13 @@ public class Recon implements HasFields, Jsonizable {
writer.key("j"); writer.value(judgmentToString());
if (match != null) {
writer.key("m");
writer.value(match.topicID);
writer.value(match.id);
}
if (match == null || saveMode) {
writer.key("c"); writer.array();
if (candidates != null) {
for (ReconCandidate c : candidates) {
writer.value(c.topicID);
writer.value(c.id);
}
}
writer.endArray();
@ -222,7 +244,6 @@ public class Recon implements HasFields, Jsonizable {
}
writer.endArray();
writer.key("service"); writer.value(service);
writer.key("judgmentAction"); writer.value(judgmentAction);
writer.key("judgmentBatchSize"); writer.value(judgmentBatchSize);
@ -317,6 +338,10 @@ public class Recon implements HasFields, Jsonizable {
}
} else if ("service".equals(fieldName)) {
recon.service = jp.getText();
} else if ("identifierSpace".equals(fieldName)) {
recon.identifierSpace = jp.getText();
} else if ("schemaSpace".equals(fieldName)) {
recon.schemaSpace = jp.getText();
} else if ("judgmentAction".equals(fieldName)) {
recon.judgmentAction = jp.getText();
} else if ("judgmentBatchSize".equals(fieldName)) {

View File

@ -14,29 +14,25 @@ import com.metaweb.gridworks.Jsonizable;
import com.metaweb.gridworks.expr.HasFields;
public class ReconCandidate implements HasFields, Jsonizable {
final public String topicID;
final public String topicGUID;
final public String topicName;
final public String[] typeIDs;
final public String id;
final public String name;
final public String[] types;
final public double score;
public ReconCandidate(String topicID, String topicGUID, String topicName, String[] typeIDs, double score) {
this.topicID = topicID;
this.topicGUID = topicGUID;
this.topicName = topicName;
this.typeIDs = typeIDs;
public ReconCandidate(String topicID, String topicName, String[] typeIDs, double score) {
this.id = topicID;
this.name = topicName;
this.types = typeIDs;
this.score = score;
}
public Object getField(String name, Properties bindings) {
if ("id".equals(name)) {
return topicID;
} else if ("guid".equals(name)) {
return topicGUID;
return id;
} else if ("name".equals(name)) {
return topicName;
return this.name;
} else if ("type".equals(name)) {
return typeIDs;
return types;
} else if ("score".equals(name)) {
return score;
}
@ -51,14 +47,13 @@ public class ReconCandidate implements HasFields, Jsonizable {
throws JSONException {
writer.object();
writer.key("id"); writer.value(topicID);
writer.key("guid"); writer.value(topicGUID);
writer.key("name"); writer.value(topicName);
writer.key("id"); writer.value(id);
writer.key("name"); writer.value(name);
writer.key("score"); writer.value(score);
/* if (!options.containsKey("reconCandidateOmitTypes")) */ {
writer.key("types"); writer.array();
for (String typeID : typeIDs) {
for (String typeID : types) {
writer.value(typeID);
}
writer.endArray();
@ -84,7 +79,6 @@ public class ReconCandidate implements HasFields, Jsonizable {
}
String id = null;
String guid = null;
String name = null;
List<String> types = null;
double score = 0;
@ -95,8 +89,6 @@ public class ReconCandidate implements HasFields, Jsonizable {
if ("id".equals(fieldName)) {
id = jp.getText();
} else if ("guid".equals(fieldName)) {
guid = jp.getText();
} else if ("name".equals(fieldName)) {
name = jp.getText();
} else if ("score".equals(fieldName)) {
@ -124,7 +116,6 @@ public class ReconCandidate implements HasFields, Jsonizable {
return new ReconCandidate(
id,
guid,
name,
typesA,
score

View File

@ -201,10 +201,10 @@ public class DataExtensionChange implements Change {
if (value instanceof ReconCandidate) {
ReconCandidate rc = (ReconCandidate) value;
Recon recon;
if (reconMap.containsKey(rc.topicGUID)) {
recon = reconMap.get(rc.topicGUID);
if (reconMap.containsKey(rc.id)) {
recon = reconMap.get(rc.id);
} else {
recon = new Recon(_historyEntryID);
recon = Recon.makeFreebaseRecon(_historyEntryID);
recon.addCandidate(rc);
recon.service = "mql";
recon.match = rc;
@ -213,9 +213,9 @@ public class DataExtensionChange implements Change {
recon.judgmentAction = "auto";
recon.judgmentBatchSize = 1;
reconMap.put(rc.topicGUID, recon);
reconMap.put(rc.id, recon);
}
cell = new Cell(rc.topicName, recon);
cell = new Cell(rc.name, recon);
} else {
cell = new Cell((Serializable) value, null);
}

View File

@ -141,13 +141,12 @@ public class GuidBasedReconConfig extends StrictReconConfig {
ReconCandidate candidate = new ReconCandidate(
result.getString("id"),
guid,
result.getString("name"),
typeIDs,
100
);
Recon recon = new Recon(historyEntryID);
Recon recon = Recon.makeFreebaseRecon(historyEntryID);
recon.addCandidate(candidate);
recon.service = "mql";
recon.judgment = Judgment.Matched;

View File

@ -145,13 +145,12 @@ public class IdBasedReconConfig extends StrictReconConfig {
ReconCandidate candidate = new ReconCandidate(
id,
result.getString("guid"),
result.getString("name"),
typeIDs,
100
);
Recon recon = new Recon(historyEntryID);
Recon recon = Recon.makeFreebaseRecon(historyEntryID);
recon.addCandidate(candidate);
recon.service = "mql";
recon.judgment = Judgment.Matched;

View File

@ -159,13 +159,12 @@ public class KeyBasedReconConfig extends StrictReconConfig {
ReconCandidate candidate = new ReconCandidate(
result.getString("id"),
result.getString("guid"),
result.getString("name"),
typeIDs,
100
);
Recon recon = new Recon(historyEntryID);
Recon recon = Recon.makeFreebaseRecon(historyEntryID);
recon.addCandidate(candidate);
recon.service = "mql";
recon.judgment = Judgment.Matched;

View File

@ -17,8 +17,10 @@ import com.metaweb.gridworks.model.Row;
abstract public class ReconConfig implements Jsonizable {
static public ReconConfig reconstruct(JSONObject obj) throws Exception {
String mode = obj.getString("mode");
if ("heuristic".equals(mode)) {
return HeuristicReconConfig.reconstruct(obj);
if ("standard-service".equals(mode) ||
"heuristic".equals(mode) // legacy
) {
return StandardReconConfig.reconstruct(obj);
} else if ("strict".equals(mode)) {
return StrictReconConfig.reconstruct(obj);
} else if ("extend".equals(mode)) {

View File

@ -1,5 +1,6 @@
package com.metaweb.gridworks.model.recon;
import java.io.DataOutputStream;
import java.io.InputStream;
import java.io.StringWriter;
import java.net.URL;
@ -15,6 +16,8 @@ import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.metaweb.gridworks.expr.ExpressionUtils;
import com.metaweb.gridworks.model.Cell;
@ -27,7 +30,9 @@ import com.metaweb.gridworks.model.RecordModel.RowDependency;
import com.metaweb.gridworks.protograph.FreebaseProperty;
import com.metaweb.gridworks.util.ParsingUtilities;
public class HeuristicReconConfig extends ReconConfig {
public class StandardReconConfig extends ReconConfig {
final static Logger logger = LoggerFactory.getLogger("gridworks-standard-recon");
static public class ColumnDetail {
final public String columnName;
final public FreebaseProperty property;
@ -63,8 +68,10 @@ public class HeuristicReconConfig extends ReconConfig {
JSONObject t = obj.getJSONObject("type");
return new HeuristicReconConfig(
return new StandardReconConfig(
obj.getString("service"),
obj.has("identifierSpace") ? obj.getString("identifierSpace") : null,
obj.has("schemaSpace") ? obj.getString("schemaSpace") : null,
t.getString("id"),
t.getString("name"),
obj.getBoolean("autoMatch"),
@ -72,7 +79,7 @@ public class HeuristicReconConfig extends ReconConfig {
);
}
static protected class HeuristicReconJob extends ReconJob {
static protected class StandardReconJob extends ReconJob {
String text;
String code;
@ -81,20 +88,29 @@ public class HeuristicReconConfig extends ReconConfig {
}
}
final public String service; // either "recon" or "relevance"
final public String service;
final public String identifierSpace;
final public String schemaSpace;
final public String typeID;
final public String typeName;
final public boolean autoMatch;
final public List<ColumnDetail> columnDetails;
public HeuristicReconConfig(
public StandardReconConfig(
String service,
String identifierSpace,
String schemaSpace,
String typeID,
String typeName,
boolean autoMatch,
List<ColumnDetail> columnDetails
) {
this.service = service;
this.identifierSpace = identifierSpace;
this.schemaSpace = schemaSpace;
this.typeID = typeID;
this.typeName = typeName;
this.autoMatch = autoMatch;
@ -105,8 +121,10 @@ public class HeuristicReconConfig extends ReconConfig {
throws JSONException {
writer.object();
writer.key("mode"); writer.value("heuristic");
writer.key("service"); writer.value(service);
writer.key("mode"); writer.value("standard-service");
writer.key("service"); writer.value(service);
writer.key("identifierSpace"); writer.value(identifierSpace);
writer.key("schemaSpace"); writer.value(schemaSpace);
writer.key("type");
writer.object();
writer.key("id"); writer.value(typeID);
@ -127,7 +145,7 @@ public class HeuristicReconConfig extends ReconConfig {
@Override
public int getBatchSize() {
return 10;
return 7;
}
@Override
@ -139,17 +157,18 @@ public class HeuristicReconConfig extends ReconConfig {
public ReconJob createJob(Project project, int rowIndex, Row row,
String columnName, Cell cell) {
HeuristicReconJob job = new HeuristicReconJob();
if ("relevance".equals(service)) {
job.code = job.text = cell.value.toString();
} else {
try {
StringWriter stringWriter = new StringWriter();
JSONWriter jsonWriter = new JSONWriter(stringWriter);
jsonWriter.object();
jsonWriter.key("/type/object/name"); jsonWriter.value(cell.value.toString());
jsonWriter.key("/type/object/type"); jsonWriter.value(typeID);
StandardReconJob job = new StandardReconJob();
try {
StringWriter stringWriter = new StringWriter();
JSONWriter jsonWriter = new JSONWriter(stringWriter);
jsonWriter.object();
jsonWriter.key("query"); jsonWriter.value(cell.value.toString());
jsonWriter.key("type"); jsonWriter.value(typeID);
if (columnDetails.size() > 0) {
jsonWriter.key("properties");
jsonWriter.array();
for (ColumnDetail c : columnDetails) {
int detailCellIndex = project.columnModel.getColumnByName(c.columnName).getCellIndex();
@ -168,72 +187,75 @@ public class HeuristicReconConfig extends ReconConfig {
}
}
}
if (cell2 != null && ExpressionUtils.isNonBlankData(cell2.value)) {
jsonWriter.key(c.property.id);
jsonWriter.object();
jsonWriter.key("pid"); jsonWriter.value(c.property.id);
jsonWriter.key("v");
if (cell2.recon != null && cell2.recon.match != null) {
jsonWriter.object();
jsonWriter.key("id"); jsonWriter.value(cell2.recon.match.topicID);
jsonWriter.key("name"); jsonWriter.value(cell2.recon.match.topicName);
jsonWriter.key("id"); jsonWriter.value(cell2.recon.match.id);
jsonWriter.key("name"); jsonWriter.value(cell2.recon.match.name);
jsonWriter.endObject();
} else {
jsonWriter.value(cell2.value.toString());
}
jsonWriter.endObject();
}
}
jsonWriter.endObject();
job.text = cell.value.toString();
job.code = stringWriter.toString();
} catch (JSONException e) {
//
}
jsonWriter.endArray();
}
jsonWriter.endObject();
job.text = cell.value.toString();
job.code = stringWriter.toString();
} catch (JSONException e) {
//
}
return job;
}
@Override
public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID) {
if ("relevance".equals(service)) {
return batchReconUsingRelevance(jobs, historyEntryID);
} else {
return batchReconUsingReconService(jobs, historyEntryID);
}
}
protected List<Recon> batchReconUsingRelevance(List<ReconJob> jobs, long historyEntryID) {
List<Recon> recons = new ArrayList<Recon>(jobs.size());
try {
StringWriter stringWriter = new StringWriter();
JSONWriter jsonWriter = new JSONWriter(stringWriter);
jsonWriter.object();
for (int i = 0; i < jobs.size(); i++) {
HeuristicReconJob job = (HeuristicReconJob) jobs.get(i);
jsonWriter.key("q" + i + ":search");
jsonWriter.object();
jsonWriter.key("query"); jsonWriter.value(job.text);
jsonWriter.key("limit"); jsonWriter.value(3);
jsonWriter.key("type"); jsonWriter.value(typeID);
jsonWriter.key("type_strict"); jsonWriter.value("should");
jsonWriter.key("type_exclude"); jsonWriter.value("/common/image");
jsonWriter.key("domain_exclude"); jsonWriter.value("/freebase");
jsonWriter.key("stemmed"); jsonWriter.value(1);
jsonWriter.endObject();
StringWriter stringWriter = new StringWriter();
stringWriter.write("{");
for (int i = 0; i < jobs.size(); i++) {
StandardReconJob job = (StandardReconJob) jobs.get(i);
if (i > 0) {
stringWriter.write(",");
}
jsonWriter.endObject();
StringBuffer sb = new StringBuffer(1024);
sb.append("http://api.freebase.com/api/service/search?indent=1&queries=");
sb.append(ParsingUtilities.encode(stringWriter.toString()));
URL url = new URL(sb.toString());
stringWriter.write("\"q" + i + "\":");
stringWriter.write(job.code);
}
stringWriter.write("}");
String queriesString = stringWriter.toString();
try {
URL url = new URL(service);
URLConnection connection = url.openConnection();
connection.setConnectTimeout(5000);
connection.connect();
{
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
connection.setConnectTimeout(30000);
connection.setDoOutput(true);
DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
try {
String body = "queries=" + ParsingUtilities.encode(queriesString);
dos.writeBytes(body);
} finally {
dos.flush();
dos.close();
}
connection.connect();
}
InputStream is = connection.getInputStream();
try {
@ -241,40 +263,48 @@ public class HeuristicReconConfig extends ReconConfig {
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
for (int i = 0; i < jobs.size(); i++) {
HeuristicReconJob job = (HeuristicReconJob) jobs.get(i);
String text = job.text;
String key = "q" + i + ":search";
if (!o.has(key)) {
continue;
}
StandardReconJob job = (StandardReconJob) jobs.get(i);
Recon recon = null;
JSONObject o2 = o.getJSONObject(key);
if (o2.has("result")) {
JSONArray results = o2.getJSONArray("result");
recon = createReconFromRelevanceResults(text, results, historyEntryID);
} else {
recon = new Recon(historyEntryID);
String text = job.text;
String key = "q" + i;
if (o.has(key)) {
JSONObject o2 = o.getJSONObject(key);
if (o2.has("result")) {
JSONArray results = o2.getJSONArray("result");
recon = createReconServiceResults(text, results, historyEntryID);
}
}
recon.service = "recon";
if (recon == null) {
recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
}
recon.service = service;
recons.add(recon);
}
} finally {
is.close();
}
} catch (Exception e) {
e.printStackTrace();
logger.error("Failed to batch recon with load:\n" + queriesString, e);
}
while (recons.size() < jobs.size()) {
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
recon.service = service;
recon.identifierSpace = identifierSpace;
recon.schemaSpace = schemaSpace;
recons.add(recon);
}
return recons;
}
protected Recon createReconFromRelevanceResults(String text, JSONArray results, long historyEntryID) {
Recon recon = new Recon(historyEntryID);
protected Recon createReconServiceResults(String text, JSONArray results, long historyEntryID) {
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
try {
int length = results.length();
int count = 0;
@ -287,13 +317,14 @@ public class HeuristicReconConfig extends ReconConfig {
JSONArray types = result.getJSONArray("type");
String[] typeIDs = new String[types.length()];
for (int j = 0; j < typeIDs.length; j++) {
typeIDs[j] = types.getJSONObject(j).getString("id");
Object type = types.get(j);
typeIDs[j] = type instanceof String ? (String) type :
((JSONObject) type).getString("id");
}
double score = result.getDouble("relevance:score");
double score = result.getDouble("score");
ReconCandidate candidate = new ReconCandidate(
result.getString("id"),
result.getString("guid"),
result.getString("name"),
typeIDs,
score
@ -306,12 +337,12 @@ public class HeuristicReconConfig extends ReconConfig {
if (count > 0) {
ReconCandidate candidate = recon.candidates.get(0);
recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.topicName));
recon.setFeature(Recon.Feature_nameLevenshtein, StringUtils.getLevenshteinDistance(text, candidate.topicName));
recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.topicName));
recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.name));
recon.setFeature(Recon.Feature_nameLevenshtein, StringUtils.getLevenshteinDistance(text, candidate.name));
recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.name));
recon.setFeature(Recon.Feature_typeMatch, false);
for (String typeID : candidate.typeIDs) {
for (String typeID : candidate.types) {
if (this.typeID.equals(typeID)) {
recon.setFeature(Recon.Feature_typeMatch, true);
if (autoMatch && candidate.score >= 100 && (count == 1 || candidate.score / recon.candidates.get(1).score >= 1.5)) {
@ -330,106 +361,6 @@ public class HeuristicReconConfig extends ReconConfig {
return recon;
}
static final String s_reconService = "http://data.labs.freebase.com/recon/query";
protected List<Recon> batchReconUsingReconService(List<ReconJob> jobs, long historyEntryID) {
List<Recon> recons = new ArrayList<Recon>(jobs.size());
for (int i = 0; i < jobs.size(); i++) {
HeuristicReconJob job = (HeuristicReconJob) jobs.get(i);
Recon recon = null;
try {
StringBuffer sb = new StringBuffer();
sb.append(s_reconService + "?limit=5&q=");
sb.append(ParsingUtilities.encode(job.code));
URL url = new URL(sb.toString());
URLConnection connection = url.openConnection();
connection.setConnectTimeout(5000);
connection.connect();
InputStream is = connection.getInputStream();
try {
String s = ParsingUtilities.inputStreamToString(is);
JSONArray a = ParsingUtilities.evaluateJsonStringToArray(s);
recon = createReconFromReconResults(job.text, a, historyEntryID);
} finally {
is.close();
}
} catch (Exception e) {
e.printStackTrace();
}
if (recon == null) {
recon = new Recon(historyEntryID);
}
recon.service = "recon";
recons.add(recon);
}
return recons;
}
protected Recon createReconFromReconResults(String text, JSONArray results, long historyEntryID) {
Recon recon = new Recon(historyEntryID);
try {
int length = results.length();
int count = 0;
for (int i = 0; i < length && count < 3; i++) {
JSONObject result = results.getJSONObject(i);
if (!result.has("name")) {
continue;
}
String id = result.getString("id");
JSONArray names = result.getJSONArray("name");
double score = result.getDouble("score");
JSONArray types = result.getJSONArray("type");
String[] typeIDs = new String[types.length()];
for (int j = 0; j < typeIDs.length; j++) {
typeIDs[j] = types.getString(j);
}
ReconCandidate candidate = new ReconCandidate(
id,
"#" + id.substring(6),
names.getString(0),
typeIDs,
score
);
// best match
if (i == 0) {
recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.topicName));
recon.setFeature(Recon.Feature_nameLevenshtein, StringUtils.getLevenshteinDistance(text, candidate.topicName));
recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.topicName));
recon.setFeature(Recon.Feature_typeMatch, false);
for (String typeID : candidate.typeIDs) {
if (this.typeID.equals(typeID)) {
recon.setFeature(Recon.Feature_typeMatch, true);
if (autoMatch && result.has("match") && result.getBoolean("match")) {
recon.match = candidate;
recon.matchRank = 0;
recon.judgment = Judgment.Matched;
recon.judgmentAction = "auto";
}
break;
}
}
}
recon.addCandidate(candidate);
count++;
}
} catch (JSONException e) {
e.printStackTrace();
}
return recon;
}
static protected double wordDistance(String s1, String s2) {
Set<String> words1 = breakWords(s1);
Set<String> words2 = breakWords(s2);

View File

@ -179,20 +179,20 @@ public class ExtendDataOperation extends EngineDependentOperation {
int limit,
Map<String, ReconCandidate> reconCandidateMap
) {
Set<String> guids = new HashSet<String>();
Set<String> ids = new HashSet<String>();
int end;
for (end = from; end < limit && guids.size() < 10; end++) {
for (end = from; end < limit && ids.size() < 10; end++) {
int index = rowIndices.get(end);
Row row = _project.rows.get(index);
Cell cell = row.getCell(_cellIndex);
guids.add(cell.recon.match.topicGUID);
ids.add(cell.recon.match.id);
}
Map<String, DataExtension> map = null;
try {
map = _job.extend(guids, reconCandidateMap);
map = _job.extend(ids, reconCandidateMap);
} catch (Exception e) {
map = new HashMap<String, DataExtension>();
}
@ -201,7 +201,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
int index = rowIndices.get(i);
Row row = _project.rows.get(index);
Cell cell = row.getCell(_cellIndex);
String guid = cell.recon.match.topicGUID;
String guid = cell.recon.match.id;
if (map.containsKey(guid)) {
dataExtensions.add(map.get(guid));

View File

@ -47,7 +47,6 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
match = new ReconCandidate(
matchObj.getString("id"),
matchObj.getString("guid"),
matchObj.getString("name"),
typeIDs,
matchObj.getDouble("score")
@ -116,8 +115,8 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
}
} else if (_judgment == Judgment.Matched) {
return "Match topic " +
_match.topicName + " (" +
_match.topicID + ") for cells containing \"" +
_match.name + " (" +
_match.id + ") for cells containing \"" +
_similarValue + "\" in column " + _columnName;
}
throw new InternalError("Can't get here");
@ -139,8 +138,8 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
}
} else if (_judgment == Judgment.Matched) {
return "Match topic " +
_match.topicName + " (" +
_match.topicID + ") for " +
_match.name + " (" +
_match.id + ") for " +
cellChanges.size() + " cells containing \"" +
_similarValue + "\" in column " + _columnName;
}
@ -166,70 +165,70 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
@Override
public void start(Project project) {
// nothing to do
// nothing to do
}
@Override
public void end(Project project) {
// nothing to do
// nothing to do
}
public boolean visit(Project project, int rowIndex, Row row) {
Cell cell = row.getCell(_cellIndex);
if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) {
String value = cell.value instanceof String ?
((String) cell.value) : cell.value.toString();
String value = cell.value instanceof String ?
((String) cell.value) : cell.value.toString();
if (_similarValue.equals(value)) {
Recon recon = null;
if (_judgment == Judgment.New && _shareNewTopics) {
if (_sharedNewRecon == null) {
_sharedNewRecon = new Recon(_historyEntryID);
_sharedNewRecon.judgment = Judgment.New;
_sharedNewRecon.judgmentBatchSize = 0;
_sharedNewRecon.judgmentAction = "similar";
}
_sharedNewRecon.judgmentBatchSize++;
recon = _sharedNewRecon;
} else {
if (_dupReconMap.containsKey(cell.recon.id)) {
recon = _dupReconMap.get(cell.recon.id);
recon.judgmentBatchSize++;
} else {
recon = cell.recon.dup(_historyEntryID);
recon.judgmentBatchSize = 1;
recon.matchRank = -1;
recon.judgmentAction = "similar";
if (_judgment == Judgment.Matched) {
recon.judgment = Recon.Judgment.Matched;
recon.match = _match;
if (recon.candidates != null) {
for (int m = 0; m < recon.candidates.size(); m++) {
if (recon.candidates.get(m).topicGUID.equals(_match.topicGUID)) {
recon.matchRank = m;
break;
}
}
}
} else if (_judgment == Judgment.New) {
recon.judgment = Recon.Judgment.New;
recon.match = null;
} else if (_judgment == Judgment.None) {
recon.judgment = Recon.Judgment.None;
recon.match = null;
}
_dupReconMap.put(cell.recon.id, recon);
}
}
Cell newCell = new Cell(cell.value, recon);
CellChange cellChange = new CellChange(rowIndex, _cellIndex, cell, newCell);
_cellChanges.add(cellChange);
Recon recon = null;
if (_judgment == Judgment.New && _shareNewTopics) {
if (_sharedNewRecon == null) {
_sharedNewRecon = new Recon(_historyEntryID, null, null);
_sharedNewRecon.judgment = Judgment.New;
_sharedNewRecon.judgmentBatchSize = 0;
_sharedNewRecon.judgmentAction = "similar";
}
_sharedNewRecon.judgmentBatchSize++;
recon = _sharedNewRecon;
} else {
if (_dupReconMap.containsKey(cell.recon.id)) {
recon = _dupReconMap.get(cell.recon.id);
recon.judgmentBatchSize++;
} else {
recon = cell.recon.dup(_historyEntryID);
recon.judgmentBatchSize = 1;
recon.matchRank = -1;
recon.judgmentAction = "similar";
if (_judgment == Judgment.Matched) {
recon.judgment = Recon.Judgment.Matched;
recon.match = _match;
if (recon.candidates != null) {
for (int m = 0; m < recon.candidates.size(); m++) {
if (recon.candidates.get(m).id.equals(_match.id)) {
recon.matchRank = m;
break;
}
}
}
} else if (_judgment == Judgment.New) {
recon.judgment = Recon.Judgment.New;
recon.match = null;
} else if (_judgment == Judgment.None) {
recon.judgment = Recon.Judgment.None;
recon.match = null;
}
_dupReconMap.put(cell.recon.id, recon);
}
}
Cell newCell = new Cell(cell.value, recon);
CellChange cellChange = new CellChange(rowIndex, _cellIndex, cell, newCell);
_cellChanges.add(cellChange);
}
}
return false;

View File

@ -106,7 +106,7 @@ public class ReconMarkNewTopicsOperation extends EngineDependentMassCellOperatio
recon = sharedRecons.get(s);
recon.judgmentBatchSize++;
} else {
recon = new Recon(historyEntryID);
recon = new Recon(historyEntryID, null, null);
recon.judgment = Judgment.New;
recon.judgmentBatchSize = 1;
recon.judgmentAction = "mass";
@ -114,7 +114,7 @@ public class ReconMarkNewTopicsOperation extends EngineDependentMassCellOperatio
sharedRecons.put(s, recon);
}
} else {
recon = cell.recon == null ? new Recon(historyEntryID) : cell.recon.dup(historyEntryID);
recon = cell.recon == null ? new Recon(historyEntryID, null, null) : cell.recon.dup(historyEntryID);
recon.match = null;
recon.matchRank = -1;
recon.judgment = Judgment.New;

View File

@ -27,6 +27,8 @@ import com.metaweb.gridworks.operations.OperationRegistry;
public class ReconMatchSpecificTopicOperation extends EngineDependentMassCellOperation {
final protected ReconCandidate match;
final protected String identifierSpace;
final protected String schemaSpace;
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
JSONObject engineConfig = obj.getJSONObject("engineConfig");
@ -44,17 +46,26 @@ public class ReconMatchSpecificTopicOperation extends EngineDependentMassCellOpe
obj.getString("columnName"),
new ReconCandidate(
match.getString("id"),
match.getString("guid"),
match.getString("name"),
typeIDs,
100
)
),
obj.getString("identifierSpace"),
obj.getString("schemaSpace")
);
}
public ReconMatchSpecificTopicOperation(JSONObject engineConfig, String columnName, ReconCandidate match) {
public ReconMatchSpecificTopicOperation(
JSONObject engineConfig,
String columnName,
ReconCandidate match,
String identifierSpace,
String schemaSpace
) {
super(engineConfig, columnName, false);
this.match = match;
this.identifierSpace = identifierSpace;
this.schemaSpace = schemaSpace;
}
public void write(JSONWriter writer, Properties options)
@ -67,30 +78,31 @@ public class ReconMatchSpecificTopicOperation extends EngineDependentMassCellOpe
writer.key("columnName"); writer.value(_columnName);
writer.key("match");
writer.object();
writer.key("id"); writer.value(match.topicID);
writer.key("guid"); writer.value(match.topicGUID);
writer.key("name"); writer.value(match.topicName);
writer.key("id"); writer.value(match.id);
writer.key("name"); writer.value(match.name);
writer.key("types");
writer.array();
for (String typeID : match.typeIDs) {
for (String typeID : match.types) {
writer.value(typeID);
}
writer.endArray();
writer.endObject();
writer.key("identifierSpace"); writer.value(identifierSpace);
writer.key("schemaSpace"); writer.value(schemaSpace);
writer.endObject();
}
protected String getBriefDescription(Project project) {
return "Match specific topic " +
match.topicName + " (" +
match.topicID + ") to cells in column " + _columnName;
match.name + " (" +
match.id + ") to cells in column " + _columnName;
}
protected String createDescription(Column column,
List<CellChange> cellChanges) {
return "Match specific topic " +
match.topicName + " (" +
match.topicID + ") to " + cellChanges.size() +
match.name + " (" +
match.id + ") to " + cellChanges.size() +
" cells in column " + column.getName();
}
@ -130,7 +142,13 @@ public class ReconMatchSpecificTopicOperation extends EngineDependentMassCellOpe
newRecon = dupReconMap.get(reconID);
newRecon.judgmentBatchSize++;
} else {
newRecon = cell.recon != null ? cell.recon.dup(historyEntryID) : new Recon(historyEntryID);
newRecon = cell.recon != null ?
cell.recon.dup(historyEntryID) :
new Recon(
historyEntryID,
identifierSpace,
schemaSpace);
newRecon.match = match;
newRecon.matchRank = -1;
newRecon.judgment = Judgment.Matched;

View File

@ -24,7 +24,7 @@ import com.metaweb.gridworks.model.Recon;
import com.metaweb.gridworks.model.Row;
import com.metaweb.gridworks.model.changes.CellChange;
import com.metaweb.gridworks.model.changes.ReconChange;
import com.metaweb.gridworks.model.recon.HeuristicReconConfig;
import com.metaweb.gridworks.model.recon.StandardReconConfig;
import com.metaweb.gridworks.model.recon.ReconConfig;
import com.metaweb.gridworks.model.recon.ReconJob;
import com.metaweb.gridworks.operations.EngineDependentOperation;
@ -143,7 +143,7 @@ public class ReconOperation extends EngineDependentOperation {
writer.endObject();
writer.endObject();
if (_reconConfig instanceof HeuristicReconConfig) {
if (_reconConfig instanceof StandardReconConfig) {
writer.object();
writer.key("action"); writer.value("createFacet");
writer.key("facetType"); writer.value("range");
@ -242,7 +242,8 @@ public class ReconOperation extends EngineDependentOperation {
List<Recon> recons = _reconConfig.batchRecon(jobs, _historyEntryID);
for (int j = i; j < to; j++) {
Recon recon = recons.get(j - i);
int index = j - i;
Recon recon = index < recons.size() ? recons.get(j - i) : null;
List<ReconEntry> entries = groups.get(j).entries;
if (recon != null) {

View File

@ -53,22 +53,27 @@ public class PreferenceStore implements Jsonizable {
while (i.hasNext()) {
String key = i.next();
Object o = entries.get(key);
if (o instanceof JSONObject) {
try {
JSONObject obj2 = (JSONObject) o;
String className = obj2.getString("class");
Class klass = Class.forName(className);
Method method = klass.getMethod("load", JSONObject.class);
_prefs.put(key, method.invoke(null, obj2));
} catch (Exception e) {
//
e.printStackTrace();
}
} else {
_prefs.put(key, o);
}
_prefs.put(key, loadObject(o));
}
}
}
@SuppressWarnings("unchecked")
static public Object loadObject(Object o) {
if (o instanceof JSONObject) {
try {
JSONObject obj2 = (JSONObject) o;
String className = obj2.getString("class");
Class klass = Class.forName(className);
Method method = klass.getMethod("load", JSONObject.class);
return method.invoke(null, obj2);
} catch (Exception e) {
e.printStackTrace();
return null;
}
} else {
return o;
}
}
}

View File

@ -123,7 +123,7 @@ public class MqlwriteLikeTransposedNodeFactory implements TransposedNodeFactory
if (cell.recon != null &&
cell.recon.judgment == Recon.Judgment.Matched &&
cell.recon.match != null) {
obj.put(ID, cell.recon.match.topicID);
obj.put(ID, cell.recon.match.id);
} else {
obj.put(ID, (String) null);
obj.put(NAME, cell.value.toString());

View File

@ -201,7 +201,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
cell.recon.match != null) {
objectCell = cell;
id = cell.recon.match.topicID;
id = cell.recon.match.id;
} else if (node.createForNoReconMatch ||
(cell.recon != null && cell.recon.judgment == Judgment.New)) {
if (cell.recon != null && newTopicVars.containsKey(cell.recon.id)) {

View File

@ -57,11 +57,11 @@ public class FreebaseDataExtensionJob {
}
public Map<String, FreebaseDataExtensionJob.DataExtension> extend(
Set<String> guids,
Set<String> ids,
Map<String, ReconCandidate> reconCandidateMap
) throws Exception {
StringWriter writer = new StringWriter();
formulateQuery(guids, extension, writer);
formulateQuery(ids, extension, writer);
String query = writer.toString();
InputStream is = doMqlRead(query);
@ -76,11 +76,11 @@ public class FreebaseDataExtensionJob {
for (int i = 0; i < l; i++) {
JSONObject o2 = a.getJSONObject(i);
String guid = o2.getString("guid");
String id = o2.getString("id");
FreebaseDataExtensionJob.DataExtension ext = collectResult(o2, reconCandidateMap);
if (ext != null) {
map.put(guid, ext);
map.put(id, ext);
}
}
}
@ -125,20 +125,19 @@ public class FreebaseDataExtensionJob {
JSONObject obj,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
String guid = obj.getString("guid");
String id = obj.getString("id");
ReconCandidate rc;
if (reconCandidateMap.containsKey(guid)) {
rc = reconCandidateMap.get(guid);
if (reconCandidateMap.containsKey(id)) {
rc = reconCandidateMap.get(id);
} else {
rc = new ReconCandidate(
obj.getString("id"),
obj.getString("guid"),
obj.getString("name"),
JSONUtilities.getStringArray(obj, "type"),
100
);
reconCandidateMap.put(guid, rc);
reconCandidateMap.put(id, rc);
}
storeCell(rows, row, col, rc, reconCandidateMap);
@ -274,7 +273,7 @@ public class FreebaseDataExtensionJob {
return connection.getInputStream();
}
static protected void formulateQuery(Set<String> guids, JSONObject node, Writer writer) throws JSONException {
static protected void formulateQuery(Set<String> ids, JSONObject node, Writer writer) throws JSONException {
JSONWriter jsonWriter = new JSONWriter(writer);
jsonWriter.object();
@ -282,12 +281,12 @@ public class FreebaseDataExtensionJob {
jsonWriter.array();
jsonWriter.object();
jsonWriter.key("guid"); jsonWriter.value(null);
jsonWriter.key("guid|=");
jsonWriter.key("id"); jsonWriter.value(null);
jsonWriter.key("id|=");
jsonWriter.array();
for (String guid : guids) {
if (guid != null) {
jsonWriter.value(guid);
for (String id : ids) {
if (id != null) {
jsonWriter.value(id);
}
}
jsonWriter.endArray();
@ -349,7 +348,6 @@ public class FreebaseDataExtensionJob {
if (!hasSubProperties || (node.has("included") && node.getBoolean("included"))) {
writer.key("name"); writer.value(null);
writer.key("id"); writer.value(null);
writer.key("guid"); writer.value(null);
writer.key("type"); writer.array(); writer.endArray();
}

View File

@ -25,7 +25,7 @@ public class Pool implements Jsonizable {
final protected Map<String, Recon> recons = new HashMap<String, Recon>();
public void pool(ReconCandidate candidate) {
candidates.put(candidate.topicID, candidate);
candidates.put(candidate.id, candidate);
}
public void pool(Recon recon) {

View File

@ -57,7 +57,11 @@ function init() {
"scripts/views/data-table/data-table-cell-ui.js",
"scripts/views/data-table/data-table-column-header-ui.js",
"scripts/dialogs/recon-dialog.js",
"scripts/reconciliation/recon-manager.js",
"scripts/reconciliation/recon-dialog.js",
"scripts/reconciliation/freebase-query-panel.js",
"scripts/reconciliation/standard-service-panel.js",
"scripts/dialogs/expression-preview-dialog.js",
"scripts/dialogs/freebase-loading-dialog.js",
"scripts/dialogs/clustering-dialog.js",
@ -133,7 +137,8 @@ function process(path, request, response) {
var context = {};
context.scripts = ClientSideResourceManager.getPaths(lastSegment + "/scripts");
context.styles = ClientSideResourceManager.getPaths(lastSegment + "/styles");
context.projectID = request.getParameter("project");
send(request, response, path + ".vt", context);
}
}

View File

@ -5,8 +5,8 @@
<title>Freebase Gridworks</title>
<link rel="icon" type="image/png" href="images/favicon.png">
<link rel="icon" type="image/png" href="images/favicon.png" />
<script type="text/javascript">var theProject = { id : $projectID };</script>
#foreach($path in $styles)
<link type="text/css" rel="stylesheet" href="$path" />

View File

@ -1,71 +0,0 @@
<div class="dialog-frame" style="width: 800px;">
<div class="dialog-header" bind="dialogHeader"></div>
<div class="dialog-body" bind="dialogBody">
<div id="recon-dialog-tabs" class="gridworks-tabs">
<ul>
<li><a href="#recon-dialog-tabs-heuristic">Heuristic</a></li>
<li><a href="#recon-dialog-tabs-strict">Strict</a></li>
</ul>
<div id="recon-dialog-tabs-heuristic">
<div class="grid-layout layout-normal layout-full"><table>
<tr>
<td>Reconcile each cell to a Freebase topic of type:</td>
<td>Also use relevant details from other columns:</td>
</tr>
<tr>
<td>
<div class="recon-dialog-heuristic-types-container" bind="heuristicTypeContainer">
</div>
<table class="recon-dialog-heuristic-other-type-container recon-dialog-inner-layout">
<tr>
<td width="1"><input type="radio" name="recon-dialog-type-choice" value=""></td>
<td>Search for type: <input size="20" bind="heuristicTypeInput" /></td>
<tr>
</table>
</td>
<td width="50%">
<div class="recon-dialog-heuristic-details-container" bind="heuristicDetailContainer"></div>
</td>
</tr>
<tr>
<td>
<input type="checkbox" checked bind="heuristicAutomatchCheck" /> Auto-match candidates with high confidence
</td>
<td>
Use
<input type="radio" name="recon-dialog-heuristic-service" value="relevance" checked="" /> relevance service
<input type="radio" name="recon-dialog-heuristic-service" value="recon" /> recon service
</td>
</tr>
</table></div>
</div>
<div id="recon-dialog-tabs-strict" style="display: none;">
<p>Each cell contains:</p>
<div class="grid-layout layout-normal layout-full"><table>
<tr><td width="1%"><input type="radio" name="recon-dialog-strict-choice" value="id" checked /></td><td>a Freebase ID, e.g., /en/solar_system</td></tr>
<tr><td><input type="radio" name="recon-dialog-strict-choice" value="guid" /></td><td>a Freebase GUID, e.g., #9202a8c04000641f80000000000354ae</td></tr>
<tr>
<td width="1%"><input type="radio" name="recon-dialog-strict-choice" value="key" /></td>
<td>
<div class="grid-layout layout-tighter layout-full"><table>
<tr><td colspan="2">a Freebase key in</td></tr>
<tr>
<td width="1%"><input type="radio" name="recon-dialog-strict-namespace-choice" value="/wikipedia/en" nsName="Wikipedia EN" checked /></td>
<td>the Wikipedia English namespace</td>
</tr>
<tr>
<td width="1%"><input type="radio" name="recon-dialog-strict-namespace-choice" value="other" /></td>
<td>this namespace: <input bind="strictNamespaceInput" /></td>
</tr>
</table></div>
</td>
</tr>
</table></div>
</div>
</div>
</div>
<div class="dialog-footer" bind="dialogFooter">
<button bind="reconcileButton">Start Reconciling</button>
<button bind="cancelButton">Cancel</button>
</div>
</div>

View File

@ -1,267 +0,0 @@
function ReconDialog(column, types) {
this._column = column;
this._types = types.slice(0, 10);
var defaultTypes = {
"/people/person" : {
name: "Person"
},
"/location/location" : {
name: "Location"
}
};
$.each(this._types, function() {
delete defaultTypes[this.id];
});
for (var id in defaultTypes) {
if (defaultTypes.hasOwnProperty(id)) {
this._types.push({
id: id,
name: defaultTypes[id].name
});
}
}
this._createDialog();
}
ReconDialog.prototype._createDialog = function() {
var self = this;
var dialog = $(DOM.loadHTML("core", "scripts/dialogs/recon-dialog.html"));
this._elmts = DOM.bind(dialog);
this._elmts.dialogHeader.text("Reconcile column " + this._column.name);
this._elmts.reconcileButton.click(function() { self._onOK(); });
this._elmts.cancelButton.click(function() { self._dismiss(); });
this._populateDialog();
this._level = DialogSystem.showDialog(dialog);
$("#recon-dialog-tabs").tabs();
$("#recon-dialog-tabs-strict").css("display", "");
this._wireEvents();
};
ReconDialog.prototype._populateDialog = function() {
var self = this;
/*
* Populate types in heuristic tab
*/
var typeTableContainer = $('<div>').addClass("grid-layout layout-tighter").appendTo(this._elmts.heuristicTypeContainer);
var typeTable = $('<table></table>').appendTo(typeTableContainer)[0];
var createTypeChoice = function(type, check) {
var tr = typeTable.insertRow(typeTable.rows.length);
var td0 = tr.insertCell(0);
var td1 = tr.insertCell(1);
td0.width = "1%";
var radio = $('<input type="radio" name="recon-dialog-type-choice">')
.attr("value", type.id)
.attr("typeName", type.name)
.appendTo(td0)
.click(function() {
self._rewirePropertySuggests(this.value);
});
if (check) {
radio.attr("checked", "true");
}
$(td1).html(type.name + '<br/><span class="recon-dialog-type-id">' + type.id + '</span>');
};
for (var i = 0; i < this._types.length; i++) {
createTypeChoice(this._types[i], i === 0);
}
/*
* Populate properties in heuristic tab
*/
var heuristicDetailTableContainer = $('<div>')
.addClass("grid-layout layout-tighter")
.appendTo(this._elmts.heuristicDetailContainer);
var heuristicDetailTable = $(
'<table>' +
'<tr><th>Column</th><th>Freebase property</th></tr>' +
'</table>'
).appendTo(heuristicDetailTableContainer)[0];
function renderDetailColumn(column) {
var tr = heuristicDetailTable.insertRow(heuristicDetailTable.rows.length);
var td0 = tr.insertCell(0);
var td1 = tr.insertCell(1);
$(td0).html(column.name);
$('<input size="15" name="recon-dialog-heuristic-property" />')
.attr("columnName", column.name)
.appendTo(td1);
}
var columns = theProject.columnModel.columns;
for (var i = 0; i < columns.length; i++) {
var column = columns[i];
if (column !== this._column) {
renderDetailColumn(column);
}
}
};
ReconDialog.prototype._wireEvents = function() {
var self = this;
this._elmts.heuristicTypeInput
.suggestT({ type : '/type/type' })
.bind("fb-select", function(e, data) {
$('input[name="recon-dialog-type-choice"][value=""]').attr("checked", "true");
self._rewirePropertySuggests(data.id);
});
this._rewirePropertySuggests(this._types[0].id);
this._elmts.strictNamespaceInput
.suggest({ type: '/type/namespace' })
.bind("fb-select", function(e, data) {
$('input[name="recon-dialog-strict-choice"][value="key"]').attr("checked", "true");
$('input[name="recon-dialog-strict-namespace-choice"][value="other"]').attr("checked", "true");
});
};
ReconDialog.prototype._rewirePropertySuggests = function(schema) {
var inputs = $('input[name="recon-dialog-heuristic-property"]');
inputs.unbind().suggestP({
type: '/type/property',
schema: schema || "/common/topic"
}).bind("fb-select", function(e, data) {
$('input[name="recon-dialog-heuristic-service"][value="recon"]').attr("checked", "true");
});
};
ReconDialog.prototype._onOK = function() {
var tab = $("#recon-dialog-tabs").tabs('option', 'selected');
if (tab === 0) {
this._onDoHeuristic();
} else {
this._onDoStrict();
}
};
ReconDialog.prototype._dismiss = function() {
DialogSystem.dismissUntil(this._level - 1);
};
ReconDialog.prototype._onDoHeuristic = function() {
var type = this._elmts.heuristicTypeInput.data("data.suggest");
var choices = $('input[name="recon-dialog-type-choice"]:checked');
if (choices !== null && choices.length > 0 && choices[0].value != "") {
type = {
id: choices[0].value,
name: choices.attr("typeName")
};
}
if (!type) {
alert("Please specify a type.");
} else {
var columnDetails = [];
var propertyInputs = $('input[name="recon-dialog-heuristic-property"]');
$.each(propertyInputs, function() {
var property = $(this).data("data.suggest");
if (property && property.id) {
columnDetails.push({
column: this.getAttribute("columnName"),
property: {
id: property.id,
name: property.name
}
});
}
});
Gridworks.postProcess(
"reconcile",
{},
{
columnName: this._column.name,
config: JSON.stringify({
mode: "heuristic",
service: $('input[name="recon-dialog-heuristic-service"]:checked')[0].value,
type: {
id: type.id,
name: type.name
},
autoMatch: this._elmts.heuristicAutomatchCheck[0].checked,
columnDetails: columnDetails
})
},
{ cellsChanged: true, columnStatsChanged: true }
);
this._dismiss();
}
};
ReconDialog.prototype._onDoStrict = function() {
var bodyParams;
var match = $('input[name="recon-dialog-strict-choice"]:checked')[0].value;
if (match == "key") {
var namespaceChoice = $('input[name="recon-dialog-strict-namespace-choice"]:checked')[0];
var namespace;
if (namespaceChoice.value == "other") {
var suggest = this._elmts.strictNamespaceInput.data("data.suggest");
if (!suggest) {
alert("Please specify a namespace.");
return;
}
namespace = {
id: suggest.id,
name: suggest.name
};
} else {
namespace = {
id: namespaceChoice.value,
name: namespaceChoice.getAttribute("nsName")
};
}
bodyParams = {
columnName: this._column.name,
config: JSON.stringify({
mode: "strict",
match: "key",
namespace: namespace
})
};
} else if (match == "id") {
bodyParams = {
columnName: this._column.name,
config: JSON.stringify({
mode: "strict",
match: "id"
})
};
} else if (match == "guid") {
bodyParams = {
columnName: this._column.name,
config: JSON.stringify({
mode: "strict",
match: "guid"
})
};
}
Gridworks.postProcess(
"reconcile",
{},
bodyParams,
{ cellsChanged: true, columnStatsChanged: true }
);
this._dismiss();
};

View File

@ -388,10 +388,6 @@ Gridworks.getPermanentLink = function() {
function onLoad() {
var params = URL.getParameters();
if ("project" in params) {
theProject = {
id: parseInt(params.project,10)
};
var uiState = {};
if ("ui" in params) {
try {

View File

@ -0,0 +1,23 @@
<div class="recon-dialog-service-panel">
<p>Each cell contains:</p>
<div class="grid-layout layout-normal layout-full"><table>
<tr><td width="1%"><input type="radio" name="recon-dialog-strict-choice" value="id" checked /></td><td>a Freebase ID, e.g., /en/solar_system</td></tr>
<tr><td><input type="radio" name="recon-dialog-strict-choice" value="guid" /></td><td>a Freebase GUID, e.g., #9202a8c04000641f80000000000354ae</td></tr>
<tr>
<td width="1%"><input type="radio" name="recon-dialog-strict-choice" value="key" /></td>
<td>
<div class="grid-layout layout-tighter layout-full"><table>
<tr><td colspan="2">a Freebase key in</td></tr>
<tr>
<td width="1%"><input type="radio" name="recon-dialog-strict-namespace-choice" value="/wikipedia/en" nsName="Wikipedia EN" checked /></td>
<td>the Wikipedia English namespace</td>
</tr>
<tr>
<td width="1%"><input type="radio" name="recon-dialog-strict-namespace-choice" value="other" /></td>
<td>this namespace: <input bind="strictNamespaceInput" /></td>
</tr>
</table></div>
</td>
</tr>
</table></div>
</div>

View File

@ -0,0 +1,100 @@
function ReconFreebaseQueryPanel(column, service, container) {
this._column = column;
this._service = service;
this._container = container;
this._constructUI();
}
ReconFreebaseQueryPanel.prototype.activate = function() {
this._panel.show();
};
ReconFreebaseQueryPanel.prototype.deactivate = function() {
this._panel.hide();
};
ReconFreebaseQueryPanel.prototype.dispose = function() {
this._panel.remove();
this._panel = null;
this._column = null;
this._service = null;
this._container = null;
};
ReconFreebaseQueryPanel.prototype._constructUI = function() {
var self = this;
this._panel = $(DOM.loadHTML("core", "scripts/reconciliation/freebase-query-panel.html")).appendTo(this._container);
this._elmts = DOM.bind(this._panel);
this._wireEvents();
};
ReconFreebaseQueryPanel.prototype._wireEvents = function() {
var self = this;
this._elmts.strictNamespaceInput
.suggest({ type: '/type/namespace' })
.bind("fb-select", function(e, data) {
self._panel.find('input[name="recon-dialog-strict-choice"][value="key"]').attr("checked", "true");
self._panel.find('input[name="recon-dialog-strict-namespace-choice"][value="other"]').attr("checked", "true");
});
};
ReconFreebaseQueryPanel.prototype.start = function() {
var bodyParams;
var match = $('input[name="recon-dialog-strict-choice"]:checked')[0].value;
if (match == "key") {
var namespaceChoice = $('input[name="recon-dialog-strict-namespace-choice"]:checked')[0];
var namespace;
if (namespaceChoice.value == "other") {
var suggest = this._elmts.strictNamespaceInput.data("data.suggest");
if (!suggest) {
alert("Please specify a namespace.");
return;
}
namespace = {
id: suggest.id,
name: suggest.name
};
} else {
namespace = {
id: namespaceChoice.value,
name: namespaceChoice.getAttribute("nsName")
};
}
bodyParams = {
columnName: this._column.name,
config: JSON.stringify({
mode: "strict",
match: "key",
namespace: namespace
})
};
} else if (match == "id") {
bodyParams = {
columnName: this._column.name,
config: JSON.stringify({
mode: "strict",
match: "id"
})
};
} else if (match == "guid") {
bodyParams = {
columnName: this._column.name,
config: JSON.stringify({
mode: "strict",
match: "guid"
})
};
}
Gridworks.postProcess(
"reconcile",
{},
bodyParams,
{ cellsChanged: true, columnStatsChanged: true }
);
};

View File

@ -0,0 +1,19 @@
<div class="dialog-frame" style="width: 900px;">
<div class="dialog-header" bind="dialogHeader"></div>
<div class="dialog-body" bind="dialogBody">
<div class="grid-layout layout-normal layout-full"><table><tr>
<td width="1%">
<div class="recon-dialog-service-header">Services and Extensions</div>
<div class="recon-dialog-service-list" bind="serviceList"></div>
<div class="recon-dialog-service-controls">
<button bind="addStandardServiceButton">Add Standard Service...</button>
</div>
</td>
<td><div class="recon-dialog-service-panel-container" bind="servicePanelContainer"></div></td>
</tr></table></div>
</div>
<div class="dialog-footer" bind="dialogFooter">
<button bind="reconcileButton">Start Reconciling</button>
<button bind="cancelButton">Cancel</button>
</div>
</div>

View File

@ -0,0 +1,101 @@
function ReconDialog(column, types) {
this._column = column;
this._serviceRecords = [];
this._selectedServiceRecordIndex = -1;
this._createDialog();
}
ReconDialog.prototype._createDialog = function() {
var self = this;
var dialog = $(DOM.loadHTML("core", "scripts/reconciliation/recon-dialog.html"));
this._elmts = DOM.bind(dialog);
this._elmts.dialogHeader.text("Reconcile column " + this._column.name);
this._elmts.reconcileButton.click(function() { self._onOK(); });
this._elmts.cancelButton.click(function() { self._dismiss(); });
this._level = DialogSystem.showDialog(dialog);
this._populateDialog();
};
ReconDialog.prototype._onOK = function() {
if (this._selectedServiceRecordIndex >= 0) {
var record = this._serviceRecords[this._selectedServiceRecordIndex];
if (record.handler) {
record.handler.start();
}
}
this._dismiss();
};
ReconDialog.prototype._dismiss = function() {
for (var i = 0; i < this._serviceRecords.length; i++) {
var record = this._serviceRecords[i];
if (record.handler) {
record.handler.dispose();
}
}
this._serviceRecords = null;
DialogSystem.dismissUntil(this._level - 1);
};
ReconDialog.prototype._populateDialog = function() {
var self = this;
var services = ReconciliationManager.getAllServices();
if (services.length > 0) {
var renderService = function(service) {
var record = {
service: service,
handler: null
};
record.selector = $('<a>')
.attr("href", "javascript:{}")
.addClass("recon-dialog-service-selector")
.text(service.name)
.appendTo(self._elmts.serviceList)
.click(function() {
self._selectService(record);
});
self._serviceRecords.push(record);
};
for (var i = 0; i < services.length; i++) {
renderService(services[i]);
}
this._selectService(this._serviceRecords[0]);
}
};
ReconDialog.prototype._selectService = function(record) {
for (var i = 0; i < this._serviceRecords.length; i++) {
if (record === this._serviceRecords[i]) {
if (i !== this._selectedServiceRecordIndex) {
if (this._selectedServiceRecordIndex >= 0) {
var oldRecord = this._serviceRecords[this._selectedServiceRecordIndex];
if (oldRecord.handler) {
oldRecord.selector.removeClass("selected");
oldRecord.handler.deactivate();
}
}
record.selector.addClass("selected");
if (record.handler) {
record.handler.activate();
} else {
var handlerConstructor = eval(record.service.ui.handler);
record.handler = new handlerConstructor(
this._column, record.service, this._elmts.servicePanelContainer);
}
this._selectedServiceRecordIndex = i;
return;
}
}
}
};

View File

@ -0,0 +1,65 @@
var ReconciliationManager = {
"customServices" : [], // services registered by core and extensions
"standardServices" : [] // services registered by user
};
ReconciliationManager.getAllServices = function() {
return ReconciliationManager.customServices.concat(ReconciliationManager.standardServices);
};
ReconciliationManager.registerService = function(service) {
ReconciliationManager.customServices.push(service);
};
ReconciliationManager.registerStandardService = function(url) {
$.ajax({
async: false,
url: url + (url.contains("?") ? "&" : "?") + "callback=?",
success: function(data) {
data.url = url;
data.ui = { "handler" : "ReconStandardServicePanel" };
ReconciliationManager.standardServices.push(data);
ReconciliationManager.save();
},
dataType: "jsonp"
});
};
ReconciliationManager.save = function(f) {
$.ajax({
async: false,
type: "POST",
url: "/command/set-preference?" + $.param({
name: "standard-reconciliation-services"
}),
data: { "value" : JSON.stringify(ReconciliationManager.standardServices) },
success: function(data) {
if (f) { f(); }
},
dataType: "json"
});
};
(function() {
ReconciliationManager.customServices.push({
"name" : "Freebase Query-based Reconciliation",
"ui" : { "handler" : "ReconFreebaseQueryPanel" }
});
$.ajax({
async: false,
url: "/command/get-preference?" + $.param({
name: "standard-reconciliation-services"
}),
success: function(data) {
if (data.value && data.value != "null") {
ReconciliationManager.standardServices = JSON.parse(data.value);
} else {
ReconciliationManager.registerStandardService(
"http://gridworks-helper.dfhuynh.user.dev.freebaseapps.com/reconcile");
}
},
dataType: "json"
});
})();

View File

@ -0,0 +1,22 @@
<div class="recon-dialog-service-panel recon-dialog-standard-service-panel">
<div class="grid-layout layout-normal layout-full"><table>
<tr>
<td>Reconcile each cell to an entity of one of these types:</td>
<td>Also use relevant details from other columns:</td>
</tr>
<tr>
<td><div class="type-container" bind="typeContainer"></div></td>
<td width="50%"><div class="detail-container" bind="detailContainer"></div></td>
</tr>
<tr>
<td><input type="radio" name="type-choice" value="">
Or enter a specific type: <input size="20" bind="typeInput" /></td>
<td>
</td>
</tr>
<tr>
<td><input type="checkbox" checked bind="automatchCheck" /> Auto-match candidates with high confidence</td>
<td></td>
</tr>
</table></div>
</div>

View File

@ -0,0 +1,257 @@
function ReconStandardServicePanel(column, service, container) {
this._column = column;
this._service = service;
this._container = container;
this._types = [];
this._constructUI();
}
ReconStandardServicePanel.prototype._guessTypes = function(f) {
var self = this;
var dismissBusy = DialogSystem.showBusy();
$.post(
"/command/guess-types-of-column?" + $.param({
project: theProject.id,
columnName: this._column.name,
service: this._service.url
}),
null,
function(data) {
self._types = data.types;
if (self._types.length === 0 && "defaultTypes" in self._service) {
var defaultTypes = {};
$.each(self._service["defaultTypes"], function() {
defaultTypes[this.id] = this.name;
});
$.each(self._types, function() {
delete defaultTypes[typeof this == "string" ? this : this.id];
});
for (var id in defaultTypes) {
if (defaultTypes.hasOwnProperty(id)) {
self._types.push({
id: id,
name: defaultTypes[id].name
});
}
}
}
dismissBusy();
f();
}
);
};
ReconStandardServicePanel.prototype._constructUI = function() {
var self = this;
this._panel = $(DOM.loadHTML("core", "scripts/reconciliation/standard-service-panel.html")).appendTo(this._container);
this._elmts = DOM.bind(this._panel);
this._guessTypes(function() {
self._populatePanel();
self._wireEvents();
});
};
ReconStandardServicePanel.prototype.activate = function() {
this._panel.show();
};
ReconStandardServicePanel.prototype.deactivate = function() {
this._panel.hide();
};
ReconStandardServicePanel.prototype.dispose = function() {
this._panel.remove();
this._panel = null;
this._column = null;
this._service = null;
this._container = null;
};
ReconStandardServicePanel.prototype._populatePanel = function() {
var self = this;
/*
* Populate types
*/
var typeTableContainer = $('<div>')
.addClass("grid-layout layout-tightest")
.appendTo(this._elmts.typeContainer);
var typeTable = $('<table></table>').appendTo(typeTableContainer)[0];
var createTypeChoice = function(type, check) {
var typeID = typeof type == "string" ? type : type.id;
var typeName = typeof type == "string" ? type : (type.name || type.id);
var tr = typeTable.insertRow(typeTable.rows.length);
var td0 = tr.insertCell(0);
var td1 = tr.insertCell(1);
td0.width = "1%";
var radio = $('<input type="radio" name="type-choice">')
.attr("value", typeID)
.attr("typeName", typeName)
.appendTo(td0)
.click(function() {
self._rewirePropertySuggests(this.value);
});
if (check) {
radio.attr("checked", "true");
}
if (typeName == typeID) {
$(td1).html(typeName);
} else {
$(td1).html(
typeName +
'<br/>' +
'<span class="type-id">' + typeID + '</span>');
}
};
for (var i = 0; i < this._types.length; i++) {
createTypeChoice(this._types[i], i === 0);
}
/*
* Populate properties
*/
var detailTableContainer = $('<div>')
.addClass("grid-layout layout-tightest")
.appendTo(this._elmts.detailContainer);
var detailTable = $(
'<table>' +
'<tr><th>Column</th><th>Include?</th><th>As Property</th></tr>' +
'</table>'
).appendTo(detailTableContainer)[0];
function renderDetailColumn(column) {
var tr = detailTable.insertRow(detailTable.rows.length);
var td0 = tr.insertCell(0);
var td1 = tr.insertCell(1);
var td2 = tr.insertCell(2);
$(td0).html(column.name);
$('<input type="checkbox" />')
.attr("columnName", column.name)
.appendTo(td1);
$('<input size="25" name="property" />')
.attr("columnName", column.name)
.appendTo(td2);
}
var columns = theProject.columnModel.columns;
for (var i = 0; i < columns.length; i++) {
var column = columns[i];
if (column !== this._column) {
renderDetailColumn(column);
}
}
};
ReconStandardServicePanel.prototype._wireEvents = function() {
if (this._isInFreebaseIdentifierSpace()) {
var self = this;
this._elmts.typeInput
.suggestT({ type : '/type/type' })
.bind("fb-select", function(e, data) {
self._panel
.find('input[name="type-choice"][value=""]')
.attr("checked", "true");
self._rewirePropertySuggests(data.id);
});
this._rewirePropertySuggests(this._types[0].id);
}
};
ReconStandardServicePanel.prototype._rewirePropertySuggests = function(type) {
if (this._isInFreebaseIdentifierSpace()) {
this._panel
.find('input[name="property"]')
.unbind().suggestP({
type: '/type/property',
schema: (type) ? (typeof type == "string" ? type : type.id) : "/common/topic"
});
}
};
ReconStandardServicePanel.prototype._isInFreebaseIdentifierSpace = function() {
return "identifier-space" in this._service &&
this._service["identifier-space"].startsWith("http://rdf.freebase.com/");
};
ReconStandardServicePanel.prototype.start = function() {
var self = this;
var type = this._isInFreebaseIdentifierSpace() ?
this._elmts.typeInput.data("data.suggest") :
{
id: this._elmts.typeInput[0].value,
name: this._elmts.typeInput[0].value
};
var choices = this._panel.find('input[name="type-choice"]:checked');
if (choices !== null && choices.length > 0 && choices[0].value != "") {
type = {
id: choices[0].value,
name: choices.attr("typeName")
};
}
var columnDetails = [];
$.each(
this._panel.find('input[name="property"]'),
function() {
var property = $(this).data("data.suggest");
if (property && property.id) {
columnDetails.push({
column: this.getAttribute("columnName"),
property: {
id: property.id,
name: property.name
}
});
} else {
var property = $.trim(this.value);
if (property) {
columnDetails.push({
column: this.getAttribute("columnName"),
property: {
id: property,
name: property
}
});
}
}
}
);
Gridworks.postProcess(
"reconcile",
{},
{
columnName: this._column.name,
config: JSON.stringify({
mode: "standard-service",
service: this._service.url,
identifierSpace: this._service.identifierSpace,
schemaSpace: this._service.schemaSpace,
type: {
id: type.id,
name: type.name
},
autoMatch: this._elmts.automatchCheck[0].checked,
columnDetails: columnDetails
})
},
{ cellsChanged: true, columnStatsChanged: true }
);
};

View File

@ -1,3 +1,15 @@
String.prototype.trim = function() {
return this.replace(/^\s+/, '').replace(/\s+$/, '');
};
String.prototype.startsWith = function(s) {
return this.length >= s.length && this.substring(0, s.length) == s;
};
String.prototype.endsWith = function(s) {
return this.length >= s.length && this.substring(this.length - s.length) == s;
};
String.prototype.contains = function(s) {
return this.indexOf(s) >= 0;
};

View File

@ -195,6 +195,8 @@ DataTableCellUI.prototype._doJudgment = function(judgment, params) {
params.row = this._rowIndex;
params.cell = this._cellIndex;
params.judgment = judgment;
params.identifierSpace = (this._cell.r) ? this._cell.r.identifierSpace : null;
params.schemaSpace = (this._cell.r) ? this._cell.r.schemaSpace : null;
this._postProcessOneCell("recon-judge-one-cell", params, true);
};
@ -203,7 +205,8 @@ DataTableCellUI.prototype._doJudgmentForSimilarCells = function(judgment, params
params.columnName = Gridworks.cellIndexToColumn(this._cellIndex).name;
params.similarValue = this._cell.v;
params.judgment = judgment;
params.identifierSpace = (this._cell.r) ? this._cell.r.identifierSpace : null;
params.schemaSpace = (this._cell.r) ? this._cell.r.schemaSpace : null;
this._postProcessSeveralCells("recon-judge-similar-cells", params, true);
};

View File

@ -704,60 +704,7 @@ DataTableColumnHeaderUI.prototype._doTextTransformPrompt = function() {
};
DataTableColumnHeaderUI.prototype._doReconcile = function() {
var self = this;
var dismissBusy = DialogSystem.showBusy();
$.post(
"/command/guess-types-of-column?" + $.param({ project: theProject.id, columnName: this._column.name }),
null,
function(data) {
if (data.code != "ok") {
dismissBusy();
new ReconDialog(self._column, []);
} else {
data.types = data.types.slice(0, 20);
var ids = $.map(data.types, function(elmt) { return elmt.id; });
if (!ids.length) {
dismissBusy();
new ReconDialog(self._column, []);
} else {
var query = [{
"id|=" : ids,
"id" : null,
"/freebase/type_profile/kind" : []
}];
$.getJSON(
"http://api.freebase.com/api/service/mqlread?" + $.param({ "query" : JSON.stringify({ "query" : query }) }) + "&callback=?",
null,
function(o) {
dismissBusy();
var kindMap = {};
$.each(o.result, function() {
var m = kindMap[this.id] = {};
$.each(this["/freebase/type_profile/kind"], function() {
m[this] = true;
});
});
new ReconDialog(self._column, $.map(data.types, function(type) {
if (type.id in kindMap) {
var m = kindMap[type.id];
if (!("Role" in m) && !("Annotation" in m)) {
return type;
}
}
return null;
}));
},
"jsonp"
);
}
}
},
"json"
);
new ReconDialog(this._column);
};
DataTableColumnHeaderUI.prototype._doReconDiscardJudgments = function() {

View File

@ -1,21 +1,47 @@
.recon-dialog-type-id {
.recon-dialog-service-header {
padding: 5px 10px;
font-weight: bold;
font-size: 120%;
}
.recon-dialog-service-list {
border: 1px solid #aaa;
padding: 1px;
overflow: auto;
width: 200px;
height: 400px;
}
.recon-dialog-service-controls {
padding: 5px 0px;
}
.recon-dialog-service-controls > button {
display: block;
width: 100%;
}
a.recon-dialog-service-selector {
display: block;
padding: 10px;
text-decoration: none;
color: black;
}
a.recon-dialog-service-selector:hover {
background: #eee;
}
a.recon-dialog-service-selector.selected {
background: #eee;
font-weight: bold;
}
.recon-dialog-standard-service-panel .type-id {
color: #888;
}
.recon-dialog-heuristic-types-container {
border: 1px solid #ccc;
padding: 10px;
max-height: 200px;
overflow: auto;
}
.recon-dialog-heuristic-other-type-container {
margin: 10px;
}
.recon-dialog-heuristic-details-container {
.recon-dialog-standard-service-panel .type-container,
.recon-dialog-standard-service-panel .detail-container {
border: 1px solid #ccc;
padding: 10px;
max-height: 300px;
height: 300px !important;
overflow: auto;
}