First pass in trying to generalize standard reconciliation service UI. A lot of pieces are still Freebase-centric.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@1032 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
f0ed50e468
commit
058e86b4c8
@ -110,6 +110,9 @@ public class GridworksServlet extends Butterfly {
|
||||
{"upload-data", "com.metaweb.gridworks.commands.freebase.UploadDataCommand"},
|
||||
{"mqlread", "com.metaweb.gridworks.commands.freebase.MQLReadCommand"},
|
||||
{"mqlwrite", "com.metaweb.gridworks.commands.freebase.MQLWriteCommand"},
|
||||
|
||||
{"get-preference", "com.metaweb.gridworks.commands.GetPreferenceCommand"},
|
||||
{"set-preference", "com.metaweb.gridworks.commands.SetPreferenceCommand"}
|
||||
};
|
||||
|
||||
public static String getVersion() {
|
||||
|
@ -0,0 +1,54 @@
|
||||
package com.metaweb.gridworks.commands;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.metaweb.gridworks.ProjectManager;
|
||||
import com.metaweb.gridworks.model.Project;
|
||||
import com.metaweb.gridworks.preference.PreferenceStore;
|
||||
import com.metaweb.gridworks.preference.TopList;
|
||||
|
||||
public class GetPreferenceCommand extends Command {
|
||||
@Override
|
||||
public void doGet(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
|
||||
Project project = request.getParameter("project") != null ? getProject(request) : null;
|
||||
PreferenceStore ps = project != null ?
|
||||
project.getMetadata().getPreferenceStore() :
|
||||
ProjectManager.singleton.getPreferenceStore();
|
||||
|
||||
String prefName = request.getParameter("name");
|
||||
Object pref = ps.get(prefName);
|
||||
|
||||
try {
|
||||
response.setCharacterEncoding("UTF-8");
|
||||
response.setHeader("Content-Type", "application/json");
|
||||
|
||||
JSONWriter writer = new JSONWriter(response.getWriter());
|
||||
|
||||
writer.object();
|
||||
writer.key("value");
|
||||
if (pref == null || pref instanceof String || pref instanceof Number || pref instanceof Boolean) {
|
||||
writer.value(pref);
|
||||
} else if (pref instanceof TopList) {
|
||||
TopList tl = (TopList) pref;
|
||||
tl.write(writer, new Properties());
|
||||
} else {
|
||||
writer.value(pref.toString());
|
||||
}
|
||||
|
||||
writer.endObject();
|
||||
} catch (JSONException e) {
|
||||
respondException(response, e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
package com.metaweb.gridworks.commands;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONTokener;
|
||||
|
||||
import com.metaweb.gridworks.ProjectManager;
|
||||
import com.metaweb.gridworks.model.Project;
|
||||
import com.metaweb.gridworks.preference.PreferenceStore;
|
||||
|
||||
public class SetPreferenceCommand extends Command {
|
||||
@Override
|
||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
|
||||
Project project = request.getParameter("project") != null ? getProject(request) : null;
|
||||
PreferenceStore ps = project != null ?
|
||||
project.getMetadata().getPreferenceStore() :
|
||||
ProjectManager.singleton.getPreferenceStore();
|
||||
|
||||
String prefName = request.getParameter("name");
|
||||
String valueString = request.getParameter("value");
|
||||
|
||||
try {
|
||||
JSONTokener t = new JSONTokener(valueString);
|
||||
Object o = t.nextValue();
|
||||
|
||||
ps.put(prefName, PreferenceStore.loadObject(o));
|
||||
|
||||
respond(response, "{ \"code\" : \"ok\" }");
|
||||
} catch (JSONException e) {
|
||||
respondException(response, e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -50,28 +50,28 @@ public class PreviewExtendDataCommand extends Command {
|
||||
int cellIndex = project.columnModel.getColumnByName(columnName).getCellIndex();
|
||||
|
||||
List<String> topicNames = new ArrayList<String>();
|
||||
List<String> topicGuids = new ArrayList<String>();
|
||||
Set<String> guids = new HashSet<String>();
|
||||
List<String> topicIds = new ArrayList<String>();
|
||||
Set<String> ids = new HashSet<String>();
|
||||
for (int i = 0; i < length; i++) {
|
||||
int rowIndex = rowIndices.getInt(i);
|
||||
if (rowIndex >= 0 && rowIndex < project.rows.size()) {
|
||||
Row row = project.rows.get(rowIndex);
|
||||
Cell cell = row.getCell(cellIndex);
|
||||
if (cell != null && cell.recon != null && cell.recon.match != null) {
|
||||
topicNames.add(cell.recon.match.topicName);
|
||||
topicGuids.add(cell.recon.match.topicGUID);
|
||||
guids.add(cell.recon.match.topicGUID);
|
||||
topicNames.add(cell.recon.match.name);
|
||||
topicIds.add(cell.recon.match.id);
|
||||
ids.add(cell.recon.match.id);
|
||||
} else {
|
||||
topicNames.add(null);
|
||||
topicGuids.add(null);
|
||||
guids.add(null);
|
||||
topicIds.add(null);
|
||||
ids.add(null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
|
||||
FreebaseDataExtensionJob job = new FreebaseDataExtensionJob(json);
|
||||
Map<String, DataExtension> map = job.extend(guids, reconCandidateMap);
|
||||
Map<String, DataExtension> map = job.extend(ids, reconCandidateMap);
|
||||
|
||||
response.setCharacterEncoding("UTF-8");
|
||||
response.setHeader("Content-Type", "application/json");
|
||||
@ -102,11 +102,11 @@ public class PreviewExtendDataCommand extends Command {
|
||||
writer.key("rows");
|
||||
writer.array();
|
||||
for (int r = 0; r < topicNames.size(); r++) {
|
||||
String guid = topicGuids.get(r);
|
||||
String id = topicIds.get(r);
|
||||
String topicName = topicNames.get(r);
|
||||
|
||||
if (guid != null && map.containsKey(guid)) {
|
||||
DataExtension ext = map.get(guid);
|
||||
if (id != null && map.containsKey(id)) {
|
||||
DataExtension ext = map.get(id);
|
||||
boolean first = true;
|
||||
|
||||
if (ext.data.length > 0) {
|
||||
@ -123,8 +123,8 @@ public class PreviewExtendDataCommand extends Command {
|
||||
if (cell != null && cell instanceof ReconCandidate) {
|
||||
ReconCandidate rc = (ReconCandidate) cell;
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(rc.topicID);
|
||||
writer.key("name"); writer.value(rc.topicName);
|
||||
writer.key("id"); writer.value(rc.id);
|
||||
writer.key("name"); writer.value(rc.name);
|
||||
writer.endObject();
|
||||
} else {
|
||||
writer.value(cell);
|
||||
@ -138,9 +138,9 @@ public class PreviewExtendDataCommand extends Command {
|
||||
}
|
||||
|
||||
writer.array();
|
||||
if (guid != null) {
|
||||
if (id != null) {
|
||||
writer.object();
|
||||
writer.key("id"); writer.value("/guid/" + guid.substring(1));
|
||||
writer.key("id"); writer.value(id);
|
||||
writer.key("name"); writer.value(topicName);
|
||||
writer.endObject();
|
||||
} else {
|
||||
|
@ -1,5 +1,6 @@
|
||||
package com.metaweb.gridworks.commands.freebase;
|
||||
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.StringWriter;
|
||||
@ -38,6 +39,7 @@ public class GuessTypesOfColumnCommand extends Command {
|
||||
try {
|
||||
Project project = getProject(request);
|
||||
String columnName = request.getParameter("columnName");
|
||||
String serviceUrl = request.getParameter("service");
|
||||
|
||||
response.setCharacterEncoding("UTF-8");
|
||||
response.setHeader("Content-Type", "application/json");
|
||||
@ -54,7 +56,7 @@ public class GuessTypesOfColumnCommand extends Command {
|
||||
writer.key("code"); writer.value("ok");
|
||||
writer.key("types"); writer.array();
|
||||
|
||||
List<TypeGroup> typeGroups = guessTypes(project, column);
|
||||
List<TypeGroup> typeGroups = guessTypes(project, column, serviceUrl);
|
||||
for (TypeGroup tg : typeGroups) {
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(tg.id);
|
||||
@ -87,7 +89,7 @@ public class GuessTypesOfColumnCommand extends Command {
|
||||
* @param column
|
||||
* @return
|
||||
*/
|
||||
protected List<TypeGroup> guessTypes(Project project, Column column) {
|
||||
protected List<TypeGroup> guessTypes(Project project, Column column, String serviceUrl) {
|
||||
Map<String, TypeGroup> map = new HashMap<String, TypeGroup>();
|
||||
|
||||
int cellIndex = column.getCellIndex();
|
||||
@ -115,7 +117,7 @@ public class GuessTypesOfColumnCommand extends Command {
|
||||
|
||||
jsonWriter.object();
|
||||
for (int i = 0; i < samples.size(); i++) {
|
||||
jsonWriter.key("q" + i + ":search");
|
||||
jsonWriter.key("q" + i);
|
||||
jsonWriter.object();
|
||||
|
||||
jsonWriter.key("query"); jsonWriter.value(samples.get(i));
|
||||
@ -125,14 +127,26 @@ public class GuessTypesOfColumnCommand extends Command {
|
||||
}
|
||||
jsonWriter.endObject();
|
||||
|
||||
StringBuffer sb = new StringBuffer(1024);
|
||||
sb.append("http://api.freebase.com/api/service/search?queries=");
|
||||
sb.append(ParsingUtilities.encode(stringWriter.toString()));
|
||||
|
||||
URL url = new URL(sb.toString());
|
||||
String queriesString = stringWriter.toString();
|
||||
URL url = new URL(serviceUrl);
|
||||
URLConnection connection = url.openConnection();
|
||||
connection.setConnectTimeout(5000);
|
||||
connection.connect();
|
||||
{
|
||||
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
|
||||
connection.setConnectTimeout(30000);
|
||||
connection.setDoOutput(true);
|
||||
|
||||
DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
|
||||
try {
|
||||
String body = "queries=" + ParsingUtilities.encode(queriesString);
|
||||
|
||||
dos.writeBytes(body);
|
||||
} finally {
|
||||
dos.flush();
|
||||
dos.close();
|
||||
}
|
||||
|
||||
connection.connect();
|
||||
}
|
||||
|
||||
InputStream is = connection.getInputStream();
|
||||
try {
|
||||
@ -140,7 +154,7 @@ public class GuessTypesOfColumnCommand extends Command {
|
||||
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
|
||||
|
||||
for (int i = 0; i < samples.size(); i++) {
|
||||
String key = "q" + i + ":search";
|
||||
String key = "q" + i;
|
||||
if (!o.has(key)) {
|
||||
continue;
|
||||
}
|
||||
@ -161,23 +175,24 @@ public class GuessTypesOfColumnCommand extends Command {
|
||||
int typeCount = types.length();
|
||||
|
||||
for (int t = 0; t < typeCount; t++) {
|
||||
JSONObject type = types.getJSONObject(t);
|
||||
String id = type.getString("id");
|
||||
if (id.equals("/common/topic") ||
|
||||
id.equals("/base/ontologies/ontology_instance") ||
|
||||
(id.startsWith("/base/") && id.endsWith("/topic")) ||
|
||||
id.startsWith("/user/") ||
|
||||
id.startsWith("/freebase/")
|
||||
) {
|
||||
continue;
|
||||
Object type = types.get(t);
|
||||
String typeID;
|
||||
String typeName;
|
||||
|
||||
if (type instanceof String) {
|
||||
typeID = typeName = (String) type;
|
||||
} else {
|
||||
typeID = ((JSONObject) type).getString("id");
|
||||
typeName = ((JSONObject) type).getString("name");
|
||||
}
|
||||
|
||||
if (map.containsKey(id)) {
|
||||
TypeGroup tg = map.get(id);
|
||||
tg.score += score;
|
||||
double score2 = score * (typeCount - t) / (double) typeCount;
|
||||
if (map.containsKey(typeID)) {
|
||||
TypeGroup tg = map.get(typeID);
|
||||
tg.score += score2;
|
||||
tg.count++;
|
||||
} else {
|
||||
map.put(id, new TypeGroup(id, type.getString("name"), score));
|
||||
map.put(typeID, new TypeGroup(typeID, typeName, score2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -44,7 +44,6 @@ public class ReconJudgeOneCellCommand extends Command {
|
||||
|
||||
match = new ReconCandidate(
|
||||
topicID,
|
||||
request.getParameter("topicGUID"),
|
||||
request.getParameter("topicName"),
|
||||
request.getParameter("types").split(","),
|
||||
scoreString != null ? Double.parseDouble(scoreString) : 100
|
||||
@ -57,7 +56,9 @@ public class ReconJudgeOneCellCommand extends Command {
|
||||
judgment,
|
||||
rowIndex,
|
||||
cellIndex,
|
||||
match
|
||||
match,
|
||||
request.getParameter("identifierSpace"),
|
||||
request.getParameter("schemaSpace")
|
||||
);
|
||||
|
||||
HistoryEntry historyEntry = project.processManager.queueProcess(process);
|
||||
@ -88,10 +89,13 @@ public class ReconJudgeOneCellCommand extends Command {
|
||||
|
||||
protected static class JudgeOneCellProcess extends QuickHistoryEntryProcess {
|
||||
|
||||
final int rowIndex;
|
||||
final int cellIndex;
|
||||
final Judgment judgment;
|
||||
final ReconCandidate match;
|
||||
final int rowIndex;
|
||||
final int cellIndex;
|
||||
final Judgment judgment;
|
||||
final ReconCandidate match;
|
||||
final String identifierSpace;
|
||||
final String schemaSpace;
|
||||
|
||||
Cell newCell;
|
||||
|
||||
JudgeOneCellProcess(
|
||||
@ -100,7 +104,9 @@ public class ReconJudgeOneCellCommand extends Command {
|
||||
Judgment judgment,
|
||||
int rowIndex,
|
||||
int cellIndex,
|
||||
ReconCandidate match
|
||||
ReconCandidate match,
|
||||
String identifierSpace,
|
||||
String schemaSpace
|
||||
) {
|
||||
super(project, briefDescription);
|
||||
|
||||
@ -108,6 +114,8 @@ public class ReconJudgeOneCellCommand extends Command {
|
||||
this.rowIndex = rowIndex;
|
||||
this.cellIndex = cellIndex;
|
||||
this.match = match;
|
||||
this.identifierSpace = identifierSpace;
|
||||
this.schemaSpace = schemaSpace;
|
||||
}
|
||||
|
||||
protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception {
|
||||
@ -125,7 +133,7 @@ public class ReconJudgeOneCellCommand extends Command {
|
||||
|
||||
newCell = new Cell(
|
||||
cell.value,
|
||||
cell.recon == null ? new Recon(historyEntryID) : cell.recon.dup(historyEntryID)
|
||||
cell.recon == null ? new Recon(historyEntryID, identifierSpace, schemaSpace) : cell.recon.dup(historyEntryID)
|
||||
);
|
||||
|
||||
String cellDescription =
|
||||
@ -152,16 +160,17 @@ public class ReconJudgeOneCellCommand extends Command {
|
||||
} else {
|
||||
newCell.recon.judgment = Recon.Judgment.Matched;
|
||||
newCell.recon.match = this.match;
|
||||
|
||||
for (int m = 0; m < newCell.recon.candidates.size(); m++) {
|
||||
if (newCell.recon.candidates.get(m).topicGUID.equals(this.match.topicGUID)) {
|
||||
newCell.recon.matchRank = m;
|
||||
break;
|
||||
if (newCell.recon.candidates != null) {
|
||||
for (int m = 0; m < newCell.recon.candidates.size(); m++) {
|
||||
if (newCell.recon.candidates.get(m).id.equals(this.match.id)) {
|
||||
newCell.recon.matchRank = m;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
description = "Match " + this.match.topicName +
|
||||
" (" + match.topicID + ") to " +
|
||||
|
||||
description = "Match " + this.match.name +
|
||||
" (" + match.id + ") to " +
|
||||
cellDescription;
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,6 @@ public class ReconJudgeSimilarCellsCommand extends EngineDependentCommand {
|
||||
|
||||
match = new ReconCandidate(
|
||||
topicID,
|
||||
request.getParameter("topicGUID"),
|
||||
request.getParameter("topicName"),
|
||||
request.getParameter("types").split(","),
|
||||
scoreString != null ? Double.parseDouble(scoreString) : 100
|
||||
|
@ -19,12 +19,17 @@ public class ReconMatchSpecificTopicCommand extends EngineDependentCommand {
|
||||
String columnName = request.getParameter("columnName");
|
||||
ReconCandidate match = new ReconCandidate(
|
||||
request.getParameter("topicID"),
|
||||
request.getParameter("topicGUID"),
|
||||
request.getParameter("topicName"),
|
||||
request.getParameter("types").split(","),
|
||||
100
|
||||
);
|
||||
|
||||
return new ReconMatchSpecificTopicOperation(engineConfig, columnName, match);
|
||||
return new ReconMatchSpecificTopicOperation(
|
||||
engineConfig,
|
||||
columnName,
|
||||
match,
|
||||
request.getParameter("identifierSpace"),
|
||||
request.getParameter("schemaSpace")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -86,11 +86,11 @@ public class XlsExporter implements Exporter {
|
||||
Cell cell = row.cells.get(cellIndex);
|
||||
if (cell != null) {
|
||||
if (cell.recon != null && cell.recon.match != null) {
|
||||
c.setCellValue(cell.recon.match.topicName);
|
||||
c.setCellValue(cell.recon.match.name);
|
||||
|
||||
HSSFHyperlink hl = new HSSFHyperlink(HSSFHyperlink.LINK_URL);
|
||||
hl.setLabel(cell.recon.match.topicName);
|
||||
hl.setAddress("http://www.freebase.com/view" + cell.recon.match.topicID);
|
||||
hl.setLabel(cell.recon.match.name);
|
||||
hl.setAddress("http://www.freebase.com/view" + cell.recon.match.id);
|
||||
|
||||
c.setHyperlink(hl);
|
||||
} else if (cell.value != null) {
|
||||
|
@ -207,9 +207,9 @@ public class ExcelImporter implements Importer {
|
||||
recon = reconMap.get(id);
|
||||
recon.judgmentBatchSize++;
|
||||
} else {
|
||||
recon = new Recon(0);
|
||||
recon = new Recon(0, null, null);
|
||||
recon.service = "import";
|
||||
recon.match = new ReconCandidate(id, "", value.toString(), new String[0], 100);
|
||||
recon.match = new ReconCandidate(id, value.toString(), new String[0], 100);
|
||||
recon.matchRank = 0;
|
||||
recon.judgment = Judgment.Matched;
|
||||
recon.judgmentAction = "auto";
|
||||
|
@ -59,8 +59,11 @@ public class Recon implements HasFields, Jsonizable {
|
||||
}
|
||||
|
||||
final public long id;
|
||||
public Object[] features = new Object[Feature_max];
|
||||
public String service = "unknown";
|
||||
public String identifierSpace = null;
|
||||
public String schemaSpace = null;
|
||||
|
||||
public Object[] features = new Object[Feature_max];
|
||||
public List<ReconCandidate> candidates;
|
||||
|
||||
public Judgment judgment = Judgment.None;
|
||||
@ -71,9 +74,18 @@ public class Recon implements HasFields, Jsonizable {
|
||||
public ReconCandidate match = null;
|
||||
public int matchRank = -1;
|
||||
|
||||
public Recon(long judgmentHistoryEntry) {
|
||||
static public Recon makeFreebaseRecon(long judgmentHistoryEntry) {
|
||||
return new Recon(
|
||||
judgmentHistoryEntry,
|
||||
"http://rdf.freebase.com/ns/type.object.id",
|
||||
"http://rdf.freebase.com/ns/type.object.id");
|
||||
}
|
||||
|
||||
public Recon(long judgmentHistoryEntry, String identifierSpace, String schemaSpace) {
|
||||
id = System.currentTimeMillis() * 1000000 + Math.round(Math.random() * 1000000);
|
||||
this.judgmentHistoryEntry = judgmentHistoryEntry;
|
||||
this.identifierSpace = identifierSpace;
|
||||
this.schemaSpace = schemaSpace;
|
||||
}
|
||||
|
||||
protected Recon(long id, long judgmentHistoryEntry) {
|
||||
@ -82,7 +94,7 @@ public class Recon implements HasFields, Jsonizable {
|
||||
}
|
||||
|
||||
public Recon dup(long judgmentHistoryEntry) {
|
||||
Recon r = new Recon(judgmentHistoryEntry);
|
||||
Recon r = new Recon(judgmentHistoryEntry, identifierSpace, schemaSpace);
|
||||
|
||||
System.arraycopy(features, 0, r.features, 0, features.length);
|
||||
|
||||
@ -165,6 +177,12 @@ public class Recon implements HasFields, Jsonizable {
|
||||
return matchRank;
|
||||
} else if ("features".equals(name)) {
|
||||
return new Features();
|
||||
} else if ("service".equals(name)) {
|
||||
return service;
|
||||
} else if ("identifierSpace".equals(name)) {
|
||||
return identifierSpace;
|
||||
} else if ("schemaSpace".equals(name)) {
|
||||
return schemaSpace;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
@ -195,6 +213,10 @@ public class Recon implements HasFields, Jsonizable {
|
||||
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(id);
|
||||
writer.key("service"); writer.value(service);
|
||||
writer.key("identifierSpace"); writer.value(identifierSpace);
|
||||
writer.key("schemaSpace"); writer.value(schemaSpace);
|
||||
|
||||
if (saveMode) {
|
||||
writer.key("judgmentHistoryEntry"); writer.value(judgmentHistoryEntry);
|
||||
}
|
||||
@ -202,13 +224,13 @@ public class Recon implements HasFields, Jsonizable {
|
||||
writer.key("j"); writer.value(judgmentToString());
|
||||
if (match != null) {
|
||||
writer.key("m");
|
||||
writer.value(match.topicID);
|
||||
writer.value(match.id);
|
||||
}
|
||||
if (match == null || saveMode) {
|
||||
writer.key("c"); writer.array();
|
||||
if (candidates != null) {
|
||||
for (ReconCandidate c : candidates) {
|
||||
writer.value(c.topicID);
|
||||
writer.value(c.id);
|
||||
}
|
||||
}
|
||||
writer.endArray();
|
||||
@ -222,7 +244,6 @@ public class Recon implements HasFields, Jsonizable {
|
||||
}
|
||||
writer.endArray();
|
||||
|
||||
writer.key("service"); writer.value(service);
|
||||
writer.key("judgmentAction"); writer.value(judgmentAction);
|
||||
writer.key("judgmentBatchSize"); writer.value(judgmentBatchSize);
|
||||
|
||||
@ -317,6 +338,10 @@ public class Recon implements HasFields, Jsonizable {
|
||||
}
|
||||
} else if ("service".equals(fieldName)) {
|
||||
recon.service = jp.getText();
|
||||
} else if ("identifierSpace".equals(fieldName)) {
|
||||
recon.identifierSpace = jp.getText();
|
||||
} else if ("schemaSpace".equals(fieldName)) {
|
||||
recon.schemaSpace = jp.getText();
|
||||
} else if ("judgmentAction".equals(fieldName)) {
|
||||
recon.judgmentAction = jp.getText();
|
||||
} else if ("judgmentBatchSize".equals(fieldName)) {
|
||||
|
@ -14,29 +14,25 @@ import com.metaweb.gridworks.Jsonizable;
|
||||
import com.metaweb.gridworks.expr.HasFields;
|
||||
|
||||
public class ReconCandidate implements HasFields, Jsonizable {
|
||||
final public String topicID;
|
||||
final public String topicGUID;
|
||||
final public String topicName;
|
||||
final public String[] typeIDs;
|
||||
final public String id;
|
||||
final public String name;
|
||||
final public String[] types;
|
||||
final public double score;
|
||||
|
||||
public ReconCandidate(String topicID, String topicGUID, String topicName, String[] typeIDs, double score) {
|
||||
this.topicID = topicID;
|
||||
this.topicGUID = topicGUID;
|
||||
this.topicName = topicName;
|
||||
this.typeIDs = typeIDs;
|
||||
public ReconCandidate(String topicID, String topicName, String[] typeIDs, double score) {
|
||||
this.id = topicID;
|
||||
this.name = topicName;
|
||||
this.types = typeIDs;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
public Object getField(String name, Properties bindings) {
|
||||
if ("id".equals(name)) {
|
||||
return topicID;
|
||||
} else if ("guid".equals(name)) {
|
||||
return topicGUID;
|
||||
return id;
|
||||
} else if ("name".equals(name)) {
|
||||
return topicName;
|
||||
return this.name;
|
||||
} else if ("type".equals(name)) {
|
||||
return typeIDs;
|
||||
return types;
|
||||
} else if ("score".equals(name)) {
|
||||
return score;
|
||||
}
|
||||
@ -51,14 +47,13 @@ public class ReconCandidate implements HasFields, Jsonizable {
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(topicID);
|
||||
writer.key("guid"); writer.value(topicGUID);
|
||||
writer.key("name"); writer.value(topicName);
|
||||
writer.key("id"); writer.value(id);
|
||||
writer.key("name"); writer.value(name);
|
||||
writer.key("score"); writer.value(score);
|
||||
|
||||
/* if (!options.containsKey("reconCandidateOmitTypes")) */ {
|
||||
writer.key("types"); writer.array();
|
||||
for (String typeID : typeIDs) {
|
||||
for (String typeID : types) {
|
||||
writer.value(typeID);
|
||||
}
|
||||
writer.endArray();
|
||||
@ -84,7 +79,6 @@ public class ReconCandidate implements HasFields, Jsonizable {
|
||||
}
|
||||
|
||||
String id = null;
|
||||
String guid = null;
|
||||
String name = null;
|
||||
List<String> types = null;
|
||||
double score = 0;
|
||||
@ -95,8 +89,6 @@ public class ReconCandidate implements HasFields, Jsonizable {
|
||||
|
||||
if ("id".equals(fieldName)) {
|
||||
id = jp.getText();
|
||||
} else if ("guid".equals(fieldName)) {
|
||||
guid = jp.getText();
|
||||
} else if ("name".equals(fieldName)) {
|
||||
name = jp.getText();
|
||||
} else if ("score".equals(fieldName)) {
|
||||
@ -124,7 +116,6 @@ public class ReconCandidate implements HasFields, Jsonizable {
|
||||
|
||||
return new ReconCandidate(
|
||||
id,
|
||||
guid,
|
||||
name,
|
||||
typesA,
|
||||
score
|
||||
|
@ -201,10 +201,10 @@ public class DataExtensionChange implements Change {
|
||||
if (value instanceof ReconCandidate) {
|
||||
ReconCandidate rc = (ReconCandidate) value;
|
||||
Recon recon;
|
||||
if (reconMap.containsKey(rc.topicGUID)) {
|
||||
recon = reconMap.get(rc.topicGUID);
|
||||
if (reconMap.containsKey(rc.id)) {
|
||||
recon = reconMap.get(rc.id);
|
||||
} else {
|
||||
recon = new Recon(_historyEntryID);
|
||||
recon = Recon.makeFreebaseRecon(_historyEntryID);
|
||||
recon.addCandidate(rc);
|
||||
recon.service = "mql";
|
||||
recon.match = rc;
|
||||
@ -213,9 +213,9 @@ public class DataExtensionChange implements Change {
|
||||
recon.judgmentAction = "auto";
|
||||
recon.judgmentBatchSize = 1;
|
||||
|
||||
reconMap.put(rc.topicGUID, recon);
|
||||
reconMap.put(rc.id, recon);
|
||||
}
|
||||
cell = new Cell(rc.topicName, recon);
|
||||
cell = new Cell(rc.name, recon);
|
||||
} else {
|
||||
cell = new Cell((Serializable) value, null);
|
||||
}
|
||||
|
@ -141,13 +141,12 @@ public class GuidBasedReconConfig extends StrictReconConfig {
|
||||
|
||||
ReconCandidate candidate = new ReconCandidate(
|
||||
result.getString("id"),
|
||||
guid,
|
||||
result.getString("name"),
|
||||
typeIDs,
|
||||
100
|
||||
);
|
||||
|
||||
Recon recon = new Recon(historyEntryID);
|
||||
Recon recon = Recon.makeFreebaseRecon(historyEntryID);
|
||||
recon.addCandidate(candidate);
|
||||
recon.service = "mql";
|
||||
recon.judgment = Judgment.Matched;
|
||||
|
@ -145,13 +145,12 @@ public class IdBasedReconConfig extends StrictReconConfig {
|
||||
|
||||
ReconCandidate candidate = new ReconCandidate(
|
||||
id,
|
||||
result.getString("guid"),
|
||||
result.getString("name"),
|
||||
typeIDs,
|
||||
100
|
||||
);
|
||||
|
||||
Recon recon = new Recon(historyEntryID);
|
||||
Recon recon = Recon.makeFreebaseRecon(historyEntryID);
|
||||
recon.addCandidate(candidate);
|
||||
recon.service = "mql";
|
||||
recon.judgment = Judgment.Matched;
|
||||
|
@ -159,13 +159,12 @@ public class KeyBasedReconConfig extends StrictReconConfig {
|
||||
|
||||
ReconCandidate candidate = new ReconCandidate(
|
||||
result.getString("id"),
|
||||
result.getString("guid"),
|
||||
result.getString("name"),
|
||||
typeIDs,
|
||||
100
|
||||
);
|
||||
|
||||
Recon recon = new Recon(historyEntryID);
|
||||
Recon recon = Recon.makeFreebaseRecon(historyEntryID);
|
||||
recon.addCandidate(candidate);
|
||||
recon.service = "mql";
|
||||
recon.judgment = Judgment.Matched;
|
||||
|
@ -17,8 +17,10 @@ import com.metaweb.gridworks.model.Row;
|
||||
abstract public class ReconConfig implements Jsonizable {
|
||||
static public ReconConfig reconstruct(JSONObject obj) throws Exception {
|
||||
String mode = obj.getString("mode");
|
||||
if ("heuristic".equals(mode)) {
|
||||
return HeuristicReconConfig.reconstruct(obj);
|
||||
if ("standard-service".equals(mode) ||
|
||||
"heuristic".equals(mode) // legacy
|
||||
) {
|
||||
return StandardReconConfig.reconstruct(obj);
|
||||
} else if ("strict".equals(mode)) {
|
||||
return StrictReconConfig.reconstruct(obj);
|
||||
} else if ("extend".equals(mode)) {
|
||||
|
@ -1,5 +1,6 @@
|
||||
package com.metaweb.gridworks.model.recon;
|
||||
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.StringWriter;
|
||||
import java.net.URL;
|
||||
@ -15,6 +16,8 @@ import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.metaweb.gridworks.expr.ExpressionUtils;
|
||||
import com.metaweb.gridworks.model.Cell;
|
||||
@ -27,7 +30,9 @@ import com.metaweb.gridworks.model.RecordModel.RowDependency;
|
||||
import com.metaweb.gridworks.protograph.FreebaseProperty;
|
||||
import com.metaweb.gridworks.util.ParsingUtilities;
|
||||
|
||||
public class HeuristicReconConfig extends ReconConfig {
|
||||
public class StandardReconConfig extends ReconConfig {
|
||||
final static Logger logger = LoggerFactory.getLogger("gridworks-standard-recon");
|
||||
|
||||
static public class ColumnDetail {
|
||||
final public String columnName;
|
||||
final public FreebaseProperty property;
|
||||
@ -63,8 +68,10 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
|
||||
JSONObject t = obj.getJSONObject("type");
|
||||
|
||||
return new HeuristicReconConfig(
|
||||
return new StandardReconConfig(
|
||||
obj.getString("service"),
|
||||
obj.has("identifierSpace") ? obj.getString("identifierSpace") : null,
|
||||
obj.has("schemaSpace") ? obj.getString("schemaSpace") : null,
|
||||
t.getString("id"),
|
||||
t.getString("name"),
|
||||
obj.getBoolean("autoMatch"),
|
||||
@ -72,7 +79,7 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
);
|
||||
}
|
||||
|
||||
static protected class HeuristicReconJob extends ReconJob {
|
||||
static protected class StandardReconJob extends ReconJob {
|
||||
String text;
|
||||
String code;
|
||||
|
||||
@ -81,20 +88,29 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
}
|
||||
}
|
||||
|
||||
final public String service; // either "recon" or "relevance"
|
||||
final public String service;
|
||||
final public String identifierSpace;
|
||||
final public String schemaSpace;
|
||||
|
||||
final public String typeID;
|
||||
final public String typeName;
|
||||
final public boolean autoMatch;
|
||||
final public List<ColumnDetail> columnDetails;
|
||||
|
||||
public HeuristicReconConfig(
|
||||
public StandardReconConfig(
|
||||
String service,
|
||||
String identifierSpace,
|
||||
String schemaSpace,
|
||||
|
||||
String typeID,
|
||||
String typeName,
|
||||
boolean autoMatch,
|
||||
List<ColumnDetail> columnDetails
|
||||
) {
|
||||
this.service = service;
|
||||
this.identifierSpace = identifierSpace;
|
||||
this.schemaSpace = schemaSpace;
|
||||
|
||||
this.typeID = typeID;
|
||||
this.typeName = typeName;
|
||||
this.autoMatch = autoMatch;
|
||||
@ -105,8 +121,10 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
throws JSONException {
|
||||
|
||||
writer.object();
|
||||
writer.key("mode"); writer.value("heuristic");
|
||||
writer.key("service"); writer.value(service);
|
||||
writer.key("mode"); writer.value("standard-service");
|
||||
writer.key("service"); writer.value(service);
|
||||
writer.key("identifierSpace"); writer.value(identifierSpace);
|
||||
writer.key("schemaSpace"); writer.value(schemaSpace);
|
||||
writer.key("type");
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(typeID);
|
||||
@ -127,7 +145,7 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
|
||||
@Override
|
||||
public int getBatchSize() {
|
||||
return 10;
|
||||
return 7;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -139,17 +157,18 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
public ReconJob createJob(Project project, int rowIndex, Row row,
|
||||
String columnName, Cell cell) {
|
||||
|
||||
HeuristicReconJob job = new HeuristicReconJob();
|
||||
if ("relevance".equals(service)) {
|
||||
job.code = job.text = cell.value.toString();
|
||||
} else {
|
||||
try {
|
||||
StringWriter stringWriter = new StringWriter();
|
||||
JSONWriter jsonWriter = new JSONWriter(stringWriter);
|
||||
|
||||
jsonWriter.object();
|
||||
jsonWriter.key("/type/object/name"); jsonWriter.value(cell.value.toString());
|
||||
jsonWriter.key("/type/object/type"); jsonWriter.value(typeID);
|
||||
StandardReconJob job = new StandardReconJob();
|
||||
|
||||
try {
|
||||
StringWriter stringWriter = new StringWriter();
|
||||
JSONWriter jsonWriter = new JSONWriter(stringWriter);
|
||||
|
||||
jsonWriter.object();
|
||||
jsonWriter.key("query"); jsonWriter.value(cell.value.toString());
|
||||
jsonWriter.key("type"); jsonWriter.value(typeID);
|
||||
if (columnDetails.size() > 0) {
|
||||
jsonWriter.key("properties");
|
||||
jsonWriter.array();
|
||||
|
||||
for (ColumnDetail c : columnDetails) {
|
||||
int detailCellIndex = project.columnModel.getColumnByName(c.columnName).getCellIndex();
|
||||
@ -168,72 +187,75 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cell2 != null && ExpressionUtils.isNonBlankData(cell2.value)) {
|
||||
jsonWriter.key(c.property.id);
|
||||
jsonWriter.object();
|
||||
|
||||
jsonWriter.key("pid"); jsonWriter.value(c.property.id);
|
||||
jsonWriter.key("v");
|
||||
if (cell2.recon != null && cell2.recon.match != null) {
|
||||
jsonWriter.object();
|
||||
jsonWriter.key("id"); jsonWriter.value(cell2.recon.match.topicID);
|
||||
jsonWriter.key("name"); jsonWriter.value(cell2.recon.match.topicName);
|
||||
jsonWriter.key("id"); jsonWriter.value(cell2.recon.match.id);
|
||||
jsonWriter.key("name"); jsonWriter.value(cell2.recon.match.name);
|
||||
jsonWriter.endObject();
|
||||
} else {
|
||||
jsonWriter.value(cell2.value.toString());
|
||||
}
|
||||
|
||||
jsonWriter.endObject();
|
||||
}
|
||||
}
|
||||
jsonWriter.endObject();
|
||||
|
||||
job.text = cell.value.toString();
|
||||
job.code = stringWriter.toString();
|
||||
} catch (JSONException e) {
|
||||
//
|
||||
}
|
||||
|
||||
jsonWriter.endArray();
|
||||
}
|
||||
jsonWriter.endObject();
|
||||
|
||||
job.text = cell.value.toString();
|
||||
job.code = stringWriter.toString();
|
||||
} catch (JSONException e) {
|
||||
//
|
||||
}
|
||||
return job;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID) {
|
||||
if ("relevance".equals(service)) {
|
||||
return batchReconUsingRelevance(jobs, historyEntryID);
|
||||
} else {
|
||||
return batchReconUsingReconService(jobs, historyEntryID);
|
||||
}
|
||||
}
|
||||
|
||||
protected List<Recon> batchReconUsingRelevance(List<ReconJob> jobs, long historyEntryID) {
|
||||
List<Recon> recons = new ArrayList<Recon>(jobs.size());
|
||||
|
||||
try {
|
||||
StringWriter stringWriter = new StringWriter();
|
||||
JSONWriter jsonWriter = new JSONWriter(stringWriter);
|
||||
|
||||
jsonWriter.object();
|
||||
for (int i = 0; i < jobs.size(); i++) {
|
||||
HeuristicReconJob job = (HeuristicReconJob) jobs.get(i);
|
||||
|
||||
jsonWriter.key("q" + i + ":search");
|
||||
|
||||
jsonWriter.object();
|
||||
jsonWriter.key("query"); jsonWriter.value(job.text);
|
||||
jsonWriter.key("limit"); jsonWriter.value(3);
|
||||
jsonWriter.key("type"); jsonWriter.value(typeID);
|
||||
jsonWriter.key("type_strict"); jsonWriter.value("should");
|
||||
jsonWriter.key("type_exclude"); jsonWriter.value("/common/image");
|
||||
jsonWriter.key("domain_exclude"); jsonWriter.value("/freebase");
|
||||
jsonWriter.key("stemmed"); jsonWriter.value(1);
|
||||
jsonWriter.endObject();
|
||||
StringWriter stringWriter = new StringWriter();
|
||||
|
||||
stringWriter.write("{");
|
||||
for (int i = 0; i < jobs.size(); i++) {
|
||||
StandardReconJob job = (StandardReconJob) jobs.get(i);
|
||||
if (i > 0) {
|
||||
stringWriter.write(",");
|
||||
}
|
||||
jsonWriter.endObject();
|
||||
|
||||
StringBuffer sb = new StringBuffer(1024);
|
||||
sb.append("http://api.freebase.com/api/service/search?indent=1&queries=");
|
||||
sb.append(ParsingUtilities.encode(stringWriter.toString()));
|
||||
|
||||
URL url = new URL(sb.toString());
|
||||
stringWriter.write("\"q" + i + "\":");
|
||||
stringWriter.write(job.code);
|
||||
}
|
||||
stringWriter.write("}");
|
||||
String queriesString = stringWriter.toString();
|
||||
|
||||
try {
|
||||
URL url = new URL(service);
|
||||
URLConnection connection = url.openConnection();
|
||||
connection.setConnectTimeout(5000);
|
||||
connection.connect();
|
||||
{
|
||||
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
|
||||
connection.setConnectTimeout(30000);
|
||||
connection.setDoOutput(true);
|
||||
|
||||
DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
|
||||
try {
|
||||
String body = "queries=" + ParsingUtilities.encode(queriesString);
|
||||
|
||||
dos.writeBytes(body);
|
||||
} finally {
|
||||
dos.flush();
|
||||
dos.close();
|
||||
}
|
||||
|
||||
connection.connect();
|
||||
}
|
||||
|
||||
InputStream is = connection.getInputStream();
|
||||
try {
|
||||
@ -241,40 +263,48 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
|
||||
|
||||
for (int i = 0; i < jobs.size(); i++) {
|
||||
HeuristicReconJob job = (HeuristicReconJob) jobs.get(i);
|
||||
|
||||
String text = job.text;
|
||||
String key = "q" + i + ":search";
|
||||
if (!o.has(key)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
StandardReconJob job = (StandardReconJob) jobs.get(i);
|
||||
Recon recon = null;
|
||||
|
||||
JSONObject o2 = o.getJSONObject(key);
|
||||
if (o2.has("result")) {
|
||||
JSONArray results = o2.getJSONArray("result");
|
||||
|
||||
recon = createReconFromRelevanceResults(text, results, historyEntryID);
|
||||
} else {
|
||||
recon = new Recon(historyEntryID);
|
||||
String text = job.text;
|
||||
String key = "q" + i;
|
||||
if (o.has(key)) {
|
||||
JSONObject o2 = o.getJSONObject(key);
|
||||
if (o2.has("result")) {
|
||||
JSONArray results = o2.getJSONArray("result");
|
||||
|
||||
recon = createReconServiceResults(text, results, historyEntryID);
|
||||
}
|
||||
}
|
||||
|
||||
recon.service = "recon";
|
||||
if (recon == null) {
|
||||
recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
|
||||
}
|
||||
recon.service = service;
|
||||
|
||||
recons.add(recon);
|
||||
}
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
logger.error("Failed to batch recon with load:\n" + queriesString, e);
|
||||
}
|
||||
|
||||
while (recons.size() < jobs.size()) {
|
||||
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
|
||||
recon.service = service;
|
||||
recon.identifierSpace = identifierSpace;
|
||||
recon.schemaSpace = schemaSpace;
|
||||
|
||||
recons.add(recon);
|
||||
}
|
||||
|
||||
return recons;
|
||||
}
|
||||
|
||||
protected Recon createReconFromRelevanceResults(String text, JSONArray results, long historyEntryID) {
|
||||
Recon recon = new Recon(historyEntryID);
|
||||
protected Recon createReconServiceResults(String text, JSONArray results, long historyEntryID) {
|
||||
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
|
||||
try {
|
||||
int length = results.length();
|
||||
int count = 0;
|
||||
@ -287,13 +317,14 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
JSONArray types = result.getJSONArray("type");
|
||||
String[] typeIDs = new String[types.length()];
|
||||
for (int j = 0; j < typeIDs.length; j++) {
|
||||
typeIDs[j] = types.getJSONObject(j).getString("id");
|
||||
Object type = types.get(j);
|
||||
typeIDs[j] = type instanceof String ? (String) type :
|
||||
((JSONObject) type).getString("id");
|
||||
}
|
||||
|
||||
double score = result.getDouble("relevance:score");
|
||||
double score = result.getDouble("score");
|
||||
ReconCandidate candidate = new ReconCandidate(
|
||||
result.getString("id"),
|
||||
result.getString("guid"),
|
||||
result.getString("name"),
|
||||
typeIDs,
|
||||
score
|
||||
@ -306,12 +337,12 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
if (count > 0) {
|
||||
ReconCandidate candidate = recon.candidates.get(0);
|
||||
|
||||
recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.topicName));
|
||||
recon.setFeature(Recon.Feature_nameLevenshtein, StringUtils.getLevenshteinDistance(text, candidate.topicName));
|
||||
recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.topicName));
|
||||
recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.name));
|
||||
recon.setFeature(Recon.Feature_nameLevenshtein, StringUtils.getLevenshteinDistance(text, candidate.name));
|
||||
recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.name));
|
||||
|
||||
recon.setFeature(Recon.Feature_typeMatch, false);
|
||||
for (String typeID : candidate.typeIDs) {
|
||||
for (String typeID : candidate.types) {
|
||||
if (this.typeID.equals(typeID)) {
|
||||
recon.setFeature(Recon.Feature_typeMatch, true);
|
||||
if (autoMatch && candidate.score >= 100 && (count == 1 || candidate.score / recon.candidates.get(1).score >= 1.5)) {
|
||||
@ -330,106 +361,6 @@ public class HeuristicReconConfig extends ReconConfig {
|
||||
return recon;
|
||||
}
|
||||
|
||||
static final String s_reconService = "http://data.labs.freebase.com/recon/query";
|
||||
|
||||
protected List<Recon> batchReconUsingReconService(List<ReconJob> jobs, long historyEntryID) {
|
||||
List<Recon> recons = new ArrayList<Recon>(jobs.size());
|
||||
|
||||
for (int i = 0; i < jobs.size(); i++) {
|
||||
HeuristicReconJob job = (HeuristicReconJob) jobs.get(i);
|
||||
Recon recon = null;
|
||||
try {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(s_reconService + "?limit=5&q=");
|
||||
sb.append(ParsingUtilities.encode(job.code));
|
||||
|
||||
URL url = new URL(sb.toString());
|
||||
URLConnection connection = url.openConnection();
|
||||
connection.setConnectTimeout(5000);
|
||||
connection.connect();
|
||||
|
||||
InputStream is = connection.getInputStream();
|
||||
try {
|
||||
String s = ParsingUtilities.inputStreamToString(is);
|
||||
JSONArray a = ParsingUtilities.evaluateJsonStringToArray(s);
|
||||
|
||||
recon = createReconFromReconResults(job.text, a, historyEntryID);
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
if (recon == null) {
|
||||
recon = new Recon(historyEntryID);
|
||||
}
|
||||
recon.service = "recon";
|
||||
recons.add(recon);
|
||||
}
|
||||
|
||||
return recons;
|
||||
}
|
||||
|
||||
protected Recon createReconFromReconResults(String text, JSONArray results, long historyEntryID) {
|
||||
Recon recon = new Recon(historyEntryID);
|
||||
try {
|
||||
int length = results.length();
|
||||
int count = 0;
|
||||
for (int i = 0; i < length && count < 3; i++) {
|
||||
JSONObject result = results.getJSONObject(i);
|
||||
if (!result.has("name")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String id = result.getString("id");
|
||||
JSONArray names = result.getJSONArray("name");
|
||||
double score = result.getDouble("score");
|
||||
|
||||
JSONArray types = result.getJSONArray("type");
|
||||
String[] typeIDs = new String[types.length()];
|
||||
for (int j = 0; j < typeIDs.length; j++) {
|
||||
typeIDs[j] = types.getString(j);
|
||||
}
|
||||
|
||||
ReconCandidate candidate = new ReconCandidate(
|
||||
id,
|
||||
"#" + id.substring(6),
|
||||
names.getString(0),
|
||||
typeIDs,
|
||||
score
|
||||
);
|
||||
|
||||
// best match
|
||||
if (i == 0) {
|
||||
recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.topicName));
|
||||
recon.setFeature(Recon.Feature_nameLevenshtein, StringUtils.getLevenshteinDistance(text, candidate.topicName));
|
||||
recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.topicName));
|
||||
|
||||
recon.setFeature(Recon.Feature_typeMatch, false);
|
||||
for (String typeID : candidate.typeIDs) {
|
||||
if (this.typeID.equals(typeID)) {
|
||||
recon.setFeature(Recon.Feature_typeMatch, true);
|
||||
if (autoMatch && result.has("match") && result.getBoolean("match")) {
|
||||
recon.match = candidate;
|
||||
recon.matchRank = 0;
|
||||
recon.judgment = Judgment.Matched;
|
||||
recon.judgmentAction = "auto";
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
recon.addCandidate(candidate);
|
||||
count++;
|
||||
}
|
||||
} catch (JSONException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return recon;
|
||||
}
|
||||
|
||||
static protected double wordDistance(String s1, String s2) {
|
||||
Set<String> words1 = breakWords(s1);
|
||||
Set<String> words2 = breakWords(s2);
|
@ -179,20 +179,20 @@ public class ExtendDataOperation extends EngineDependentOperation {
|
||||
int limit,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) {
|
||||
Set<String> guids = new HashSet<String>();
|
||||
Set<String> ids = new HashSet<String>();
|
||||
|
||||
int end;
|
||||
for (end = from; end < limit && guids.size() < 10; end++) {
|
||||
for (end = from; end < limit && ids.size() < 10; end++) {
|
||||
int index = rowIndices.get(end);
|
||||
Row row = _project.rows.get(index);
|
||||
Cell cell = row.getCell(_cellIndex);
|
||||
|
||||
guids.add(cell.recon.match.topicGUID);
|
||||
ids.add(cell.recon.match.id);
|
||||
}
|
||||
|
||||
Map<String, DataExtension> map = null;
|
||||
try {
|
||||
map = _job.extend(guids, reconCandidateMap);
|
||||
map = _job.extend(ids, reconCandidateMap);
|
||||
} catch (Exception e) {
|
||||
map = new HashMap<String, DataExtension>();
|
||||
}
|
||||
@ -201,7 +201,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
|
||||
int index = rowIndices.get(i);
|
||||
Row row = _project.rows.get(index);
|
||||
Cell cell = row.getCell(_cellIndex);
|
||||
String guid = cell.recon.match.topicGUID;
|
||||
String guid = cell.recon.match.id;
|
||||
|
||||
if (map.containsKey(guid)) {
|
||||
dataExtensions.add(map.get(guid));
|
||||
|
@ -47,7 +47,6 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
|
||||
match = new ReconCandidate(
|
||||
matchObj.getString("id"),
|
||||
matchObj.getString("guid"),
|
||||
matchObj.getString("name"),
|
||||
typeIDs,
|
||||
matchObj.getDouble("score")
|
||||
@ -116,8 +115,8 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
}
|
||||
} else if (_judgment == Judgment.Matched) {
|
||||
return "Match topic " +
|
||||
_match.topicName + " (" +
|
||||
_match.topicID + ") for cells containing \"" +
|
||||
_match.name + " (" +
|
||||
_match.id + ") for cells containing \"" +
|
||||
_similarValue + "\" in column " + _columnName;
|
||||
}
|
||||
throw new InternalError("Can't get here");
|
||||
@ -139,8 +138,8 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
}
|
||||
} else if (_judgment == Judgment.Matched) {
|
||||
return "Match topic " +
|
||||
_match.topicName + " (" +
|
||||
_match.topicID + ") for " +
|
||||
_match.name + " (" +
|
||||
_match.id + ") for " +
|
||||
cellChanges.size() + " cells containing \"" +
|
||||
_similarValue + "\" in column " + _columnName;
|
||||
}
|
||||
@ -166,70 +165,70 @@ public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOper
|
||||
|
||||
@Override
|
||||
public void start(Project project) {
|
||||
// nothing to do
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end(Project project) {
|
||||
// nothing to do
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
public boolean visit(Project project, int rowIndex, Row row) {
|
||||
Cell cell = row.getCell(_cellIndex);
|
||||
if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) {
|
||||
String value = cell.value instanceof String ?
|
||||
((String) cell.value) : cell.value.toString();
|
||||
|
||||
String value = cell.value instanceof String ?
|
||||
((String) cell.value) : cell.value.toString();
|
||||
|
||||
if (_similarValue.equals(value)) {
|
||||
Recon recon = null;
|
||||
if (_judgment == Judgment.New && _shareNewTopics) {
|
||||
if (_sharedNewRecon == null) {
|
||||
_sharedNewRecon = new Recon(_historyEntryID);
|
||||
_sharedNewRecon.judgment = Judgment.New;
|
||||
_sharedNewRecon.judgmentBatchSize = 0;
|
||||
_sharedNewRecon.judgmentAction = "similar";
|
||||
}
|
||||
_sharedNewRecon.judgmentBatchSize++;
|
||||
|
||||
recon = _sharedNewRecon;
|
||||
} else {
|
||||
if (_dupReconMap.containsKey(cell.recon.id)) {
|
||||
recon = _dupReconMap.get(cell.recon.id);
|
||||
recon.judgmentBatchSize++;
|
||||
} else {
|
||||
recon = cell.recon.dup(_historyEntryID);
|
||||
recon.judgmentBatchSize = 1;
|
||||
recon.matchRank = -1;
|
||||
recon.judgmentAction = "similar";
|
||||
|
||||
if (_judgment == Judgment.Matched) {
|
||||
recon.judgment = Recon.Judgment.Matched;
|
||||
recon.match = _match;
|
||||
|
||||
if (recon.candidates != null) {
|
||||
for (int m = 0; m < recon.candidates.size(); m++) {
|
||||
if (recon.candidates.get(m).topicGUID.equals(_match.topicGUID)) {
|
||||
recon.matchRank = m;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (_judgment == Judgment.New) {
|
||||
recon.judgment = Recon.Judgment.New;
|
||||
recon.match = null;
|
||||
} else if (_judgment == Judgment.None) {
|
||||
recon.judgment = Recon.Judgment.None;
|
||||
recon.match = null;
|
||||
}
|
||||
|
||||
_dupReconMap.put(cell.recon.id, recon);
|
||||
}
|
||||
}
|
||||
|
||||
Cell newCell = new Cell(cell.value, recon);
|
||||
|
||||
CellChange cellChange = new CellChange(rowIndex, _cellIndex, cell, newCell);
|
||||
_cellChanges.add(cellChange);
|
||||
Recon recon = null;
|
||||
if (_judgment == Judgment.New && _shareNewTopics) {
|
||||
if (_sharedNewRecon == null) {
|
||||
_sharedNewRecon = new Recon(_historyEntryID, null, null);
|
||||
_sharedNewRecon.judgment = Judgment.New;
|
||||
_sharedNewRecon.judgmentBatchSize = 0;
|
||||
_sharedNewRecon.judgmentAction = "similar";
|
||||
}
|
||||
_sharedNewRecon.judgmentBatchSize++;
|
||||
|
||||
recon = _sharedNewRecon;
|
||||
} else {
|
||||
if (_dupReconMap.containsKey(cell.recon.id)) {
|
||||
recon = _dupReconMap.get(cell.recon.id);
|
||||
recon.judgmentBatchSize++;
|
||||
} else {
|
||||
recon = cell.recon.dup(_historyEntryID);
|
||||
recon.judgmentBatchSize = 1;
|
||||
recon.matchRank = -1;
|
||||
recon.judgmentAction = "similar";
|
||||
|
||||
if (_judgment == Judgment.Matched) {
|
||||
recon.judgment = Recon.Judgment.Matched;
|
||||
recon.match = _match;
|
||||
|
||||
if (recon.candidates != null) {
|
||||
for (int m = 0; m < recon.candidates.size(); m++) {
|
||||
if (recon.candidates.get(m).id.equals(_match.id)) {
|
||||
recon.matchRank = m;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (_judgment == Judgment.New) {
|
||||
recon.judgment = Recon.Judgment.New;
|
||||
recon.match = null;
|
||||
} else if (_judgment == Judgment.None) {
|
||||
recon.judgment = Recon.Judgment.None;
|
||||
recon.match = null;
|
||||
}
|
||||
|
||||
_dupReconMap.put(cell.recon.id, recon);
|
||||
}
|
||||
}
|
||||
|
||||
Cell newCell = new Cell(cell.value, recon);
|
||||
|
||||
CellChange cellChange = new CellChange(rowIndex, _cellIndex, cell, newCell);
|
||||
_cellChanges.add(cellChange);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
@ -106,7 +106,7 @@ public class ReconMarkNewTopicsOperation extends EngineDependentMassCellOperatio
|
||||
recon = sharedRecons.get(s);
|
||||
recon.judgmentBatchSize++;
|
||||
} else {
|
||||
recon = new Recon(historyEntryID);
|
||||
recon = new Recon(historyEntryID, null, null);
|
||||
recon.judgment = Judgment.New;
|
||||
recon.judgmentBatchSize = 1;
|
||||
recon.judgmentAction = "mass";
|
||||
@ -114,7 +114,7 @@ public class ReconMarkNewTopicsOperation extends EngineDependentMassCellOperatio
|
||||
sharedRecons.put(s, recon);
|
||||
}
|
||||
} else {
|
||||
recon = cell.recon == null ? new Recon(historyEntryID) : cell.recon.dup(historyEntryID);
|
||||
recon = cell.recon == null ? new Recon(historyEntryID, null, null) : cell.recon.dup(historyEntryID);
|
||||
recon.match = null;
|
||||
recon.matchRank = -1;
|
||||
recon.judgment = Judgment.New;
|
||||
|
@ -27,6 +27,8 @@ import com.metaweb.gridworks.operations.OperationRegistry;
|
||||
|
||||
public class ReconMatchSpecificTopicOperation extends EngineDependentMassCellOperation {
|
||||
final protected ReconCandidate match;
|
||||
final protected String identifierSpace;
|
||||
final protected String schemaSpace;
|
||||
|
||||
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
|
||||
JSONObject engineConfig = obj.getJSONObject("engineConfig");
|
||||
@ -44,17 +46,26 @@ public class ReconMatchSpecificTopicOperation extends EngineDependentMassCellOpe
|
||||
obj.getString("columnName"),
|
||||
new ReconCandidate(
|
||||
match.getString("id"),
|
||||
match.getString("guid"),
|
||||
match.getString("name"),
|
||||
typeIDs,
|
||||
100
|
||||
)
|
||||
),
|
||||
obj.getString("identifierSpace"),
|
||||
obj.getString("schemaSpace")
|
||||
);
|
||||
}
|
||||
|
||||
public ReconMatchSpecificTopicOperation(JSONObject engineConfig, String columnName, ReconCandidate match) {
|
||||
public ReconMatchSpecificTopicOperation(
|
||||
JSONObject engineConfig,
|
||||
String columnName,
|
||||
ReconCandidate match,
|
||||
String identifierSpace,
|
||||
String schemaSpace
|
||||
) {
|
||||
super(engineConfig, columnName, false);
|
||||
this.match = match;
|
||||
this.identifierSpace = identifierSpace;
|
||||
this.schemaSpace = schemaSpace;
|
||||
}
|
||||
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
@ -67,30 +78,31 @@ public class ReconMatchSpecificTopicOperation extends EngineDependentMassCellOpe
|
||||
writer.key("columnName"); writer.value(_columnName);
|
||||
writer.key("match");
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(match.topicID);
|
||||
writer.key("guid"); writer.value(match.topicGUID);
|
||||
writer.key("name"); writer.value(match.topicName);
|
||||
writer.key("id"); writer.value(match.id);
|
||||
writer.key("name"); writer.value(match.name);
|
||||
writer.key("types");
|
||||
writer.array();
|
||||
for (String typeID : match.typeIDs) {
|
||||
for (String typeID : match.types) {
|
||||
writer.value(typeID);
|
||||
}
|
||||
writer.endArray();
|
||||
writer.endObject();
|
||||
writer.key("identifierSpace"); writer.value(identifierSpace);
|
||||
writer.key("schemaSpace"); writer.value(schemaSpace);
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
protected String getBriefDescription(Project project) {
|
||||
return "Match specific topic " +
|
||||
match.topicName + " (" +
|
||||
match.topicID + ") to cells in column " + _columnName;
|
||||
match.name + " (" +
|
||||
match.id + ") to cells in column " + _columnName;
|
||||
}
|
||||
|
||||
protected String createDescription(Column column,
|
||||
List<CellChange> cellChanges) {
|
||||
return "Match specific topic " +
|
||||
match.topicName + " (" +
|
||||
match.topicID + ") to " + cellChanges.size() +
|
||||
match.name + " (" +
|
||||
match.id + ") to " + cellChanges.size() +
|
||||
" cells in column " + column.getName();
|
||||
}
|
||||
|
||||
@ -130,7 +142,13 @@ public class ReconMatchSpecificTopicOperation extends EngineDependentMassCellOpe
|
||||
newRecon = dupReconMap.get(reconID);
|
||||
newRecon.judgmentBatchSize++;
|
||||
} else {
|
||||
newRecon = cell.recon != null ? cell.recon.dup(historyEntryID) : new Recon(historyEntryID);
|
||||
newRecon = cell.recon != null ?
|
||||
cell.recon.dup(historyEntryID) :
|
||||
new Recon(
|
||||
historyEntryID,
|
||||
identifierSpace,
|
||||
schemaSpace);
|
||||
|
||||
newRecon.match = match;
|
||||
newRecon.matchRank = -1;
|
||||
newRecon.judgment = Judgment.Matched;
|
||||
|
@ -24,7 +24,7 @@ import com.metaweb.gridworks.model.Recon;
|
||||
import com.metaweb.gridworks.model.Row;
|
||||
import com.metaweb.gridworks.model.changes.CellChange;
|
||||
import com.metaweb.gridworks.model.changes.ReconChange;
|
||||
import com.metaweb.gridworks.model.recon.HeuristicReconConfig;
|
||||
import com.metaweb.gridworks.model.recon.StandardReconConfig;
|
||||
import com.metaweb.gridworks.model.recon.ReconConfig;
|
||||
import com.metaweb.gridworks.model.recon.ReconJob;
|
||||
import com.metaweb.gridworks.operations.EngineDependentOperation;
|
||||
@ -143,7 +143,7 @@ public class ReconOperation extends EngineDependentOperation {
|
||||
writer.endObject();
|
||||
writer.endObject();
|
||||
|
||||
if (_reconConfig instanceof HeuristicReconConfig) {
|
||||
if (_reconConfig instanceof StandardReconConfig) {
|
||||
writer.object();
|
||||
writer.key("action"); writer.value("createFacet");
|
||||
writer.key("facetType"); writer.value("range");
|
||||
@ -242,7 +242,8 @@ public class ReconOperation extends EngineDependentOperation {
|
||||
|
||||
List<Recon> recons = _reconConfig.batchRecon(jobs, _historyEntryID);
|
||||
for (int j = i; j < to; j++) {
|
||||
Recon recon = recons.get(j - i);
|
||||
int index = j - i;
|
||||
Recon recon = index < recons.size() ? recons.get(j - i) : null;
|
||||
List<ReconEntry> entries = groups.get(j).entries;
|
||||
|
||||
if (recon != null) {
|
||||
|
@ -53,22 +53,27 @@ public class PreferenceStore implements Jsonizable {
|
||||
while (i.hasNext()) {
|
||||
String key = i.next();
|
||||
Object o = entries.get(key);
|
||||
if (o instanceof JSONObject) {
|
||||
try {
|
||||
JSONObject obj2 = (JSONObject) o;
|
||||
String className = obj2.getString("class");
|
||||
Class klass = Class.forName(className);
|
||||
Method method = klass.getMethod("load", JSONObject.class);
|
||||
|
||||
_prefs.put(key, method.invoke(null, obj2));
|
||||
} catch (Exception e) {
|
||||
//
|
||||
e.printStackTrace();
|
||||
}
|
||||
} else {
|
||||
_prefs.put(key, o);
|
||||
}
|
||||
_prefs.put(key, loadObject(o));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
static public Object loadObject(Object o) {
|
||||
if (o instanceof JSONObject) {
|
||||
try {
|
||||
JSONObject obj2 = (JSONObject) o;
|
||||
String className = obj2.getString("class");
|
||||
Class klass = Class.forName(className);
|
||||
Method method = klass.getMethod("load", JSONObject.class);
|
||||
|
||||
return method.invoke(null, obj2);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
return o;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -123,7 +123,7 @@ public class MqlwriteLikeTransposedNodeFactory implements TransposedNodeFactory
|
||||
if (cell.recon != null &&
|
||||
cell.recon.judgment == Recon.Judgment.Matched &&
|
||||
cell.recon.match != null) {
|
||||
obj.put(ID, cell.recon.match.topicID);
|
||||
obj.put(ID, cell.recon.match.id);
|
||||
} else {
|
||||
obj.put(ID, (String) null);
|
||||
obj.put(NAME, cell.value.toString());
|
||||
|
@ -201,7 +201,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
||||
cell.recon.match != null) {
|
||||
|
||||
objectCell = cell;
|
||||
id = cell.recon.match.topicID;
|
||||
id = cell.recon.match.id;
|
||||
} else if (node.createForNoReconMatch ||
|
||||
(cell.recon != null && cell.recon.judgment == Judgment.New)) {
|
||||
if (cell.recon != null && newTopicVars.containsKey(cell.recon.id)) {
|
||||
|
@ -57,11 +57,11 @@ public class FreebaseDataExtensionJob {
|
||||
}
|
||||
|
||||
public Map<String, FreebaseDataExtensionJob.DataExtension> extend(
|
||||
Set<String> guids,
|
||||
Set<String> ids,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) throws Exception {
|
||||
StringWriter writer = new StringWriter();
|
||||
formulateQuery(guids, extension, writer);
|
||||
formulateQuery(ids, extension, writer);
|
||||
|
||||
String query = writer.toString();
|
||||
InputStream is = doMqlRead(query);
|
||||
@ -76,11 +76,11 @@ public class FreebaseDataExtensionJob {
|
||||
|
||||
for (int i = 0; i < l; i++) {
|
||||
JSONObject o2 = a.getJSONObject(i);
|
||||
String guid = o2.getString("guid");
|
||||
String id = o2.getString("id");
|
||||
FreebaseDataExtensionJob.DataExtension ext = collectResult(o2, reconCandidateMap);
|
||||
|
||||
if (ext != null) {
|
||||
map.put(guid, ext);
|
||||
map.put(id, ext);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -125,20 +125,19 @@ public class FreebaseDataExtensionJob {
|
||||
JSONObject obj,
|
||||
Map<String, ReconCandidate> reconCandidateMap
|
||||
) throws JSONException {
|
||||
String guid = obj.getString("guid");
|
||||
String id = obj.getString("id");
|
||||
ReconCandidate rc;
|
||||
if (reconCandidateMap.containsKey(guid)) {
|
||||
rc = reconCandidateMap.get(guid);
|
||||
if (reconCandidateMap.containsKey(id)) {
|
||||
rc = reconCandidateMap.get(id);
|
||||
} else {
|
||||
rc = new ReconCandidate(
|
||||
obj.getString("id"),
|
||||
obj.getString("guid"),
|
||||
obj.getString("name"),
|
||||
JSONUtilities.getStringArray(obj, "type"),
|
||||
100
|
||||
);
|
||||
|
||||
reconCandidateMap.put(guid, rc);
|
||||
reconCandidateMap.put(id, rc);
|
||||
}
|
||||
|
||||
storeCell(rows, row, col, rc, reconCandidateMap);
|
||||
@ -274,7 +273,7 @@ public class FreebaseDataExtensionJob {
|
||||
return connection.getInputStream();
|
||||
}
|
||||
|
||||
static protected void formulateQuery(Set<String> guids, JSONObject node, Writer writer) throws JSONException {
|
||||
static protected void formulateQuery(Set<String> ids, JSONObject node, Writer writer) throws JSONException {
|
||||
JSONWriter jsonWriter = new JSONWriter(writer);
|
||||
|
||||
jsonWriter.object();
|
||||
@ -282,12 +281,12 @@ public class FreebaseDataExtensionJob {
|
||||
jsonWriter.array();
|
||||
jsonWriter.object();
|
||||
|
||||
jsonWriter.key("guid"); jsonWriter.value(null);
|
||||
jsonWriter.key("guid|=");
|
||||
jsonWriter.key("id"); jsonWriter.value(null);
|
||||
jsonWriter.key("id|=");
|
||||
jsonWriter.array();
|
||||
for (String guid : guids) {
|
||||
if (guid != null) {
|
||||
jsonWriter.value(guid);
|
||||
for (String id : ids) {
|
||||
if (id != null) {
|
||||
jsonWriter.value(id);
|
||||
}
|
||||
}
|
||||
jsonWriter.endArray();
|
||||
@ -349,7 +348,6 @@ public class FreebaseDataExtensionJob {
|
||||
if (!hasSubProperties || (node.has("included") && node.getBoolean("included"))) {
|
||||
writer.key("name"); writer.value(null);
|
||||
writer.key("id"); writer.value(null);
|
||||
writer.key("guid"); writer.value(null);
|
||||
writer.key("type"); writer.array(); writer.endArray();
|
||||
}
|
||||
|
||||
|
@ -25,7 +25,7 @@ public class Pool implements Jsonizable {
|
||||
final protected Map<String, Recon> recons = new HashMap<String, Recon>();
|
||||
|
||||
public void pool(ReconCandidate candidate) {
|
||||
candidates.put(candidate.topicID, candidate);
|
||||
candidates.put(candidate.id, candidate);
|
||||
}
|
||||
|
||||
public void pool(Recon recon) {
|
||||
|
@ -57,7 +57,11 @@ function init() {
|
||||
"scripts/views/data-table/data-table-cell-ui.js",
|
||||
"scripts/views/data-table/data-table-column-header-ui.js",
|
||||
|
||||
"scripts/dialogs/recon-dialog.js",
|
||||
"scripts/reconciliation/recon-manager.js",
|
||||
"scripts/reconciliation/recon-dialog.js",
|
||||
"scripts/reconciliation/freebase-query-panel.js",
|
||||
"scripts/reconciliation/standard-service-panel.js",
|
||||
|
||||
"scripts/dialogs/expression-preview-dialog.js",
|
||||
"scripts/dialogs/freebase-loading-dialog.js",
|
||||
"scripts/dialogs/clustering-dialog.js",
|
||||
@ -133,7 +137,8 @@ function process(path, request, response) {
|
||||
var context = {};
|
||||
context.scripts = ClientSideResourceManager.getPaths(lastSegment + "/scripts");
|
||||
context.styles = ClientSideResourceManager.getPaths(lastSegment + "/styles");
|
||||
|
||||
context.projectID = request.getParameter("project");
|
||||
|
||||
send(request, response, path + ".vt", context);
|
||||
}
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
|
||||
<title>Freebase Gridworks</title>
|
||||
|
||||
<link rel="icon" type="image/png" href="images/favicon.png">
|
||||
|
||||
<link rel="icon" type="image/png" href="images/favicon.png" />
|
||||
<script type="text/javascript">var theProject = { id : $projectID };</script>
|
||||
|
||||
#foreach($path in $styles)
|
||||
<link type="text/css" rel="stylesheet" href="$path" />
|
||||
|
@ -1,71 +0,0 @@
|
||||
<div class="dialog-frame" style="width: 800px;">
|
||||
<div class="dialog-header" bind="dialogHeader"></div>
|
||||
<div class="dialog-body" bind="dialogBody">
|
||||
<div id="recon-dialog-tabs" class="gridworks-tabs">
|
||||
<ul>
|
||||
<li><a href="#recon-dialog-tabs-heuristic">Heuristic</a></li>
|
||||
<li><a href="#recon-dialog-tabs-strict">Strict</a></li>
|
||||
</ul>
|
||||
<div id="recon-dialog-tabs-heuristic">
|
||||
<div class="grid-layout layout-normal layout-full"><table>
|
||||
<tr>
|
||||
<td>Reconcile each cell to a Freebase topic of type:</td>
|
||||
<td>Also use relevant details from other columns:</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<div class="recon-dialog-heuristic-types-container" bind="heuristicTypeContainer">
|
||||
</div>
|
||||
<table class="recon-dialog-heuristic-other-type-container recon-dialog-inner-layout">
|
||||
<tr>
|
||||
<td width="1"><input type="radio" name="recon-dialog-type-choice" value=""></td>
|
||||
<td>Search for type: <input size="20" bind="heuristicTypeInput" /></td>
|
||||
<tr>
|
||||
</table>
|
||||
</td>
|
||||
<td width="50%">
|
||||
<div class="recon-dialog-heuristic-details-container" bind="heuristicDetailContainer"></div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<input type="checkbox" checked bind="heuristicAutomatchCheck" /> Auto-match candidates with high confidence
|
||||
</td>
|
||||
<td>
|
||||
Use
|
||||
<input type="radio" name="recon-dialog-heuristic-service" value="relevance" checked="" /> relevance service
|
||||
<input type="radio" name="recon-dialog-heuristic-service" value="recon" /> recon service
|
||||
</td>
|
||||
</tr>
|
||||
</table></div>
|
||||
</div>
|
||||
<div id="recon-dialog-tabs-strict" style="display: none;">
|
||||
<p>Each cell contains:</p>
|
||||
<div class="grid-layout layout-normal layout-full"><table>
|
||||
<tr><td width="1%"><input type="radio" name="recon-dialog-strict-choice" value="id" checked /></td><td>a Freebase ID, e.g., /en/solar_system</td></tr>
|
||||
<tr><td><input type="radio" name="recon-dialog-strict-choice" value="guid" /></td><td>a Freebase GUID, e.g., #9202a8c04000641f80000000000354ae</td></tr>
|
||||
<tr>
|
||||
<td width="1%"><input type="radio" name="recon-dialog-strict-choice" value="key" /></td>
|
||||
<td>
|
||||
<div class="grid-layout layout-tighter layout-full"><table>
|
||||
<tr><td colspan="2">a Freebase key in</td></tr>
|
||||
<tr>
|
||||
<td width="1%"><input type="radio" name="recon-dialog-strict-namespace-choice" value="/wikipedia/en" nsName="Wikipedia EN" checked /></td>
|
||||
<td>the Wikipedia English namespace</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="1%"><input type="radio" name="recon-dialog-strict-namespace-choice" value="other" /></td>
|
||||
<td>this namespace: <input bind="strictNamespaceInput" /></td>
|
||||
</tr>
|
||||
</table></div>
|
||||
</td>
|
||||
</tr>
|
||||
</table></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="dialog-footer" bind="dialogFooter">
|
||||
<button bind="reconcileButton">Start Reconciling</button>
|
||||
<button bind="cancelButton">Cancel</button>
|
||||
</div>
|
||||
</div>
|
@ -1,267 +0,0 @@
|
||||
function ReconDialog(column, types) {
|
||||
this._column = column;
|
||||
this._types = types.slice(0, 10);
|
||||
|
||||
var defaultTypes = {
|
||||
"/people/person" : {
|
||||
name: "Person"
|
||||
},
|
||||
"/location/location" : {
|
||||
name: "Location"
|
||||
}
|
||||
};
|
||||
$.each(this._types, function() {
|
||||
delete defaultTypes[this.id];
|
||||
});
|
||||
for (var id in defaultTypes) {
|
||||
if (defaultTypes.hasOwnProperty(id)) {
|
||||
this._types.push({
|
||||
id: id,
|
||||
name: defaultTypes[id].name
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
this._createDialog();
|
||||
}
|
||||
|
||||
ReconDialog.prototype._createDialog = function() {
|
||||
var self = this;
|
||||
var dialog = $(DOM.loadHTML("core", "scripts/dialogs/recon-dialog.html"));
|
||||
|
||||
this._elmts = DOM.bind(dialog);
|
||||
this._elmts.dialogHeader.text("Reconcile column " + this._column.name);
|
||||
this._elmts.reconcileButton.click(function() { self._onOK(); });
|
||||
this._elmts.cancelButton.click(function() { self._dismiss(); });
|
||||
|
||||
this._populateDialog();
|
||||
|
||||
this._level = DialogSystem.showDialog(dialog);
|
||||
|
||||
$("#recon-dialog-tabs").tabs();
|
||||
$("#recon-dialog-tabs-strict").css("display", "");
|
||||
|
||||
this._wireEvents();
|
||||
};
|
||||
|
||||
ReconDialog.prototype._populateDialog = function() {
|
||||
var self = this;
|
||||
|
||||
/*
|
||||
* Populate types in heuristic tab
|
||||
*/
|
||||
var typeTableContainer = $('<div>').addClass("grid-layout layout-tighter").appendTo(this._elmts.heuristicTypeContainer);
|
||||
var typeTable = $('<table></table>').appendTo(typeTableContainer)[0];
|
||||
var createTypeChoice = function(type, check) {
|
||||
var tr = typeTable.insertRow(typeTable.rows.length);
|
||||
var td0 = tr.insertCell(0);
|
||||
var td1 = tr.insertCell(1);
|
||||
|
||||
td0.width = "1%";
|
||||
var radio = $('<input type="radio" name="recon-dialog-type-choice">')
|
||||
.attr("value", type.id)
|
||||
.attr("typeName", type.name)
|
||||
.appendTo(td0)
|
||||
.click(function() {
|
||||
self._rewirePropertySuggests(this.value);
|
||||
});
|
||||
|
||||
if (check) {
|
||||
radio.attr("checked", "true");
|
||||
}
|
||||
|
||||
$(td1).html(type.name + '<br/><span class="recon-dialog-type-id">' + type.id + '</span>');
|
||||
};
|
||||
for (var i = 0; i < this._types.length; i++) {
|
||||
createTypeChoice(this._types[i], i === 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Populate properties in heuristic tab
|
||||
*/
|
||||
var heuristicDetailTableContainer = $('<div>')
|
||||
.addClass("grid-layout layout-tighter")
|
||||
.appendTo(this._elmts.heuristicDetailContainer);
|
||||
|
||||
var heuristicDetailTable = $(
|
||||
'<table>' +
|
||||
'<tr><th>Column</th><th>Freebase property</th></tr>' +
|
||||
'</table>'
|
||||
).appendTo(heuristicDetailTableContainer)[0];
|
||||
|
||||
function renderDetailColumn(column) {
|
||||
var tr = heuristicDetailTable.insertRow(heuristicDetailTable.rows.length);
|
||||
var td0 = tr.insertCell(0);
|
||||
var td1 = tr.insertCell(1);
|
||||
|
||||
$(td0).html(column.name);
|
||||
$('<input size="15" name="recon-dialog-heuristic-property" />')
|
||||
.attr("columnName", column.name)
|
||||
.appendTo(td1);
|
||||
}
|
||||
var columns = theProject.columnModel.columns;
|
||||
for (var i = 0; i < columns.length; i++) {
|
||||
var column = columns[i];
|
||||
if (column !== this._column) {
|
||||
renderDetailColumn(column);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
ReconDialog.prototype._wireEvents = function() {
|
||||
var self = this;
|
||||
|
||||
this._elmts.heuristicTypeInput
|
||||
.suggestT({ type : '/type/type' })
|
||||
.bind("fb-select", function(e, data) {
|
||||
$('input[name="recon-dialog-type-choice"][value=""]').attr("checked", "true");
|
||||
|
||||
self._rewirePropertySuggests(data.id);
|
||||
});
|
||||
|
||||
this._rewirePropertySuggests(this._types[0].id);
|
||||
|
||||
this._elmts.strictNamespaceInput
|
||||
.suggest({ type: '/type/namespace' })
|
||||
.bind("fb-select", function(e, data) {
|
||||
$('input[name="recon-dialog-strict-choice"][value="key"]').attr("checked", "true");
|
||||
$('input[name="recon-dialog-strict-namespace-choice"][value="other"]').attr("checked", "true");
|
||||
});
|
||||
};
|
||||
|
||||
ReconDialog.prototype._rewirePropertySuggests = function(schema) {
|
||||
var inputs = $('input[name="recon-dialog-heuristic-property"]');
|
||||
|
||||
inputs.unbind().suggestP({
|
||||
type: '/type/property',
|
||||
schema: schema || "/common/topic"
|
||||
}).bind("fb-select", function(e, data) {
|
||||
$('input[name="recon-dialog-heuristic-service"][value="recon"]').attr("checked", "true");
|
||||
});
|
||||
};
|
||||
|
||||
ReconDialog.prototype._onOK = function() {
|
||||
var tab = $("#recon-dialog-tabs").tabs('option', 'selected');
|
||||
if (tab === 0) {
|
||||
this._onDoHeuristic();
|
||||
} else {
|
||||
this._onDoStrict();
|
||||
}
|
||||
};
|
||||
|
||||
ReconDialog.prototype._dismiss = function() {
|
||||
DialogSystem.dismissUntil(this._level - 1);
|
||||
};
|
||||
|
||||
ReconDialog.prototype._onDoHeuristic = function() {
|
||||
var type = this._elmts.heuristicTypeInput.data("data.suggest");
|
||||
|
||||
var choices = $('input[name="recon-dialog-type-choice"]:checked');
|
||||
if (choices !== null && choices.length > 0 && choices[0].value != "") {
|
||||
type = {
|
||||
id: choices[0].value,
|
||||
name: choices.attr("typeName")
|
||||
};
|
||||
}
|
||||
|
||||
if (!type) {
|
||||
alert("Please specify a type.");
|
||||
} else {
|
||||
var columnDetails = [];
|
||||
var propertyInputs = $('input[name="recon-dialog-heuristic-property"]');
|
||||
$.each(propertyInputs, function() {
|
||||
var property = $(this).data("data.suggest");
|
||||
if (property && property.id) {
|
||||
columnDetails.push({
|
||||
column: this.getAttribute("columnName"),
|
||||
property: {
|
||||
id: property.id,
|
||||
name: property.name
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
Gridworks.postProcess(
|
||||
"reconcile",
|
||||
{},
|
||||
{
|
||||
columnName: this._column.name,
|
||||
config: JSON.stringify({
|
||||
mode: "heuristic",
|
||||
service: $('input[name="recon-dialog-heuristic-service"]:checked')[0].value,
|
||||
type: {
|
||||
id: type.id,
|
||||
name: type.name
|
||||
},
|
||||
autoMatch: this._elmts.heuristicAutomatchCheck[0].checked,
|
||||
columnDetails: columnDetails
|
||||
})
|
||||
},
|
||||
{ cellsChanged: true, columnStatsChanged: true }
|
||||
);
|
||||
|
||||
this._dismiss();
|
||||
}
|
||||
};
|
||||
|
||||
ReconDialog.prototype._onDoStrict = function() {
|
||||
var bodyParams;
|
||||
|
||||
var match = $('input[name="recon-dialog-strict-choice"]:checked')[0].value;
|
||||
if (match == "key") {
|
||||
var namespaceChoice = $('input[name="recon-dialog-strict-namespace-choice"]:checked')[0];
|
||||
var namespace;
|
||||
|
||||
if (namespaceChoice.value == "other") {
|
||||
var suggest = this._elmts.strictNamespaceInput.data("data.suggest");
|
||||
if (!suggest) {
|
||||
alert("Please specify a namespace.");
|
||||
return;
|
||||
}
|
||||
namespace = {
|
||||
id: suggest.id,
|
||||
name: suggest.name
|
||||
};
|
||||
} else {
|
||||
namespace = {
|
||||
id: namespaceChoice.value,
|
||||
name: namespaceChoice.getAttribute("nsName")
|
||||
};
|
||||
}
|
||||
|
||||
bodyParams = {
|
||||
columnName: this._column.name,
|
||||
config: JSON.stringify({
|
||||
mode: "strict",
|
||||
match: "key",
|
||||
namespace: namespace
|
||||
})
|
||||
};
|
||||
} else if (match == "id") {
|
||||
bodyParams = {
|
||||
columnName: this._column.name,
|
||||
config: JSON.stringify({
|
||||
mode: "strict",
|
||||
match: "id"
|
||||
})
|
||||
};
|
||||
} else if (match == "guid") {
|
||||
bodyParams = {
|
||||
columnName: this._column.name,
|
||||
config: JSON.stringify({
|
||||
mode: "strict",
|
||||
match: "guid"
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
Gridworks.postProcess(
|
||||
"reconcile",
|
||||
{},
|
||||
bodyParams,
|
||||
{ cellsChanged: true, columnStatsChanged: true }
|
||||
);
|
||||
|
||||
this._dismiss();
|
||||
};
|
@ -388,10 +388,6 @@ Gridworks.getPermanentLink = function() {
|
||||
function onLoad() {
|
||||
var params = URL.getParameters();
|
||||
if ("project" in params) {
|
||||
theProject = {
|
||||
id: parseInt(params.project,10)
|
||||
};
|
||||
|
||||
var uiState = {};
|
||||
if ("ui" in params) {
|
||||
try {
|
||||
|
@ -0,0 +1,23 @@
|
||||
<div class="recon-dialog-service-panel">
|
||||
<p>Each cell contains:</p>
|
||||
<div class="grid-layout layout-normal layout-full"><table>
|
||||
<tr><td width="1%"><input type="radio" name="recon-dialog-strict-choice" value="id" checked /></td><td>a Freebase ID, e.g., /en/solar_system</td></tr>
|
||||
<tr><td><input type="radio" name="recon-dialog-strict-choice" value="guid" /></td><td>a Freebase GUID, e.g., #9202a8c04000641f80000000000354ae</td></tr>
|
||||
<tr>
|
||||
<td width="1%"><input type="radio" name="recon-dialog-strict-choice" value="key" /></td>
|
||||
<td>
|
||||
<div class="grid-layout layout-tighter layout-full"><table>
|
||||
<tr><td colspan="2">a Freebase key in</td></tr>
|
||||
<tr>
|
||||
<td width="1%"><input type="radio" name="recon-dialog-strict-namespace-choice" value="/wikipedia/en" nsName="Wikipedia EN" checked /></td>
|
||||
<td>the Wikipedia English namespace</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="1%"><input type="radio" name="recon-dialog-strict-namespace-choice" value="other" /></td>
|
||||
<td>this namespace: <input bind="strictNamespaceInput" /></td>
|
||||
</tr>
|
||||
</table></div>
|
||||
</td>
|
||||
</tr>
|
||||
</table></div>
|
||||
</div>
|
@ -0,0 +1,100 @@
|
||||
function ReconFreebaseQueryPanel(column, service, container) {
|
||||
this._column = column;
|
||||
this._service = service;
|
||||
this._container = container;
|
||||
|
||||
this._constructUI();
|
||||
}
|
||||
|
||||
ReconFreebaseQueryPanel.prototype.activate = function() {
|
||||
this._panel.show();
|
||||
};
|
||||
|
||||
ReconFreebaseQueryPanel.prototype.deactivate = function() {
|
||||
this._panel.hide();
|
||||
};
|
||||
|
||||
ReconFreebaseQueryPanel.prototype.dispose = function() {
|
||||
this._panel.remove();
|
||||
this._panel = null;
|
||||
|
||||
this._column = null;
|
||||
this._service = null;
|
||||
this._container = null;
|
||||
};
|
||||
|
||||
ReconFreebaseQueryPanel.prototype._constructUI = function() {
|
||||
var self = this;
|
||||
this._panel = $(DOM.loadHTML("core", "scripts/reconciliation/freebase-query-panel.html")).appendTo(this._container);
|
||||
this._elmts = DOM.bind(this._panel);
|
||||
this._wireEvents();
|
||||
};
|
||||
|
||||
ReconFreebaseQueryPanel.prototype._wireEvents = function() {
|
||||
var self = this;
|
||||
this._elmts.strictNamespaceInput
|
||||
.suggest({ type: '/type/namespace' })
|
||||
.bind("fb-select", function(e, data) {
|
||||
self._panel.find('input[name="recon-dialog-strict-choice"][value="key"]').attr("checked", "true");
|
||||
self._panel.find('input[name="recon-dialog-strict-namespace-choice"][value="other"]').attr("checked", "true");
|
||||
});
|
||||
};
|
||||
|
||||
ReconFreebaseQueryPanel.prototype.start = function() {
|
||||
var bodyParams;
|
||||
|
||||
var match = $('input[name="recon-dialog-strict-choice"]:checked')[0].value;
|
||||
if (match == "key") {
|
||||
var namespaceChoice = $('input[name="recon-dialog-strict-namespace-choice"]:checked')[0];
|
||||
var namespace;
|
||||
|
||||
if (namespaceChoice.value == "other") {
|
||||
var suggest = this._elmts.strictNamespaceInput.data("data.suggest");
|
||||
if (!suggest) {
|
||||
alert("Please specify a namespace.");
|
||||
return;
|
||||
}
|
||||
namespace = {
|
||||
id: suggest.id,
|
||||
name: suggest.name
|
||||
};
|
||||
} else {
|
||||
namespace = {
|
||||
id: namespaceChoice.value,
|
||||
name: namespaceChoice.getAttribute("nsName")
|
||||
};
|
||||
}
|
||||
|
||||
bodyParams = {
|
||||
columnName: this._column.name,
|
||||
config: JSON.stringify({
|
||||
mode: "strict",
|
||||
match: "key",
|
||||
namespace: namespace
|
||||
})
|
||||
};
|
||||
} else if (match == "id") {
|
||||
bodyParams = {
|
||||
columnName: this._column.name,
|
||||
config: JSON.stringify({
|
||||
mode: "strict",
|
||||
match: "id"
|
||||
})
|
||||
};
|
||||
} else if (match == "guid") {
|
||||
bodyParams = {
|
||||
columnName: this._column.name,
|
||||
config: JSON.stringify({
|
||||
mode: "strict",
|
||||
match: "guid"
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
Gridworks.postProcess(
|
||||
"reconcile",
|
||||
{},
|
||||
bodyParams,
|
||||
{ cellsChanged: true, columnStatsChanged: true }
|
||||
);
|
||||
};
|
@ -0,0 +1,19 @@
|
||||
<div class="dialog-frame" style="width: 900px;">
|
||||
<div class="dialog-header" bind="dialogHeader"></div>
|
||||
<div class="dialog-body" bind="dialogBody">
|
||||
<div class="grid-layout layout-normal layout-full"><table><tr>
|
||||
<td width="1%">
|
||||
<div class="recon-dialog-service-header">Services and Extensions</div>
|
||||
<div class="recon-dialog-service-list" bind="serviceList"></div>
|
||||
<div class="recon-dialog-service-controls">
|
||||
<button bind="addStandardServiceButton">Add Standard Service...</button>
|
||||
</div>
|
||||
</td>
|
||||
<td><div class="recon-dialog-service-panel-container" bind="servicePanelContainer"></div></td>
|
||||
</tr></table></div>
|
||||
</div>
|
||||
<div class="dialog-footer" bind="dialogFooter">
|
||||
<button bind="reconcileButton">Start Reconciling</button>
|
||||
<button bind="cancelButton">Cancel</button>
|
||||
</div>
|
||||
</div>
|
101
main/webapp/modules/core/scripts/reconciliation/recon-dialog.js
Normal file
101
main/webapp/modules/core/scripts/reconciliation/recon-dialog.js
Normal file
@ -0,0 +1,101 @@
|
||||
function ReconDialog(column, types) {
|
||||
this._column = column;
|
||||
this._serviceRecords = [];
|
||||
this._selectedServiceRecordIndex = -1;
|
||||
|
||||
this._createDialog();
|
||||
}
|
||||
|
||||
ReconDialog.prototype._createDialog = function() {
|
||||
var self = this;
|
||||
var dialog = $(DOM.loadHTML("core", "scripts/reconciliation/recon-dialog.html"));
|
||||
|
||||
this._elmts = DOM.bind(dialog);
|
||||
this._elmts.dialogHeader.text("Reconcile column " + this._column.name);
|
||||
this._elmts.reconcileButton.click(function() { self._onOK(); });
|
||||
this._elmts.cancelButton.click(function() { self._dismiss(); });
|
||||
|
||||
this._level = DialogSystem.showDialog(dialog);
|
||||
this._populateDialog();
|
||||
};
|
||||
|
||||
ReconDialog.prototype._onOK = function() {
|
||||
if (this._selectedServiceRecordIndex >= 0) {
|
||||
var record = this._serviceRecords[this._selectedServiceRecordIndex];
|
||||
if (record.handler) {
|
||||
record.handler.start();
|
||||
}
|
||||
}
|
||||
this._dismiss();
|
||||
};
|
||||
|
||||
ReconDialog.prototype._dismiss = function() {
|
||||
for (var i = 0; i < this._serviceRecords.length; i++) {
|
||||
var record = this._serviceRecords[i];
|
||||
if (record.handler) {
|
||||
record.handler.dispose();
|
||||
}
|
||||
}
|
||||
this._serviceRecords = null;
|
||||
|
||||
DialogSystem.dismissUntil(this._level - 1);
|
||||
};
|
||||
|
||||
ReconDialog.prototype._populateDialog = function() {
|
||||
var self = this;
|
||||
var services = ReconciliationManager.getAllServices();
|
||||
if (services.length > 0) {
|
||||
var renderService = function(service) {
|
||||
var record = {
|
||||
service: service,
|
||||
handler: null
|
||||
};
|
||||
|
||||
record.selector = $('<a>')
|
||||
.attr("href", "javascript:{}")
|
||||
.addClass("recon-dialog-service-selector")
|
||||
.text(service.name)
|
||||
.appendTo(self._elmts.serviceList)
|
||||
.click(function() {
|
||||
self._selectService(record);
|
||||
});
|
||||
|
||||
self._serviceRecords.push(record);
|
||||
};
|
||||
|
||||
for (var i = 0; i < services.length; i++) {
|
||||
renderService(services[i]);
|
||||
}
|
||||
|
||||
this._selectService(this._serviceRecords[0]);
|
||||
}
|
||||
};
|
||||
|
||||
ReconDialog.prototype._selectService = function(record) {
|
||||
for (var i = 0; i < this._serviceRecords.length; i++) {
|
||||
if (record === this._serviceRecords[i]) {
|
||||
if (i !== this._selectedServiceRecordIndex) {
|
||||
if (this._selectedServiceRecordIndex >= 0) {
|
||||
var oldRecord = this._serviceRecords[this._selectedServiceRecordIndex];
|
||||
if (oldRecord.handler) {
|
||||
oldRecord.selector.removeClass("selected");
|
||||
oldRecord.handler.deactivate();
|
||||
}
|
||||
}
|
||||
|
||||
record.selector.addClass("selected");
|
||||
if (record.handler) {
|
||||
record.handler.activate();
|
||||
} else {
|
||||
var handlerConstructor = eval(record.service.ui.handler);
|
||||
|
||||
record.handler = new handlerConstructor(
|
||||
this._column, record.service, this._elmts.servicePanelContainer);
|
||||
}
|
||||
|
||||
this._selectedServiceRecordIndex = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
@ -0,0 +1,65 @@
|
||||
var ReconciliationManager = {
|
||||
"customServices" : [], // services registered by core and extensions
|
||||
"standardServices" : [] // services registered by user
|
||||
};
|
||||
|
||||
ReconciliationManager.getAllServices = function() {
|
||||
return ReconciliationManager.customServices.concat(ReconciliationManager.standardServices);
|
||||
};
|
||||
|
||||
ReconciliationManager.registerService = function(service) {
|
||||
ReconciliationManager.customServices.push(service);
|
||||
};
|
||||
|
||||
ReconciliationManager.registerStandardService = function(url) {
|
||||
$.ajax({
|
||||
async: false,
|
||||
url: url + (url.contains("?") ? "&" : "?") + "callback=?",
|
||||
success: function(data) {
|
||||
data.url = url;
|
||||
data.ui = { "handler" : "ReconStandardServicePanel" };
|
||||
|
||||
ReconciliationManager.standardServices.push(data);
|
||||
ReconciliationManager.save();
|
||||
},
|
||||
dataType: "jsonp"
|
||||
});
|
||||
};
|
||||
|
||||
ReconciliationManager.save = function(f) {
|
||||
$.ajax({
|
||||
async: false,
|
||||
type: "POST",
|
||||
url: "/command/set-preference?" + $.param({
|
||||
name: "standard-reconciliation-services"
|
||||
}),
|
||||
data: { "value" : JSON.stringify(ReconciliationManager.standardServices) },
|
||||
success: function(data) {
|
||||
if (f) { f(); }
|
||||
},
|
||||
dataType: "json"
|
||||
});
|
||||
};
|
||||
|
||||
(function() {
|
||||
ReconciliationManager.customServices.push({
|
||||
"name" : "Freebase Query-based Reconciliation",
|
||||
"ui" : { "handler" : "ReconFreebaseQueryPanel" }
|
||||
});
|
||||
|
||||
$.ajax({
|
||||
async: false,
|
||||
url: "/command/get-preference?" + $.param({
|
||||
name: "standard-reconciliation-services"
|
||||
}),
|
||||
success: function(data) {
|
||||
if (data.value && data.value != "null") {
|
||||
ReconciliationManager.standardServices = JSON.parse(data.value);
|
||||
} else {
|
||||
ReconciliationManager.registerStandardService(
|
||||
"http://gridworks-helper.dfhuynh.user.dev.freebaseapps.com/reconcile");
|
||||
}
|
||||
},
|
||||
dataType: "json"
|
||||
});
|
||||
})();
|
@ -0,0 +1,22 @@
|
||||
<div class="recon-dialog-service-panel recon-dialog-standard-service-panel">
|
||||
<div class="grid-layout layout-normal layout-full"><table>
|
||||
<tr>
|
||||
<td>Reconcile each cell to an entity of one of these types:</td>
|
||||
<td>Also use relevant details from other columns:</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div class="type-container" bind="typeContainer"></div></td>
|
||||
<td width="50%"><div class="detail-container" bind="detailContainer"></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><input type="radio" name="type-choice" value="">
|
||||
Or enter a specific type: <input size="20" bind="typeInput" /></td>
|
||||
<td>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><input type="checkbox" checked bind="automatchCheck" /> Auto-match candidates with high confidence</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</table></div>
|
||||
</div>
|
@ -0,0 +1,257 @@
|
||||
function ReconStandardServicePanel(column, service, container) {
|
||||
this._column = column;
|
||||
this._service = service;
|
||||
this._container = container;
|
||||
this._types = [];
|
||||
|
||||
this._constructUI();
|
||||
}
|
||||
|
||||
ReconStandardServicePanel.prototype._guessTypes = function(f) {
|
||||
var self = this;
|
||||
var dismissBusy = DialogSystem.showBusy();
|
||||
|
||||
$.post(
|
||||
"/command/guess-types-of-column?" + $.param({
|
||||
project: theProject.id,
|
||||
columnName: this._column.name,
|
||||
service: this._service.url
|
||||
}),
|
||||
null,
|
||||
function(data) {
|
||||
self._types = data.types;
|
||||
|
||||
if (self._types.length === 0 && "defaultTypes" in self._service) {
|
||||
var defaultTypes = {};
|
||||
$.each(self._service["defaultTypes"], function() {
|
||||
defaultTypes[this.id] = this.name;
|
||||
});
|
||||
$.each(self._types, function() {
|
||||
delete defaultTypes[typeof this == "string" ? this : this.id];
|
||||
});
|
||||
for (var id in defaultTypes) {
|
||||
if (defaultTypes.hasOwnProperty(id)) {
|
||||
self._types.push({
|
||||
id: id,
|
||||
name: defaultTypes[id].name
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dismissBusy();
|
||||
f();
|
||||
}
|
||||
);
|
||||
};
|
||||
|
||||
ReconStandardServicePanel.prototype._constructUI = function() {
|
||||
var self = this;
|
||||
this._panel = $(DOM.loadHTML("core", "scripts/reconciliation/standard-service-panel.html")).appendTo(this._container);
|
||||
this._elmts = DOM.bind(this._panel);
|
||||
|
||||
this._guessTypes(function() {
|
||||
self._populatePanel();
|
||||
self._wireEvents();
|
||||
});
|
||||
};
|
||||
|
||||
ReconStandardServicePanel.prototype.activate = function() {
|
||||
this._panel.show();
|
||||
};
|
||||
|
||||
ReconStandardServicePanel.prototype.deactivate = function() {
|
||||
this._panel.hide();
|
||||
};
|
||||
|
||||
ReconStandardServicePanel.prototype.dispose = function() {
|
||||
this._panel.remove();
|
||||
this._panel = null;
|
||||
|
||||
this._column = null;
|
||||
this._service = null;
|
||||
this._container = null;
|
||||
};
|
||||
|
||||
ReconStandardServicePanel.prototype._populatePanel = function() {
|
||||
var self = this;
|
||||
|
||||
/*
|
||||
* Populate types
|
||||
*/
|
||||
var typeTableContainer = $('<div>')
|
||||
.addClass("grid-layout layout-tightest")
|
||||
.appendTo(this._elmts.typeContainer);
|
||||
|
||||
var typeTable = $('<table></table>').appendTo(typeTableContainer)[0];
|
||||
var createTypeChoice = function(type, check) {
|
||||
var typeID = typeof type == "string" ? type : type.id;
|
||||
var typeName = typeof type == "string" ? type : (type.name || type.id);
|
||||
|
||||
var tr = typeTable.insertRow(typeTable.rows.length);
|
||||
var td0 = tr.insertCell(0);
|
||||
var td1 = tr.insertCell(1);
|
||||
|
||||
td0.width = "1%";
|
||||
var radio = $('<input type="radio" name="type-choice">')
|
||||
.attr("value", typeID)
|
||||
.attr("typeName", typeName)
|
||||
.appendTo(td0)
|
||||
.click(function() {
|
||||
self._rewirePropertySuggests(this.value);
|
||||
});
|
||||
|
||||
if (check) {
|
||||
radio.attr("checked", "true");
|
||||
}
|
||||
|
||||
if (typeName == typeID) {
|
||||
$(td1).html(typeName);
|
||||
} else {
|
||||
$(td1).html(
|
||||
typeName +
|
||||
'<br/>' +
|
||||
'<span class="type-id">' + typeID + '</span>');
|
||||
}
|
||||
};
|
||||
for (var i = 0; i < this._types.length; i++) {
|
||||
createTypeChoice(this._types[i], i === 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Populate properties
|
||||
*/
|
||||
var detailTableContainer = $('<div>')
|
||||
.addClass("grid-layout layout-tightest")
|
||||
.appendTo(this._elmts.detailContainer);
|
||||
|
||||
var detailTable = $(
|
||||
'<table>' +
|
||||
'<tr><th>Column</th><th>Include?</th><th>As Property</th></tr>' +
|
||||
'</table>'
|
||||
).appendTo(detailTableContainer)[0];
|
||||
|
||||
function renderDetailColumn(column) {
|
||||
var tr = detailTable.insertRow(detailTable.rows.length);
|
||||
var td0 = tr.insertCell(0);
|
||||
var td1 = tr.insertCell(1);
|
||||
var td2 = tr.insertCell(2);
|
||||
|
||||
$(td0).html(column.name);
|
||||
$('<input type="checkbox" />')
|
||||
.attr("columnName", column.name)
|
||||
.appendTo(td1);
|
||||
$('<input size="25" name="property" />')
|
||||
.attr("columnName", column.name)
|
||||
.appendTo(td2);
|
||||
}
|
||||
var columns = theProject.columnModel.columns;
|
||||
for (var i = 0; i < columns.length; i++) {
|
||||
var column = columns[i];
|
||||
if (column !== this._column) {
|
||||
renderDetailColumn(column);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
ReconStandardServicePanel.prototype._wireEvents = function() {
|
||||
if (this._isInFreebaseIdentifierSpace()) {
|
||||
var self = this;
|
||||
this._elmts.typeInput
|
||||
.suggestT({ type : '/type/type' })
|
||||
.bind("fb-select", function(e, data) {
|
||||
self._panel
|
||||
.find('input[name="type-choice"][value=""]')
|
||||
.attr("checked", "true");
|
||||
|
||||
self._rewirePropertySuggests(data.id);
|
||||
});
|
||||
|
||||
this._rewirePropertySuggests(this._types[0].id);
|
||||
}
|
||||
};
|
||||
|
||||
ReconStandardServicePanel.prototype._rewirePropertySuggests = function(type) {
|
||||
if (this._isInFreebaseIdentifierSpace()) {
|
||||
this._panel
|
||||
.find('input[name="property"]')
|
||||
.unbind().suggestP({
|
||||
type: '/type/property',
|
||||
schema: (type) ? (typeof type == "string" ? type : type.id) : "/common/topic"
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
ReconStandardServicePanel.prototype._isInFreebaseIdentifierSpace = function() {
|
||||
return "identifier-space" in this._service &&
|
||||
this._service["identifier-space"].startsWith("http://rdf.freebase.com/");
|
||||
};
|
||||
|
||||
ReconStandardServicePanel.prototype.start = function() {
|
||||
var self = this;
|
||||
|
||||
var type = this._isInFreebaseIdentifierSpace() ?
|
||||
this._elmts.typeInput.data("data.suggest") :
|
||||
{
|
||||
id: this._elmts.typeInput[0].value,
|
||||
name: this._elmts.typeInput[0].value
|
||||
};
|
||||
|
||||
var choices = this._panel.find('input[name="type-choice"]:checked');
|
||||
if (choices !== null && choices.length > 0 && choices[0].value != "") {
|
||||
type = {
|
||||
id: choices[0].value,
|
||||
name: choices.attr("typeName")
|
||||
};
|
||||
}
|
||||
|
||||
var columnDetails = [];
|
||||
$.each(
|
||||
this._panel.find('input[name="property"]'),
|
||||
function() {
|
||||
var property = $(this).data("data.suggest");
|
||||
if (property && property.id) {
|
||||
columnDetails.push({
|
||||
column: this.getAttribute("columnName"),
|
||||
property: {
|
||||
id: property.id,
|
||||
name: property.name
|
||||
}
|
||||
});
|
||||
} else {
|
||||
var property = $.trim(this.value);
|
||||
if (property) {
|
||||
columnDetails.push({
|
||||
column: this.getAttribute("columnName"),
|
||||
property: {
|
||||
id: property,
|
||||
name: property
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
Gridworks.postProcess(
|
||||
"reconcile",
|
||||
{},
|
||||
{
|
||||
columnName: this._column.name,
|
||||
config: JSON.stringify({
|
||||
mode: "standard-service",
|
||||
service: this._service.url,
|
||||
identifierSpace: this._service.identifierSpace,
|
||||
schemaSpace: this._service.schemaSpace,
|
||||
type: {
|
||||
id: type.id,
|
||||
name: type.name
|
||||
},
|
||||
autoMatch: this._elmts.automatchCheck[0].checked,
|
||||
columnDetails: columnDetails
|
||||
})
|
||||
},
|
||||
{ cellsChanged: true, columnStatsChanged: true }
|
||||
);
|
||||
};
|
||||
|
@ -1,3 +1,15 @@
|
||||
String.prototype.trim = function() {
|
||||
return this.replace(/^\s+/, '').replace(/\s+$/, '');
|
||||
};
|
||||
|
||||
String.prototype.startsWith = function(s) {
|
||||
return this.length >= s.length && this.substring(0, s.length) == s;
|
||||
};
|
||||
|
||||
String.prototype.endsWith = function(s) {
|
||||
return this.length >= s.length && this.substring(this.length - s.length) == s;
|
||||
};
|
||||
|
||||
String.prototype.contains = function(s) {
|
||||
return this.indexOf(s) >= 0;
|
||||
};
|
@ -195,6 +195,8 @@ DataTableCellUI.prototype._doJudgment = function(judgment, params) {
|
||||
params.row = this._rowIndex;
|
||||
params.cell = this._cellIndex;
|
||||
params.judgment = judgment;
|
||||
params.identifierSpace = (this._cell.r) ? this._cell.r.identifierSpace : null;
|
||||
params.schemaSpace = (this._cell.r) ? this._cell.r.schemaSpace : null;
|
||||
this._postProcessOneCell("recon-judge-one-cell", params, true);
|
||||
};
|
||||
|
||||
@ -203,7 +205,8 @@ DataTableCellUI.prototype._doJudgmentForSimilarCells = function(judgment, params
|
||||
params.columnName = Gridworks.cellIndexToColumn(this._cellIndex).name;
|
||||
params.similarValue = this._cell.v;
|
||||
params.judgment = judgment;
|
||||
|
||||
params.identifierSpace = (this._cell.r) ? this._cell.r.identifierSpace : null;
|
||||
params.schemaSpace = (this._cell.r) ? this._cell.r.schemaSpace : null;
|
||||
this._postProcessSeveralCells("recon-judge-similar-cells", params, true);
|
||||
};
|
||||
|
||||
|
@ -704,60 +704,7 @@ DataTableColumnHeaderUI.prototype._doTextTransformPrompt = function() {
|
||||
};
|
||||
|
||||
DataTableColumnHeaderUI.prototype._doReconcile = function() {
|
||||
var self = this;
|
||||
var dismissBusy = DialogSystem.showBusy();
|
||||
$.post(
|
||||
"/command/guess-types-of-column?" + $.param({ project: theProject.id, columnName: this._column.name }),
|
||||
null,
|
||||
function(data) {
|
||||
if (data.code != "ok") {
|
||||
dismissBusy();
|
||||
new ReconDialog(self._column, []);
|
||||
} else {
|
||||
data.types = data.types.slice(0, 20);
|
||||
|
||||
var ids = $.map(data.types, function(elmt) { return elmt.id; });
|
||||
if (!ids.length) {
|
||||
dismissBusy();
|
||||
new ReconDialog(self._column, []);
|
||||
} else {
|
||||
var query = [{
|
||||
"id|=" : ids,
|
||||
"id" : null,
|
||||
"/freebase/type_profile/kind" : []
|
||||
}];
|
||||
$.getJSON(
|
||||
"http://api.freebase.com/api/service/mqlread?" + $.param({ "query" : JSON.stringify({ "query" : query }) }) + "&callback=?",
|
||||
null,
|
||||
function(o) {
|
||||
dismissBusy();
|
||||
|
||||
var kindMap = {};
|
||||
$.each(o.result, function() {
|
||||
var m = kindMap[this.id] = {};
|
||||
$.each(this["/freebase/type_profile/kind"], function() {
|
||||
m[this] = true;
|
||||
});
|
||||
});
|
||||
|
||||
new ReconDialog(self._column, $.map(data.types, function(type) {
|
||||
if (type.id in kindMap) {
|
||||
var m = kindMap[type.id];
|
||||
if (!("Role" in m) && !("Annotation" in m)) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}));
|
||||
},
|
||||
"jsonp"
|
||||
);
|
||||
}
|
||||
}
|
||||
},
|
||||
"json"
|
||||
);
|
||||
|
||||
new ReconDialog(this._column);
|
||||
};
|
||||
|
||||
DataTableColumnHeaderUI.prototype._doReconDiscardJudgments = function() {
|
||||
|
@ -1,21 +1,47 @@
|
||||
.recon-dialog-type-id {
|
||||
.recon-dialog-service-header {
|
||||
padding: 5px 10px;
|
||||
font-weight: bold;
|
||||
font-size: 120%;
|
||||
}
|
||||
.recon-dialog-service-list {
|
||||
border: 1px solid #aaa;
|
||||
padding: 1px;
|
||||
overflow: auto;
|
||||
width: 200px;
|
||||
height: 400px;
|
||||
}
|
||||
|
||||
.recon-dialog-service-controls {
|
||||
padding: 5px 0px;
|
||||
}
|
||||
.recon-dialog-service-controls > button {
|
||||
display: block;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
a.recon-dialog-service-selector {
|
||||
display: block;
|
||||
padding: 10px;
|
||||
text-decoration: none;
|
||||
color: black;
|
||||
}
|
||||
a.recon-dialog-service-selector:hover {
|
||||
background: #eee;
|
||||
}
|
||||
a.recon-dialog-service-selector.selected {
|
||||
background: #eee;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.recon-dialog-standard-service-panel .type-id {
|
||||
color: #888;
|
||||
}
|
||||
|
||||
.recon-dialog-heuristic-types-container {
|
||||
border: 1px solid #ccc;
|
||||
padding: 10px;
|
||||
max-height: 200px;
|
||||
overflow: auto;
|
||||
}
|
||||
.recon-dialog-heuristic-other-type-container {
|
||||
margin: 10px;
|
||||
}
|
||||
|
||||
.recon-dialog-heuristic-details-container {
|
||||
.recon-dialog-standard-service-panel .type-container,
|
||||
.recon-dialog-standard-service-panel .detail-container {
|
||||
border: 1px solid #ccc;
|
||||
padding: 10px;
|
||||
max-height: 300px;
|
||||
height: 300px !important;
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user