diff --git a/src/main/java/com/metaweb/gridworks/GridworksServlet.java b/src/main/java/com/metaweb/gridworks/GridworksServlet.java index f5d8f078d..8a5682ad9 100644 --- a/src/main/java/com/metaweb/gridworks/GridworksServlet.java +++ b/src/main/java/com/metaweb/gridworks/GridworksServlet.java @@ -39,6 +39,7 @@ import com.metaweb.gridworks.commands.recon.DiscardReconcileCommand; import com.metaweb.gridworks.commands.recon.JudgeOneCellCommand; import com.metaweb.gridworks.commands.recon.ReconcileCommand; import com.metaweb.gridworks.commands.util.GetExpressionLanguageInfoCommand; +import com.metaweb.gridworks.commands.util.GuessTypesOfColumnCommand; import com.metaweb.gridworks.commands.util.PreviewExpressionCommand; import com.metaweb.gridworks.commands.util.PreviewProtographCommand; @@ -82,6 +83,7 @@ public class GridworksServlet extends HttpServlet { _commands.put("preview-expression", new PreviewExpressionCommand()); _commands.put("get-expression-language-info", new GetExpressionLanguageInfoCommand()); _commands.put("preview-protograph", new PreviewProtographCommand()); + _commands.put("guess-types-of-column", new GuessTypesOfColumnCommand()); } @Override diff --git a/src/main/java/com/metaweb/gridworks/commands/util/GuessTypesOfColumnCommand.java b/src/main/java/com/metaweb/gridworks/commands/util/GuessTypesOfColumnCommand.java new file mode 100644 index 000000000..ffe52c39d --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/commands/util/GuessTypesOfColumnCommand.java @@ -0,0 +1,188 @@ +package com.metaweb.gridworks.commands.util; + +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONArray; +import org.json.JSONObject; +import org.json.JSONWriter; + + +import com.metaweb.gridworks.commands.Command; +import com.metaweb.gridworks.expr.ExpressionUtils; +import com.metaweb.gridworks.model.Column; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.model.Row; +import com.metaweb.gridworks.util.ParsingUtilities; + +public class GuessTypesOfColumnCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + String columnName = request.getParameter("columnName"); + + JSONWriter writer = new JSONWriter(response.getWriter()); + writer.object(); + + Column column = project.columnModel.getColumnByName(columnName); + if (column == null) { + writer.key("code"); writer.value("error"); + writer.key("message"); writer.value("No such column"); + } else { + try { + writer.key("code"); writer.value("ok"); + writer.key("types"); writer.array(); + + List typeGroups = guessTypes(project, column); + for (TypeGroup tg : typeGroups) { + writer.object(); + writer.key("id"); writer.value(tg.id); + writer.key("name"); writer.value(tg.name); + writer.endObject(); + } + + writer.endArray(); + } catch (Exception e) { + writer.key("code"); writer.value("error"); + } + } + + writer.endObject(); + } catch (Exception e) { + respondException(response, e); + } + } + + protected List guessTypes(Project project, Column column) { + Map map = new HashMap(); + + int cellIndex = column.getCellIndex(); + + List samples = new ArrayList(10); + for (Row row : project.rows) { + Object value = row.getCellValue(cellIndex); + if (!ExpressionUtils.isBlank(value)) { + samples.add(value.toString()); + if (samples.size() >= 10) { + break; + } + } + } + + try { + StringWriter stringWriter = new StringWriter(); + JSONWriter jsonWriter = new JSONWriter(stringWriter); + + jsonWriter.object(); + for (int i = 0; i < samples.size(); i++) { + jsonWriter.key("q" + i + ":search"); + jsonWriter.object(); + + jsonWriter.key("query"); jsonWriter.value(samples.get(i)); + jsonWriter.key("limit"); jsonWriter.value(3); + jsonWriter.key("type_exclude"); jsonWriter.value("/common/image"); + jsonWriter.key("domain_exclude"); jsonWriter.value("/freebase"); + + jsonWriter.endObject(); + } + jsonWriter.endObject(); + + StringBuffer sb = new StringBuffer(); + sb.append("http://api.freebase.com/api/service/search?indent=1&queries="); + sb.append(ParsingUtilities.encode(stringWriter.toString())); + + URL url = new URL(sb.toString()); + URLConnection connection = url.openConnection(); + connection.setConnectTimeout(5000); + connection.connect(); + + InputStream is = connection.getInputStream(); + try { + String s = ParsingUtilities.inputStreamToString(is); + JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); + + for (int i = 0; i < samples.size(); i++) { + String key = "q" + i + ":search"; + if (!o.has(key)) { + continue; + } + + JSONObject o2 = o.getJSONObject(key); + if (!(o2.has("result"))) { + continue; + } + + JSONArray results = o2.getJSONArray("result"); + int count = results.length(); + + for (int j = 0; j < count; j++) { + JSONObject result = results.getJSONObject(j); + double score = result.getDouble("relevance:score"); + + JSONArray types = result.getJSONArray("type"); + int typeCount = types.length(); + + for (int t = 0; t < typeCount; t++) { + JSONObject type = types.getJSONObject(t); + String id = type.getString("id"); + if (id.equals("/common/topic") || + (id.startsWith("/base/") && id.endsWith("/topic")) || + id.startsWith("/user/") + ) { + continue; + } + + if (map.containsKey(id)) { + map.get(id).score += score; + } else { + map.put(id, new TypeGroup(id, type.getString("name"), score)); + } + } + } + } + } finally { + is.close(); + } + } catch (Exception e) { + e.printStackTrace(); + } + + List types = new ArrayList(map.values()); + Collections.sort(types, new Comparator() { + public int compare(TypeGroup o1, TypeGroup o2) { + return (int) Math.signum(o2.score - o1.score); + } + }); + + return types; + } + + static protected class TypeGroup { + String id; + String name; + double score; + + TypeGroup(String id, String name, double score) { + this.id = id; + this.name = name; + this.score = score; + } + } +} diff --git a/src/main/webapp/scripts/project/data-table-column-header-ui.js b/src/main/webapp/scripts/project/data-table-column-header-ui.js index 255d71246..a4b51b2f7 100644 --- a/src/main/webapp/scripts/project/data-table-column-header-ui.js +++ b/src/main/webapp/scripts/project/data-table-column-header-ui.js @@ -187,7 +187,7 @@ DataTableColumnHeaderUI.prototype._createMenuForColumnHeader = function(elmt) { label: "Start Reconciling ...", tooltip: "Reconcile text in this column with topics on Freebase", click: function() { - new ReconDialog(self._column); + self._doReconcile(); } }, {}, @@ -375,6 +375,19 @@ DataTableColumnHeaderUI.prototype._doTextTransformPrompt = function() { ); }; +DataTableColumnHeaderUI.prototype._doReconcile = function() { + var self = this; + $.post( + "/command/guess-types-of-column?" + $.param({ project: theProject.id, columnName: this._column.headerLabel }), + null, + function(data) { + new ReconDialog(self._column, data.code == "ok" ? data.types : []); + }, + "json" + ); + +}; + DataTableColumnHeaderUI.prototype._doDiscardReconResults = function() { this._dataTableView.doPostThenUpdate( "discard-reconcile", diff --git a/src/main/webapp/scripts/project/recon-dialog.js b/src/main/webapp/scripts/project/recon-dialog.js index ce0b0b85c..d102aee4c 100644 --- a/src/main/webapp/scripts/project/recon-dialog.js +++ b/src/main/webapp/scripts/project/recon-dialog.js @@ -1,5 +1,6 @@ -function ReconDialog(column) { +function ReconDialog(column, types) { this._column = column; + this._types = types; this._createDialog(); } @@ -14,26 +15,58 @@ ReconDialog.prototype._createDialog = function() { $('

').text("Reconcile cell values to topics of type:").appendTo(body); + if (this._types.length > 0) { + var createTypeChoice = function(type) { + var div = $('
').appendTo(body); + $('') + .attr("value", type.id) + .appendTo(div); + + $('').text(" " + type.name).appendTo(div); + $('').text(" (" + type.id + ")").appendTo(div); + }; + for (var i = 0; i < this._types.length && i < 7; i++) { + createTypeChoice(this._types[i]); + } + + var divCustom = $('
').appendTo(body); + $('') + .attr("value", "") + .appendTo(divCustom); + + $('').text(" Other:").appendTo(divCustom); + } + var type = null; var input = $('').appendTo($('

').appendTo(body)); input.suggest({ type : '/type/type' }).bind("fb-select", function(e, data) { type = data.id; + $('input[name="recon-dialog-type-choice"][value=""]').attr("checked", "true"); }); $('').text("Start Reconciling").click(function() { - DialogSystem.dismissUntil(level - 1); - $.post( - "/command/reconcile?" + $.param({ project: theProject.id, columnName: self._column.headerLabel, type: type }), - { engine: JSON.stringify(ui.browsingEngine.getJSON()) }, - function(data) { - if (data.code != "error") { - ui.processWidget.update(); - } else { - alert(data.message); - } - }, - "json" - ); + var choices = $('input[name="recon-dialog-type-choice"]:checked'); + if (choices != null && choices.length > 0 && choices[0].value != "") { + type = choices[0].value; + } + + if (type == null) { + alert("Please specify a type."); + } else { + DialogSystem.dismissUntil(level - 1); + $.post( + "/command/reconcile?" + $.param({ project: theProject.id, columnName: self._column.headerLabel, type: type }), + { engine: JSON.stringify(ui.browsingEngine.getJSON()) }, + function(data) { + if (data.code != "error") { + ui.processWidget.update(); + } else { + alert(data.message); + } + }, + "json" + ); + } }).appendTo(footer); $('').text("Cancel").click(function() { @@ -44,3 +77,4 @@ ReconDialog.prototype._createDialog = function() { input[0].focus(); }; +