diff --git a/src/main/java/com/metaweb/gridworks/commands/util/GuessTypesOfColumnCommand.java b/src/main/java/com/metaweb/gridworks/commands/util/GuessTypesOfColumnCommand.java index 1fb5d05b0..8a0c2117d 100644 --- a/src/main/java/com/metaweb/gridworks/commands/util/GuessTypesOfColumnCommand.java +++ b/src/main/java/com/metaweb/gridworks/commands/util/GuessTypesOfColumnCommand.java @@ -57,6 +57,8 @@ public class GuessTypesOfColumnCommand extends Command { writer.object(); writer.key("id"); writer.value(tg.id); writer.key("name"); writer.value(tg.name); + writer.key("score"); writer.value(tg.score); + writer.key("count"); writer.value(tg.count); writer.endObject(); } @@ -72,12 +74,14 @@ public class GuessTypesOfColumnCommand extends Command { } } + final static int s_sampleSize = 20; + protected List guessTypes(Project project, Column column) { Map map = new HashMap(); int cellIndex = column.getCellIndex(); - List samples = new ArrayList(10); + List samples = new ArrayList(s_sampleSize); Set sampleSet = new HashSet(); for (Row row : project.rows) { @@ -87,7 +91,7 @@ public class GuessTypesOfColumnCommand extends Command { if (!sampleSet.contains(s)) { samples.add(s); sampleSet.add(s); - if (samples.size() >= 10) { + if (samples.size() >= s_sampleSize) { break; } } @@ -105,8 +109,6 @@ public class GuessTypesOfColumnCommand extends Command { jsonWriter.key("query"); jsonWriter.value(samples.get(i)); jsonWriter.key("limit"); jsonWriter.value(3); - jsonWriter.key("type_exclude"); jsonWriter.value("/common/image"); - jsonWriter.key("domain_exclude"); jsonWriter.value("/freebase"); jsonWriter.endObject(); } @@ -142,7 +144,7 @@ public class GuessTypesOfColumnCommand extends Command { for (int j = 0; j < count; j++) { JSONObject result = results.getJSONObject(j); - double score = result.getDouble("relevance:score"); + double score = 1.0 / (1 + j); //result.getDouble("relevance:score"); JSONArray types = result.getJSONArray("type"); int typeCount = types.length(); @@ -151,14 +153,18 @@ public class GuessTypesOfColumnCommand extends Command { JSONObject type = types.getJSONObject(t); String id = type.getString("id"); if (id.equals("/common/topic") || + id.equals("/base/ontologies/ontology_instance") || (id.startsWith("/base/") && id.endsWith("/topic")) || - id.startsWith("/user/") + id.startsWith("/user/") || + id.startsWith("/freebase/") ) { continue; } if (map.containsKey(id)) { - map.get(id).score += score; + TypeGroup tg = map.get(id); + tg.score += score; + tg.count++; } else { map.put(id, new TypeGroup(id, type.getString("name"), score)); } @@ -175,7 +181,11 @@ public class GuessTypesOfColumnCommand extends Command { List types = new ArrayList(map.values()); Collections.sort(types, new Comparator() { public int compare(TypeGroup o1, TypeGroup o2) { - return (int) Math.signum(o2.score - o1.score); + int c = Math.min(s_sampleSize, o2.count) - Math.min(s_sampleSize, o1.count); + if (c != 0) { + return c; + } + return (int) Math.signum(o2.score / o2.count - o1.score / o1.count); } }); @@ -185,12 +195,14 @@ public class GuessTypesOfColumnCommand extends Command { static protected class TypeGroup { String id; String name; + int count; double score; TypeGroup(String id, String name, double score) { this.id = id; this.name = name; this.score = score; + this.count = 1; } } } diff --git a/src/main/webapp/scripts/project/data-table-column-header-ui.js b/src/main/webapp/scripts/project/data-table-column-header-ui.js index 218ef0209..1beb9a7b2 100644 --- a/src/main/webapp/scripts/project/data-table-column-header-ui.js +++ b/src/main/webapp/scripts/project/data-table-column-header-ui.js @@ -393,8 +393,45 @@ DataTableColumnHeaderUI.prototype._doReconcile = function() { "/command/guess-types-of-column?" + $.param({ project: theProject.id, columnName: this._column.headerLabel }), null, function(data) { - dismissBusy(); - new ReconDialog(self._column, data.code == "ok" ? data.types : []); + if (data.code != "ok") { + dismissBusy(); + new ReconDialog(self._column, []); + } else { + data.types = data.types.slice(0, 20); + + var ids = $.map(data.types, function(elmt) { return elmt.id; }); + var query = [{ + "id|=" : ids, + "id" : null, + "/freebase/type_profile/kind" : [] + }]; + $.getJSON( + "http://api.freebase.com/api/service/mqlread?" + $.param({ "query" : JSON.stringify({ "query" : query }) }) + "&callback=?", + null, + function(o) { + dismissBusy(); + + var kindMap = {}; + $.each(o.result, function() { + var m = kindMap[this.id] = {}; + $.each(this["/freebase/type_profile/kind"], function() { + m[this] = true; + }); + }); + + new ReconDialog(self._column, $.map(data.types, function(type) { + if (type.id in kindMap) { + var m = kindMap[type.id]; + if (!("Role" in m) && !("Annotation" in m)) { + return type; + } + } + return null; + })); + }, + "jsonp" + ); + } }, "json" ); diff --git a/src/main/webapp/scripts/project/recon-dialog.js b/src/main/webapp/scripts/project/recon-dialog.js index 976dbb648..dd6b6e750 100644 --- a/src/main/webapp/scripts/project/recon-dialog.js +++ b/src/main/webapp/scripts/project/recon-dialog.js @@ -7,13 +7,13 @@ function ReconDialog(column, types) { ReconDialog.prototype._createDialog = function() { var self = this; var frame = DialogSystem.createDialog(); - frame.width("400px"); + frame.width("500px"); var header = $('
').addClass("dialog-header").text("Reconcile column " + this._column.headerLabel).appendTo(frame); var body = $('
').addClass("dialog-body").appendTo(frame); var footer = $('
').addClass("dialog-footer").appendTo(frame); - $('

').text("Reconcile cell values to topics of type:").appendTo(body); + $('

').text("Reconcile cell values to Freebase topics of type:").appendTo(body); if (this._types.length > 0) { var createTypeChoice = function(type) { @@ -35,11 +35,14 @@ ReconDialog.prototype._createDialog = function() { .attr("value", "") .appendTo(divCustom); - $('').text(" Other:").appendTo(divCustom); + $('').text(" Other: ").appendTo(divCustom); + + var input = $('').appendTo(divCustom); + } else { + var input = $('').appendTo($('

').appendTo(body)); } var type = null; - var input = $('').appendTo($('

').appendTo(body)); input.suggest({ type : '/type/type' }).bind("fb-select", function(e, data) { type = { id: data.id, @@ -48,6 +51,11 @@ ReconDialog.prototype._createDialog = function() { $('input[name="recon-dialog-type-choice"][value=""]').attr("checked", "true"); }); + var optionDiv = $('

').appendTo(body); + var autoMatchCheckbox = $('').appendTo(optionDiv); + $('').text(" Auto-match correctly-typed candidates scoring at least ").appendTo(optionDiv); + var minScoreInput = $('').attr("value", "100").appendTo(optionDiv); + $('').text("Start Reconciling").click(function() { var choices = $('input[name="recon-dialog-type-choice"]:checked'); if (choices != null && choices.length > 0 && choices[0].value != "") { @@ -62,7 +70,14 @@ ReconDialog.prototype._createDialog = function() { } else { DialogSystem.dismissUntil(level - 1); $.post( - "/command/reconcile?" + $.param({ project: theProject.id, columnName: self._column.headerLabel, typeID: type.id, typeName: type.name }), + "/command/reconcile?" + $.param({ + project: theProject.id, + columnName: self._column.headerLabel, + typeID: type.id, + typeName: type.name, + autoMatch: autoMatchCheckbox[0].checked, + minScore: minScoreInput[0].value + }), { engine: JSON.stringify(ui.browsingEngine.getJSON()) }, function(data) { if (data.code != "error") {