From e61655506a19161418adead9f797597d8b06be02 Mon Sep 17 00:00:00 2001 From: David Huynh Date: Fri, 13 Aug 2010 16:26:33 +0000 Subject: [PATCH] Added new command to import QA results, so any reconciliation action that yields conflicting or uncertain opinions among reviewers can be examined inside Gridworks. Added new customized facets for checking QA results. git-svn-id: http://google-refine.googlecode.com/svn/trunk@1156 7d457c2a-affb-35e4-300a-418c747d4874 --- .../google/gridworks/GridworksServlet.java | 1 + .../freebase/ImportQADataCommand.java | 36 ++++++ .../commands/freebase/UploadDataCommand.java | 2 +- .../src/com/google/gridworks/model/Recon.java | 33 ++++-- .../model/changes/MassCellChange.java | 4 +- .../model/changes/MassReconChange.java | 111 ++++++++++++++++++ .../operations/OperationRegistry.java | 3 + .../recon/ImportQADataOperation.java | 106 +++++++++++++++++ main/src/com/google/gridworks/util/Pool.java | 5 +- .../modules/core/scripts/project/menu-bar.js | 13 ++ .../views/data-table/menu-reconcile.js | 44 +++++++ 11 files changed, 348 insertions(+), 10 deletions(-) create mode 100644 main/src/com/google/gridworks/commands/freebase/ImportQADataCommand.java create mode 100644 main/src/com/google/gridworks/model/changes/MassReconChange.java create mode 100644 main/src/com/google/gridworks/operations/recon/ImportQADataOperation.java diff --git a/main/src/com/google/gridworks/GridworksServlet.java b/main/src/com/google/gridworks/GridworksServlet.java index 442ec6c79..a4ce74207 100644 --- a/main/src/com/google/gridworks/GridworksServlet.java +++ b/main/src/com/google/gridworks/GridworksServlet.java @@ -116,6 +116,7 @@ public class GridworksServlet extends Butterfly { {"user-badges", "com.google.gridworks.commands.auth.GetUserBadgesCommand"}, {"upload-data", "com.google.gridworks.commands.freebase.UploadDataCommand"}, + {"import-qa-data", "com.google.gridworks.commands.freebase.ImportQADataCommand"}, {"mqlread", "com.google.gridworks.commands.freebase.MQLReadCommand"}, {"mqlwrite", "com.google.gridworks.commands.freebase.MQLWriteCommand"}, diff --git a/main/src/com/google/gridworks/commands/freebase/ImportQADataCommand.java b/main/src/com/google/gridworks/commands/freebase/ImportQADataCommand.java new file mode 100644 index 000000000..abc7e599f --- /dev/null +++ b/main/src/com/google/gridworks/commands/freebase/ImportQADataCommand.java @@ -0,0 +1,36 @@ +package com.google.gridworks.commands.freebase; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.gridworks.ProjectManager; +import com.google.gridworks.commands.Command; +import com.google.gridworks.model.AbstractOperation; +import com.google.gridworks.model.Project; +import com.google.gridworks.operations.recon.ImportQADataOperation; +import com.google.gridworks.process.Process; + +public class ImportQADataCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + ProjectManager.singleton.setBusy(true); + try { + Project project = getProject(request); + + AbstractOperation op = new ImportQADataOperation(); + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } finally { + ProjectManager.singleton.setBusy(false); + } + } +} diff --git a/main/src/com/google/gridworks/commands/freebase/UploadDataCommand.java b/main/src/com/google/gridworks/commands/freebase/UploadDataCommand.java index bcebcddef..22958f33b 100644 --- a/main/src/com/google/gridworks/commands/freebase/UploadDataCommand.java +++ b/main/src/com/google/gridworks/commands/freebase/UploadDataCommand.java @@ -21,7 +21,7 @@ import com.google.gridworks.util.FreebaseUtils; import com.google.gridworks.util.ParsingUtilities; public class UploadDataCommand extends Command { - final static protected String s_dataLoadJobIDPref = "core/freebaseDataLoadJobID"; + final static public String s_dataLoadJobIDPref = "core/freebaseDataLoadJobID"; @Override public void doPost(HttpServletRequest request, HttpServletResponse response) diff --git a/main/src/com/google/gridworks/model/Recon.java b/main/src/com/google/gridworks/model/Recon.java index 092b1ad4e..6834a6ded 100644 --- a/main/src/com/google/gridworks/model/Recon.java +++ b/main/src/com/google/gridworks/model/Recon.java @@ -48,7 +48,8 @@ public class Recon implements HasFields, Jsonizable { static final public int Feature_nameMatch = 1; static final public int Feature_nameLevenshtein = 2; static final public int Feature_nameWordDistance = 3; - static final public int Feature_max = 4; + static final public int Feature_qaResult = 4; + static final public int Feature_max = 5; static final protected Map s_featureMap = new HashMap(); static { @@ -56,6 +57,7 @@ public class Recon implements HasFields, Jsonizable { s_featureMap.put("nameMatch", Feature_nameMatch); s_featureMap.put("nameLevenshtein", Feature_nameLevenshtein); s_featureMap.put("nameWordDistance", Feature_nameWordDistance); + s_featureMap.put("qaResult", Feature_qaResult); } final public long id; @@ -93,9 +95,25 @@ public class Recon implements HasFields, Jsonizable { this.judgmentHistoryEntry = judgmentHistoryEntry; } + public Recon dup() { + Recon r = new Recon(id, judgmentHistoryEntry); + r.identifierSpace = identifierSpace; + r.schemaSpace = schemaSpace; + + copyTo(r); + + return r; + } + public Recon dup(long judgmentHistoryEntry) { Recon r = new Recon(judgmentHistoryEntry, identifierSpace, schemaSpace); + copyTo(r); + + return r; + } + + protected void copyTo(Recon r) { System.arraycopy(features, 0, r.features, 0, features.length); if (candidates != null) { @@ -111,8 +129,6 @@ public class Recon implements HasFields, Jsonizable { r.match = match; r.matchRank = matchRank; - - return r; } public void addCandidate(ReconCandidate candidate) { @@ -213,14 +229,14 @@ public class Recon implements HasFields, Jsonizable { writer.object(); writer.key("id"); writer.value(id); - writer.key("service"); writer.value(service); - writer.key("identifierSpace"); writer.value(identifierSpace); - writer.key("schemaSpace"); writer.value(schemaSpace); - if (saveMode) { writer.key("judgmentHistoryEntry"); writer.value(judgmentHistoryEntry); } + writer.key("service"); writer.value(service); + writer.key("identifierSpace"); writer.value(identifierSpace); + writer.key("schemaSpace"); writer.value(schemaSpace); + writer.key("j"); writer.value(judgmentToString()); if (match != null) { writer.key("m"); @@ -283,6 +299,9 @@ public class Recon implements HasFields, Jsonizable { id = jp.getLongValue(); } else if ("judgmentHistoryEntry".equals(fieldName)) { judgmentHistoryEntry = jp.getLongValue(); + if (recon != null) { + recon.judgmentHistoryEntry = judgmentHistoryEntry; + } } else { if (recon == null) { recon = new Recon(id, judgmentHistoryEntry); diff --git a/main/src/com/google/gridworks/model/changes/MassCellChange.java b/main/src/com/google/gridworks/model/changes/MassCellChange.java index 8c8d023a7..f0ac8b4ed 100644 --- a/main/src/com/google/gridworks/model/changes/MassCellChange.java +++ b/main/src/com/google/gridworks/model/changes/MassCellChange.java @@ -87,7 +87,9 @@ public class MassCellChange implements Change { } public void save(Writer writer, Properties options) throws IOException { - writer.write("commonColumnName="); writer.write(_commonColumnName); writer.write('\n'); + if (_commonColumnName != null) { + writer.write("commonColumnName="); writer.write(_commonColumnName); writer.write('\n'); + } writer.write("updateRowContextDependencies="); writer.write(Boolean.toString(_updateRowContextDependencies)); writer.write('\n'); writer.write("cellChangeCount="); writer.write(Integer.toString(_cellChanges.length)); writer.write('\n'); for (CellChange c : _cellChanges) { diff --git a/main/src/com/google/gridworks/model/changes/MassReconChange.java b/main/src/com/google/gridworks/model/changes/MassReconChange.java new file mode 100644 index 000000000..5d391dd24 --- /dev/null +++ b/main/src/com/google/gridworks/model/changes/MassReconChange.java @@ -0,0 +1,111 @@ +package com.google.gridworks.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.gridworks.history.Change; +import com.google.gridworks.model.Cell; +import com.google.gridworks.model.Project; +import com.google.gridworks.model.Recon; +import com.google.gridworks.model.Row; +import com.google.gridworks.util.Pool; + +public class MassReconChange implements Change { + final protected Map _newRecons; + final protected Map _oldRecons; + + public MassReconChange(Map newRecons, Map oldRecons) { + _newRecons = newRecons; + _oldRecons = oldRecons; + } + + public void apply(Project project) { + switchRecons(project, _newRecons); + } + + public void revert(Project project) { + switchRecons(project, _oldRecons); + } + + protected void switchRecons(Project project, Map reconMap) { + synchronized (project) { + for (int r = 0; r < project.rows.size(); r++) { + Row row = project.rows.get(r); + + for (int c = 0; c < row.cells.size(); c++) { + Cell cell = row.cells.get(c); + if (cell != null && cell.recon != null) { + Recon recon = cell.recon; + + if (reconMap.containsKey(recon.id)) { + row.setCell(c, new Cell(cell.value, reconMap.get(recon.id))); + } + } + } + } + } + } + + public void save(Writer writer, Properties options) throws IOException { + writeRecons(writer, options, _oldRecons, "oldReconCount"); + writeRecons(writer, options, _newRecons, "newReconCount"); + writer.write("/ec/\n"); // end of change marker + } + + protected void writeRecons(Writer writer, Properties options, Map recons, String key) throws IOException { + writer.write(key + "="); writer.write(Integer.toString(recons.size())); writer.write('\n'); + for (Recon recon : recons.values()) { + Pool pool = (Pool) options.get("pool"); + pool.poolReconCandidates(recon); + + JSONWriter jsonWriter = new JSONWriter(writer); + try { + recon.write(jsonWriter, options); + } catch (JSONException e) { + e.printStackTrace(); + } + writer.write("\n"); + } + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + Map oldRecons = new HashMap(); + Map newRecons = new HashMap(); + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("oldReconCount".equals(field)) { + loadRecons(reader, pool, oldRecons, value); + } else if ("newReconCount".equals(field)) { + loadRecons(reader, pool, newRecons, value); + } + } + + MassReconChange change = new MassReconChange(newRecons, oldRecons); + + return change; + } + + static protected void loadRecons(LineNumberReader reader, Pool pool, Map recons, String countString) throws Exception { + int count = Integer.parseInt(countString); + + for (int i = 0; i < count; i++) { + String line = reader.readLine(); + Recon recon = Recon.loadStreaming(line, pool); + + recons.put(recon.id, recon); + } + } +} + diff --git a/main/src/com/google/gridworks/operations/OperationRegistry.java b/main/src/com/google/gridworks/operations/OperationRegistry.java index 844ed46af..001eb219d 100644 --- a/main/src/com/google/gridworks/operations/OperationRegistry.java +++ b/main/src/com/google/gridworks/operations/OperationRegistry.java @@ -21,6 +21,7 @@ import com.google.gridworks.operations.column.ColumnRemovalOperation; import com.google.gridworks.operations.column.ColumnRenameOperation; import com.google.gridworks.operations.column.ColumnSplitOperation; import com.google.gridworks.operations.column.ExtendDataOperation; +import com.google.gridworks.operations.recon.ImportQADataOperation; import com.google.gridworks.operations.recon.ReconDiscardJudgmentsOperation; import com.google.gridworks.operations.recon.ReconJudgeSimilarCellsOperation; import com.google.gridworks.operations.recon.ReconMarkNewTopicsOperation; @@ -73,6 +74,8 @@ public abstract class OperationRegistry { register("text-transform", TextTransformOperation.class); register("mass-edit", MassEditOperation.class); + register("import-qa-data", ImportQADataOperation.class); + register("denormalize", DenormalizeOperation.class); } diff --git a/main/src/com/google/gridworks/operations/recon/ImportQADataOperation.java b/main/src/com/google/gridworks/operations/recon/ImportQADataOperation.java new file mode 100644 index 000000000..abe2f2985 --- /dev/null +++ b/main/src/com/google/gridworks/operations/recon/ImportQADataOperation.java @@ -0,0 +1,106 @@ +package com.google.gridworks.operations.recon; + +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.gridworks.commands.freebase.UploadDataCommand; +import com.google.gridworks.history.HistoryEntry; +import com.google.gridworks.model.AbstractOperation; +import com.google.gridworks.model.Cell; +import com.google.gridworks.model.Project; +import com.google.gridworks.model.Recon; +import com.google.gridworks.model.Row; +import com.google.gridworks.model.changes.MassReconChange; +import com.google.gridworks.operations.OperationRegistry; +import com.google.gridworks.util.ParsingUtilities; + +public class ImportQADataOperation extends AbstractOperation { + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + return new ImportQADataOperation(); + } + + public ImportQADataOperation() { + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.endObject(); + } + + @Override + protected String getBriefDescription(Project project) { + return "Import QA DAta"; + } + + @Override + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + Integer jobID = (Integer) project.getMetadata().getPreferenceStore().get(UploadDataCommand.s_dataLoadJobIDPref); + if (jobID == null) { + throw new InternalError("Project is not associated with any data loading job."); + } + + Map reconIDToResult = new HashMap(); + + URL url = new URL("http://gridworks-loads.dfhuynh.user.dev.freebaseapps.com/get_answers/" + jobID); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setReadTimeout(30000); // 30 seconds + + LineNumberReader reader = new LineNumberReader(new InputStreamReader(conn.getInputStream())); + try { + String line; + while ((line = reader.readLine()) != null) { + JSONObject obj = ParsingUtilities.evaluateJsonStringToObject(line); + long reconID = Long.parseLong(obj.getString("recon_id").substring(3)); + + reconIDToResult.put(reconID, obj.getString("result")); + } + } finally { + reader.close(); + } + + Map oldRecons = new HashMap(); + Map newRecons = new HashMap(); + + for (int r = 0; r < project.rows.size(); r++) { + Row row = project.rows.get(r); + + for (int c = 0; c < row.cells.size(); c++) { + Cell cell = row.cells.get(c); + if (cell != null && cell.recon != null) { + Recon oldRecon = cell.recon; + + if (reconIDToResult.containsKey(oldRecon.id)) { + Recon newRecon = oldRecon.dup(); + newRecon.setFeature(Recon.Feature_qaResult, reconIDToResult.get(oldRecon.id)); + + reconIDToResult.remove(oldRecon.id); + + oldRecons.put(oldRecon.id, oldRecon); + newRecons.put(oldRecon.id, newRecon); + } + } + } + } + + return new HistoryEntry( + historyEntryID, + project, + getBriefDescription(project), + this, + new MassReconChange(newRecons, oldRecons) + ); + } +} diff --git a/main/src/com/google/gridworks/util/Pool.java b/main/src/com/google/gridworks/util/Pool.java index 54bcad7ab..780a26d99 100644 --- a/main/src/com/google/gridworks/util/Pool.java +++ b/main/src/com/google/gridworks/util/Pool.java @@ -30,7 +30,10 @@ public class Pool implements Jsonizable { public void pool(Recon recon) { recons.put(Long.toString(recon.id), recon); - + poolReconCandidates(recon); + } + + public void poolReconCandidates(Recon recon) { if (recon.match != null) { pool(recon.match); } diff --git a/main/webapp/modules/core/scripts/project/menu-bar.js b/main/webapp/modules/core/scripts/project/menu-bar.js index 9af9e8105..1e8dadfb2 100644 --- a/main/webapp/modules/core/scripts/project/menu-bar.js +++ b/main/webapp/modules/core/scripts/project/menu-bar.js @@ -100,6 +100,11 @@ MenuBar.MenuItems = [ "id" : "core/load-info-freebase", label: "Load into Freebase ...", click: function() { MenuBar.handlers.loadIntoFreebase(); } + }, + { + "id" : "core/import-qa-data", + label: "Import QA Data", + click: function() { MenuBar.handlers.importQAData(); } } ] } @@ -321,3 +326,11 @@ MenuBar.handlers.loadIntoFreebase = function() { new FreebaseLoadingDialog(); }; +MenuBar.handlers.importQAData = function() { + Gridworks.postProcess( + "import-qa-data", + {}, + {}, + { cellsChanged: true } + ); +}; diff --git a/main/webapp/modules/core/scripts/views/data-table/menu-reconcile.js b/main/webapp/modules/core/scripts/views/data-table/menu-reconcile.js index 3c2a9c23a..5caebd22e 100644 --- a/main/webapp/modules/core/scripts/views/data-table/menu-reconcile.js +++ b/main/webapp/modules/core/scripts/views/data-table/menu-reconcile.js @@ -210,6 +210,50 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) { } ] }, + { + label: "QA Facets", + submenu: [ + { + label: "QA Results", + click: function() { + ui.browsingEngine.addFacet( + "list", + { + "name" : column.name + " QA Results", + "columnName" : column.name, + "expression" : "cell.recon.features.qaResult" + } + ); + } + }, + { + label: "Judgment Actions", + click: function() { + ui.browsingEngine.addFacet( + "list", + { + "name" : column.name + " Judgment Actions", + "columnName" : column.name, + "expression" : "cell.recon.judgmentAction" + } + ); + } + }, + { + label: "Judgment History Entries", + click: function() { + ui.browsingEngine.addFacet( + "list", + { + "name" : column.name + " History Entries", + "columnName" : column.name, + "expression" : "cell.recon.judgmentHistoryEntry" + } + ); + } + } + ] + }, { label: "Actions", submenu: [