Added new command to import QA results, so any reconciliation action that yields conflicting or uncertain opinions among reviewers can be examined inside Gridworks.

Added new customized facets for checking QA results. 

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1156 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-08-13 16:26:33 +00:00
parent 096caeac7b
commit e61655506a
11 changed files with 348 additions and 10 deletions

View File

@ -116,6 +116,7 @@ public class GridworksServlet extends Butterfly {
{"user-badges", "com.google.gridworks.commands.auth.GetUserBadgesCommand"},
{"upload-data", "com.google.gridworks.commands.freebase.UploadDataCommand"},
{"import-qa-data", "com.google.gridworks.commands.freebase.ImportQADataCommand"},
{"mqlread", "com.google.gridworks.commands.freebase.MQLReadCommand"},
{"mqlwrite", "com.google.gridworks.commands.freebase.MQLWriteCommand"},

View File

@ -0,0 +1,36 @@
package com.google.gridworks.commands.freebase;
import java.io.IOException;
import java.util.Properties;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import com.google.gridworks.ProjectManager;
import com.google.gridworks.commands.Command;
import com.google.gridworks.model.AbstractOperation;
import com.google.gridworks.model.Project;
import com.google.gridworks.operations.recon.ImportQADataOperation;
import com.google.gridworks.process.Process;
public class ImportQADataCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
ProjectManager.singleton.setBusy(true);
try {
Project project = getProject(request);
AbstractOperation op = new ImportQADataOperation();
Process process = op.createProcess(project, new Properties());
performProcessAndRespond(request, response, project, process);
} catch (Exception e) {
respondException(response, e);
} finally {
ProjectManager.singleton.setBusy(false);
}
}
}

View File

@ -21,7 +21,7 @@ import com.google.gridworks.util.FreebaseUtils;
import com.google.gridworks.util.ParsingUtilities;
public class UploadDataCommand extends Command {
final static protected String s_dataLoadJobIDPref = "core/freebaseDataLoadJobID";
final static public String s_dataLoadJobIDPref = "core/freebaseDataLoadJobID";
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)

View File

@ -48,7 +48,8 @@ public class Recon implements HasFields, Jsonizable {
static final public int Feature_nameMatch = 1;
static final public int Feature_nameLevenshtein = 2;
static final public int Feature_nameWordDistance = 3;
static final public int Feature_max = 4;
static final public int Feature_qaResult = 4;
static final public int Feature_max = 5;
static final protected Map<String, Integer> s_featureMap = new HashMap<String, Integer>();
static {
@ -56,6 +57,7 @@ public class Recon implements HasFields, Jsonizable {
s_featureMap.put("nameMatch", Feature_nameMatch);
s_featureMap.put("nameLevenshtein", Feature_nameLevenshtein);
s_featureMap.put("nameWordDistance", Feature_nameWordDistance);
s_featureMap.put("qaResult", Feature_qaResult);
}
final public long id;
@ -93,9 +95,25 @@ public class Recon implements HasFields, Jsonizable {
this.judgmentHistoryEntry = judgmentHistoryEntry;
}
public Recon dup() {
Recon r = new Recon(id, judgmentHistoryEntry);
r.identifierSpace = identifierSpace;
r.schemaSpace = schemaSpace;
copyTo(r);
return r;
}
public Recon dup(long judgmentHistoryEntry) {
Recon r = new Recon(judgmentHistoryEntry, identifierSpace, schemaSpace);
copyTo(r);
return r;
}
protected void copyTo(Recon r) {
System.arraycopy(features, 0, r.features, 0, features.length);
if (candidates != null) {
@ -111,8 +129,6 @@ public class Recon implements HasFields, Jsonizable {
r.match = match;
r.matchRank = matchRank;
return r;
}
public void addCandidate(ReconCandidate candidate) {
@ -213,14 +229,14 @@ public class Recon implements HasFields, Jsonizable {
writer.object();
writer.key("id"); writer.value(id);
writer.key("service"); writer.value(service);
writer.key("identifierSpace"); writer.value(identifierSpace);
writer.key("schemaSpace"); writer.value(schemaSpace);
if (saveMode) {
writer.key("judgmentHistoryEntry"); writer.value(judgmentHistoryEntry);
}
writer.key("service"); writer.value(service);
writer.key("identifierSpace"); writer.value(identifierSpace);
writer.key("schemaSpace"); writer.value(schemaSpace);
writer.key("j"); writer.value(judgmentToString());
if (match != null) {
writer.key("m");
@ -283,6 +299,9 @@ public class Recon implements HasFields, Jsonizable {
id = jp.getLongValue();
} else if ("judgmentHistoryEntry".equals(fieldName)) {
judgmentHistoryEntry = jp.getLongValue();
if (recon != null) {
recon.judgmentHistoryEntry = judgmentHistoryEntry;
}
} else {
if (recon == null) {
recon = new Recon(id, judgmentHistoryEntry);

View File

@ -87,7 +87,9 @@ public class MassCellChange implements Change {
}
public void save(Writer writer, Properties options) throws IOException {
if (_commonColumnName != null) {
writer.write("commonColumnName="); writer.write(_commonColumnName); writer.write('\n');
}
writer.write("updateRowContextDependencies="); writer.write(Boolean.toString(_updateRowContextDependencies)); writer.write('\n');
writer.write("cellChangeCount="); writer.write(Integer.toString(_cellChanges.length)); writer.write('\n');
for (CellChange c : _cellChanges) {

View File

@ -0,0 +1,111 @@
package com.google.gridworks.model.changes;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.Writer;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONWriter;
import com.google.gridworks.history.Change;
import com.google.gridworks.model.Cell;
import com.google.gridworks.model.Project;
import com.google.gridworks.model.Recon;
import com.google.gridworks.model.Row;
import com.google.gridworks.util.Pool;
public class MassReconChange implements Change {
final protected Map<Long, Recon> _newRecons;
final protected Map<Long, Recon> _oldRecons;
public MassReconChange(Map<Long, Recon> newRecons, Map<Long, Recon> oldRecons) {
_newRecons = newRecons;
_oldRecons = oldRecons;
}
public void apply(Project project) {
switchRecons(project, _newRecons);
}
public void revert(Project project) {
switchRecons(project, _oldRecons);
}
protected void switchRecons(Project project, Map<Long, Recon> reconMap) {
synchronized (project) {
for (int r = 0; r < project.rows.size(); r++) {
Row row = project.rows.get(r);
for (int c = 0; c < row.cells.size(); c++) {
Cell cell = row.cells.get(c);
if (cell != null && cell.recon != null) {
Recon recon = cell.recon;
if (reconMap.containsKey(recon.id)) {
row.setCell(c, new Cell(cell.value, reconMap.get(recon.id)));
}
}
}
}
}
}
public void save(Writer writer, Properties options) throws IOException {
writeRecons(writer, options, _oldRecons, "oldReconCount");
writeRecons(writer, options, _newRecons, "newReconCount");
writer.write("/ec/\n"); // end of change marker
}
protected void writeRecons(Writer writer, Properties options, Map<Long, Recon> recons, String key) throws IOException {
writer.write(key + "="); writer.write(Integer.toString(recons.size())); writer.write('\n');
for (Recon recon : recons.values()) {
Pool pool = (Pool) options.get("pool");
pool.poolReconCandidates(recon);
JSONWriter jsonWriter = new JSONWriter(writer);
try {
recon.write(jsonWriter, options);
} catch (JSONException e) {
e.printStackTrace();
}
writer.write("\n");
}
}
static public Change load(LineNumberReader reader, Pool pool) throws Exception {
Map<Long, Recon> oldRecons = new HashMap<Long, Recon>();
Map<Long, Recon> newRecons = new HashMap<Long, Recon>();
String line;
while ((line = reader.readLine()) != null && !"/ec/".equals(line)) {
int equal = line.indexOf('=');
CharSequence field = line.subSequence(0, equal);
String value = line.substring(equal + 1);
if ("oldReconCount".equals(field)) {
loadRecons(reader, pool, oldRecons, value);
} else if ("newReconCount".equals(field)) {
loadRecons(reader, pool, newRecons, value);
}
}
MassReconChange change = new MassReconChange(newRecons, oldRecons);
return change;
}
static protected void loadRecons(LineNumberReader reader, Pool pool, Map<Long, Recon> recons, String countString) throws Exception {
int count = Integer.parseInt(countString);
for (int i = 0; i < count; i++) {
String line = reader.readLine();
Recon recon = Recon.loadStreaming(line, pool);
recons.put(recon.id, recon);
}
}
}

View File

@ -21,6 +21,7 @@ import com.google.gridworks.operations.column.ColumnRemovalOperation;
import com.google.gridworks.operations.column.ColumnRenameOperation;
import com.google.gridworks.operations.column.ColumnSplitOperation;
import com.google.gridworks.operations.column.ExtendDataOperation;
import com.google.gridworks.operations.recon.ImportQADataOperation;
import com.google.gridworks.operations.recon.ReconDiscardJudgmentsOperation;
import com.google.gridworks.operations.recon.ReconJudgeSimilarCellsOperation;
import com.google.gridworks.operations.recon.ReconMarkNewTopicsOperation;
@ -73,6 +74,8 @@ public abstract class OperationRegistry {
register("text-transform", TextTransformOperation.class);
register("mass-edit", MassEditOperation.class);
register("import-qa-data", ImportQADataOperation.class);
register("denormalize", DenormalizeOperation.class);
}

View File

@ -0,0 +1,106 @@
package com.google.gridworks.operations.recon;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.gridworks.commands.freebase.UploadDataCommand;
import com.google.gridworks.history.HistoryEntry;
import com.google.gridworks.model.AbstractOperation;
import com.google.gridworks.model.Cell;
import com.google.gridworks.model.Project;
import com.google.gridworks.model.Recon;
import com.google.gridworks.model.Row;
import com.google.gridworks.model.changes.MassReconChange;
import com.google.gridworks.operations.OperationRegistry;
import com.google.gridworks.util.ParsingUtilities;
public class ImportQADataOperation extends AbstractOperation {
static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception {
return new ImportQADataOperation();
}
public ImportQADataOperation() {
}
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass()));
writer.key("description"); writer.value(getBriefDescription(null));
writer.endObject();
}
@Override
protected String getBriefDescription(Project project) {
return "Import QA DAta";
}
@Override
protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception {
Integer jobID = (Integer) project.getMetadata().getPreferenceStore().get(UploadDataCommand.s_dataLoadJobIDPref);
if (jobID == null) {
throw new InternalError("Project is not associated with any data loading job.");
}
Map<Long, String> reconIDToResult = new HashMap<Long, String>();
URL url = new URL("http://gridworks-loads.dfhuynh.user.dev.freebaseapps.com/get_answers/" + jobID);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setReadTimeout(30000); // 30 seconds
LineNumberReader reader = new LineNumberReader(new InputStreamReader(conn.getInputStream()));
try {
String line;
while ((line = reader.readLine()) != null) {
JSONObject obj = ParsingUtilities.evaluateJsonStringToObject(line);
long reconID = Long.parseLong(obj.getString("recon_id").substring(3));
reconIDToResult.put(reconID, obj.getString("result"));
}
} finally {
reader.close();
}
Map<Long, Recon> oldRecons = new HashMap<Long, Recon>();
Map<Long, Recon> newRecons = new HashMap<Long, Recon>();
for (int r = 0; r < project.rows.size(); r++) {
Row row = project.rows.get(r);
for (int c = 0; c < row.cells.size(); c++) {
Cell cell = row.cells.get(c);
if (cell != null && cell.recon != null) {
Recon oldRecon = cell.recon;
if (reconIDToResult.containsKey(oldRecon.id)) {
Recon newRecon = oldRecon.dup();
newRecon.setFeature(Recon.Feature_qaResult, reconIDToResult.get(oldRecon.id));
reconIDToResult.remove(oldRecon.id);
oldRecons.put(oldRecon.id, oldRecon);
newRecons.put(oldRecon.id, newRecon);
}
}
}
}
return new HistoryEntry(
historyEntryID,
project,
getBriefDescription(project),
this,
new MassReconChange(newRecons, oldRecons)
);
}
}

View File

@ -30,7 +30,10 @@ public class Pool implements Jsonizable {
public void pool(Recon recon) {
recons.put(Long.toString(recon.id), recon);
poolReconCandidates(recon);
}
public void poolReconCandidates(Recon recon) {
if (recon.match != null) {
pool(recon.match);
}

View File

@ -100,6 +100,11 @@ MenuBar.MenuItems = [
"id" : "core/load-info-freebase",
label: "Load into Freebase ...",
click: function() { MenuBar.handlers.loadIntoFreebase(); }
},
{
"id" : "core/import-qa-data",
label: "Import QA Data",
click: function() { MenuBar.handlers.importQAData(); }
}
]
}
@ -321,3 +326,11 @@ MenuBar.handlers.loadIntoFreebase = function() {
new FreebaseLoadingDialog();
};
MenuBar.handlers.importQAData = function() {
Gridworks.postProcess(
"import-qa-data",
{},
{},
{ cellsChanged: true }
);
};

View File

@ -210,6 +210,50 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
}
]
},
{
label: "QA Facets",
submenu: [
{
label: "QA Results",
click: function() {
ui.browsingEngine.addFacet(
"list",
{
"name" : column.name + " QA Results",
"columnName" : column.name,
"expression" : "cell.recon.features.qaResult"
}
);
}
},
{
label: "Judgment Actions",
click: function() {
ui.browsingEngine.addFacet(
"list",
{
"name" : column.name + " Judgment Actions",
"columnName" : column.name,
"expression" : "cell.recon.judgmentAction"
}
);
}
},
{
label: "Judgment History Entries",
click: function() {
ui.browsingEngine.addFacet(
"list",
{
"name" : column.name + " History Entries",
"columnName" : column.name,
"expression" : "cell.recon.judgmentHistoryEntry"
}
);
}
}
]
},
{
label: "Actions",
submenu: [