For each recon group, try at least 3 times if the service keeps failing. Log errors more for debugging purposes.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1578 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-10-16 00:19:31 +00:00
parent f50880905e
commit a62638e88d
5 changed files with 62 additions and 19 deletions

View File

@ -2,6 +2,7 @@ package com.google.refine.freebase.model.recon;
import org.json.JSONObject; import org.json.JSONObject;
import com.google.refine.model.Recon;
import com.google.refine.model.recon.ReconConfig; import com.google.refine.model.recon.ReconConfig;
abstract public class StrictReconConfig extends ReconConfig { abstract public class StrictReconConfig extends ReconConfig {
@ -18,4 +19,9 @@ abstract public class StrictReconConfig extends ReconConfig {
} }
return null; return null;
} }
@Override
public Recon createNewRecon(long historyEntryID) {
return Recon.makeFreebaseRecon(historyEntryID);
}
} }

View File

@ -80,7 +80,7 @@ public class Recon implements HasFields, Jsonizable {
return new Recon( return new Recon(
judgmentHistoryEntry, judgmentHistoryEntry,
"http://rdf.freebase.com/ns/type.object.mid", "http://rdf.freebase.com/ns/type.object.mid",
"http://rdf.freebase.com/ns/type.object.mid"); "http://rdf.freebase.com/ns/type.object.id");
} }
public Recon(long judgmentHistoryEntry, String identifierSpace, String schemaSpace) { public Recon(long judgmentHistoryEntry, String identifierSpace, String schemaSpace) {

View File

@ -82,6 +82,8 @@ abstract public class ReconConfig implements Jsonizable {
abstract public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID); abstract public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID);
abstract public Recon createNewRecon(long historyEntryID);
public void save(Writer writer) { public void save(Writer writer) {
JSONWriter jsonWriter = new JSONWriter(writer); JSONWriter jsonWriter = new JSONWriter(writer);
try { try {

View File

@ -179,6 +179,7 @@ public class StandardReconConfig extends ReconConfig {
jsonWriter.key("query"); jsonWriter.value(cell.value.toString()); jsonWriter.key("query"); jsonWriter.value(cell.value.toString());
if (typeID != null) { if (typeID != null) {
jsonWriter.key("type"); jsonWriter.value(typeID); jsonWriter.key("type"); jsonWriter.value(typeID);
jsonWriter.key("type_strict"); jsonWriter.value("should");
} }
if (columnDetails.size() > 0) { if (columnDetails.size() > 0) {
@ -289,14 +290,16 @@ public class StandardReconConfig extends ReconConfig {
JSONArray results = o2.getJSONArray("result"); JSONArray results = o2.getJSONArray("result");
recon = createReconServiceResults(text, results, historyEntryID); recon = createReconServiceResults(text, results, historyEntryID);
} else {
logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString());
} }
} else {
logger.warn("Service error for text: " + text + "\n Job code: " + job.code);
} }
if (recon == null) { if (recon != null) {
recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
}
recon.service = service; recon.service = service;
}
recons.add(recon); recons.add(recon);
} }
} finally { } finally {
@ -318,6 +321,13 @@ public class StandardReconConfig extends ReconConfig {
return recons; return recons;
} }
@Override
public Recon createNewRecon(long historyEntryID) {
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
recon.service = service;
return recon;
}
protected Recon createReconServiceResults(String text, JSONArray results, long historyEntryID) { protected Recon createReconServiceResults(String text, JSONArray results, long historyEntryID) {
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace); Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
try { try {

View File

@ -9,6 +9,8 @@ import java.util.Properties;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
import org.json.JSONWriter; import org.json.JSONWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.browsing.Engine; import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows; import com.google.refine.browsing.FilteredRows;
@ -33,6 +35,8 @@ import com.google.refine.process.LongRunningProcess;
import com.google.refine.process.Process; import com.google.refine.process.Process;
public class ReconOperation extends EngineDependentOperation { public class ReconOperation extends EngineDependentOperation {
final static Logger logger = LoggerFactory.getLogger("recon-operation");
final protected String _columnName; final protected String _columnName;
final protected ReconConfig _reconConfig; final protected ReconConfig _reconConfig;
@ -92,6 +96,7 @@ public class ReconOperation extends EngineDependentOperation {
static protected class JobGroup { static protected class JobGroup {
final public ReconJob job; final public ReconJob job;
final public List<ReconEntry> entries = new ArrayList<ReconEntry>(); final public List<ReconEntry> entries = new ArrayList<ReconEntry>();
public int trials = 0;
public JobGroup(ReconJob job) { public JobGroup(ReconJob job) {
this.job = job; this.job = job;
@ -228,27 +233,47 @@ public class ReconOperation extends EngineDependentOperation {
group.entries.add(entry); group.entries.add(entry);
} }
int batchSize = _reconConfig.getBatchSize();
int done = 0;
List<CellChange> cellChanges = new ArrayList<CellChange>(_entries.size()); List<CellChange> cellChanges = new ArrayList<CellChange>(_entries.size());
List<JobGroup> groups = new ArrayList<JobGroup>(jobKeyToGroup.values()); List<JobGroup> groups = new ArrayList<JobGroup>(jobKeyToGroup.values());
int batchSize = _reconConfig.getBatchSize(); List<ReconJob> jobs = new ArrayList<ReconJob>(batchSize);
for (int i = 0; i < groups.size(); i += batchSize) { Map<ReconJob, JobGroup> jobToGroup = new HashMap<ReconJob, ReconOperation.JobGroup>();
int to = Math.min(i + batchSize, groups.size());
List<ReconJob> jobs = new ArrayList<ReconJob>(to - i); for (int i = 0; i < groups.size(); /* don't increment here */) {
for (int j = i; j < to; j++) { while (jobs.size() < batchSize && i < groups.size()) {
jobs.add(groups.get(j).job); JobGroup group = groups.get(i++);
jobs.add(group.job);
jobToGroup.put(group.job, group);
} }
List<Recon> recons = _reconConfig.batchRecon(jobs, _historyEntryID); List<Recon> recons = _reconConfig.batchRecon(jobs, _historyEntryID);
for (int j = i; j < to; j++) { for (int j = jobs.size() - 1; j >= 0; j--) {
int index = j - i; ReconJob job = jobs.get(j);
Recon recon = index < recons.size() ? recons.get(j - i) : null; Recon recon = j < recons.size() ? recons.get(j) : null;
List<ReconEntry> entries = groups.get(j).entries; JobGroup group = jobToGroup.get(job);
List<ReconEntry> entries = group.entries;
if (recon != null) { if (recon == null) {
recon.judgmentBatchSize = entries.size(); group.trials++;
if (group.trials < 3) {
logger.warn("Re-trying job including cell containing: " + entries.get(0).cell.value);
continue; // try again next time
} }
logger.warn("Failed after 3 trials for job including cell containing: " + entries.get(0).cell.value);
}
jobToGroup.remove(job);
jobs.remove(j);
done++;
if (recon == null) {
recon = _reconConfig.createNewRecon(_historyEntryID);
}
recon.judgmentBatchSize = entries.size();
for (ReconEntry entry : entries) { for (ReconEntry entry : entries) {
Cell oldCell = entry.cell; Cell oldCell = entry.cell;
@ -264,7 +289,7 @@ public class ReconOperation extends EngineDependentOperation {
} }
} }
_progress = i * 100 / groups.size(); _progress = done * 100 / groups.size();
try { try {
Thread.sleep(50); Thread.sleep(50);
} catch (InterruptedException e) { } catch (InterruptedException e) {