For each recon group, try at least 3 times if the service keeps failing. Log errors more for debugging purposes.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1578 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-10-16 00:19:31 +00:00
parent f50880905e
commit a62638e88d
5 changed files with 62 additions and 19 deletions

View File

@ -2,6 +2,7 @@ package com.google.refine.freebase.model.recon;
import org.json.JSONObject;
import com.google.refine.model.Recon;
import com.google.refine.model.recon.ReconConfig;
abstract public class StrictReconConfig extends ReconConfig {
@ -18,4 +19,9 @@ abstract public class StrictReconConfig extends ReconConfig {
}
return null;
}
@Override
public Recon createNewRecon(long historyEntryID) {
return Recon.makeFreebaseRecon(historyEntryID);
}
}

View File

@ -80,7 +80,7 @@ public class Recon implements HasFields, Jsonizable {
return new Recon(
judgmentHistoryEntry,
"http://rdf.freebase.com/ns/type.object.mid",
"http://rdf.freebase.com/ns/type.object.mid");
"http://rdf.freebase.com/ns/type.object.id");
}
public Recon(long judgmentHistoryEntry, String identifierSpace, String schemaSpace) {

View File

@ -82,6 +82,8 @@ abstract public class ReconConfig implements Jsonizable {
abstract public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID);
abstract public Recon createNewRecon(long historyEntryID);
public void save(Writer writer) {
JSONWriter jsonWriter = new JSONWriter(writer);
try {

View File

@ -179,6 +179,7 @@ public class StandardReconConfig extends ReconConfig {
jsonWriter.key("query"); jsonWriter.value(cell.value.toString());
if (typeID != null) {
jsonWriter.key("type"); jsonWriter.value(typeID);
jsonWriter.key("type_strict"); jsonWriter.value("should");
}
if (columnDetails.size() > 0) {
@ -289,14 +290,16 @@ public class StandardReconConfig extends ReconConfig {
JSONArray results = o2.getJSONArray("result");
recon = createReconServiceResults(text, results, historyEntryID);
} else {
logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString());
}
} else {
logger.warn("Service error for text: " + text + "\n Job code: " + job.code);
}
if (recon == null) {
recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
if (recon != null) {
recon.service = service;
}
recon.service = service;
recons.add(recon);
}
} finally {
@ -317,6 +320,13 @@ public class StandardReconConfig extends ReconConfig {
return recons;
}
@Override
public Recon createNewRecon(long historyEntryID) {
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
recon.service = service;
return recon;
}
protected Recon createReconServiceResults(String text, JSONArray results, long historyEntryID) {
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);

View File

@ -9,6 +9,8 @@ import java.util.Properties;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
@ -33,6 +35,8 @@ import com.google.refine.process.LongRunningProcess;
import com.google.refine.process.Process;
public class ReconOperation extends EngineDependentOperation {
final static Logger logger = LoggerFactory.getLogger("recon-operation");
final protected String _columnName;
final protected ReconConfig _reconConfig;
@ -92,6 +96,7 @@ public class ReconOperation extends EngineDependentOperation {
static protected class JobGroup {
final public ReconJob job;
final public List<ReconEntry> entries = new ArrayList<ReconEntry>();
public int trials = 0;
public JobGroup(ReconJob job) {
this.job = job;
@ -228,28 +233,48 @@ public class ReconOperation extends EngineDependentOperation {
group.entries.add(entry);
}
int batchSize = _reconConfig.getBatchSize();
int done = 0;
List<CellChange> cellChanges = new ArrayList<CellChange>(_entries.size());
List<JobGroup> groups = new ArrayList<JobGroup>(jobKeyToGroup.values());
int batchSize = _reconConfig.getBatchSize();
for (int i = 0; i < groups.size(); i += batchSize) {
int to = Math.min(i + batchSize, groups.size());
List<ReconJob> jobs = new ArrayList<ReconJob>(to - i);
for (int j = i; j < to; j++) {
jobs.add(groups.get(j).job);
List<ReconJob> jobs = new ArrayList<ReconJob>(batchSize);
Map<ReconJob, JobGroup> jobToGroup = new HashMap<ReconJob, ReconOperation.JobGroup>();
for (int i = 0; i < groups.size(); /* don't increment here */) {
while (jobs.size() < batchSize && i < groups.size()) {
JobGroup group = groups.get(i++);
jobs.add(group.job);
jobToGroup.put(group.job, group);
}
List<Recon> recons = _reconConfig.batchRecon(jobs, _historyEntryID);
for (int j = i; j < to; j++) {
int index = j - i;
Recon recon = index < recons.size() ? recons.get(j - i) : null;
List<ReconEntry> entries = groups.get(j).entries;
for (int j = jobs.size() - 1; j >= 0; j--) {
ReconJob job = jobs.get(j);
Recon recon = j < recons.size() ? recons.get(j) : null;
JobGroup group = jobToGroup.get(job);
List<ReconEntry> entries = group.entries;
if (recon != null) {
recon.judgmentBatchSize = entries.size();
if (recon == null) {
group.trials++;
if (group.trials < 3) {
logger.warn("Re-trying job including cell containing: " + entries.get(0).cell.value);
continue; // try again next time
}
logger.warn("Failed after 3 trials for job including cell containing: " + entries.get(0).cell.value);
}
jobToGroup.remove(job);
jobs.remove(j);
done++;
if (recon == null) {
recon = _reconConfig.createNewRecon(_historyEntryID);
}
recon.judgmentBatchSize = entries.size();
for (ReconEntry entry : entries) {
Cell oldCell = entry.cell;
Cell newCell = new Cell(oldCell.value, recon);
@ -264,7 +289,7 @@ public class ReconOperation extends EngineDependentOperation {
}
}
_progress = i * 100 / groups.size();
_progress = done * 100 / groups.size();
try {
Thread.sleep(50);
} catch (InterruptedException e) {