For each recon group, try at least 3 times if the service keeps failing. Log errors more for debugging purposes.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@1578 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
f50880905e
commit
a62638e88d
@ -2,6 +2,7 @@ package com.google.refine.freebase.model.recon;
|
|||||||
|
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.google.refine.model.Recon;
|
||||||
import com.google.refine.model.recon.ReconConfig;
|
import com.google.refine.model.recon.ReconConfig;
|
||||||
|
|
||||||
abstract public class StrictReconConfig extends ReconConfig {
|
abstract public class StrictReconConfig extends ReconConfig {
|
||||||
@ -18,4 +19,9 @@ abstract public class StrictReconConfig extends ReconConfig {
|
|||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Recon createNewRecon(long historyEntryID) {
|
||||||
|
return Recon.makeFreebaseRecon(historyEntryID);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -80,7 +80,7 @@ public class Recon implements HasFields, Jsonizable {
|
|||||||
return new Recon(
|
return new Recon(
|
||||||
judgmentHistoryEntry,
|
judgmentHistoryEntry,
|
||||||
"http://rdf.freebase.com/ns/type.object.mid",
|
"http://rdf.freebase.com/ns/type.object.mid",
|
||||||
"http://rdf.freebase.com/ns/type.object.mid");
|
"http://rdf.freebase.com/ns/type.object.id");
|
||||||
}
|
}
|
||||||
|
|
||||||
public Recon(long judgmentHistoryEntry, String identifierSpace, String schemaSpace) {
|
public Recon(long judgmentHistoryEntry, String identifierSpace, String schemaSpace) {
|
||||||
|
@ -82,6 +82,8 @@ abstract public class ReconConfig implements Jsonizable {
|
|||||||
|
|
||||||
abstract public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID);
|
abstract public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID);
|
||||||
|
|
||||||
|
abstract public Recon createNewRecon(long historyEntryID);
|
||||||
|
|
||||||
public void save(Writer writer) {
|
public void save(Writer writer) {
|
||||||
JSONWriter jsonWriter = new JSONWriter(writer);
|
JSONWriter jsonWriter = new JSONWriter(writer);
|
||||||
try {
|
try {
|
||||||
|
@ -179,6 +179,7 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
jsonWriter.key("query"); jsonWriter.value(cell.value.toString());
|
jsonWriter.key("query"); jsonWriter.value(cell.value.toString());
|
||||||
if (typeID != null) {
|
if (typeID != null) {
|
||||||
jsonWriter.key("type"); jsonWriter.value(typeID);
|
jsonWriter.key("type"); jsonWriter.value(typeID);
|
||||||
|
jsonWriter.key("type_strict"); jsonWriter.value("should");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (columnDetails.size() > 0) {
|
if (columnDetails.size() > 0) {
|
||||||
@ -289,14 +290,16 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
JSONArray results = o2.getJSONArray("result");
|
JSONArray results = o2.getJSONArray("result");
|
||||||
|
|
||||||
recon = createReconServiceResults(text, results, historyEntryID);
|
recon = createReconServiceResults(text, results, historyEntryID);
|
||||||
|
} else {
|
||||||
|
logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString());
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
logger.warn("Service error for text: " + text + "\n Job code: " + job.code);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (recon == null) {
|
if (recon != null) {
|
||||||
recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
|
|
||||||
}
|
|
||||||
recon.service = service;
|
recon.service = service;
|
||||||
|
}
|
||||||
recons.add(recon);
|
recons.add(recon);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
@ -318,6 +321,13 @@ public class StandardReconConfig extends ReconConfig {
|
|||||||
return recons;
|
return recons;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Recon createNewRecon(long historyEntryID) {
|
||||||
|
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
|
||||||
|
recon.service = service;
|
||||||
|
return recon;
|
||||||
|
}
|
||||||
|
|
||||||
protected Recon createReconServiceResults(String text, JSONArray results, long historyEntryID) {
|
protected Recon createReconServiceResults(String text, JSONArray results, long historyEntryID) {
|
||||||
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
|
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
|
||||||
try {
|
try {
|
||||||
|
@ -9,6 +9,8 @@ import java.util.Properties;
|
|||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
import org.json.JSONWriter;
|
import org.json.JSONWriter;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.refine.browsing.Engine;
|
import com.google.refine.browsing.Engine;
|
||||||
import com.google.refine.browsing.FilteredRows;
|
import com.google.refine.browsing.FilteredRows;
|
||||||
@ -33,6 +35,8 @@ import com.google.refine.process.LongRunningProcess;
|
|||||||
import com.google.refine.process.Process;
|
import com.google.refine.process.Process;
|
||||||
|
|
||||||
public class ReconOperation extends EngineDependentOperation {
|
public class ReconOperation extends EngineDependentOperation {
|
||||||
|
final static Logger logger = LoggerFactory.getLogger("recon-operation");
|
||||||
|
|
||||||
final protected String _columnName;
|
final protected String _columnName;
|
||||||
final protected ReconConfig _reconConfig;
|
final protected ReconConfig _reconConfig;
|
||||||
|
|
||||||
@ -92,6 +96,7 @@ public class ReconOperation extends EngineDependentOperation {
|
|||||||
static protected class JobGroup {
|
static protected class JobGroup {
|
||||||
final public ReconJob job;
|
final public ReconJob job;
|
||||||
final public List<ReconEntry> entries = new ArrayList<ReconEntry>();
|
final public List<ReconEntry> entries = new ArrayList<ReconEntry>();
|
||||||
|
public int trials = 0;
|
||||||
|
|
||||||
public JobGroup(ReconJob job) {
|
public JobGroup(ReconJob job) {
|
||||||
this.job = job;
|
this.job = job;
|
||||||
@ -228,27 +233,47 @@ public class ReconOperation extends EngineDependentOperation {
|
|||||||
group.entries.add(entry);
|
group.entries.add(entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int batchSize = _reconConfig.getBatchSize();
|
||||||
|
int done = 0;
|
||||||
|
|
||||||
List<CellChange> cellChanges = new ArrayList<CellChange>(_entries.size());
|
List<CellChange> cellChanges = new ArrayList<CellChange>(_entries.size());
|
||||||
List<JobGroup> groups = new ArrayList<JobGroup>(jobKeyToGroup.values());
|
List<JobGroup> groups = new ArrayList<JobGroup>(jobKeyToGroup.values());
|
||||||
|
|
||||||
int batchSize = _reconConfig.getBatchSize();
|
List<ReconJob> jobs = new ArrayList<ReconJob>(batchSize);
|
||||||
for (int i = 0; i < groups.size(); i += batchSize) {
|
Map<ReconJob, JobGroup> jobToGroup = new HashMap<ReconJob, ReconOperation.JobGroup>();
|
||||||
int to = Math.min(i + batchSize, groups.size());
|
|
||||||
|
|
||||||
List<ReconJob> jobs = new ArrayList<ReconJob>(to - i);
|
for (int i = 0; i < groups.size(); /* don't increment here */) {
|
||||||
for (int j = i; j < to; j++) {
|
while (jobs.size() < batchSize && i < groups.size()) {
|
||||||
jobs.add(groups.get(j).job);
|
JobGroup group = groups.get(i++);
|
||||||
|
|
||||||
|
jobs.add(group.job);
|
||||||
|
jobToGroup.put(group.job, group);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Recon> recons = _reconConfig.batchRecon(jobs, _historyEntryID);
|
List<Recon> recons = _reconConfig.batchRecon(jobs, _historyEntryID);
|
||||||
for (int j = i; j < to; j++) {
|
for (int j = jobs.size() - 1; j >= 0; j--) {
|
||||||
int index = j - i;
|
ReconJob job = jobs.get(j);
|
||||||
Recon recon = index < recons.size() ? recons.get(j - i) : null;
|
Recon recon = j < recons.size() ? recons.get(j) : null;
|
||||||
List<ReconEntry> entries = groups.get(j).entries;
|
JobGroup group = jobToGroup.get(job);
|
||||||
|
List<ReconEntry> entries = group.entries;
|
||||||
|
|
||||||
if (recon != null) {
|
if (recon == null) {
|
||||||
recon.judgmentBatchSize = entries.size();
|
group.trials++;
|
||||||
|
if (group.trials < 3) {
|
||||||
|
logger.warn("Re-trying job including cell containing: " + entries.get(0).cell.value);
|
||||||
|
continue; // try again next time
|
||||||
}
|
}
|
||||||
|
logger.warn("Failed after 3 trials for job including cell containing: " + entries.get(0).cell.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
jobToGroup.remove(job);
|
||||||
|
jobs.remove(j);
|
||||||
|
done++;
|
||||||
|
|
||||||
|
if (recon == null) {
|
||||||
|
recon = _reconConfig.createNewRecon(_historyEntryID);
|
||||||
|
}
|
||||||
|
recon.judgmentBatchSize = entries.size();
|
||||||
|
|
||||||
for (ReconEntry entry : entries) {
|
for (ReconEntry entry : entries) {
|
||||||
Cell oldCell = entry.cell;
|
Cell oldCell = entry.cell;
|
||||||
@ -264,7 +289,7 @@ public class ReconOperation extends EngineDependentOperation {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_progress = i * 100 / groups.size();
|
_progress = done * 100 / groups.size();
|
||||||
try {
|
try {
|
||||||
Thread.sleep(50);
|
Thread.sleep(50);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
Loading…
Reference in New Issue
Block a user