From a62638e88d333e3b8d555fa23c8d6111313b3b46 Mon Sep 17 00:00:00 2001 From: David Huynh Date: Sat, 16 Oct 2010 00:19:31 +0000 Subject: [PATCH] For each recon group, try at least 3 times if the service keeps failing. Log errors more for debugging purposes. git-svn-id: http://google-refine.googlecode.com/svn/trunk@1578 7d457c2a-affb-35e4-300a-418c747d4874 --- .../model/recon/StrictReconConfig.java | 6 +++ main/src/com/google/refine/model/Recon.java | 2 +- .../refine/model/recon/ReconConfig.java | 2 + .../model/recon/StandardReconConfig.java | 18 +++++-- .../operations/recon/ReconOperation.java | 53 ++++++++++++++----- 5 files changed, 62 insertions(+), 19 deletions(-) diff --git a/extensions/freebase/src/com/google/refine/freebase/model/recon/StrictReconConfig.java b/extensions/freebase/src/com/google/refine/freebase/model/recon/StrictReconConfig.java index e15a1b7d5..af39c1642 100644 --- a/extensions/freebase/src/com/google/refine/freebase/model/recon/StrictReconConfig.java +++ b/extensions/freebase/src/com/google/refine/freebase/model/recon/StrictReconConfig.java @@ -2,6 +2,7 @@ package com.google.refine.freebase.model.recon; import org.json.JSONObject; +import com.google.refine.model.Recon; import com.google.refine.model.recon.ReconConfig; abstract public class StrictReconConfig extends ReconConfig { @@ -18,4 +19,9 @@ abstract public class StrictReconConfig extends ReconConfig { } return null; } + + @Override + public Recon createNewRecon(long historyEntryID) { + return Recon.makeFreebaseRecon(historyEntryID); + } } diff --git a/main/src/com/google/refine/model/Recon.java b/main/src/com/google/refine/model/Recon.java index c6f70fcbf..4b73397c8 100644 --- a/main/src/com/google/refine/model/Recon.java +++ b/main/src/com/google/refine/model/Recon.java @@ -80,7 +80,7 @@ public class Recon implements HasFields, Jsonizable { return new Recon( judgmentHistoryEntry, "http://rdf.freebase.com/ns/type.object.mid", - "http://rdf.freebase.com/ns/type.object.mid"); + "http://rdf.freebase.com/ns/type.object.id"); } public Recon(long judgmentHistoryEntry, String identifierSpace, String schemaSpace) { diff --git a/main/src/com/google/refine/model/recon/ReconConfig.java b/main/src/com/google/refine/model/recon/ReconConfig.java index 96bb9d81f..aacef0b73 100644 --- a/main/src/com/google/refine/model/recon/ReconConfig.java +++ b/main/src/com/google/refine/model/recon/ReconConfig.java @@ -82,6 +82,8 @@ abstract public class ReconConfig implements Jsonizable { abstract public List batchRecon(List jobs, long historyEntryID); + abstract public Recon createNewRecon(long historyEntryID); + public void save(Writer writer) { JSONWriter jsonWriter = new JSONWriter(writer); try { diff --git a/main/src/com/google/refine/model/recon/StandardReconConfig.java b/main/src/com/google/refine/model/recon/StandardReconConfig.java index 309a844f1..e36abbdb8 100644 --- a/main/src/com/google/refine/model/recon/StandardReconConfig.java +++ b/main/src/com/google/refine/model/recon/StandardReconConfig.java @@ -179,6 +179,7 @@ public class StandardReconConfig extends ReconConfig { jsonWriter.key("query"); jsonWriter.value(cell.value.toString()); if (typeID != null) { jsonWriter.key("type"); jsonWriter.value(typeID); + jsonWriter.key("type_strict"); jsonWriter.value("should"); } if (columnDetails.size() > 0) { @@ -289,14 +290,16 @@ public class StandardReconConfig extends ReconConfig { JSONArray results = o2.getJSONArray("result"); recon = createReconServiceResults(text, results, historyEntryID); + } else { + logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString()); } + } else { + logger.warn("Service error for text: " + text + "\n Job code: " + job.code); } - if (recon == null) { - recon = new Recon(historyEntryID, identifierSpace, schemaSpace); + if (recon != null) { + recon.service = service; } - recon.service = service; - recons.add(recon); } } finally { @@ -317,6 +320,13 @@ public class StandardReconConfig extends ReconConfig { return recons; } + + @Override + public Recon createNewRecon(long historyEntryID) { + Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace); + recon.service = service; + return recon; + } protected Recon createReconServiceResults(String text, JSONArray results, long historyEntryID) { Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace); diff --git a/main/src/com/google/refine/operations/recon/ReconOperation.java b/main/src/com/google/refine/operations/recon/ReconOperation.java index 98a68e168..a4b8d15e7 100644 --- a/main/src/com/google/refine/operations/recon/ReconOperation.java +++ b/main/src/com/google/refine/operations/recon/ReconOperation.java @@ -9,6 +9,8 @@ import java.util.Properties; import org.json.JSONException; import org.json.JSONObject; import org.json.JSONWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.refine.browsing.Engine; import com.google.refine.browsing.FilteredRows; @@ -33,6 +35,8 @@ import com.google.refine.process.LongRunningProcess; import com.google.refine.process.Process; public class ReconOperation extends EngineDependentOperation { + final static Logger logger = LoggerFactory.getLogger("recon-operation"); + final protected String _columnName; final protected ReconConfig _reconConfig; @@ -92,6 +96,7 @@ public class ReconOperation extends EngineDependentOperation { static protected class JobGroup { final public ReconJob job; final public List entries = new ArrayList(); + public int trials = 0; public JobGroup(ReconJob job) { this.job = job; @@ -228,28 +233,48 @@ public class ReconOperation extends EngineDependentOperation { group.entries.add(entry); } + int batchSize = _reconConfig.getBatchSize(); + int done = 0; + List cellChanges = new ArrayList(_entries.size()); List groups = new ArrayList(jobKeyToGroup.values()); - int batchSize = _reconConfig.getBatchSize(); - for (int i = 0; i < groups.size(); i += batchSize) { - int to = Math.min(i + batchSize, groups.size()); - - List jobs = new ArrayList(to - i); - for (int j = i; j < to; j++) { - jobs.add(groups.get(j).job); + List jobs = new ArrayList(batchSize); + Map jobToGroup = new HashMap(); + + for (int i = 0; i < groups.size(); /* don't increment here */) { + while (jobs.size() < batchSize && i < groups.size()) { + JobGroup group = groups.get(i++); + + jobs.add(group.job); + jobToGroup.put(group.job, group); } List recons = _reconConfig.batchRecon(jobs, _historyEntryID); - for (int j = i; j < to; j++) { - int index = j - i; - Recon recon = index < recons.size() ? recons.get(j - i) : null; - List entries = groups.get(j).entries; + for (int j = jobs.size() - 1; j >= 0; j--) { + ReconJob job = jobs.get(j); + Recon recon = j < recons.size() ? recons.get(j) : null; + JobGroup group = jobToGroup.get(job); + List entries = group.entries; - if (recon != null) { - recon.judgmentBatchSize = entries.size(); + if (recon == null) { + group.trials++; + if (group.trials < 3) { + logger.warn("Re-trying job including cell containing: " + entries.get(0).cell.value); + continue; // try again next time + } + logger.warn("Failed after 3 trials for job including cell containing: " + entries.get(0).cell.value); } + jobToGroup.remove(job); + jobs.remove(j); + done++; + + if (recon == null) { + recon = _reconConfig.createNewRecon(_historyEntryID); + } + recon.judgmentBatchSize = entries.size(); + for (ReconEntry entry : entries) { Cell oldCell = entry.cell; Cell newCell = new Cell(oldCell.value, recon); @@ -264,7 +289,7 @@ public class ReconOperation extends EngineDependentOperation { } } - _progress = i * 100 / groups.size(); + _progress = done * 100 / groups.size(); try { Thread.sleep(50); } catch (InterruptedException e) {