Improve recon error handling

This commit is contained in:
Tom Morris 2013-02-26 00:00:03 -05:00
parent a2b60d3d4b
commit 95e13eac50
2 changed files with 110 additions and 96 deletions

View File

@ -37,8 +37,8 @@ import java.io.DataOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.StringWriter; import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.URL; import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
@ -86,11 +86,11 @@ public class GuessTypesOfColumnCommand extends Command {
writer.key("code"); writer.value("error"); writer.key("code"); writer.value("error");
writer.key("message"); writer.value("No such column"); writer.key("message"); writer.value("No such column");
} else { } else {
try {
writer.key("code"); writer.value("ok");
writer.key("types"); writer.array();
List<TypeGroup> typeGroups = guessTypes(project, column, serviceUrl); List<TypeGroup> typeGroups = guessTypes(project, column, serviceUrl);
writer.key("code"); writer.value("ok");
writer.key("types"); writer.array();
for (TypeGroup tg : typeGroups) { for (TypeGroup tg : typeGroups) {
writer.object(); writer.object();
writer.key("id"); writer.value(tg.id); writer.key("id"); writer.value(tg.id);
@ -101,9 +101,6 @@ public class GuessTypesOfColumnCommand extends Command {
} }
writer.endArray(); writer.endArray();
} catch (Exception e) {
writer.key("code"); writer.value("error");
}
} }
writer.endObject(); writer.endObject();
@ -112,7 +109,7 @@ public class GuessTypesOfColumnCommand extends Command {
} }
} }
final static int s_sampleSize = 10; final static int SAMPLE_SIZE = 10;
/** /**
* Run relevance searches for the first n cells in the given column and * Run relevance searches for the first n cells in the given column and
@ -122,13 +119,15 @@ public class GuessTypesOfColumnCommand extends Command {
* @param project * @param project
* @param column * @param column
* @return * @return
* @throws JSONException, IOException
*/ */
protected List<TypeGroup> guessTypes(Project project, Column column, String serviceUrl) { protected List<TypeGroup> guessTypes(Project project, Column column, String serviceUrl)
throws JSONException, IOException {
Map<String, TypeGroup> map = new HashMap<String, TypeGroup>(); Map<String, TypeGroup> map = new HashMap<String, TypeGroup>();
int cellIndex = column.getCellIndex(); int cellIndex = column.getCellIndex();
List<String> samples = new ArrayList<String>(s_sampleSize); List<String> samples = new ArrayList<String>(SAMPLE_SIZE);
Set<String> sampleSet = new HashSet<String>(); Set<String> sampleSet = new HashSet<String>();
for (Row row : project.rows) { for (Row row : project.rows) {
@ -138,7 +137,7 @@ public class GuessTypesOfColumnCommand extends Command {
if (!sampleSet.contains(s)) { if (!sampleSet.contains(s)) {
samples.add(s); samples.add(s);
sampleSet.add(s); sampleSet.add(s);
if (samples.size() >= s_sampleSize) { if (samples.size() >= SAMPLE_SIZE) {
break; break;
} }
} }
@ -160,13 +159,13 @@ public class GuessTypesOfColumnCommand extends Command {
} }
jsonWriter.endObject(); jsonWriter.endObject();
} catch (JSONException e) { } catch (JSONException e) {
// ignore logger.error("Error constructing query", e);
} }
String queriesString = stringWriter.toString(); String queriesString = stringWriter.toString();
try { try {
URL url = new URL(serviceUrl); URL url = new URL(serviceUrl);
URLConnection connection = url.openConnection(); HttpURLConnection connection = (HttpURLConnection) url.openConnection();
{ {
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
connection.setConnectTimeout(30000); connection.setConnectTimeout(30000);
@ -184,68 +183,76 @@ public class GuessTypesOfColumnCommand extends Command {
connection.connect(); connection.connect();
} }
InputStream is = connection.getInputStream(); if (connection.getResponseCode() >= 400) {
try { InputStream is = connection.getErrorStream();
String s = ParsingUtilities.inputStreamToString(is); throw new IOException("Failed - code:"
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); + Integer.toString(connection.getResponseCode())
+ " message: " + is == null ? "" : ParsingUtilities.inputStreamToString(is));
for (int i = 0; i < samples.size(); i++) { } else {
String key = "q" + i; InputStream is = connection.getInputStream();
if (!o.has(key)) { try {
continue; String s = ParsingUtilities.inputStreamToString(is);
} JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
JSONObject o2 = o.getJSONObject(key); for (int i = 0; i < samples.size(); i++) {
if (!(o2.has("result"))) { String key = "q" + i;
continue; if (!o.has(key)) {
} continue;
}
JSONArray results = o2.getJSONArray("result");
int count = results.length(); JSONObject o2 = o.getJSONObject(key);
if (!(o2.has("result"))) {
for (int j = 0; j < count; j++) { continue;
JSONObject result = results.getJSONObject(j); }
double score = 1.0 / (1 + j); // score by each result's rank
JSONArray results = o2.getJSONArray("result");
JSONArray types = result.getJSONArray("type"); int count = results.length();
int typeCount = types.length();
for (int j = 0; j < count; j++) {
for (int t = 0; t < typeCount; t++) { JSONObject result = results.getJSONObject(j);
Object type = types.get(t); double score = 1.0 / (1 + j); // score by each result's rank
String typeID;
String typeName; JSONArray types = result.getJSONArray("type");
int typeCount = types.length();
if (type instanceof String) {
typeID = typeName = (String) type; for (int t = 0; t < typeCount; t++) {
} else { Object type = types.get(t);
typeID = ((JSONObject) type).getString("id"); String typeID;
typeName = ((JSONObject) type).getString("name"); String typeName;
}
if (type instanceof String) {
double score2 = score * (typeCount - t) / typeCount; typeID = typeName = (String) type;
if (map.containsKey(typeID)) { } else {
TypeGroup tg = map.get(typeID); typeID = ((JSONObject) type).getString("id");
tg.score += score2; typeName = ((JSONObject) type).getString("name");
tg.count++; }
} else {
map.put(typeID, new TypeGroup(typeID, typeName, score2)); double score2 = score * (typeCount - t) / typeCount;
if (map.containsKey(typeID)) {
TypeGroup tg = map.get(typeID);
tg.score += score2;
tg.count++;
} else {
map.put(typeID, new TypeGroup(typeID, typeName, score2));
}
} }
} }
} }
} finally {
is.close();
} }
} finally {
is.close();
} }
} catch (Exception e) { } catch (IOException e) {
logger.error("Failed to guess cell types for load\n" + queriesString, e); logger.error("Failed to guess cell types for load\n" + queriesString, e);
throw e;
} }
List<TypeGroup> types = new ArrayList<TypeGroup>(map.values()); List<TypeGroup> types = new ArrayList<TypeGroup>(map.values());
Collections.sort(types, new Comparator<TypeGroup>() { Collections.sort(types, new Comparator<TypeGroup>() {
@Override @Override
public int compare(TypeGroup o1, TypeGroup o2) { public int compare(TypeGroup o1, TypeGroup o2) {
int c = Math.min(s_sampleSize, o2.count) - Math.min(s_sampleSize, o1.count); int c = Math.min(SAMPLE_SIZE, o2.count) - Math.min(SAMPLE_SIZE, o1.count);
if (c != 0) { if (c != 0) {
return c; return c;
} }

View File

@ -34,10 +34,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.model.recon; package com.google.refine.model.recon;
import java.io.DataOutputStream; import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.StringWriter; import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.URL; import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
@ -289,7 +290,7 @@ public class StandardReconConfig extends ReconConfig {
try { try {
URL url = new URL(service); URL url = new URL(service);
URLConnection connection = url.openConnection(); HttpURLConnection connection = (HttpURLConnection) url.openConnection();
{ {
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
connection.setConnectTimeout(30000); connection.setConnectTimeout(30000);
@ -308,44 +309,50 @@ public class StandardReconConfig extends ReconConfig {
connection.connect(); connection.connect();
} }
InputStream is = connection.getInputStream(); if (connection.getResponseCode() >= 400) {
try { // TODO: Retry with backoff on 500 errors?
String s = ParsingUtilities.inputStreamToString(is); InputStream is = connection.getErrorStream();
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); throw new IOException("Failed - code:"
+ Integer.toString(connection.getResponseCode())
for (int i = 0; i < jobs.size(); i++) { + " message: " + is == null ? "" : ParsingUtilities.inputStreamToString(is));
StandardReconJob job = (StandardReconJob) jobs.get(i); } else {
Recon recon = null; InputStream is = connection.getInputStream();
try {
String text = job.text; String s = ParsingUtilities.inputStreamToString(is);
String key = "q" + i; JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
if (o.has(key)) {
JSONObject o2 = o.getJSONObject(key); for (int i = 0; i < jobs.size(); i++) {
if (o2.has("result")) { StandardReconJob job = (StandardReconJob) jobs.get(i);
JSONArray results = o2.getJSONArray("result"); Recon recon = null;
recon = createReconServiceResults(text, results, historyEntryID); String text = job.text;
String key = "q" + i;
if (o.has(key)) {
JSONObject o2 = o.getJSONObject(key);
if (o2.has("result")) {
JSONArray results = o2.getJSONArray("result");
recon = createReconServiceResults(text, results, historyEntryID);
} else {
logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString());
}
} else { } else {
logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString()); logger.warn("Service error for text: " + text + "\n Job code: " + job.code);
} }
} else {
logger.warn("Service error for text: " + text + "\n Job code: " + job.code); if (recon != null) {
recon.service = service;
}
recons.add(recon);
} }
} finally {
if (recon != null) { is.close();
recon.service = service;
}
recons.add(recon);
} }
} finally {
is.close();
} }
// } catch (IOException e) {
// // TODO: Retry on HTTP 500 errors?
} catch (Exception e) { } catch (Exception e) {
logger.error("Failed to batch recon with load:\n" + queriesString, e); logger.error("Failed to batch recon with load:\n" + queriesString, e);
} }
while (recons.size() < jobs.size()) { while (recons.size() < jobs.size()) {
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace); Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
recon.service = service; recon.service = service;