From df45d06b2b78da9328f26f81ec1a328ccc4162d3 Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Thu, 26 Jan 2012 18:47:30 +0000 Subject: [PATCH] Issue 523 - On URL fetch error, return HTTP error code, message, and contents of error stream (HTML page) if available git-svn-id: http://google-refine.googlecode.com/svn/trunk@2429 7d457c2a-affb-35e4-300a-418c747d4874 --- ...ColumnAdditionByFetchingURLsOperation.java | 58 +++++++++++++------ 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/main/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperation.java b/main/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperation.java index 61740d41b..e5dd0ca8a 100644 --- a/main/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperation.java +++ b/main/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperation.java @@ -33,8 +33,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.refine.operations.column; +import java.io.IOException; import java.io.InputStream; import java.io.Serializable; +import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; @@ -265,29 +267,49 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat try { URLConnection urlConnection = url.openConnection(); - urlConnection.connect(); +// urlConnection.setRequestProperty(_headerKey, _headerValue); - InputStream is = urlConnection.getInputStream(); try { - String encoding = urlConnection.getContentEncoding(); - if (encoding == null) { - String contentType = urlConnection.getContentType(); - if (contentType != null) { - final String charsetEqual = "charset="; - int c = contentType.lastIndexOf(charsetEqual); - if (c > 0) { - encoding = contentType.substring(c + charsetEqual.length()); + InputStream is = urlConnection.getInputStream(); + try { + String encoding = urlConnection.getContentEncoding(); + if (encoding == null) { + String contentType = urlConnection.getContentType(); + if (contentType != null) { + final String charsetEqual = "charset="; + int c = contentType.lastIndexOf(charsetEqual); + if (c > 0) { + encoding = contentType.substring(c + charsetEqual.length()); + } } } + return new CellAtRow( + urlData.row, + new Cell( + ParsingUtilities.inputStreamToString( + is, encoding != null ? encoding : "UTF-8"), + null)); + + } finally { + is.close(); } - return new CellAtRow( - urlData.row, - new Cell( - ParsingUtilities.inputStreamToString( - is, encoding != null ? encoding : "UTF-8"), - null)); - } finally { - is.close(); + } catch (IOException e) { + String message; + if (urlConnection instanceof HttpURLConnection) { + int status = ((HttpURLConnection)urlConnection).getResponseCode(); + String errorString = ""; + InputStream errorStream = ((HttpURLConnection)urlConnection).getErrorStream(); + if (errorStream != null) { + errorString = ParsingUtilities.inputStreamToString(errorStream); + } + message = String.format("HTTP error %d : %s | %s",status, + ((HttpURLConnection)urlConnection).getResponseMessage(), + errorString); + } else { + message = e.toString(); + } + return _onError == OnError.StoreError ? + new CellAtRow(urlData.row, new Cell(new EvalError(message), null)) : null; } } catch (Exception e) { return _onError == OnError.StoreError ?