Fixed Issue 502 in google-refine: Fetch URLs does not return the exact HTTP payload, like Create Project from URLs does.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2398 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2011-12-02 20:44:13 +00:00
parent 4ecd83bcf8
commit ae771a7ccb
2 changed files with 27 additions and 3 deletions

View File

@ -37,6 +37,7 @@ import java.io.InputStream;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
@ -263,9 +264,28 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
}
try {
InputStream is = url.openStream();
URLConnection urlConnection = url.openConnection();
urlConnection.connect();
InputStream is = urlConnection.getInputStream();
try {
return new CellAtRow(urlData.row, new Cell(ParsingUtilities.inputStreamToString(is), null));
String encoding = urlConnection.getContentEncoding();
if (encoding == null) {
String contentType = urlConnection.getContentType();
if (contentType != null) {
final String charsetEqual = "charset=";
int c = contentType.lastIndexOf(charsetEqual);
if (c > 0) {
encoding = contentType.substring(c + charsetEqual.length());
}
}
}
return new CellAtRow(
urlData.row,
new Cell(
ParsingUtilities.inputStreamToString(
is, encoding != null ? encoding : "UTF-8"),
null));
} finally {
is.close();
}

View File

@ -87,7 +87,11 @@ public class ParsingUtilities {
}
static public String inputStreamToString(InputStream is) throws IOException {
Reader reader = new InputStreamReader(is, "UTF-8");
return inputStreamToString(is, "UTF-8");
}
static public String inputStreamToString(InputStream is, String encoding) throws IOException {
Reader reader = new InputStreamReader(is, encoding);
try {
return readerToString(reader);
} finally {