Switch to Apache HTTP client for downloads - fixes #748
This commit is contained in:
parent
d7531bbbd8
commit
4f7da9d18e
@ -370,7 +370,10 @@ public class RefineServlet extends Butterfly {
|
||||
}
|
||||
|
||||
static public void setUserAgent(HttpURLConnection httpConnection) {
|
||||
httpConnection.addRequestProperty("User-Agent", "OpenRefine/" + FULL_VERSION);
|
||||
httpConnection.addRequestProperty("User-Agent", getUserAgent());
|
||||
}
|
||||
|
||||
static public String getUserAgent() {
|
||||
return "OpenRefine/" + FULL_VERSION;
|
||||
}
|
||||
}
|
@ -42,7 +42,6 @@ import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.util.ArrayList;
|
||||
@ -65,6 +64,12 @@ import org.apache.commons.fileupload.ProgressListener;
|
||||
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
|
||||
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
||||
import org.apache.commons.fileupload.util.Streams;
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.DecompressingHttpClient;
|
||||
import org.apache.http.impl.client.DefaultHttpClient;
|
||||
import org.apache.http.util.EntityUtils;
|
||||
import org.apache.tools.bzip2.CBZip2InputStream;
|
||||
import org.apache.tools.tar.TarEntry;
|
||||
import org.apache.tools.tar.TarInputStream;
|
||||
@ -210,16 +215,15 @@ public class ImportingUtilities {
|
||||
}
|
||||
});
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
List tempFiles = upload.parseRequest(request);
|
||||
@SuppressWarnings("unchecked")
|
||||
List<FileItem> tempFiles = (List<FileItem>)upload.parseRequest(request);
|
||||
|
||||
progress.setProgress("Uploading data ...", -1);
|
||||
parts: for (Object obj : tempFiles) {
|
||||
parts: for (FileItem fileItem : tempFiles) {
|
||||
if (progress.isCanceled()) {
|
||||
break;
|
||||
}
|
||||
|
||||
FileItem fileItem = (FileItem) obj;
|
||||
InputStream stream = fileItem.getInputStream();
|
||||
|
||||
String name = fileItem.getFieldName().toLowerCase();
|
||||
@ -244,10 +248,10 @@ public class ImportingUtilities {
|
||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||
|
||||
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
|
||||
JSONUtilities.append(fileRecords, fileRecord);
|
||||
|
||||
clipboardCount++;
|
||||
|
||||
JSONUtilities.append(fileRecords, fileRecord);
|
||||
} else if (name.equals("download")) {
|
||||
String urlString = Streams.asString(stream);
|
||||
URL url = new URL(urlString);
|
||||
@ -272,56 +276,62 @@ public class ImportingUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
URLConnection urlConnection = url.openConnection();
|
||||
urlConnection.setConnectTimeout(5000);
|
||||
if (urlConnection instanceof HttpURLConnection) {
|
||||
HttpURLConnection httpConnection = (HttpURLConnection) urlConnection;
|
||||
RefineServlet.setUserAgent(httpConnection);
|
||||
}
|
||||
// TODO: Set Accept-Encoding on connection so we don't get stuff we can't handle?
|
||||
urlConnection.connect();
|
||||
if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
|
||||
DecompressingHttpClient httpclient =
|
||||
new DecompressingHttpClient(new DefaultHttpClient());
|
||||
HttpGet httpGet = new HttpGet(url.toURI());
|
||||
httpGet.setHeader("User-Agent", RefineServlet.getUserAgent());
|
||||
|
||||
HttpResponse response = httpclient.execute(httpGet);
|
||||
|
||||
InputStream stream2 = urlConnection.getInputStream();
|
||||
try {
|
||||
String localname = url.getPath();
|
||||
if (localname.isEmpty() || localname.endsWith("/")) {
|
||||
localname = localname + "temp";
|
||||
response.getStatusLine();
|
||||
HttpEntity entity = response.getEntity();
|
||||
if (entity == null) {
|
||||
throw new Exception("No content found in " + url.toString());
|
||||
}
|
||||
File file = allocateFile(rawDataDir, localname);
|
||||
|
||||
int contentLength = urlConnection.getContentLength();
|
||||
if (contentLength > 0) {
|
||||
update.totalExpectedSize += contentLength;
|
||||
InputStream stream2 = entity.getContent();
|
||||
String encoding = null;
|
||||
if (entity.getContentEncoding() != null) {
|
||||
encoding = entity.getContentEncoding().getValue();
|
||||
}
|
||||
|
||||
JSONUtilities.safePut(fileRecord, "declaredEncoding", urlConnection.getContentEncoding());
|
||||
JSONUtilities.safePut(fileRecord, "declaredMimeType", urlConnection.getContentType());
|
||||
JSONUtilities.safePut(fileRecord, "fileName", file.getName());
|
||||
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
|
||||
|
||||
progress.setProgress("Downloading " + urlString,
|
||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||
|
||||
long actualLength = saveStreamToFile(stream2, file, update);
|
||||
JSONUtilities.safePut(fileRecord, "size", actualLength);
|
||||
if (actualLength == 0) {
|
||||
throw new Exception("No content found in " + urlString);
|
||||
} else if (contentLength >= 0) {
|
||||
update.totalExpectedSize += (actualLength - contentLength);
|
||||
} else {
|
||||
update.totalExpectedSize += actualLength;
|
||||
JSONUtilities.safePut(fileRecord, "declaredEncoding", encoding);
|
||||
String contentType = null;
|
||||
if (entity.getContentType().getValue() != null) {
|
||||
contentType = entity.getContentType().getValue();
|
||||
}
|
||||
progress.setProgress("Saving " + urlString + " locally",
|
||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||
|
||||
if (postProcessRetrievedFile(rawDataDir, file, fileRecord, fileRecords, progress)) {
|
||||
JSONUtilities.safePut(fileRecord, "declaredMimeType", contentType);
|
||||
if (saveStream(stream2, url, rawDataDir, progress, update,
|
||||
fileRecord, fileRecords,
|
||||
entity.getContentLength())) {
|
||||
archiveCount++;
|
||||
}
|
||||
downloadCount++;
|
||||
EntityUtils.consume(entity);
|
||||
} finally {
|
||||
httpGet.releaseConnection();
|
||||
}
|
||||
} else {
|
||||
// Fallback handling for non HTTP connections (only FTP?)
|
||||
URLConnection urlConnection = url.openConnection();
|
||||
urlConnection.setConnectTimeout(5000);
|
||||
urlConnection.connect();
|
||||
InputStream stream2 = urlConnection.getInputStream();
|
||||
JSONUtilities.safePut(fileRecord, "declaredEncoding",
|
||||
urlConnection.getContentEncoding());
|
||||
JSONUtilities.safePut(fileRecord, "declaredMimeType",
|
||||
urlConnection.getContentType());
|
||||
try {
|
||||
if (saveStream(stream2, url, rawDataDir, progress,
|
||||
update, fileRecord, fileRecords,
|
||||
urlConnection.getContentLength())) {
|
||||
archiveCount++;
|
||||
}
|
||||
|
||||
downloadCount++;
|
||||
} finally {
|
||||
stream2.close();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
String value = Streams.asString(stream);
|
||||
parameters.put(name, value);
|
||||
@ -361,8 +371,8 @@ public class ImportingUtilities {
|
||||
}
|
||||
|
||||
// Delete all temp files.
|
||||
for (Object obj : tempFiles) {
|
||||
((FileItem)obj).delete();
|
||||
for (FileItem fileItem : tempFiles) {
|
||||
fileItem.delete();
|
||||
}
|
||||
|
||||
JSONUtilities.safePut(retrievalRecord, "uploadCount", uploadCount);
|
||||
@ -371,6 +381,37 @@ public class ImportingUtilities {
|
||||
JSONUtilities.safePut(retrievalRecord, "archiveCount", archiveCount);
|
||||
}
|
||||
|
||||
private static boolean saveStream(InputStream stream, URL url, File rawDataDir, final Progress progress,
|
||||
final SavingUpdate update, JSONObject fileRecord, JSONArray fileRecords, long length)
|
||||
throws IOException, Exception {
|
||||
String localname = url.getPath();
|
||||
if (localname.isEmpty() || localname.endsWith("/")) {
|
||||
localname = localname + "temp";
|
||||
}
|
||||
File file = allocateFile(rawDataDir, localname);
|
||||
|
||||
JSONUtilities.safePut(fileRecord, "fileName", file.getName());
|
||||
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
|
||||
|
||||
update.totalExpectedSize += length;
|
||||
|
||||
progress.setProgress("Downloading " + url.toString(),
|
||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||
|
||||
long actualLength = saveStreamToFile(stream, file, update);
|
||||
JSONUtilities.safePut(fileRecord, "size", actualLength);
|
||||
if (actualLength == 0) {
|
||||
throw new Exception("No content found in " + url.toString());
|
||||
} else if (length >= 0) {
|
||||
update.totalExpectedSize += (actualLength - length);
|
||||
} else {
|
||||
update.totalExpectedSize += actualLength;
|
||||
}
|
||||
progress.setProgress("Saving " + url.toString() + " locally",
|
||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||
return postProcessRetrievedFile(rawDataDir, file, fileRecord, fileRecords, progress);
|
||||
}
|
||||
|
||||
static public String getRelativePath(File file, File dir) {
|
||||
String location = file.getAbsolutePath().substring(dir.getAbsolutePath().length());
|
||||
return (location.startsWith(File.separator)) ? location.substring(1) : location;
|
||||
@ -627,17 +668,13 @@ public class ImportingUtilities {
|
||||
static public InputStream tryOpenAsCompressedFile(File file, String mimeType, String contentEncoding) {
|
||||
String fileName = file.getName();
|
||||
try {
|
||||
/*
|
||||
* TODO: Do we need to support MIME types as well as content encodings?
|
||||
* application/x-bzip2
|
||||
* application/x-gzip
|
||||
* multipart/x-gzip
|
||||
*/
|
||||
if (fileName.endsWith(".gz")
|
||||
|| "gzip".equals(contentEncoding)
|
||||
|| "x-gzip".equals(contentEncoding)) {
|
||||
|| "x-gzip".equals(contentEncoding)
|
||||
|| "application/x-gzip".equals(mimeType)) {
|
||||
return new GZIPInputStream(new FileInputStream(file));
|
||||
} else if (fileName.endsWith(".bz2")) {
|
||||
} else if (fileName.endsWith(".bz2")
|
||||
||"application/x-bzip2".equals(mimeType)) {
|
||||
InputStream is = new FileInputStream(file);
|
||||
is.mark(4);
|
||||
if (!(is.read() == 'B' && is.read() == 'Z')) {
|
||||
|
Loading…
Reference in New Issue
Block a user