Switch to Apache HTTP client for downloads - fixes #748
This commit is contained in:
parent
d7531bbbd8
commit
4f7da9d18e
@ -370,7 +370,10 @@ public class RefineServlet extends Butterfly {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static public void setUserAgent(HttpURLConnection httpConnection) {
|
static public void setUserAgent(HttpURLConnection httpConnection) {
|
||||||
httpConnection.addRequestProperty("User-Agent", "OpenRefine/" + FULL_VERSION);
|
httpConnection.addRequestProperty("User-Agent", getUserAgent());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static public String getUserAgent() {
|
||||||
|
return "OpenRefine/" + FULL_VERSION;
|
||||||
|
}
|
||||||
}
|
}
|
@ -42,7 +42,6 @@ import java.io.InputStream;
|
|||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.HttpURLConnection;
|
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLConnection;
|
import java.net.URLConnection;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -65,6 +64,12 @@ import org.apache.commons.fileupload.ProgressListener;
|
|||||||
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
|
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
|
||||||
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
||||||
import org.apache.commons.fileupload.util.Streams;
|
import org.apache.commons.fileupload.util.Streams;
|
||||||
|
import org.apache.http.HttpEntity;
|
||||||
|
import org.apache.http.HttpResponse;
|
||||||
|
import org.apache.http.client.methods.HttpGet;
|
||||||
|
import org.apache.http.impl.client.DecompressingHttpClient;
|
||||||
|
import org.apache.http.impl.client.DefaultHttpClient;
|
||||||
|
import org.apache.http.util.EntityUtils;
|
||||||
import org.apache.tools.bzip2.CBZip2InputStream;
|
import org.apache.tools.bzip2.CBZip2InputStream;
|
||||||
import org.apache.tools.tar.TarEntry;
|
import org.apache.tools.tar.TarEntry;
|
||||||
import org.apache.tools.tar.TarInputStream;
|
import org.apache.tools.tar.TarInputStream;
|
||||||
@ -210,16 +215,15 @@ public class ImportingUtilities {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@SuppressWarnings("rawtypes")
|
@SuppressWarnings("unchecked")
|
||||||
List tempFiles = upload.parseRequest(request);
|
List<FileItem> tempFiles = (List<FileItem>)upload.parseRequest(request);
|
||||||
|
|
||||||
progress.setProgress("Uploading data ...", -1);
|
progress.setProgress("Uploading data ...", -1);
|
||||||
parts: for (Object obj : tempFiles) {
|
parts: for (FileItem fileItem : tempFiles) {
|
||||||
if (progress.isCanceled()) {
|
if (progress.isCanceled()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
FileItem fileItem = (FileItem) obj;
|
|
||||||
InputStream stream = fileItem.getInputStream();
|
InputStream stream = fileItem.getInputStream();
|
||||||
|
|
||||||
String name = fileItem.getFieldName().toLowerCase();
|
String name = fileItem.getFieldName().toLowerCase();
|
||||||
@ -244,10 +248,10 @@ public class ImportingUtilities {
|
|||||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||||
|
|
||||||
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
|
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
|
||||||
|
JSONUtilities.append(fileRecords, fileRecord);
|
||||||
|
|
||||||
clipboardCount++;
|
clipboardCount++;
|
||||||
|
|
||||||
JSONUtilities.append(fileRecords, fileRecord);
|
|
||||||
} else if (name.equals("download")) {
|
} else if (name.equals("download")) {
|
||||||
String urlString = Streams.asString(stream);
|
String urlString = Streams.asString(stream);
|
||||||
URL url = new URL(urlString);
|
URL url = new URL(urlString);
|
||||||
@ -271,56 +275,62 @@ public class ImportingUtilities {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
URLConnection urlConnection = url.openConnection();
|
|
||||||
urlConnection.setConnectTimeout(5000);
|
|
||||||
if (urlConnection instanceof HttpURLConnection) {
|
|
||||||
HttpURLConnection httpConnection = (HttpURLConnection) urlConnection;
|
|
||||||
RefineServlet.setUserAgent(httpConnection);
|
|
||||||
}
|
|
||||||
// TODO: Set Accept-Encoding on connection so we don't get stuff we can't handle?
|
|
||||||
urlConnection.connect();
|
|
||||||
|
|
||||||
InputStream stream2 = urlConnection.getInputStream();
|
|
||||||
try {
|
|
||||||
String localname = url.getPath();
|
|
||||||
if (localname.isEmpty() || localname.endsWith("/")) {
|
|
||||||
localname = localname + "temp";
|
|
||||||
}
|
|
||||||
File file = allocateFile(rawDataDir, localname);
|
|
||||||
|
|
||||||
int contentLength = urlConnection.getContentLength();
|
|
||||||
if (contentLength > 0) {
|
|
||||||
update.totalExpectedSize += contentLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
JSONUtilities.safePut(fileRecord, "declaredEncoding", urlConnection.getContentEncoding());
|
|
||||||
JSONUtilities.safePut(fileRecord, "declaredMimeType", urlConnection.getContentType());
|
|
||||||
JSONUtilities.safePut(fileRecord, "fileName", file.getName());
|
|
||||||
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
|
|
||||||
|
|
||||||
progress.setProgress("Downloading " + urlString,
|
if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
|
||||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
DecompressingHttpClient httpclient =
|
||||||
|
new DecompressingHttpClient(new DefaultHttpClient());
|
||||||
long actualLength = saveStreamToFile(stream2, file, update);
|
HttpGet httpGet = new HttpGet(url.toURI());
|
||||||
JSONUtilities.safePut(fileRecord, "size", actualLength);
|
httpGet.setHeader("User-Agent", RefineServlet.getUserAgent());
|
||||||
if (actualLength == 0) {
|
|
||||||
throw new Exception("No content found in " + urlString);
|
|
||||||
} else if (contentLength >= 0) {
|
|
||||||
update.totalExpectedSize += (actualLength - contentLength);
|
|
||||||
} else {
|
|
||||||
update.totalExpectedSize += actualLength;
|
|
||||||
}
|
|
||||||
progress.setProgress("Saving " + urlString + " locally",
|
|
||||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
|
||||||
|
|
||||||
if (postProcessRetrievedFile(rawDataDir, file, fileRecord, fileRecords, progress)) {
|
|
||||||
archiveCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
downloadCount++;
|
HttpResponse response = httpclient.execute(httpGet);
|
||||||
} finally {
|
|
||||||
stream2.close();
|
try {
|
||||||
|
response.getStatusLine();
|
||||||
|
HttpEntity entity = response.getEntity();
|
||||||
|
if (entity == null) {
|
||||||
|
throw new Exception("No content found in " + url.toString());
|
||||||
|
}
|
||||||
|
InputStream stream2 = entity.getContent();
|
||||||
|
String encoding = null;
|
||||||
|
if (entity.getContentEncoding() != null) {
|
||||||
|
encoding = entity.getContentEncoding().getValue();
|
||||||
|
}
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredEncoding", encoding);
|
||||||
|
String contentType = null;
|
||||||
|
if (entity.getContentType().getValue() != null) {
|
||||||
|
contentType = entity.getContentType().getValue();
|
||||||
|
}
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredMimeType", contentType);
|
||||||
|
if (saveStream(stream2, url, rawDataDir, progress, update,
|
||||||
|
fileRecord, fileRecords,
|
||||||
|
entity.getContentLength())) {
|
||||||
|
archiveCount++;
|
||||||
|
}
|
||||||
|
downloadCount++;
|
||||||
|
EntityUtils.consume(entity);
|
||||||
|
} finally {
|
||||||
|
httpGet.releaseConnection();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Fallback handling for non HTTP connections (only FTP?)
|
||||||
|
URLConnection urlConnection = url.openConnection();
|
||||||
|
urlConnection.setConnectTimeout(5000);
|
||||||
|
urlConnection.connect();
|
||||||
|
InputStream stream2 = urlConnection.getInputStream();
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredEncoding",
|
||||||
|
urlConnection.getContentEncoding());
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredMimeType",
|
||||||
|
urlConnection.getContentType());
|
||||||
|
try {
|
||||||
|
if (saveStream(stream2, url, rawDataDir, progress,
|
||||||
|
update, fileRecord, fileRecords,
|
||||||
|
urlConnection.getContentLength())) {
|
||||||
|
archiveCount++;
|
||||||
|
}
|
||||||
|
downloadCount++;
|
||||||
|
} finally {
|
||||||
|
stream2.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
String value = Streams.asString(stream);
|
String value = Streams.asString(stream);
|
||||||
@ -361,8 +371,8 @@ public class ImportingUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Delete all temp files.
|
// Delete all temp files.
|
||||||
for (Object obj : tempFiles) {
|
for (FileItem fileItem : tempFiles) {
|
||||||
((FileItem)obj).delete();
|
fileItem.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
JSONUtilities.safePut(retrievalRecord, "uploadCount", uploadCount);
|
JSONUtilities.safePut(retrievalRecord, "uploadCount", uploadCount);
|
||||||
@ -370,6 +380,37 @@ public class ImportingUtilities {
|
|||||||
JSONUtilities.safePut(retrievalRecord, "clipboardCount", clipboardCount);
|
JSONUtilities.safePut(retrievalRecord, "clipboardCount", clipboardCount);
|
||||||
JSONUtilities.safePut(retrievalRecord, "archiveCount", archiveCount);
|
JSONUtilities.safePut(retrievalRecord, "archiveCount", archiveCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean saveStream(InputStream stream, URL url, File rawDataDir, final Progress progress,
|
||||||
|
final SavingUpdate update, JSONObject fileRecord, JSONArray fileRecords, long length)
|
||||||
|
throws IOException, Exception {
|
||||||
|
String localname = url.getPath();
|
||||||
|
if (localname.isEmpty() || localname.endsWith("/")) {
|
||||||
|
localname = localname + "temp";
|
||||||
|
}
|
||||||
|
File file = allocateFile(rawDataDir, localname);
|
||||||
|
|
||||||
|
JSONUtilities.safePut(fileRecord, "fileName", file.getName());
|
||||||
|
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
|
||||||
|
|
||||||
|
update.totalExpectedSize += length;
|
||||||
|
|
||||||
|
progress.setProgress("Downloading " + url.toString(),
|
||||||
|
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||||
|
|
||||||
|
long actualLength = saveStreamToFile(stream, file, update);
|
||||||
|
JSONUtilities.safePut(fileRecord, "size", actualLength);
|
||||||
|
if (actualLength == 0) {
|
||||||
|
throw new Exception("No content found in " + url.toString());
|
||||||
|
} else if (length >= 0) {
|
||||||
|
update.totalExpectedSize += (actualLength - length);
|
||||||
|
} else {
|
||||||
|
update.totalExpectedSize += actualLength;
|
||||||
|
}
|
||||||
|
progress.setProgress("Saving " + url.toString() + " locally",
|
||||||
|
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||||
|
return postProcessRetrievedFile(rawDataDir, file, fileRecord, fileRecords, progress);
|
||||||
|
}
|
||||||
|
|
||||||
static public String getRelativePath(File file, File dir) {
|
static public String getRelativePath(File file, File dir) {
|
||||||
String location = file.getAbsolutePath().substring(dir.getAbsolutePath().length());
|
String location = file.getAbsolutePath().substring(dir.getAbsolutePath().length());
|
||||||
@ -627,17 +668,13 @@ public class ImportingUtilities {
|
|||||||
static public InputStream tryOpenAsCompressedFile(File file, String mimeType, String contentEncoding) {
|
static public InputStream tryOpenAsCompressedFile(File file, String mimeType, String contentEncoding) {
|
||||||
String fileName = file.getName();
|
String fileName = file.getName();
|
||||||
try {
|
try {
|
||||||
/*
|
|
||||||
* TODO: Do we need to support MIME types as well as content encodings?
|
|
||||||
* application/x-bzip2
|
|
||||||
* application/x-gzip
|
|
||||||
* multipart/x-gzip
|
|
||||||
*/
|
|
||||||
if (fileName.endsWith(".gz")
|
if (fileName.endsWith(".gz")
|
||||||
|| "gzip".equals(contentEncoding)
|
|| "gzip".equals(contentEncoding)
|
||||||
|| "x-gzip".equals(contentEncoding)) {
|
|| "x-gzip".equals(contentEncoding)
|
||||||
|
|| "application/x-gzip".equals(mimeType)) {
|
||||||
return new GZIPInputStream(new FileInputStream(file));
|
return new GZIPInputStream(new FileInputStream(file));
|
||||||
} else if (fileName.endsWith(".bz2")) {
|
} else if (fileName.endsWith(".bz2")
|
||||||
|
||"application/x-bzip2".equals(mimeType)) {
|
||||||
InputStream is = new FileInputStream(file);
|
InputStream is = new FileInputStream(file);
|
||||||
is.mark(4);
|
is.mark(4);
|
||||||
if (!(is.read() == 'B' && is.read() == 'Z')) {
|
if (!(is.read() == 'B' && is.read() == 'Z')) {
|
||||||
|
Loading…
Reference in New Issue
Block a user