Enable gzip compression (#2475)

* Enable gzip compression

* Add test for gzip parser
This commit is contained in:
chuhao zeng 2020-03-26 03:42:55 -04:00 committed by GitHub
parent 1f98bfdbc3
commit 70b4c6a6d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 47 additions and 1 deletions

View File

@ -344,6 +344,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
try { try {
URLConnection urlConnection = url.openConnection(); URLConnection urlConnection = url.openConnection();
urlConnection.setRequestProperty("Accept-Encoding", "gzip");
if (_httpHeadersJson != null) { if (_httpHeadersJson != null) {
for (int i = 0; i < _httpHeadersJson.size(); i++) { for (int i = 0; i < _httpHeadersJson.size(); i++) {
String headerLabel = _httpHeadersJson.get(i).name; String headerLabel = _httpHeadersJson.get(i).name;
@ -354,6 +355,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
} }
} }
try { try {
InputStream is = urlConnection.getInputStream(); InputStream is = urlConnection.getInputStream();
try { try {

View File

@ -68,6 +68,9 @@ import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.ser.FilterProvider; import com.fasterxml.jackson.databind.ser.FilterProvider;
import com.fasterxml.jackson.databind.ser.impl.SimpleFilterProvider; import com.fasterxml.jackson.databind.ser.impl.SimpleFilterProvider;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipException;
public class ParsingUtilities { public class ParsingUtilities {
public static JsonFactory jsonFactory = new JsonFactory(); public static JsonFactory jsonFactory = new JsonFactory();
static { static {
@ -133,7 +136,14 @@ public class ParsingUtilities {
} }
static public String inputStreamToString(InputStream is, String encoding) throws IOException { static public String inputStreamToString(InputStream is, String encoding) throws IOException {
Reader reader = new InputStreamReader(is, encoding); Reader reader;
if (encoding.equals("gzip")) {
InputStream inputStream = new GZIPInputStream(is);
reader = new InputStreamReader(inputStream);
} else {
reader = new InputStreamReader(is, encoding);
}
try { try {
return readerToString(reader); return readerToString(reader);
} finally { } finally {

View File

@ -188,6 +188,7 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
Assert.assertFalse(process.isRunning()); Assert.assertFalse(process.isRunning());
} }
/** /**
* Fetch invalid URLs * Fetch invalid URLs
* https://github.com/OpenRefine/OpenRefine/issues/1219 * https://github.com/OpenRefine/OpenRefine/issues/1219

View File

@ -33,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.util; package com.google.refine.util;
import java.io.*;
import java.util.zip.GZIPOutputStream;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.util.HashMap; import java.util.HashMap;
@ -126,4 +129,34 @@ public class ParsingUtilitiesTests extends RefineTest {
String result = sub.replace(message); String result = sub.replace(message);
Assert.assertTrue(result.contains("1234")); Assert.assertTrue(result.contains("1234"));
} }
@Test
public void testParseGZIPInutstream() throws IOException {
// Test decompressing gzip
try {
String sampleBody = "<HTML>\n" +
"\n" +
"<HEAD>\n" +
"\n" +
"<TITLE>Your Title Here</TITLE>\n" +
"\n" +
"</HEAD>\n" +
"\n" +
"<BODY BGCOLOR=\"FFFFFF\">\n" +
"\n" +
"</BODY>\n" +
"\n" +
"</HTML>";
ByteArrayOutputStream obj=new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(obj);
gzip.write(sampleBody.getBytes("UTF-8"));
gzip.close();
byte[] compressed = obj.toByteArray();
String res = ParsingUtilities.inputStreamToString(new ByteArrayInputStream(compressed), "gzip");
Assert.assertEquals(res, sampleBody);
} catch (Exception e) {
Assert.fail();
}
}
} }