Enable gzip compression (#2475)
* Enable gzip compression * Add test for gzip parser
This commit is contained in:
parent
1f98bfdbc3
commit
70b4c6a6d0
@ -344,6 +344,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
URLConnection urlConnection = url.openConnection();
|
URLConnection urlConnection = url.openConnection();
|
||||||
|
urlConnection.setRequestProperty("Accept-Encoding", "gzip");
|
||||||
if (_httpHeadersJson != null) {
|
if (_httpHeadersJson != null) {
|
||||||
for (int i = 0; i < _httpHeadersJson.size(); i++) {
|
for (int i = 0; i < _httpHeadersJson.size(); i++) {
|
||||||
String headerLabel = _httpHeadersJson.get(i).name;
|
String headerLabel = _httpHeadersJson.get(i).name;
|
||||||
@ -354,6 +355,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
InputStream is = urlConnection.getInputStream();
|
InputStream is = urlConnection.getInputStream();
|
||||||
try {
|
try {
|
||||||
|
@ -68,6 +68,9 @@ import com.fasterxml.jackson.databind.node.ObjectNode;
|
|||||||
import com.fasterxml.jackson.databind.ser.FilterProvider;
|
import com.fasterxml.jackson.databind.ser.FilterProvider;
|
||||||
import com.fasterxml.jackson.databind.ser.impl.SimpleFilterProvider;
|
import com.fasterxml.jackson.databind.ser.impl.SimpleFilterProvider;
|
||||||
|
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
import java.util.zip.ZipException;
|
||||||
|
|
||||||
public class ParsingUtilities {
|
public class ParsingUtilities {
|
||||||
public static JsonFactory jsonFactory = new JsonFactory();
|
public static JsonFactory jsonFactory = new JsonFactory();
|
||||||
static {
|
static {
|
||||||
@ -133,7 +136,14 @@ public class ParsingUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static public String inputStreamToString(InputStream is, String encoding) throws IOException {
|
static public String inputStreamToString(InputStream is, String encoding) throws IOException {
|
||||||
Reader reader = new InputStreamReader(is, encoding);
|
Reader reader;
|
||||||
|
if (encoding.equals("gzip")) {
|
||||||
|
InputStream inputStream = new GZIPInputStream(is);
|
||||||
|
reader = new InputStreamReader(inputStream);
|
||||||
|
} else {
|
||||||
|
reader = new InputStreamReader(is, encoding);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return readerToString(reader);
|
return readerToString(reader);
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -188,6 +188,7 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
|
|||||||
Assert.assertFalse(process.isRunning());
|
Assert.assertFalse(process.isRunning());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetch invalid URLs
|
* Fetch invalid URLs
|
||||||
* https://github.com/OpenRefine/OpenRefine/issues/1219
|
* https://github.com/OpenRefine/OpenRefine/issues/1219
|
||||||
|
@ -33,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.util;
|
package com.google.refine.util;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.zip.GZIPOutputStream;
|
||||||
|
|
||||||
import java.time.OffsetDateTime;
|
import java.time.OffsetDateTime;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -126,4 +129,34 @@ public class ParsingUtilitiesTests extends RefineTest {
|
|||||||
String result = sub.replace(message);
|
String result = sub.replace(message);
|
||||||
Assert.assertTrue(result.contains("1234"));
|
Assert.assertTrue(result.contains("1234"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParseGZIPInutstream() throws IOException {
|
||||||
|
// Test decompressing gzip
|
||||||
|
try {
|
||||||
|
String sampleBody = "<HTML>\n" +
|
||||||
|
"\n" +
|
||||||
|
"<HEAD>\n" +
|
||||||
|
"\n" +
|
||||||
|
"<TITLE>Your Title Here</TITLE>\n" +
|
||||||
|
"\n" +
|
||||||
|
"</HEAD>\n" +
|
||||||
|
"\n" +
|
||||||
|
"<BODY BGCOLOR=\"FFFFFF\">\n" +
|
||||||
|
"\n" +
|
||||||
|
"</BODY>\n" +
|
||||||
|
"\n" +
|
||||||
|
"</HTML>";
|
||||||
|
ByteArrayOutputStream obj=new ByteArrayOutputStream();
|
||||||
|
GZIPOutputStream gzip = new GZIPOutputStream(obj);
|
||||||
|
gzip.write(sampleBody.getBytes("UTF-8"));
|
||||||
|
gzip.close();
|
||||||
|
byte[] compressed = obj.toByteArray();
|
||||||
|
|
||||||
|
String res = ParsingUtilities.inputStreamToString(new ByteArrayInputStream(compressed), "gzip");
|
||||||
|
Assert.assertEquals(res, sampleBody);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user