diff --git a/main/src/com/google/refine/importing/ImportingManager.java b/main/src/com/google/refine/importing/ImportingManager.java index 02425cdc7..2bbd75244 100644 --- a/main/src/com/google/refine/importing/ImportingManager.java +++ b/main/src/com/google/refine/importing/ImportingManager.java @@ -256,14 +256,21 @@ public class ImportingManager { } static public String getFormatFromMimeType(String mimeType) { - return mimeTypeToFormat.get(mimeType); + String format = mimeTypeToFormat.get(mimeType); + if (format == null) { + // Try Structured Name Syntax Suffix + // https://tools.ietf.org/html/rfc6838#section-4.2.8 + if (mimeType.contains("+")) { + // These are registered with a leading plus sign (+) to show they're suffixes + mimeType = "+" + mimeType.split("\\+")[1]; + format = mimeTypeToFormat.get(mimeType); + } + } + return format; } static public String getFormat(String fileName, String mimeType) { String fileNameFormat = getFormatFromFileName(fileName); - if (mimeType != null) { - mimeType = mimeType.split(";")[0]; - } String mimeTypeFormat = mimeType == null ? null : getFormatFromMimeType(mimeType); if (mimeTypeFormat == null) { return fileNameFormat; diff --git a/main/src/com/google/refine/importing/ImportingUtilities.java b/main/src/com/google/refine/importing/ImportingUtilities.java index b8a7ae84f..827fce6f4 100644 --- a/main/src/com/google/refine/importing/ImportingUtilities.java +++ b/main/src/com/google/refine/importing/ImportingUtilities.java @@ -44,6 +44,7 @@ import java.io.Reader; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLConnection; +import java.nio.charset.Charset; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Collections; @@ -74,6 +75,7 @@ import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.client.CredentialsProvider; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; +import org.apache.http.entity.ContentType; import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; @@ -313,17 +315,20 @@ public class ImportingUtilities { throw new Exception("No content found in " + url.toString()); } InputStream stream2 = entity.getContent(); - String encoding = null; - if (entity.getContentEncoding() != null) { - encoding = entity.getContentEncoding().getValue(); + + String mimeType = null; + String charset = null; + ContentType contentType = ContentType.get(entity); + if (contentType != null) { + mimeType = contentType.getMimeType(); + Charset cs = contentType.getCharset(); + if (cs != null) { + charset = cs.toString(); + } } - JSONUtilities.safePut(fileRecord, "declaredEncoding", encoding); - String contentType = null; - if (entity.getContentType() != null) { - contentType = entity.getContentType().getValue(); - } - JSONUtilities.safePut(fileRecord, "declaredMimeType", contentType); - if (saveStream(stream2, url, rawDataDir, progress, update, + JSONUtilities.safePut(fileRecord, "declaredMimeType", mimeType); + JSONUtilities.safePut(fileRecord, "declaredEncoding", charset); + if (saveStream(stream2, url, rawDataDir, progress, update, fileRecord, fileRecords, entity.getContentLength())) { archiveCount++; @@ -803,8 +808,9 @@ public class ImportingUtilities { } }); - // Default to text/line-based to to avoid parsing as binary/excel. - String bestFormat = formats.size() > 0 ? formats.get(0) : "text/line-based"; + // Default to "text" to to avoid parsing as "binary/excel". + // "text" is more general than "text/line-based", so a better starting point + String bestFormat = formats.size() > 0 ? formats.get(0) : "text"; if (JSONUtilities.getInt(retrievalRecord, "archiveCount", 0) == 0) { // If there's no archive, then select everything for (int i = 0; i < count; i++) { diff --git a/main/tests/server/src/com/google/refine/importing/ImportingManagerTests.java b/main/tests/server/src/com/google/refine/importing/ImportingManagerTests.java new file mode 100644 index 000000000..c5fdf8a1f --- /dev/null +++ b/main/tests/server/src/com/google/refine/importing/ImportingManagerTests.java @@ -0,0 +1,51 @@ +/******************************************************************************* + * Copyright (C) 2018, OpenRefine contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ +package com.google.refine.importing; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import com.google.refine.importers.ImporterTest; + +public class ImportingManagerTests extends ImporterTest { + + @Override + @BeforeMethod + public void setUp(){ + super.setUp(); + } + + + @Test + public void testStructuredNameSuffixFallback() { + ImportingManager.registerMimeType("+json", "text/json"); + assertEquals(ImportingManager.getFormatFromMimeType("application/sparql-results+json"), "text/json"); + } + +} diff --git a/main/webapp/modules/core/MOD-INF/controller.js b/main/webapp/modules/core/MOD-INF/controller.js index f9cf062d7..255c5d163 100644 --- a/main/webapp/modules/core/MOD-INF/controller.js +++ b/main/webapp/modules/core/MOD-INF/controller.js @@ -280,6 +280,7 @@ function registerImporting() { IM.registerMimeType("text/turtle", "text/rdf/ttl"); IM.registerMimeType("application/xml", "text/xml"); IM.registerMimeType("text/xml", "text/xml"); + IM.registerMimeType("+xml", "text/xml"); // suffix will be tried only as fallback IM.registerMimeType("application/rdf+xml", "text/rdf/xml"); IM.registerMimeType("application/ld+json", "text/rdf/ld+json"); IM.registerMimeType("application/atom+xml", "text/xml"); @@ -299,6 +300,7 @@ function registerImporting() { IM.registerMimeType("application/json", "text/json"); IM.registerMimeType("application/javascript", "text/json"); IM.registerMimeType("text/json", "text/json"); + IM.registerMimeType("+json", "text/json"); // suffix will be tried only as fallback IM.registerMimeType("application/marc", "text/marc");