From 42e8c86a7858b978da075c5fbc38a8a10ae7d74b Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Sat, 27 Oct 2018 06:34:33 +0100 Subject: [PATCH 1/3] Add support for JSON-LD import --- .../importers/RdfJsonldTripleImporter.java | 43 +++++++++++++++++++ .../refine/importers/RdfTripleImporter.java | 6 ++- .../importers/RdfTripleImporterTests.java | 38 ++++++++++++++++ .../webapp/modules/core/MOD-INF/controller.js | 4 ++ 4 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 main/src/com/google/refine/importers/RdfJsonldTripleImporter.java diff --git a/main/src/com/google/refine/importers/RdfJsonldTripleImporter.java b/main/src/com/google/refine/importers/RdfJsonldTripleImporter.java new file mode 100644 index 000000000..7f5b728b4 --- /dev/null +++ b/main/src/com/google/refine/importers/RdfJsonldTripleImporter.java @@ -0,0 +1,43 @@ +/* + +Copyright 2012, Thomas F. Morris +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +package com.google.refine.importers; + + +public class RdfJsonldTripleImporter extends RdfTripleImporter { + + public RdfJsonldTripleImporter() { + super(RdfTripleImporter.Mode.JSONLD); + } + +} diff --git a/main/src/com/google/refine/importers/RdfTripleImporter.java b/main/src/com/google/refine/importers/RdfTripleImporter.java index cbfbd8818..96b70c156 100644 --- a/main/src/com/google/refine/importers/RdfTripleImporter.java +++ b/main/src/com/google/refine/importers/RdfTripleImporter.java @@ -62,7 +62,8 @@ public class RdfTripleImporter extends ImportingParserBase { RDFXML, NT, N3, - TTL + TTL, + JSONLD } public RdfTripleImporter() { @@ -88,6 +89,9 @@ public class RdfTripleImporter extends ImportingParserBase { case TTL: model.read(input, null, "TTL"); break; + case JSONLD: + model.read(input, null, "JSON-LD"); + break; case RDFXML: model.read(input, null); break; diff --git a/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java index 315ea02d2..82a7fd5b2 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java @@ -233,5 +233,43 @@ public class RdfTripleImporterTests extends ImporterTest { Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://meetings.example.com/cal#m1"); Assert.assertEquals(project.rows.get(0).cells.get(2).value, "mailto:fred@example.com"); Assert.assertEquals(project.rows.get(0).cells.get(3).value, "Fred"); + } + + @Test + public void canParseJsonld() throws UnsupportedEncodingException { + String sampleJsonld = "{\n "+ + " \"@context\": {\n "+ + " \"m\": \"http://www.example.org/meeting_organization#\",\n "+ + " \"p\": \"http://www.example.org/personal_details#\",\n "+ + " \"rdf\": \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\",\n "+ + " \"rdfs\": \"http://www.w3.org/2000/01/rdf-schema#\",\n "+ + " \"xsd\": \"http://www.w3.org/2001/XMLSchema#\"\n "+ + " },\n "+ + " \"@id\": \"http://www.example.org/people#fred\",\n "+ + " \"m:attending\": {\n "+ + " \"@id\": \"http://meetings.example.com/cal#m1\"\n "+ + " },\n "+ + " \"p:GivenName\": \"Fred\",\n "+ + " \"p:hasEmail\": {\n "+ + " \"@id\": \"mailto:fred@example.com\"\n "+ + " }\n "+ + "}"; + + InputStream input = new ByteArrayInputStream(sampleJsonld.getBytes("UTF-8")); + + SUT = new RdfTripleImporter(RdfTripleImporter.Mode.JSONLD); + parseOneFile(SUT, input); + + Assert.assertEquals(project.columnModel.columns.size(), 4); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://www.example.org/personal_details#hasEmail"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://www.example.org/personal_details#GivenName"); + Assert.assertEquals(project.columnModel.columns.get(3).getName(), "http://www.example.org/meeting_organization#attending"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 4); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://www.example.org/people#fred"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "mailto:fred@example.com"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "Fred"); + Assert.assertEquals(project.rows.get(0).cells.get(3).value, "http://meetings.example.com/cal#m1"); } } diff --git a/main/webapp/modules/core/MOD-INF/controller.js b/main/webapp/modules/core/MOD-INF/controller.js index 6229bb789..e7ef96a65 100644 --- a/main/webapp/modules/core/MOD-INF/controller.js +++ b/main/webapp/modules/core/MOD-INF/controller.js @@ -220,6 +220,7 @@ function registerImporting() { IM.registerFormat("binary/text/xml/xls/xlsx", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter()); IM.registerFormat("text/xml/ods", "Open Document Format spreadsheets (.ods)", "ExcelParserUI", new Packages.com.google.refine.importers.OdsImporter()); IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter()); + IM.registerFormat("text/ld+json", "JSON-LD files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfJsonldTripleImporter()); IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter()); IM.registerFormat("text/marc", "MARC files", "XmlParserUI", new Packages.com.google.refine.importers.MarcImporter()); IM.registerFormat("text/wiki", "Wikitext", "WikitextParserUI", new Packages.com.google.refine.importers.WikitextImporter()); @@ -240,6 +241,7 @@ function registerImporting() { IM.registerExtension(".json", "text/json"); IM.registerExtension(".js", "text/json"); + IM.registerExtension(".jsonld", "text/ld+json"); IM.registerExtension(".xls", "binary/text/xml/xls/xlsx"); IM.registerExtension(".xlsx", "binary/text/xml/xls/xlsx"); @@ -264,6 +266,7 @@ function registerImporting() { IM.registerMimeType("text/fixed-width", "text/line-based/fixed-width"); IM.registerMimeType("text/rdf+n3", "text/rdf+n3"); + IM.registerMimeType("text/rdf+ttl", "text/rdf+ttl"); IM.registerMimeType("application/msexcel", "binary/text/xml/xls/xlsx"); IM.registerMimeType("application/x-msexcel", "binary/text/xml/xls/xlsx"); @@ -280,6 +283,7 @@ function registerImporting() { IM.registerMimeType("text/json", "text/json"); IM.registerMimeType("application/rdf+xml", "text/xml/rdf"); + IM.registerMimeType("application/ld+json", "text/ld+json"); IM.registerMimeType("application/marc", "text/marc"); From c3e49e79abace4a0bdb344e036b65e6e17beef17 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Sat, 27 Oct 2018 19:04:07 +0100 Subject: [PATCH 2/3] Add support for N-triples and Turtle --- .../refine/importers/RdfTripleImporter.java | 2 + .../importers/RdfTripleImporterTests.java | 52 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/main/src/com/google/refine/importers/RdfTripleImporter.java b/main/src/com/google/refine/importers/RdfTripleImporter.java index 96b70c156..64b60634f 100644 --- a/main/src/com/google/refine/importers/RdfTripleImporter.java +++ b/main/src/com/google/refine/importers/RdfTripleImporter.java @@ -86,6 +86,8 @@ public class RdfTripleImporter extends ImportingParserBase { model.read(input, null, "NT"); break; case N3: + model.read(input, null, "N3"); + break; case TTL: model.read(input, null, "TTL"); break; diff --git a/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java index 82a7fd5b2..2fbc4235e 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java @@ -235,6 +235,58 @@ public class RdfTripleImporterTests extends ImporterTest { Assert.assertEquals(project.rows.get(0).cells.get(3).value, "Fred"); } + @Test + public void canParseTtl() throws UnsupportedEncodingException { + String sampleRdf = "@prefix p: .\n" + + "@prefix m: .\n\n" + + "\n" + + "p:GivenName \"Fred\";\n" + + "p:hasEmail ;\n" + + "m:attending .\n"; + + InputStream input = new ByteArrayInputStream(sampleRdf.getBytes("UTF-8")); + + SUT = new RdfTripleImporter(RdfTripleImporter.Mode.TTL); + parseOneFile(SUT, input); + + Assert.assertEquals(project.columnModel.columns.size(), 4); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://www.example.org/meeting_organization#attending"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://www.example.org/personal_details#hasEmail"); + Assert.assertEquals(project.columnModel.columns.get(3).getName(), "http://www.example.org/personal_details#GivenName"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 4); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://www.example.org/people#fred"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://meetings.example.com/cal#m1"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "mailto:fred@example.com"); + Assert.assertEquals(project.rows.get(0).cells.get(3).value, "Fred"); + } + + @Test + public void canParseNTriples() throws UnsupportedEncodingException { + String sampleRdf = " . \n" + + " . \n" + + " \"Fred\" . "; + + InputStream input = new ByteArrayInputStream(sampleRdf.getBytes("UTF-8")); + + SUT = new RdfTripleImporter(RdfTripleImporter.Mode.NT); + parseOneFile(SUT, input); + + Assert.assertEquals(project.columnModel.columns.size(), 4); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://www.example.org/personal_details#GivenName"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://www.example.org/personal_details#hasEmail"); + Assert.assertEquals(project.columnModel.columns.get(3).getName(), "http://www.example.org/meeting_organization#attending"); + + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 4); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://www.example.org/people#fred"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "Fred"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "mailto:fred@example.com"); + Assert.assertEquals(project.rows.get(0).cells.get(3).value, "http://meetings.example.com/cal#m1"); + } + @Test public void canParseJsonld() throws UnsupportedEncodingException { String sampleJsonld = "{\n "+ From 94b52f6d55667358a734f2fed4587bdcdeac83f1 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Sat, 27 Oct 2018 19:13:35 +0100 Subject: [PATCH 3/3] Add n-triple and ttl formats and small rearrangement of controller.js --- .../webapp/modules/core/MOD-INF/controller.js | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/main/webapp/modules/core/MOD-INF/controller.js b/main/webapp/modules/core/MOD-INF/controller.js index e7ef96a65..009429d49 100644 --- a/main/webapp/modules/core/MOD-INF/controller.js +++ b/main/webapp/modules/core/MOD-INF/controller.js @@ -213,14 +213,18 @@ function registerImporting() { IM.registerFormat("text/line-based/fixed-width", "Fixed-width field text files", "FixedWidthParserUI", new Packages.com.google.refine.importers.FixedWidthImporter()); - IM.registerFormat("text/rdf+n3", "RDF/N3 files", "RdfTriplesParserUI", + IM.registerFormat("text/rdf/nt", "RDF/N-Triples files", "RdfTriplesParserUI", + new Packages.com.google.refine.importers.RdfTripleImporter(Packages.com.google.refine.importers.RdfTripleImporter.Mode.NT)); + IM.registerFormat("text/rdf/n3", "RDF/N3 files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfTripleImporter(Packages.com.google.refine.importers.RdfTripleImporter.Mode.N3)); + IM.registerFormat("text/rdf/ttl", "RDF/Turtle files", "RdfTriplesParserUI", + new Packages.com.google.refine.importers.RdfTripleImporter(Packages.com.google.refine.importers.RdfTripleImporter.Mode.TTL)); + IM.registerFormat("text/rdf/xml", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter()); + IM.registerFormat("text/rdf/ld+json", "JSON-LD files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfJsonldTripleImporter()); IM.registerFormat("text/xml", "XML files", "XmlParserUI", new Packages.com.google.refine.importers.XmlImporter()); IM.registerFormat("binary/text/xml/xls/xlsx", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter()); IM.registerFormat("text/xml/ods", "Open Document Format spreadsheets (.ods)", "ExcelParserUI", new Packages.com.google.refine.importers.OdsImporter()); - IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter()); - IM.registerFormat("text/ld+json", "JSON-LD files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfJsonldTripleImporter()); IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter()); IM.registerFormat("text/marc", "MARC files", "XmlParserUI", new Packages.com.google.refine.importers.MarcImporter()); IM.registerFormat("text/wiki", "Wikitext", "WikitextParserUI", new Packages.com.google.refine.importers.WikitextImporter()); @@ -237,18 +241,20 @@ function registerImporting() { IM.registerExtension(".tsv", "text/line-based/*sv"); IM.registerExtension(".xml", "text/xml"); - IM.registerExtension(".rdf", "text/xml/rdf"); IM.registerExtension(".json", "text/json"); IM.registerExtension(".js", "text/json"); - IM.registerExtension(".jsonld", "text/ld+json"); IM.registerExtension(".xls", "binary/text/xml/xls/xlsx"); IM.registerExtension(".xlsx", "binary/text/xml/xls/xlsx"); IM.registerExtension(".ods", "text/xml/ods"); - IM.registerExtension(".n3", "text/rdf+n3"); + IM.registerExtension(".nt", "text/rdf/nt"); + IM.registerExtension(".n3", "text/rdf/n3"); + IM.registerExtension(".ttl", "text/rdf/ttl"); + IM.registerExtension(".jsonld", "text/rdf/ld+json"); + IM.registerExtension(".rdf", "text/rdf/xml"); IM.registerExtension(".marc", "text/marc"); IM.registerExtension(".mrc", "text/marc"); @@ -265,8 +271,12 @@ function registerImporting() { IM.registerMimeType("text/fixed-width", "text/line-based/fixed-width"); - IM.registerMimeType("text/rdf+n3", "text/rdf+n3"); - IM.registerMimeType("text/rdf+ttl", "text/rdf+ttl"); + IM.registerMimeType("application/n-triples", "text/rdf/nt"); + IM.registerMimeType("text/n3", "text/rdf/n3"); + IM.registerMimeType("text/rdf+n3", "text/rdf/n3"); + IM.registerMimeType("text/turtle", "text/rdf/ttl"); + IM.registerMimeType("application/rdf+xml", "text/rdf/xml"); + IM.registerMimeType("application/ld+json", "text/rdf/ld+json"); IM.registerMimeType("application/msexcel", "binary/text/xml/xls/xlsx"); IM.registerMimeType("application/x-msexcel", "binary/text/xml/xls/xlsx"); @@ -282,9 +292,6 @@ function registerImporting() { IM.registerMimeType("application/javascript", "text/json"); IM.registerMimeType("text/json", "text/json"); - IM.registerMimeType("application/rdf+xml", "text/xml/rdf"); - IM.registerMimeType("application/ld+json", "text/ld+json"); - IM.registerMimeType("application/marc", "text/marc"); IM.registerUrlRewriter(new Packages.com.google.refine.model.metadata.DataPackageUrlRewriter());