diff --git a/.classpath b/.classpath index b55cf5463..a20f2d5a8 100644 --- a/.classpath +++ b/.classpath @@ -17,7 +17,6 @@ - @@ -113,5 +112,20 @@ + + + + + + + + + + + + + + + diff --git a/main/src/com/google/refine/importers/RdfTripleImporter.java b/main/src/com/google/refine/importers/RdfTripleImporter.java index 6df68cc01..eb338c9d1 100644 --- a/main/src/com/google/refine/importers/RdfTripleImporter.java +++ b/main/src/com/google/refine/importers/RdfTripleImporter.java @@ -33,10 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.refine.importers; -import static org.jrdf.graph.AnyObjectNode.ANY_OBJECT_NODE; -import static org.jrdf.graph.AnyPredicateNode.ANY_PREDICATE_NODE; -import static org.jrdf.graph.AnySubjectNode.ANY_SUBJECT_NODE; - import java.io.InputStream; import java.util.ArrayList; import java.util.LinkedHashMap; @@ -44,10 +40,10 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; -import org.jrdf.graph.Graph; -import org.jrdf.graph.Triple; -import org.jrdf.parser.RdfReader; -import org.jrdf.util.ClosableIterable; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.rdf.model.Statement; +import org.apache.jena.rdf.model.StmtIterator; import org.json.JSONObject; import com.google.refine.expr.ExpressionUtils; @@ -60,13 +56,13 @@ import com.google.refine.model.Row; import com.google.refine.model.medadata.ProjectMetadata; public class RdfTripleImporter extends ImportingParserBase { - private RdfReader rdfReader; private Mode mode; public enum Mode { RDFXML, NT, - N3 + N3, + TTL } public RdfTripleImporter() { @@ -75,27 +71,25 @@ public class RdfTripleImporter extends ImportingParserBase { public RdfTripleImporter(Mode mode) { super(true); - rdfReader = new RdfReader(); this.mode = mode; } - - @Override - public void parseOneFile(Project project, ProjectMetadata metadata, - ImportingJob job, String fileSource, InputStream input, int limit, - JSONObject options, List exceptions) { - - Graph graph; + public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource, + InputStream input, int limit, JSONObject options, List exceptions) { + // create an empty model + Model model = ModelFactory.createDefaultModel(); + try { switch (mode) { case NT: - graph = rdfReader.parseNTriples(input); + model.read(input, null, "NT"); break; case N3: - graph = rdfReader.parseN3(input); + case TTL: + model.read(input, null, "TTL"); break; case RDFXML: - graph = rdfReader.parseRdfXml(input); + model.read(input, null); break; default: throw new IllegalArgumentException("Unknown parsing mode"); @@ -104,63 +98,63 @@ public class RdfTripleImporter extends ImportingParserBase { exceptions.add(e); return; } - - ClosableIterable triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE); - try { - Map> subjectToRows = new LinkedHashMap>(); - Column subjectColumn = new Column(project.columnModel.allocateNewCellIndex(), "subject"); - project.columnModel.addColumn(0, subjectColumn, false); - project.columnModel.setKeyColumnIndex(0); - - for (Triple triple : triples) { - String subject = triple.getSubject().toString(); - String predicate = triple.getPredicate().toString(); - String object = triple.getObject().toString(); - Column column = project.columnModel.getColumnByName(predicate); - if (column == null) { - column = new Column(project.columnModel.allocateNewCellIndex(), predicate); - project.columnModel.addColumn(-1, column, true); - } + StmtIterator triples = model.listStatements(); + + try { + Map> subjectToRows = new LinkedHashMap>(); + Column subjectColumn = new Column(project.columnModel.allocateNewCellIndex(), "subject"); + project.columnModel.addColumn(0, subjectColumn, false); + project.columnModel.setKeyColumnIndex(0); + + while (triples.hasNext()) { + Statement triple = triples.nextStatement(); + String subject = triple.getSubject().toString(); + String predicate = triple.getPredicate().toString(); + String object = triple.getObject().toString(); - int cellIndex = column.getCellIndex(); - if (subjectToRows.containsKey(subject)) { - List rows = subjectToRows.get(subject); - for (Row row : rows) { - if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) { - row.setCell(cellIndex, new Cell(object, null)); - object = null; - break; - } - } + Column column = project.columnModel.getColumnByName(predicate); + if (column == null) { + column = new Column(project.columnModel.allocateNewCellIndex(), predicate); + project.columnModel.addColumn(-1, column, true); + } - if (object != null) { - Row row = new Row(project.columnModel.getMaxCellIndex() + 1); - rows.add(row); + int cellIndex = column.getCellIndex(); + if (subjectToRows.containsKey(subject)) { + List rows = subjectToRows.get(subject); + for (Row row : rows) { + if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) { + row.setCell(cellIndex, new Cell(object, null)); + object = null; + break; + } + } - row.setCell(cellIndex, new Cell(object, null)); - } - } else { - List rows = new ArrayList(); - subjectToRows.put(subject, rows); + if (object != null) { + Row row = new Row(project.columnModel.getMaxCellIndex() + 1); + rows.add(row); - Row row = new Row(project.columnModel.getMaxCellIndex() + 1); - rows.add(row); + row.setCell(cellIndex, new Cell(object, null)); + } + } else { + List rows = new ArrayList(); + subjectToRows.put(subject, rows); - row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null)); - row.setCell(cellIndex, new Cell(object, null)); - } - } + Row row = new Row(project.columnModel.getMaxCellIndex() + 1); + rows.add(row); - for (Entry> entry : subjectToRows.entrySet()) { - project.rows.addAll(entry.getValue()); - } - } catch (ModelException e) { - exceptions.add(e); - } finally { - triples.iterator().close(); - } - - super.parseOneFile(project, metadata, job, fileSource, input, limit, options, exceptions); + row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null)); + row.setCell(cellIndex, new Cell(object, null)); + } + } + + for (Entry> entry : subjectToRows.entrySet()) { + project.rows.addAll(entry.getValue()); + } + } catch (ModelException e) { + exceptions.add(e); + } + + super.parseOneFile(project, metadata, job, fileSource, input, limit, options, exceptions); } } diff --git a/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java index 2138ab70d..315ea02d2 100644 --- a/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java +++ b/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java @@ -106,19 +106,19 @@ public class RdfTripleImporterTests extends ImporterTest { //row0 Assert.assertEquals(project.rows.get(0).cells.size(), 2); Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); //row1 Assert.assertEquals(project.rows.get(1).cells.size(), 2); Assert.assertNull(project.rows.get(1).cells.get(0)); - Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.under_the_red_sky"); + Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.under_the_red_sky"); Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].rowIndex, 0); Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].cellIndex, 0); //row2 Assert.assertEquals(project.rows.get(2).cells.size(), 2); Assert.assertNull(project.rows.get(2).cells.get(0)); - Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); + Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks"); Assert.assertEquals(project.recordModel.getRowDependency(2).cellDependencies[1].rowIndex, 0); Assert.assertEquals(project.recordModel.getRowDependency(2).cellDependencies[1].cellIndex, 0); } @@ -143,23 +143,23 @@ public class RdfTripleImporterTests extends ImporterTest { //row0 Assert.assertEquals(project.rows.get(0).cells.size(), 3); Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://rdf.freebase.com/ns/en.folk_rock"); //row1 Assert.assertEquals(project.rows.get(1).cells.size(), 2); Assert.assertNull(project.rows.get(1).cells.get(0)); - Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); + Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks"); Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].rowIndex, 0); Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].cellIndex, 0); } @Test public void canParseTripleWithValue() throws UnsupportedEncodingException { - String sampleRdf = " \"Robert Zimmerman\"@en."; + String sampleRdf = " \"Robert Zimmerman\"@en."; InputStream input = new ByteArrayInputStream(sampleRdf.getBytes("UTF-8")); - SUT = new RdfTripleImporter(RdfTripleImporter.Mode.NT); + SUT = new RdfTripleImporter(RdfTripleImporter.Mode.N3); parseOneFile(SUT, input); Assert.assertEquals(project.columnModel.columns.size(), 2); @@ -168,10 +168,11 @@ public class RdfTripleImporterTests extends ImporterTest { Assert.assertEquals(project.rows.size(), 1); Assert.assertEquals(project.rows.get(0).cells.size(), 2); Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "\"Robert Zimmerman\"@en"); - } @Test + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "Robert Zimmerman@en"); + } - public void parseRdfXml() throws UnsupportedEncodingException { + @Test + public void canParseRdfXml() throws UnsupportedEncodingException { // From W3C spec http://www.w3.org/TR/REC-rdf-syntax/#example8 String sampleRdf = "\n" + " .\n\n" + + "\n" + + "p:GivenName \"Fred\";\n" + + "p:hasEmail ;\n" + + "m:attending .\n"; + + InputStream input = new ByteArrayInputStream(sampleRdf.getBytes("UTF-8")); + + SUT = new RdfTripleImporter(RdfTripleImporter.Mode.N3); + parseOneFile(SUT, input); + + Assert.assertEquals(project.columnModel.columns.size(), 4); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://www.example.org/meeting_organization#attending"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://www.example.org/personal_details#hasEmail"); + Assert.assertEquals(project.columnModel.columns.get(3).getName(), "http://www.example.org/personal_details#GivenName"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 4); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://www.example.org/people#fred"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://meetings.example.com/cal#m1"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "mailto:fred@example.com"); + Assert.assertEquals(project.rows.get(0).cells.get(3).value, "Fred"); + } } diff --git a/main/webapp/WEB-INF/lib/jena-arq-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-arq-3.6.0.jar new file mode 100644 index 000000000..cb7ed3ebf Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-arq-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jena-base-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-base-3.6.0.jar new file mode 100644 index 000000000..0f4771682 Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-base-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jena-cmds-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-cmds-3.6.0.jar new file mode 100644 index 000000000..71302bcd7 Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-cmds-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jena-core-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-core-3.6.0.jar new file mode 100644 index 000000000..05a9e24df Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-core-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jena-dboe-base-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-dboe-base-3.6.0.jar new file mode 100644 index 000000000..f21d809aa Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-dboe-base-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jena-dboe-index-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-dboe-index-3.6.0.jar new file mode 100644 index 000000000..10b18f626 Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-dboe-index-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jena-dboe-trans-data-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-dboe-trans-data-3.6.0.jar new file mode 100644 index 000000000..d2a88dbc3 Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-dboe-trans-data-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jena-dboe-transaction-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-dboe-transaction-3.6.0.jar new file mode 100644 index 000000000..6d6dc7d45 Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-dboe-transaction-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jena-iri-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-iri-3.6.0.jar new file mode 100644 index 000000000..5ad1885ea Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-iri-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jena-rdfconnection-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-rdfconnection-3.6.0.jar new file mode 100644 index 000000000..c119366f3 Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-rdfconnection-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jena-shaded-guava-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-shaded-guava-3.6.0.jar new file mode 100644 index 000000000..9b183879d Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-shaded-guava-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jena-tdb-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-tdb-3.6.0.jar new file mode 100644 index 000000000..2eea0435b Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-tdb-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jena-tdb2-3.6.0.jar b/main/webapp/WEB-INF/lib/jena-tdb2-3.6.0.jar new file mode 100644 index 000000000..5f7c1b76f Binary files /dev/null and b/main/webapp/WEB-INF/lib/jena-tdb2-3.6.0.jar differ diff --git a/main/webapp/WEB-INF/lib/jrdf-0.5.6.jar b/main/webapp/WEB-INF/lib/jrdf-0.5.6.jar deleted file mode 100644 index 52dccaed8..000000000 Binary files a/main/webapp/WEB-INF/lib/jrdf-0.5.6.jar and /dev/null differ diff --git a/main/webapp/WEB-INF/lib/jsonld-java-0.11.1.jar b/main/webapp/WEB-INF/lib/jsonld-java-0.11.1.jar new file mode 100644 index 000000000..7530d7a2a Binary files /dev/null and b/main/webapp/WEB-INF/lib/jsonld-java-0.11.1.jar differ diff --git a/main/webapp/WEB-INF/lib/libthrift-0.10.0.jar b/main/webapp/WEB-INF/lib/libthrift-0.10.0.jar new file mode 100644 index 000000000..dae69e6f7 Binary files /dev/null and b/main/webapp/WEB-INF/lib/libthrift-0.10.0.jar differ