Merge pull request #1563 from OpenRefine/jena-migrate

migrate from JRDF to JENA library
This commit is contained in:
Jacky 2018-04-16 23:09:00 -04:00 committed by GitHub
commit 1a0d1fabd4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 124 additions and 87 deletions

View File

@ -17,7 +17,6 @@
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/clojure-1.5.1-slim.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/dom4j-1.6.1.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/jcl-over-slf4j-1.5.6.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/jrdf-0.5.6.jar" sourcepath="main/webapp/WEB-INF/lib-src/jrdf-0.5.6-sources.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/lessen-trunk-r8.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/log4j-1.2.15.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/marc4j-2.4.jar"/>
@ -113,5 +112,20 @@
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/commons-beanutils-1.9.3.jar"/>
<classpathentry kind="lib" path="extensions/gdata/module/MOD-INF/lib/google-auth-library-oauth2-http-0.9.0.jar"/>
<classpathentry kind="lib" path="extensions/gdata/module/MOD-INF/lib/google-oauth-client-jetty-1.23.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-arq-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-base-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-cmds-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-core-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-dboe-base-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-dboe-index-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-dboe-trans-data-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-dboe-transaction-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-iri-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-rdfconnection-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-shaded-guava-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-tdb-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-tdb2-3.6.0.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jsonld-java-0.11.1.jar"/>
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/libthrift-0.10.0.jar"/>
<classpathentry kind="output" path="main/webapp/WEB-INF/classes"/>
</classpath>

View File

@ -33,10 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers;
import static org.jrdf.graph.AnyObjectNode.ANY_OBJECT_NODE;
import static org.jrdf.graph.AnyPredicateNode.ANY_PREDICATE_NODE;
import static org.jrdf.graph.AnySubjectNode.ANY_SUBJECT_NODE;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.LinkedHashMap;
@ -44,10 +40,10 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.jrdf.graph.Graph;
import org.jrdf.graph.Triple;
import org.jrdf.parser.RdfReader;
import org.jrdf.util.ClosableIterable;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.json.JSONObject;
import com.google.refine.expr.ExpressionUtils;
@ -60,13 +56,13 @@ import com.google.refine.model.Row;
import com.google.refine.model.medadata.ProjectMetadata;
public class RdfTripleImporter extends ImportingParserBase {
private RdfReader rdfReader;
private Mode mode;
public enum Mode {
RDFXML,
NT,
N3
N3,
TTL
}
public RdfTripleImporter() {
@ -75,27 +71,25 @@ public class RdfTripleImporter extends ImportingParserBase {
public RdfTripleImporter(Mode mode) {
super(true);
rdfReader = new RdfReader();
this.mode = mode;
}
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource,
InputStream input, int limit, JSONObject options, List<Exception> exceptions) {
// create an empty model
Model model = ModelFactory.createDefaultModel();
@Override
public void parseOneFile(Project project, ProjectMetadata metadata,
ImportingJob job, String fileSource, InputStream input, int limit,
JSONObject options, List<Exception> exceptions) {
Graph graph;
try {
switch (mode) {
case NT:
graph = rdfReader.parseNTriples(input);
model.read(input, null, "NT");
break;
case N3:
graph = rdfReader.parseN3(input);
case TTL:
model.read(input, null, "TTL");
break;
case RDFXML:
graph = rdfReader.parseRdfXml(input);
model.read(input, null);
break;
default:
throw new IllegalArgumentException("Unknown parsing mode");
@ -105,14 +99,16 @@ public class RdfTripleImporter extends ImportingParserBase {
return;
}
ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE);
StmtIterator triples = model.listStatements();
try {
Map<String, List<Row>> subjectToRows = new LinkedHashMap<String, List<Row>>();
Column subjectColumn = new Column(project.columnModel.allocateNewCellIndex(), "subject");
project.columnModel.addColumn(0, subjectColumn, false);
project.columnModel.setKeyColumnIndex(0);
for (Triple triple : triples) {
while (triples.hasNext()) {
Statement triple = triples.nextStatement();
String subject = triple.getSubject().toString();
String predicate = triple.getPredicate().toString();
String object = triple.getObject().toString();
@ -157,8 +153,6 @@ public class RdfTripleImporter extends ImportingParserBase {
}
} catch (ModelException e) {
exceptions.add(e);
} finally {
triples.iterator().close();
}
super.parseOneFile(project, metadata, job, fileSource, input, limit, options, exceptions);

View File

@ -106,7 +106,7 @@ public class RdfTripleImporterTests extends ImporterTest {
//row0
Assert.assertEquals(project.rows.get(0).cells.size(), 2);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home");
//row1
Assert.assertEquals(project.rows.get(1).cells.size(), 2);
@ -118,7 +118,7 @@ public class RdfTripleImporterTests extends ImporterTest {
//row2
Assert.assertEquals(project.rows.get(2).cells.size(), 2);
Assert.assertNull(project.rows.get(2).cells.get(0));
Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home");
Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
Assert.assertEquals(project.recordModel.getRowDependency(2).cellDependencies[1].rowIndex, 0);
Assert.assertEquals(project.recordModel.getRowDependency(2).cellDependencies[1].cellIndex, 0);
}
@ -143,23 +143,23 @@ public class RdfTripleImporterTests extends ImporterTest {
//row0
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home");
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://rdf.freebase.com/ns/en.folk_rock");
//row1
Assert.assertEquals(project.rows.get(1).cells.size(), 2);
Assert.assertNull(project.rows.get(1).cells.get(0));
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home");
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].rowIndex, 0);
Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].cellIndex, 0);
}
@Test
public void canParseTripleWithValue() throws UnsupportedEncodingException {
String sampleRdf = "<http://rdf.freebase.com/ns/en.bob_dylan> <http://rdf.freebase.com/ns/common.topic.alias> \"Robert Zimmerman\"@en.";
String sampleRdf = "<http://rdf.freebase.com/ns/en.bob_dylan> <http://rdf.freebase.com/ns/common.topic.alias>\"Robert Zimmerman\"@en.";
InputStream input = new ByteArrayInputStream(sampleRdf.getBytes("UTF-8"));
SUT = new RdfTripleImporter(RdfTripleImporter.Mode.NT);
SUT = new RdfTripleImporter(RdfTripleImporter.Mode.N3);
parseOneFile(SUT, input);
Assert.assertEquals(project.columnModel.columns.size(), 2);
@ -168,10 +168,11 @@ public class RdfTripleImporterTests extends ImporterTest {
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 2);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "\"Robert Zimmerman\"@en");
} @Test
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "Robert Zimmerman@en");
}
public void parseRdfXml() throws UnsupportedEncodingException {
@Test
public void canParseRdfXml() throws UnsupportedEncodingException {
// From W3C spec http://www.w3.org/TR/REC-rdf-syntax/#example8
String sampleRdf = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+ "<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n"
@ -200,10 +201,37 @@ public class RdfTripleImporterTests extends ImporterTest {
Assert.assertEquals(project.rows.size(), 5);
Assert.assertEquals(project.rows.get(0).cells.size(), 2);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://www.w3.org/TR/rdf-syntax-grammar");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "\"RDF/XML Syntax Specification (Revised)\"");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "RDF/XML Syntax Specification (Revised)@en-US");
Assert.assertEquals(project.rows.get(3).cells.size(), 3);
Assert.assertEquals(project.rows.get(3).cells.get(0).value, "http://example.org/buecher/baum");
Assert.assertEquals(project.rows.get(3).cells.get(1).value, "\"Der Baum\"@de");
Assert.assertEquals(project.rows.get(3).cells.get(2).value, "\"Das Buch ist außergewöhnlich\"@de");
Assert.assertEquals(project.rows.get(3).cells.get(1).value, "The Tree@en");
Assert.assertEquals(project.rows.get(3).cells.get(2).value, "Das Buch ist außergewöhnlich@de");
}
@Test
public void canParseN3() throws UnsupportedEncodingException {
String sampleRdf = "@prefix p: <http://www.example.org/personal_details#> .\n" +
"@prefix m: <http://www.example.org/meeting_organization#> .\n\n" +
"<http://www.example.org/people#fred>\n" +
"p:GivenName \"Fred\";\n" +
"p:hasEmail <mailto:fred@example.com>;\n" +
"m:attending <http://meetings.example.com/cal#m1> .\n";
InputStream input = new ByteArrayInputStream(sampleRdf.getBytes("UTF-8"));
SUT = new RdfTripleImporter(RdfTripleImporter.Mode.N3);
parseOneFile(SUT, input);
Assert.assertEquals(project.columnModel.columns.size(), 4);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://www.example.org/meeting_organization#attending");
Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://www.example.org/personal_details#hasEmail");
Assert.assertEquals(project.columnModel.columns.get(3).getName(), "http://www.example.org/personal_details#GivenName");
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://www.example.org/people#fred");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://meetings.example.com/cal#m1");
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "mailto:fred@example.com");
Assert.assertEquals(project.rows.get(0).cells.get(3).value, "Fred");
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -210,7 +210,8 @@ function registerImporting() {
IM.registerFormat("text/line-based/fixed-width", "Fixed-width field text files", "FixedWidthParserUI",
new Packages.com.google.refine.importers.FixedWidthImporter());
IM.registerFormat("text/rdf+n3", "RDF/N3 files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfTripleImporter());
IM.registerFormat("text/rdf+n3", "RDF/N3 files", "RdfTriplesParserUI",
new Packages.com.google.refine.importers.RdfTripleImporter(Packages.com.google.refine.importers.RdfTripleImporter.Mode.N3));
IM.registerFormat("text/xml", "XML files", "XmlParserUI", new Packages.com.google.refine.importers.XmlImporter());
IM.registerFormat("binary/text/xml/xls/xlsx", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());