migrate from JRDF to JENA library
This commit is contained in:
parent
507c4a4c7a
commit
de42f4d85a
16
.classpath
16
.classpath
@ -17,7 +17,6 @@
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/clojure-1.5.1-slim.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/dom4j-1.6.1.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/jcl-over-slf4j-1.5.6.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/jrdf-0.5.6.jar" sourcepath="main/webapp/WEB-INF/lib-src/jrdf-0.5.6-sources.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/lessen-trunk-r8.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/log4j-1.2.15.jar"/>
|
||||
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/marc4j-2.4.jar"/>
|
||||
@ -113,5 +112,20 @@
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/commons-beanutils-1.9.3.jar"/>
|
||||
<classpathentry kind="lib" path="extensions/gdata/module/MOD-INF/lib/google-auth-library-oauth2-http-0.9.0.jar"/>
|
||||
<classpathentry kind="lib" path="extensions/gdata/module/MOD-INF/lib/google-oauth-client-jetty-1.23.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-arq-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-base-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-cmds-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-core-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-dboe-base-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-dboe-index-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-dboe-trans-data-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-dboe-transaction-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-iri-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-rdfconnection-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-shaded-guava-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-tdb-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jena-tdb2-3.6.0.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/jsonld-java-0.11.1.jar"/>
|
||||
<classpathentry kind="lib" path="main/webapp/WEB-INF/lib/libthrift-0.10.0.jar"/>
|
||||
<classpathentry kind="output" path="main/webapp/WEB-INF/classes"/>
|
||||
</classpath>
|
||||
|
@ -33,10 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.importers;
|
||||
|
||||
import static org.jrdf.graph.AnyObjectNode.ANY_OBJECT_NODE;
|
||||
import static org.jrdf.graph.AnyPredicateNode.ANY_PREDICATE_NODE;
|
||||
import static org.jrdf.graph.AnySubjectNode.ANY_SUBJECT_NODE;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
@ -44,10 +40,10 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.jrdf.graph.Graph;
|
||||
import org.jrdf.graph.Triple;
|
||||
import org.jrdf.parser.RdfReader;
|
||||
import org.jrdf.util.ClosableIterable;
|
||||
import org.apache.jena.rdf.model.Model;
|
||||
import org.apache.jena.rdf.model.ModelFactory;
|
||||
import org.apache.jena.rdf.model.Statement;
|
||||
import org.apache.jena.rdf.model.StmtIterator;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.expr.ExpressionUtils;
|
||||
@ -60,13 +56,13 @@ import com.google.refine.model.Row;
|
||||
import com.google.refine.model.medadata.ProjectMetadata;
|
||||
|
||||
public class RdfTripleImporter extends ImportingParserBase {
|
||||
private RdfReader rdfReader;
|
||||
private Mode mode;
|
||||
|
||||
public enum Mode {
|
||||
RDFXML,
|
||||
NT,
|
||||
N3
|
||||
N3,
|
||||
TTL
|
||||
}
|
||||
|
||||
public RdfTripleImporter() {
|
||||
@ -75,27 +71,25 @@ public class RdfTripleImporter extends ImportingParserBase {
|
||||
|
||||
public RdfTripleImporter(Mode mode) {
|
||||
super(true);
|
||||
rdfReader = new RdfReader();
|
||||
this.mode = mode;
|
||||
}
|
||||
|
||||
public void parseOneFile(Project project, ProjectMetadata metadata, ImportingJob job, String fileSource,
|
||||
InputStream input, int limit, JSONObject options, List<Exception> exceptions) {
|
||||
// create an empty model
|
||||
Model model = ModelFactory.createDefaultModel();
|
||||
|
||||
@Override
|
||||
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||
ImportingJob job, String fileSource, InputStream input, int limit,
|
||||
JSONObject options, List<Exception> exceptions) {
|
||||
|
||||
Graph graph;
|
||||
try {
|
||||
switch (mode) {
|
||||
case NT:
|
||||
graph = rdfReader.parseNTriples(input);
|
||||
model.read(input, null, "NT");
|
||||
break;
|
||||
case N3:
|
||||
graph = rdfReader.parseN3(input);
|
||||
case TTL:
|
||||
model.read(input, null, "TTL");
|
||||
break;
|
||||
case RDFXML:
|
||||
graph = rdfReader.parseRdfXml(input);
|
||||
model.read(input, null);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unknown parsing mode");
|
||||
@ -105,62 +99,62 @@ public class RdfTripleImporter extends ImportingParserBase {
|
||||
return;
|
||||
}
|
||||
|
||||
ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE);
|
||||
try {
|
||||
Map<String, List<Row>> subjectToRows = new LinkedHashMap<String, List<Row>>();
|
||||
Column subjectColumn = new Column(project.columnModel.allocateNewCellIndex(), "subject");
|
||||
project.columnModel.addColumn(0, subjectColumn, false);
|
||||
project.columnModel.setKeyColumnIndex(0);
|
||||
StmtIterator triples = model.listStatements();
|
||||
|
||||
for (Triple triple : triples) {
|
||||
String subject = triple.getSubject().toString();
|
||||
String predicate = triple.getPredicate().toString();
|
||||
String object = triple.getObject().toString();
|
||||
try {
|
||||
Map<String, List<Row>> subjectToRows = new LinkedHashMap<String, List<Row>>();
|
||||
Column subjectColumn = new Column(project.columnModel.allocateNewCellIndex(), "subject");
|
||||
project.columnModel.addColumn(0, subjectColumn, false);
|
||||
project.columnModel.setKeyColumnIndex(0);
|
||||
|
||||
Column column = project.columnModel.getColumnByName(predicate);
|
||||
if (column == null) {
|
||||
column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
|
||||
project.columnModel.addColumn(-1, column, true);
|
||||
}
|
||||
while (triples.hasNext()) {
|
||||
Statement triple = triples.nextStatement();
|
||||
String subject = triple.getSubject().toString();
|
||||
String predicate = triple.getPredicate().toString();
|
||||
String object = triple.getObject().toString();
|
||||
|
||||
int cellIndex = column.getCellIndex();
|
||||
if (subjectToRows.containsKey(subject)) {
|
||||
List<Row> rows = subjectToRows.get(subject);
|
||||
for (Row row : rows) {
|
||||
if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) {
|
||||
row.setCell(cellIndex, new Cell(object, null));
|
||||
object = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Column column = project.columnModel.getColumnByName(predicate);
|
||||
if (column == null) {
|
||||
column = new Column(project.columnModel.allocateNewCellIndex(), predicate);
|
||||
project.columnModel.addColumn(-1, column, true);
|
||||
}
|
||||
|
||||
if (object != null) {
|
||||
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
||||
rows.add(row);
|
||||
int cellIndex = column.getCellIndex();
|
||||
if (subjectToRows.containsKey(subject)) {
|
||||
List<Row> rows = subjectToRows.get(subject);
|
||||
for (Row row : rows) {
|
||||
if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) {
|
||||
row.setCell(cellIndex, new Cell(object, null));
|
||||
object = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
row.setCell(cellIndex, new Cell(object, null));
|
||||
}
|
||||
} else {
|
||||
List<Row> rows = new ArrayList<Row>();
|
||||
subjectToRows.put(subject, rows);
|
||||
if (object != null) {
|
||||
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
||||
rows.add(row);
|
||||
|
||||
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
||||
rows.add(row);
|
||||
row.setCell(cellIndex, new Cell(object, null));
|
||||
}
|
||||
} else {
|
||||
List<Row> rows = new ArrayList<Row>();
|
||||
subjectToRows.put(subject, rows);
|
||||
|
||||
row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
|
||||
row.setCell(cellIndex, new Cell(object, null));
|
||||
}
|
||||
}
|
||||
Row row = new Row(project.columnModel.getMaxCellIndex() + 1);
|
||||
rows.add(row);
|
||||
|
||||
for (Entry<String, List<Row>> entry : subjectToRows.entrySet()) {
|
||||
project.rows.addAll(entry.getValue());
|
||||
}
|
||||
} catch (ModelException e) {
|
||||
exceptions.add(e);
|
||||
} finally {
|
||||
triples.iterator().close();
|
||||
}
|
||||
row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null));
|
||||
row.setCell(cellIndex, new Cell(object, null));
|
||||
}
|
||||
}
|
||||
|
||||
super.parseOneFile(project, metadata, job, fileSource, input, limit, options, exceptions);
|
||||
for (Entry<String, List<Row>> entry : subjectToRows.entrySet()) {
|
||||
project.rows.addAll(entry.getValue());
|
||||
}
|
||||
} catch (ModelException e) {
|
||||
exceptions.add(e);
|
||||
}
|
||||
|
||||
super.parseOneFile(project, metadata, job, fileSource, input, limit, options, exceptions);
|
||||
}
|
||||
}
|
||||
|
@ -106,7 +106,7 @@ public class RdfTripleImporterTests extends ImporterTest {
|
||||
//row0
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 2);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home");
|
||||
|
||||
//row1
|
||||
Assert.assertEquals(project.rows.get(1).cells.size(), 2);
|
||||
@ -118,7 +118,7 @@ public class RdfTripleImporterTests extends ImporterTest {
|
||||
//row2
|
||||
Assert.assertEquals(project.rows.get(2).cells.size(), 2);
|
||||
Assert.assertNull(project.rows.get(2).cells.get(0));
|
||||
Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home");
|
||||
Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
|
||||
Assert.assertEquals(project.recordModel.getRowDependency(2).cellDependencies[1].rowIndex, 0);
|
||||
Assert.assertEquals(project.recordModel.getRowDependency(2).cellDependencies[1].cellIndex, 0);
|
||||
}
|
||||
@ -143,23 +143,23 @@ public class RdfTripleImporterTests extends ImporterTest {
|
||||
//row0
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://rdf.freebase.com/ns/en.folk_rock");
|
||||
|
||||
//row1
|
||||
Assert.assertEquals(project.rows.get(1).cells.size(), 2);
|
||||
Assert.assertNull(project.rows.get(1).cells.get(0));
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home");
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
|
||||
Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].rowIndex, 0);
|
||||
Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].cellIndex, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void canParseTripleWithValue() throws UnsupportedEncodingException {
|
||||
String sampleRdf = "<http://rdf.freebase.com/ns/en.bob_dylan> <http://rdf.freebase.com/ns/common.topic.alias> \"Robert Zimmerman\"@en.";
|
||||
String sampleRdf = "<http://rdf.freebase.com/ns/en.bob_dylan> <http://rdf.freebase.com/ns/common.topic.alias>\"Robert Zimmerman\"@en.";
|
||||
InputStream input = new ByteArrayInputStream(sampleRdf.getBytes("UTF-8"));
|
||||
|
||||
SUT = new RdfTripleImporter(RdfTripleImporter.Mode.NT);
|
||||
SUT = new RdfTripleImporter(RdfTripleImporter.Mode.N3);
|
||||
parseOneFile(SUT, input);
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 2);
|
||||
@ -168,10 +168,11 @@ public class RdfTripleImporterTests extends ImporterTest {
|
||||
Assert.assertEquals(project.rows.size(), 1);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 2);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "\"Robert Zimmerman\"@en");
|
||||
} @Test
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "Robert Zimmerman@en");
|
||||
}
|
||||
|
||||
public void parseRdfXml() throws UnsupportedEncodingException {
|
||||
@Test
|
||||
public void canParseRdfXml() throws UnsupportedEncodingException {
|
||||
// From W3C spec http://www.w3.org/TR/REC-rdf-syntax/#example8
|
||||
String sampleRdf = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
|
||||
+ "<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n"
|
||||
@ -200,10 +201,37 @@ public class RdfTripleImporterTests extends ImporterTest {
|
||||
Assert.assertEquals(project.rows.size(), 5);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 2);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://www.w3.org/TR/rdf-syntax-grammar");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "\"RDF/XML Syntax Specification (Revised)\"");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "RDF/XML Syntax Specification (Revised)@en-US");
|
||||
Assert.assertEquals(project.rows.get(3).cells.size(), 3);
|
||||
Assert.assertEquals(project.rows.get(3).cells.get(0).value, "http://example.org/buecher/baum");
|
||||
Assert.assertEquals(project.rows.get(3).cells.get(1).value, "\"Der Baum\"@de");
|
||||
Assert.assertEquals(project.rows.get(3).cells.get(2).value, "\"Das Buch ist außergewöhnlich\"@de");
|
||||
Assert.assertEquals(project.rows.get(3).cells.get(1).value, "The Tree@en");
|
||||
Assert.assertEquals(project.rows.get(3).cells.get(2).value, "Das Buch ist außergewöhnlich@de");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void canParseN3() throws UnsupportedEncodingException {
|
||||
String sampleRdf = "@prefix p: <http://www.example.org/personal_details#> .\n" +
|
||||
"@prefix m: <http://www.example.org/meeting_organization#> .\n\n" +
|
||||
"<http://www.example.org/people#fred>\n" +
|
||||
"p:GivenName \"Fred\";\n" +
|
||||
"p:hasEmail <mailto:fred@example.com>;\n" +
|
||||
"m:attending <http://meetings.example.com/cal#m1> .\n";
|
||||
|
||||
InputStream input = new ByteArrayInputStream(sampleRdf.getBytes("UTF-8"));
|
||||
|
||||
SUT = new RdfTripleImporter(RdfTripleImporter.Mode.N3);
|
||||
parseOneFile(SUT, input);
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 4);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://www.example.org/meeting_organization#attending");
|
||||
Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://www.example.org/personal_details#hasEmail");
|
||||
Assert.assertEquals(project.columnModel.columns.get(3).getName(), "http://www.example.org/personal_details#GivenName");
|
||||
Assert.assertEquals(project.rows.size(), 1);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://www.example.org/people#fred");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://meetings.example.com/cal#m1");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "mailto:fred@example.com");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(3).value, "Fred");
|
||||
}
|
||||
}
|
||||
|
BIN
main/webapp/WEB-INF/lib/jena-arq-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-arq-3.6.0.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jena-base-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-base-3.6.0.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jena-cmds-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-cmds-3.6.0.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jena-core-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-core-3.6.0.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jena-dboe-base-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-dboe-base-3.6.0.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jena-dboe-index-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-dboe-index-3.6.0.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jena-dboe-trans-data-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-dboe-trans-data-3.6.0.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jena-dboe-transaction-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-dboe-transaction-3.6.0.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jena-iri-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-iri-3.6.0.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jena-rdfconnection-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-rdfconnection-3.6.0.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jena-shaded-guava-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-shaded-guava-3.6.0.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jena-tdb-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-tdb-3.6.0.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jena-tdb2-3.6.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jena-tdb2-3.6.0.jar
Normal file
Binary file not shown.
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/jsonld-java-0.11.1.jar
Normal file
BIN
main/webapp/WEB-INF/lib/jsonld-java-0.11.1.jar
Normal file
Binary file not shown.
BIN
main/webapp/WEB-INF/lib/libthrift-0.10.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/libthrift-0.10.0.jar
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user