2010-05-18 14:41:40 +02:00
|
|
|
package com.metaweb.gridworks.importers;
|
|
|
|
|
|
|
|
import java.io.InputStream;
|
|
|
|
import java.io.Reader;
|
|
|
|
import java.util.Properties;
|
|
|
|
|
|
|
|
import org.jrdf.JRDFFactory;
|
|
|
|
import org.jrdf.SortedMemoryJRDFFactory;
|
|
|
|
import org.jrdf.collection.MemMapFactory;
|
|
|
|
import org.jrdf.graph.Graph;
|
|
|
|
import org.jrdf.graph.Triple;
|
|
|
|
import org.jrdf.parser.line.GraphLineParser;
|
|
|
|
import org.jrdf.parser.line.LineHandler;
|
|
|
|
import org.jrdf.parser.ntriples.NTriplesParserFactory;
|
|
|
|
import org.jrdf.util.ClosableIterable;
|
|
|
|
import static org.jrdf.graph.AnyObjectNode.ANY_OBJECT_NODE;
|
|
|
|
import static org.jrdf.graph.AnyPredicateNode.ANY_PREDICATE_NODE;
|
|
|
|
import static org.jrdf.graph.AnySubjectNode.ANY_SUBJECT_NODE;
|
|
|
|
|
|
|
|
import com.metaweb.gridworks.model.Cell;
|
|
|
|
import com.metaweb.gridworks.model.Column;
|
|
|
|
import com.metaweb.gridworks.model.Project;
|
|
|
|
import com.metaweb.gridworks.model.Row;
|
|
|
|
|
|
|
|
public class RdfTripleImporter implements Importer{
|
|
|
|
JRDFFactory JrdfFactory;
|
|
|
|
NTriplesParserFactory nTriplesParserFactory;
|
|
|
|
MemMapFactory newMapFactory;
|
|
|
|
|
|
|
|
public RdfTripleImporter(){
|
|
|
|
JrdfFactory = SortedMemoryJRDFFactory.getFactory();
|
|
|
|
nTriplesParserFactory = new NTriplesParserFactory();
|
|
|
|
newMapFactory = new MemMapFactory();
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void read(Reader reader, Project project, Properties options) throws Exception {
|
|
|
|
String baseUrl = options.getProperty("base-url");
|
|
|
|
|
|
|
|
Graph graph = JrdfFactory.getNewGraph();
|
|
|
|
LineHandler lineHandler = nTriplesParserFactory.createParser(graph, newMapFactory);
|
|
|
|
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
|
|
|
|
parser.parse(reader, baseUrl); //fills JRDF graph
|
2010-05-18 23:08:37 +02:00
|
|
|
|
2010-05-18 14:41:40 +02:00
|
|
|
//first column is subject
|
|
|
|
project.columnModel.columns.add(0, new Column(0, "subject"));
|
2010-05-18 23:08:37 +02:00
|
|
|
project.columnModel.setKeyColumnIndex(0); //the subject will be the key column
|
2010-05-18 14:41:40 +02:00
|
|
|
project.columnModel.update();
|
|
|
|
|
|
|
|
ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE);
|
|
|
|
try {
|
|
|
|
for (Triple triple : triples) {
|
|
|
|
String subject = triple.getSubject().toString();
|
|
|
|
String predicate = triple.getPredicate().toString();
|
|
|
|
String object = triple.getObject().toString();
|
2010-05-18 23:08:37 +02:00
|
|
|
|
2010-05-18 14:41:40 +02:00
|
|
|
//creates new column for every predicate
|
2010-05-18 15:48:52 +02:00
|
|
|
int columnIndex = project.columnModel.getColumnIndexByName(predicate);
|
|
|
|
if(columnIndex == -1){
|
2010-05-18 23:08:37 +02:00
|
|
|
AddNewColumn(project, predicate, subject);
|
2010-05-18 14:41:40 +02:00
|
|
|
}
|
|
|
|
|
2010-05-18 23:08:37 +02:00
|
|
|
//now find row to match with
|
|
|
|
int candidateMergeRowIndex = -1;
|
|
|
|
for(int i = 0; i < project.rows.size(); i++){
|
|
|
|
//check to see if the subjects are the same (merge if they are)
|
|
|
|
Cell cell = project.rows.get(i).cells.get(0);
|
|
|
|
if(cell != null){
|
|
|
|
if(project.rows.get(i).cells.get(0).value == subject){
|
|
|
|
candidateMergeRowIndex = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
columnIndex = project.columnModel.getColumnIndexByName(predicate);
|
|
|
|
|
2010-05-18 15:48:52 +02:00
|
|
|
if(candidateMergeRowIndex > -1){
|
2010-05-18 23:08:37 +02:00
|
|
|
Cell cell = project.rows.get(candidateMergeRowIndex).cells.get(columnIndex);
|
|
|
|
if(cell == null){
|
2010-05-18 15:48:52 +02:00
|
|
|
//empty, so merge in this value
|
|
|
|
MergeWithRow(project, candidateMergeRowIndex, columnIndex, object);
|
|
|
|
}else{
|
2010-05-18 23:08:37 +02:00
|
|
|
//can't overwrite existing, so add new dependent row
|
|
|
|
AddNewDependentRow(project, subject, candidateMergeRowIndex, columnIndex, object); //TODO group to original row.
|
2010-05-18 15:48:52 +02:00
|
|
|
}
|
|
|
|
}else{
|
2010-05-18 23:08:37 +02:00
|
|
|
AddNewRow(project, subject, columnIndex, object);
|
2010-05-18 15:48:52 +02:00
|
|
|
}
|
2010-05-18 14:41:40 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
triples.iterator().close();
|
|
|
|
}
|
|
|
|
}
|
2010-05-18 15:48:52 +02:00
|
|
|
|
2010-05-18 23:08:37 +02:00
|
|
|
protected void AddNewColumn(Project project, String predicate, String subject){
|
2010-05-18 15:04:51 +02:00
|
|
|
int numberOfColumns = project.columnModel.columns.size();
|
2010-05-18 15:48:52 +02:00
|
|
|
|
2010-05-18 15:04:51 +02:00
|
|
|
project.columnModel.columns.add(numberOfColumns, new Column(numberOfColumns, predicate));
|
2010-05-18 23:08:37 +02:00
|
|
|
project.columnModel.setMaxCellIndex(numberOfColumns);
|
2010-05-18 15:04:51 +02:00
|
|
|
project.columnModel.update();
|
2010-05-18 15:48:52 +02:00
|
|
|
|
2010-05-18 15:04:51 +02:00
|
|
|
//update existing rows with new column
|
2010-05-18 15:48:52 +02:00
|
|
|
for(int i = 0; i < project.rows.size(); i++){
|
|
|
|
project.rows.get(i).cells.add(numberOfColumns, null);
|
2010-05-18 15:04:51 +02:00
|
|
|
}
|
2010-05-18 15:48:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
protected void MergeWithRow(Project project, int candidateMergeRowIndex, int columnIndex, String object){
|
|
|
|
project.rows.get(candidateMergeRowIndex).setCell(columnIndex, new Cell(object, null));
|
2010-05-18 15:04:51 +02:00
|
|
|
}
|
2010-05-18 14:41:40 +02:00
|
|
|
|
2010-05-18 23:08:37 +02:00
|
|
|
protected void AddNewDependentRow(Project project, String subject, int candidateMergeRowIndex, int columnIndex, String object){
|
|
|
|
Row row = AddNewRow(project, subject, columnIndex, object);
|
|
|
|
|
|
|
|
Project.setRowDependency(project, row, columnIndex, candidateMergeRowIndex, project.columnModel.getKeyColumnIndex());
|
|
|
|
|
|
|
|
row.cells.set(project.columnModel.getKeyColumnIndex(), null); //the subject can now be null, as the dependencies are set
|
|
|
|
}
|
2010-05-18 14:41:40 +02:00
|
|
|
|
2010-05-18 23:08:37 +02:00
|
|
|
protected Row AddNewRow(Project project, String subject, int columnIndex, String object){
|
|
|
|
int numberOfColumns = project.columnModel.columns.size();
|
|
|
|
|
2010-05-18 14:41:40 +02:00
|
|
|
//add subject
|
|
|
|
Row row = new Row(numberOfColumns);
|
|
|
|
row.setCell(0, new Cell(subject, null));
|
|
|
|
|
|
|
|
//add object to a row
|
|
|
|
row.setCell(columnIndex, new Cell(object, null));
|
|
|
|
project.rows.add(row);
|
2010-05-18 23:08:37 +02:00
|
|
|
return row;
|
2010-05-18 14:41:40 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void read(InputStream inputStream, Project project, Properties options) throws Exception {
|
|
|
|
// TODO
|
|
|
|
throw new UnsupportedOperationException();
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public boolean takesReader() {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|