RdfTripleImporter handles row dependencies.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@818 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
cfd0f2219e
commit
0e4682f453
@ -38,50 +38,53 @@ public class RdfTripleImporter implements Importer{
|
||||
String baseUrl = options.getProperty("base-url");
|
||||
|
||||
Graph graph = JrdfFactory.getNewGraph();
|
||||
//System.out.println("--------------------------------");
|
||||
//System.out.println("initial number of triples before parsing is : " + graph.getNumberOfTriples());
|
||||
LineHandler lineHandler = nTriplesParserFactory.createParser(graph, newMapFactory);
|
||||
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
|
||||
parser.parse(reader, baseUrl); //fills JRDF graph
|
||||
//System.out.println("number of triples parsed is : " + graph.getNumberOfTriples());
|
||||
|
||||
//first column is subject
|
||||
project.columnModel.columns.add(0, new Column(0, "subject"));
|
||||
project.columnModel.setKeyColumnIndex(0); //the subject will be the key column
|
||||
project.columnModel.update();
|
||||
|
||||
ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE);
|
||||
try {
|
||||
for (Triple triple : triples) {
|
||||
|
||||
//System.out.println("Triple : " + triple);
|
||||
String subject = triple.getSubject().toString();
|
||||
String predicate = triple.getPredicate().toString();
|
||||
String object = triple.getObject().toString();
|
||||
|
||||
//System.out.println("subject : " + subject);
|
||||
//System.out.println("predicate : " + predicate);
|
||||
//System.out.println("object : " + object);
|
||||
//System.out.println("predicate relates to column : " + project.columnModel.getColumnByName(predicate));
|
||||
|
||||
int candidateMergeRowIndex = -1;
|
||||
|
||||
//creates new column for every predicate
|
||||
int columnIndex = project.columnModel.getColumnIndexByName(predicate);
|
||||
if(columnIndex == -1){
|
||||
candidateMergeRowIndex = AddNewColumn(project, predicate, subject);
|
||||
AddNewColumn(project, predicate, subject);
|
||||
}
|
||||
|
||||
//now find row to match with
|
||||
int candidateMergeRowIndex = -1;
|
||||
for(int i = 0; i < project.rows.size(); i++){
|
||||
//check to see if the subjects are the same (merge if they are)
|
||||
Cell cell = project.rows.get(i).cells.get(0);
|
||||
if(cell != null){
|
||||
if(project.rows.get(i).cells.get(0).value == subject){
|
||||
candidateMergeRowIndex = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
columnIndex = project.columnModel.getColumnIndexByName(predicate);
|
||||
|
||||
if(candidateMergeRowIndex > -1){
|
||||
if(project.rows.get(candidateMergeRowIndex).cells.get(columnIndex) == null){
|
||||
Cell cell = project.rows.get(candidateMergeRowIndex).cells.get(columnIndex);
|
||||
if(cell == null){
|
||||
//empty, so merge in this value
|
||||
MergeWithRow(project, candidateMergeRowIndex, columnIndex, object);
|
||||
}else{
|
||||
//can't overwrite existing, so add new row
|
||||
AddNewRow(project, subject, predicate, object); //TODO group to original row.
|
||||
//can't overwrite existing, so add new dependent row
|
||||
AddNewDependentRow(project, subject, candidateMergeRowIndex, columnIndex, object); //TODO group to original row.
|
||||
}
|
||||
}else{
|
||||
AddNewRow(project, subject, predicate, object);
|
||||
AddNewRow(project, subject, columnIndex, object);
|
||||
}
|
||||
}
|
||||
|
||||
@ -90,33 +93,32 @@ public class RdfTripleImporter implements Importer{
|
||||
}
|
||||
}
|
||||
|
||||
protected int AddNewColumn(Project project, String predicate, String subject){
|
||||
//System.out.println("adding new column");
|
||||
protected void AddNewColumn(Project project, String predicate, String subject){
|
||||
int numberOfColumns = project.columnModel.columns.size();
|
||||
|
||||
project.columnModel.columns.add(numberOfColumns, new Column(numberOfColumns, predicate));
|
||||
project.columnModel.setMaxCellIndex(numberOfColumns);
|
||||
project.columnModel.update();
|
||||
|
||||
int candidateMergeRowIndex = -1;
|
||||
//update existing rows with new column
|
||||
for(int i = 0; i < project.rows.size(); i++){
|
||||
project.rows.get(i).cells.add(numberOfColumns, null);
|
||||
if(project.rows.get(i).cells.get(0).value == subject){
|
||||
candidateMergeRowIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
//numberOfColumns = project.columnModel.columns.size();
|
||||
//System.out.println("New total number of columns : " + numberOfColumns);
|
||||
|
||||
return candidateMergeRowIndex;
|
||||
}
|
||||
|
||||
protected void MergeWithRow(Project project, int candidateMergeRowIndex, int columnIndex, String object){
|
||||
project.rows.get(candidateMergeRowIndex).setCell(columnIndex, new Cell(object, null));
|
||||
}
|
||||
|
||||
protected void AddNewRow(Project project, String subject, String predicate, String object){
|
||||
protected void AddNewDependentRow(Project project, String subject, int candidateMergeRowIndex, int columnIndex, String object){
|
||||
Row row = AddNewRow(project, subject, columnIndex, object);
|
||||
|
||||
Project.setRowDependency(project, row, columnIndex, candidateMergeRowIndex, project.columnModel.getKeyColumnIndex());
|
||||
|
||||
row.cells.set(project.columnModel.getKeyColumnIndex(), null); //the subject can now be null, as the dependencies are set
|
||||
}
|
||||
|
||||
protected Row AddNewRow(Project project, String subject, int columnIndex, String object){
|
||||
int numberOfColumns = project.columnModel.columns.size();
|
||||
|
||||
//add subject
|
||||
@ -124,12 +126,9 @@ public class RdfTripleImporter implements Importer{
|
||||
row.setCell(0, new Cell(subject, null));
|
||||
|
||||
//add object to a row
|
||||
int columnIndex = project.columnModel.getColumnIndexByName(predicate);
|
||||
//System.out.println("predicate relates to columnIndex : " + columnIndex);
|
||||
row.setCell(columnIndex, new Cell(object, null));
|
||||
//System.out.println("Number of cells in new row : " + row.cells.size());
|
||||
project.rows.add(row);
|
||||
//System.out.println("New total number of rows : " + project.rows.size());
|
||||
return row;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -311,6 +311,7 @@ public class Project {
|
||||
for (int dependentCellIndex : group.cellIndices) {
|
||||
if (ExpressionUtils.isNonBlankData(row.getCellValue(dependentCellIndex))) {
|
||||
setRowDependency(
|
||||
this,
|
||||
row,
|
||||
dependentCellIndex,
|
||||
contextRowIndex,
|
||||
@ -360,8 +361,8 @@ public class Project {
|
||||
}
|
||||
}
|
||||
|
||||
protected void setRowDependency(Row row, int cellIndex, int contextRowIndex, int contextCellIndex) {
|
||||
int count = columnModel.getMaxCellIndex() + 1;
|
||||
public static void setRowDependency(Project project, Row row, int cellIndex, int contextRowIndex, int contextCellIndex) {
|
||||
int count = project.columnModel.getMaxCellIndex() + 1;
|
||||
if (row.contextRowSlots == null || row.contextCellSlots == null) {
|
||||
row.contextRowSlots = new int[count];
|
||||
row.contextCellSlots = new int[count];
|
||||
|
@ -63,14 +63,31 @@ public class RdfTripleImporterTests {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
//columns
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 2);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album");
|
||||
|
||||
//rows
|
||||
Assert.assertEquals(project.rows.size(), 3);
|
||||
|
||||
//row0
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 2);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
|
||||
|
||||
//row1
|
||||
Assert.assertEquals(project.rows.get(2).cells.size(), 2);
|
||||
Assert.assertNull(project.rows.get(1).cells.get(0));
|
||||
Assert.assertEquals(project.rows.get(1).contextRowSlots[1], 0);
|
||||
Assert.assertEquals(project.rows.get(1).contextCellSlots[1], 0);
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); //NB triples aren't created in order they were input
|
||||
|
||||
//row2
|
||||
Assert.assertEquals(project.rows.get(2).cells.size(), 2);
|
||||
Assert.assertEquals(project.rows.get(2).contextRowSlots[1], 0);
|
||||
Assert.assertEquals(project.rows.get(2).contextCellSlots[1], 0);
|
||||
Assert.assertNull(project.rows.get(2).cells.get(0));
|
||||
Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.under_the_red_sky"); //NB triples aren't created in order they were input
|
||||
}
|
||||
|
||||
@ -87,18 +104,46 @@ public class RdfTripleImporterTests {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
//columns
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album");
|
||||
Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://rdf.freebase.com/ns/music.artist.genre");
|
||||
|
||||
//rows
|
||||
Assert.assertEquals(project.rows.size(), 2);
|
||||
|
||||
//row0
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
|
||||
Assert.assertNull(project.rows.get(0).cells.get(2));
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://rdf.freebase.com/ns/en.folk_rock");
|
||||
|
||||
//row1
|
||||
Assert.assertEquals(project.rows.get(1).cells.size(), 3);
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
||||
Assert.assertEquals(project.rows.get(1).contextRowSlots[1], 0);
|
||||
Assert.assertEquals(project.rows.get(1).contextCellSlots[1], 0);
|
||||
Assert.assertNull(project.rows.get(1).cells.get(0));
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home");
|
||||
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://rdf.freebase.com/ns/en.folk_rock");
|
||||
Assert.assertNull(project.rows.get(1).cells.get(2));
|
||||
}
|
||||
@Test
|
||||
public void CanParseTripleWithValue(){
|
||||
String sampleRdf = "<http://rdf.freebase.com/ns/en.bob_dylan> <http://rdf.freebase.com/ns/common.topic.alias> \"Robert Zimmerman\"@en.";
|
||||
StringReader reader = new StringReader(sampleRdf);
|
||||
|
||||
try {
|
||||
SUT.read(reader, project, options);
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 2);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/common.topic.alias");
|
||||
Assert.assertEquals(project.rows.size(), 1);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 2);
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "\"Robert Zimmerman\"@en");
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user