RdfTripleImporter handles row dependencies.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@818 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Iain Sproat 2010-05-18 21:08:37 +00:00
parent cfd0f2219e
commit 0e4682f453
3 changed files with 151 additions and 106 deletions

View File

@ -38,50 +38,53 @@ public class RdfTripleImporter implements Importer{
String baseUrl = options.getProperty("base-url"); String baseUrl = options.getProperty("base-url");
Graph graph = JrdfFactory.getNewGraph(); Graph graph = JrdfFactory.getNewGraph();
//System.out.println("--------------------------------");
//System.out.println("initial number of triples before parsing is : " + graph.getNumberOfTriples());
LineHandler lineHandler = nTriplesParserFactory.createParser(graph, newMapFactory); LineHandler lineHandler = nTriplesParserFactory.createParser(graph, newMapFactory);
GraphLineParser parser = new GraphLineParser(graph, lineHandler); GraphLineParser parser = new GraphLineParser(graph, lineHandler);
parser.parse(reader, baseUrl); //fills JRDF graph parser.parse(reader, baseUrl); //fills JRDF graph
//System.out.println("number of triples parsed is : " + graph.getNumberOfTriples());
//first column is subject //first column is subject
project.columnModel.columns.add(0, new Column(0, "subject")); project.columnModel.columns.add(0, new Column(0, "subject"));
project.columnModel.setKeyColumnIndex(0); //the subject will be the key column
project.columnModel.update(); project.columnModel.update();
ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE); ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE);
try { try {
for (Triple triple : triples) { for (Triple triple : triples) {
//System.out.println("Triple : " + triple);
String subject = triple.getSubject().toString(); String subject = triple.getSubject().toString();
String predicate = triple.getPredicate().toString(); String predicate = triple.getPredicate().toString();
String object = triple.getObject().toString(); String object = triple.getObject().toString();
//System.out.println("subject : " + subject);
//System.out.println("predicate : " + predicate);
//System.out.println("object : " + object);
//System.out.println("predicate relates to column : " + project.columnModel.getColumnByName(predicate));
int candidateMergeRowIndex = -1;
//creates new column for every predicate //creates new column for every predicate
int columnIndex = project.columnModel.getColumnIndexByName(predicate); int columnIndex = project.columnModel.getColumnIndexByName(predicate);
if(columnIndex == -1){ if(columnIndex == -1){
candidateMergeRowIndex = AddNewColumn(project, predicate, subject); AddNewColumn(project, predicate, subject);
} }
columnIndex = project.columnModel.getColumnIndexByName(predicate);
//now find row to match with
int candidateMergeRowIndex = -1;
for(int i = 0; i < project.rows.size(); i++){
//check to see if the subjects are the same (merge if they are)
Cell cell = project.rows.get(i).cells.get(0);
if(cell != null){
if(project.rows.get(i).cells.get(0).value == subject){
candidateMergeRowIndex = i;
}
}
}
columnIndex = project.columnModel.getColumnIndexByName(predicate);
if(candidateMergeRowIndex > -1){ if(candidateMergeRowIndex > -1){
if(project.rows.get(candidateMergeRowIndex).cells.get(columnIndex) == null){ Cell cell = project.rows.get(candidateMergeRowIndex).cells.get(columnIndex);
if(cell == null){
//empty, so merge in this value //empty, so merge in this value
MergeWithRow(project, candidateMergeRowIndex, columnIndex, object); MergeWithRow(project, candidateMergeRowIndex, columnIndex, object);
}else{ }else{
//can't overwrite existing, so add new row //can't overwrite existing, so add new dependent row
AddNewRow(project, subject, predicate, object); //TODO group to original row. AddNewDependentRow(project, subject, candidateMergeRowIndex, columnIndex, object); //TODO group to original row.
} }
}else{ }else{
AddNewRow(project, subject, predicate, object); AddNewRow(project, subject, columnIndex, object);
} }
} }
@ -90,46 +93,42 @@ public class RdfTripleImporter implements Importer{
} }
} }
protected int AddNewColumn(Project project, String predicate, String subject){ protected void AddNewColumn(Project project, String predicate, String subject){
//System.out.println("adding new column");
int numberOfColumns = project.columnModel.columns.size(); int numberOfColumns = project.columnModel.columns.size();
project.columnModel.columns.add(numberOfColumns, new Column(numberOfColumns, predicate)); project.columnModel.columns.add(numberOfColumns, new Column(numberOfColumns, predicate));
project.columnModel.setMaxCellIndex(numberOfColumns);
project.columnModel.update(); project.columnModel.update();
int candidateMergeRowIndex = -1;
//update existing rows with new column //update existing rows with new column
for(int i = 0; i < project.rows.size(); i++){ for(int i = 0; i < project.rows.size(); i++){
project.rows.get(i).cells.add(numberOfColumns, null); project.rows.get(i).cells.add(numberOfColumns, null);
if(project.rows.get(i).cells.get(0).value == subject){
candidateMergeRowIndex = i;
}
} }
//numberOfColumns = project.columnModel.columns.size();
//System.out.println("New total number of columns : " + numberOfColumns);
return candidateMergeRowIndex;
} }
protected void MergeWithRow(Project project, int candidateMergeRowIndex, int columnIndex, String object){ protected void MergeWithRow(Project project, int candidateMergeRowIndex, int columnIndex, String object){
project.rows.get(candidateMergeRowIndex).setCell(columnIndex, new Cell(object, null)); project.rows.get(candidateMergeRowIndex).setCell(columnIndex, new Cell(object, null));
} }
protected void AddNewRow(Project project, String subject, String predicate, String object){ protected void AddNewDependentRow(Project project, String subject, int candidateMergeRowIndex, int columnIndex, String object){
int numberOfColumns = project.columnModel.columns.size(); Row row = AddNewRow(project, subject, columnIndex, object);
Project.setRowDependency(project, row, columnIndex, candidateMergeRowIndex, project.columnModel.getKeyColumnIndex());
row.cells.set(project.columnModel.getKeyColumnIndex(), null); //the subject can now be null, as the dependencies are set
}
protected Row AddNewRow(Project project, String subject, int columnIndex, String object){
int numberOfColumns = project.columnModel.columns.size();
//add subject //add subject
Row row = new Row(numberOfColumns); Row row = new Row(numberOfColumns);
row.setCell(0, new Cell(subject, null)); row.setCell(0, new Cell(subject, null));
//add object to a row //add object to a row
int columnIndex = project.columnModel.getColumnIndexByName(predicate);
//System.out.println("predicate relates to columnIndex : " + columnIndex);
row.setCell(columnIndex, new Cell(object, null)); row.setCell(columnIndex, new Cell(object, null));
//System.out.println("Number of cells in new row : " + row.cells.size());
project.rows.add(row); project.rows.add(row);
//System.out.println("New total number of rows : " + project.rows.size()); return row;
} }
@Override @Override

View File

@ -32,80 +32,80 @@ import com.metaweb.gridworks.util.Pool;
public class Project { public class Project {
final public long id; final public long id;
final public ColumnModel columnModel = new ColumnModel(); final public ColumnModel columnModel = new ColumnModel();
final public List<Row> rows = new ArrayList<Row>(); final public List<Row> rows = new ArrayList<Row>();
final public History history; final public History history;
public Protograph protograph; public Protograph protograph;
transient public ProcessManager processManager = new ProcessManager(); transient public ProcessManager processManager = new ProcessManager();
transient public Date lastSave = new Date(); transient public Date lastSave = new Date();
final static Logger logger = LoggerFactory.getLogger("project"); final static Logger logger = LoggerFactory.getLogger("project");
static public long generateID() { static public long generateID() {
return System.currentTimeMillis() + Math.round(Math.random() * 1000000000000L); return System.currentTimeMillis() + Math.round(Math.random() * 1000000000000L);
} }
public Project() { public Project() {
id = generateID(); id = generateID();
history = new History(this); history = new History(this);
} }
protected Project(long id) { protected Project(long id) {
this.id = id; this.id = id;
this.history = new History(this); this.history = new History(this);
} }
public ProjectMetadata getMetadata() { public ProjectMetadata getMetadata() {
return ProjectManager.singleton.getProjectMetadata(id); return ProjectManager.singleton.getProjectMetadata(id);
} }
synchronized public void save() { synchronized public void save() {
synchronized (this) { synchronized (this) {
File dir = ProjectManager.singleton.getProjectDir(id); File dir = ProjectManager.singleton.getProjectDir(id);
File tempFile = new File(dir, "data.temp.zip"); File tempFile = new File(dir, "data.temp.zip");
try { try {
saveToFile(tempFile); saveToFile(tempFile);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
logger.warn("Failed to save project {}", id); logger.warn("Failed to save project {}", id);
return; return;
} }
File file = new File(dir, "data.zip"); File file = new File(dir, "data.zip");
File oldFile = new File(dir, "data.old.zip"); File oldFile = new File(dir, "data.old.zip");
if (file.exists()) { if (file.exists()) {
file.renameTo(oldFile); file.renameTo(oldFile);
} }
tempFile.renameTo(file); tempFile.renameTo(file);
if (oldFile.exists()) { if (oldFile.exists()) {
oldFile.delete(); oldFile.delete();
} }
lastSave = new Date(); lastSave = new Date();
logger.info("Saved project '{}'",id); logger.info("Saved project '{}'",id);
} }
} }
protected void saveToFile(File file) throws Exception { protected void saveToFile(File file) throws Exception {
ZipOutputStream out = new ZipOutputStream(new FileOutputStream(file)); ZipOutputStream out = new ZipOutputStream(new FileOutputStream(file));
try { try {
Pool pool = new Pool(); Pool pool = new Pool();
out.putNextEntry(new ZipEntry("data.txt")); out.putNextEntry(new ZipEntry("data.txt"));
try { try {
saveToOutputStream(out, pool); saveToOutputStream(out, pool);
} finally { } finally {
out.closeEntry(); out.closeEntry();
} }
out.putNextEntry(new ZipEntry("pool.txt")); out.putNextEntry(new ZipEntry("pool.txt"));
try { try {
pool.save(out); pool.save(out);
@ -116,35 +116,35 @@ public class Project {
out.close(); out.close();
} }
} }
protected void saveToOutputStream(OutputStream out, Pool pool) throws IOException { protected void saveToOutputStream(OutputStream out, Pool pool) throws IOException {
Writer writer = new OutputStreamWriter(out); Writer writer = new OutputStreamWriter(out);
try { try {
Properties options = new Properties(); Properties options = new Properties();
options.setProperty("mode", "save"); options.setProperty("mode", "save");
options.put("pool", pool); options.put("pool", pool);
saveToWriter(writer, options); saveToWriter(writer, options);
} finally { } finally {
writer.flush(); writer.flush();
} }
} }
protected void saveToWriter(Writer writer, Properties options) throws IOException { protected void saveToWriter(Writer writer, Properties options) throws IOException {
writer.write(Gridworks.getVersion()); writer.write('\n'); writer.write(Gridworks.getVersion()); writer.write('\n');
writer.write("columnModel=\n"); columnModel.save(writer, options); writer.write("columnModel=\n"); columnModel.save(writer, options);
writer.write("history=\n"); history.save(writer, options); writer.write("history=\n"); history.save(writer, options);
if (protograph != null) { if (protograph != null) {
writer.write("protograph="); protograph.save(writer, options); writer.write('\n'); writer.write("protograph="); protograph.save(writer, options); writer.write('\n');
} }
writer.write("rowCount="); writer.write(Integer.toString(rows.size())); writer.write('\n'); writer.write("rowCount="); writer.write(Integer.toString(rows.size())); writer.write('\n');
for (Row row : rows) { for (Row row : rows) {
row.save(writer, options); writer.write('\n'); row.save(writer, options); writer.write('\n');
} }
} }
static public Project load(File dir, long id) { static public Project load(File dir, long id) {
try { try {
File file = new File(dir, "data.zip"); File file = new File(dir, "data.zip");
@ -154,7 +154,7 @@ public class Project {
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
try { try {
File file = new File(dir, "data.temp.zip"); File file = new File(dir, "data.temp.zip");
if (file.exists()) { if (file.exists()) {
@ -163,7 +163,7 @@ public class Project {
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
try { try {
File file = new File(dir, "data.old.zip"); File file = new File(dir, "data.old.zip");
if (file.exists()) { if (file.exists()) {
@ -172,12 +172,12 @@ public class Project {
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
return null; return null;
} }
static protected Project loadFromFile( static protected Project loadFromFile(
File file, File file,
long id long id
) throws Exception { ) throws Exception {
ZipFile zipFile = new ZipFile(file); ZipFile zipFile = new ZipFile(file);
@ -188,7 +188,7 @@ public class Project {
pool.load(new InputStreamReader( pool.load(new InputStreamReader(
zipFile.getInputStream(poolEntry))); zipFile.getInputStream(poolEntry)));
} // else, it's a legacy project file } // else, it's a legacy project file
return loadFromReader( return loadFromReader(
new LineNumberReader( new LineNumberReader(
new InputStreamReader( new InputStreamReader(
@ -201,25 +201,25 @@ public class Project {
zipFile.close(); zipFile.close();
} }
} }
static protected Project loadFromReader( static protected Project loadFromReader(
LineNumberReader reader, LineNumberReader reader,
long id, long id,
Pool pool Pool pool
) throws Exception { ) throws Exception {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
/* String version = */ reader.readLine(); /* String version = */ reader.readLine();
Project project = new Project(id); Project project = new Project(id);
int maxCellCount = 0; int maxCellCount = 0;
String line; String line;
while ((line = reader.readLine()) != null) { while ((line = reader.readLine()) != null) {
int equal = line.indexOf('='); int equal = line.indexOf('=');
CharSequence field = line.subSequence(0, equal); CharSequence field = line.subSequence(0, equal);
String value = line.substring(equal + 1); String value = line.substring(equal + 1);
if ("columnModel".equals(field)) { if ("columnModel".equals(field)) {
project.columnModel.load(reader); project.columnModel.load(reader);
} else if ("history".equals(field)) { } else if ("history".equals(field)) {
@ -228,7 +228,7 @@ public class Project {
project.protograph = Protograph.load(project, value); project.protograph = Protograph.load(project, value);
} else if ("rowCount".equals(field)) { } else if ("rowCount".equals(field)) {
int count = Integer.parseInt(value); int count = Integer.parseInt(value);
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
line = reader.readLine(); line = reader.readLine();
if (line != null) { if (line != null) {
@ -239,35 +239,35 @@ public class Project {
} }
} }
} }
project.columnModel.setMaxCellIndex(maxCellCount - 1); project.columnModel.setMaxCellIndex(maxCellCount - 1);
logger.info( logger.info(
"Loaded project {} from disk in {} sec(s)",id,Long.toString((System.currentTimeMillis() - start) / 1000) "Loaded project {} from disk in {} sec(s)",id,Long.toString((System.currentTimeMillis() - start) / 1000)
); );
project.recomputeRowContextDependencies(); project.recomputeRowContextDependencies();
return project; return project;
} }
static protected class Group { static protected class Group {
int[] cellIndices; int[] cellIndices;
int keyCellIndex; int keyCellIndex;
} }
synchronized public void recomputeRowContextDependencies() { synchronized public void recomputeRowContextDependencies() {
List<Group> keyedGroups = new ArrayList<Group>(); List<Group> keyedGroups = new ArrayList<Group>();
addRootKeyedGroup(keyedGroups); addRootKeyedGroup(keyedGroups);
for (ColumnGroup group : columnModel.columnGroups) { for (ColumnGroup group : columnModel.columnGroups) {
if (group.keyColumnIndex >= 0) { if (group.keyColumnIndex >= 0) {
Group keyedGroup = new Group(); Group keyedGroup = new Group();
keyedGroup.keyCellIndex = columnModel.columns.get(group.keyColumnIndex).getCellIndex(); keyedGroup.keyCellIndex = columnModel.columns.get(group.keyColumnIndex).getCellIndex();
keyedGroup.cellIndices = new int[group.columnSpan - 1]; keyedGroup.cellIndices = new int[group.columnSpan - 1];
int c = 0; int c = 0;
for (int i = 0; i < group.columnSpan; i++) { for (int i = 0; i < group.columnSpan; i++) {
int columnIndex = group.startColumnIndex + i; int columnIndex = group.startColumnIndex + i;
@ -276,44 +276,45 @@ public class Project {
keyedGroup.cellIndices[c++] = cellIndex; keyedGroup.cellIndices[c++] = cellIndex;
} }
} }
keyedGroups.add(keyedGroup); keyedGroups.add(keyedGroup);
} }
} }
Collections.sort(keyedGroups, new Comparator<Group>() { Collections.sort(keyedGroups, new Comparator<Group>() {
public int compare(Group o1, Group o2) { public int compare(Group o1, Group o2) {
return o2.cellIndices.length - o1.cellIndices.length; // larger groups first return o2.cellIndices.length - o1.cellIndices.length; // larger groups first
} }
}); });
int[] lastNonBlankRowsByGroup = new int[keyedGroups.size()]; int[] lastNonBlankRowsByGroup = new int[keyedGroups.size()];
for (int i = 0; i < lastNonBlankRowsByGroup.length; i++) { for (int i = 0; i < lastNonBlankRowsByGroup.length; i++) {
lastNonBlankRowsByGroup[i] = -1; lastNonBlankRowsByGroup[i] = -1;
} }
int rowCount = rows.size(); int rowCount = rows.size();
int groupCount = keyedGroups.size(); int groupCount = keyedGroups.size();
int recordIndex = 0; int recordIndex = 0;
for (int r = 0; r < rowCount; r++) { for (int r = 0; r < rowCount; r++) {
Row row = rows.get(r); Row row = rows.get(r);
row.contextRows = null; row.contextRows = null;
row.contextRowSlots = null; row.contextRowSlots = null;
row.contextCellSlots = null; row.contextCellSlots = null;
for (int g = 0; g < groupCount; g++) { for (int g = 0; g < groupCount; g++) {
Group group = keyedGroups.get(g); Group group = keyedGroups.get(g);
if (!ExpressionUtils.isNonBlankData(row.getCellValue(group.keyCellIndex))) { if (!ExpressionUtils.isNonBlankData(row.getCellValue(group.keyCellIndex))) {
int contextRowIndex = lastNonBlankRowsByGroup[g]; int contextRowIndex = lastNonBlankRowsByGroup[g];
if (contextRowIndex >= 0) { if (contextRowIndex >= 0) {
for (int dependentCellIndex : group.cellIndices) { for (int dependentCellIndex : group.cellIndices) {
if (ExpressionUtils.isNonBlankData(row.getCellValue(dependentCellIndex))) { if (ExpressionUtils.isNonBlankData(row.getCellValue(dependentCellIndex))) {
setRowDependency( setRowDependency(
row, this,
dependentCellIndex, row,
contextRowIndex, dependentCellIndex,
contextRowIndex,
group.keyCellIndex group.keyCellIndex
); );
} }
@ -323,7 +324,7 @@ public class Project {
lastNonBlankRowsByGroup[g] = r; lastNonBlankRowsByGroup[g] = r;
} }
} }
if (row.contextRowSlots != null && row.contextRowSlots.length > 0) { if (row.contextRowSlots != null && row.contextRowSlots.length > 0) {
row.recordIndex = -1; row.recordIndex = -1;
row.contextRows = new ArrayList<Integer>(); row.contextRows = new ArrayList<Integer>();
@ -333,19 +334,19 @@ public class Project {
} }
} }
Collections.sort(row.contextRows); Collections.sort(row.contextRows);
columnModel._hasDependentRows = true; columnModel._hasDependentRows = true;
} else { } else {
row.recordIndex = recordIndex++; row.recordIndex = recordIndex++;
} }
} }
} }
protected void addRootKeyedGroup(List<Group> keyedGroups) { protected void addRootKeyedGroup(List<Group> keyedGroups) {
int count = columnModel.getMaxCellIndex() + 1; int count = columnModel.getMaxCellIndex() + 1;
if (count > 0 && columnModel.getKeyColumnIndex() < columnModel.columns.size()) { if (count > 0 && columnModel.getKeyColumnIndex() < columnModel.columns.size()) {
Group rootKeyedGroup = new Group(); Group rootKeyedGroup = new Group();
rootKeyedGroup.cellIndices = new int[count - 1]; rootKeyedGroup.cellIndices = new int[count - 1];
rootKeyedGroup.keyCellIndex = columnModel.columns.get(columnModel.getKeyColumnIndex()).getCellIndex(); rootKeyedGroup.keyCellIndex = columnModel.columns.get(columnModel.getKeyColumnIndex()).getCellIndex();
@ -359,19 +360,19 @@ public class Project {
keyedGroups.add(rootKeyedGroup); keyedGroups.add(rootKeyedGroup);
} }
} }
protected void setRowDependency(Row row, int cellIndex, int contextRowIndex, int contextCellIndex) { public static void setRowDependency(Project project, Row row, int cellIndex, int contextRowIndex, int contextCellIndex) {
int count = columnModel.getMaxCellIndex() + 1; int count = project.columnModel.getMaxCellIndex() + 1;
if (row.contextRowSlots == null || row.contextCellSlots == null) { if (row.contextRowSlots == null || row.contextCellSlots == null) {
row.contextRowSlots = new int[count]; row.contextRowSlots = new int[count];
row.contextCellSlots = new int[count]; row.contextCellSlots = new int[count];
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
row.contextRowSlots[i] = -1; row.contextRowSlots[i] = -1;
row.contextCellSlots[i] = -1; row.contextCellSlots[i] = -1;
} }
} }
row.contextRowSlots[cellIndex] = contextRowIndex; row.contextRowSlots[cellIndex] = contextRowIndex;
row.contextCellSlots[cellIndex] = contextCellIndex; row.contextCellSlots[cellIndex] = contextCellIndex;
} }

View File

@ -63,14 +63,31 @@ public class RdfTripleImporterTests {
Assert.fail(); Assert.fail();
} }
//columns
Assert.assertEquals(project.columnModel.columns.size(), 2); Assert.assertEquals(project.columnModel.columns.size(), 2);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album"); Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album");
//rows
Assert.assertEquals(project.rows.size(), 3); Assert.assertEquals(project.rows.size(), 3);
//row0
Assert.assertEquals(project.rows.get(0).cells.size(), 2); Assert.assertEquals(project.rows.get(0).cells.size(), 2);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks"); Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
//row1
Assert.assertEquals(project.rows.get(2).cells.size(), 2);
Assert.assertNull(project.rows.get(1).cells.get(0));
Assert.assertEquals(project.rows.get(1).contextRowSlots[1], 0);
Assert.assertEquals(project.rows.get(1).contextCellSlots[1], 0);
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); //NB triples aren't created in order they were input Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); //NB triples aren't created in order they were input
//row2
Assert.assertEquals(project.rows.get(2).cells.size(), 2);
Assert.assertEquals(project.rows.get(2).contextRowSlots[1], 0);
Assert.assertEquals(project.rows.get(2).contextCellSlots[1], 0);
Assert.assertNull(project.rows.get(2).cells.get(0));
Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.under_the_red_sky"); //NB triples aren't created in order they were input Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.under_the_red_sky"); //NB triples aren't created in order they were input
} }
@ -87,18 +104,46 @@ public class RdfTripleImporterTests {
Assert.fail(); Assert.fail();
} }
//columns
Assert.assertEquals(project.columnModel.columns.size(), 3); Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album"); Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album");
Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://rdf.freebase.com/ns/music.artist.genre"); Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://rdf.freebase.com/ns/music.artist.genre");
//rows
Assert.assertEquals(project.rows.size(), 2); Assert.assertEquals(project.rows.size(), 2);
//row0
Assert.assertEquals(project.rows.get(0).cells.size(), 3); Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks"); Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
Assert.assertNull(project.rows.get(0).cells.get(2)); Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://rdf.freebase.com/ns/en.folk_rock");
//row1
Assert.assertEquals(project.rows.get(1).cells.size(), 3); Assert.assertEquals(project.rows.get(1).cells.size(), 3);
Assert.assertEquals(project.rows.get(1).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); Assert.assertEquals(project.rows.get(1).contextRowSlots[1], 0);
Assert.assertEquals(project.rows.get(1).contextCellSlots[1], 0);
Assert.assertNull(project.rows.get(1).cells.get(0));
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home");
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://rdf.freebase.com/ns/en.folk_rock"); Assert.assertNull(project.rows.get(1).cells.get(2));
} }
@Test
public void CanParseTripleWithValue(){
String sampleRdf = "<http://rdf.freebase.com/ns/en.bob_dylan> <http://rdf.freebase.com/ns/common.topic.alias> \"Robert Zimmerman\"@en.";
StringReader reader = new StringReader(sampleRdf);
try {
SUT.read(reader, project, options);
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 2);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/common.topic.alias");
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 2);
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "\"Robert Zimmerman\"@en");
}
} }