RdfTripleImporter handles row dependencies.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@818 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
cfd0f2219e
commit
0e4682f453
@ -38,50 +38,53 @@ public class RdfTripleImporter implements Importer{
|
|||||||
String baseUrl = options.getProperty("base-url");
|
String baseUrl = options.getProperty("base-url");
|
||||||
|
|
||||||
Graph graph = JrdfFactory.getNewGraph();
|
Graph graph = JrdfFactory.getNewGraph();
|
||||||
//System.out.println("--------------------------------");
|
|
||||||
//System.out.println("initial number of triples before parsing is : " + graph.getNumberOfTriples());
|
|
||||||
LineHandler lineHandler = nTriplesParserFactory.createParser(graph, newMapFactory);
|
LineHandler lineHandler = nTriplesParserFactory.createParser(graph, newMapFactory);
|
||||||
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
|
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
|
||||||
parser.parse(reader, baseUrl); //fills JRDF graph
|
parser.parse(reader, baseUrl); //fills JRDF graph
|
||||||
//System.out.println("number of triples parsed is : " + graph.getNumberOfTriples());
|
|
||||||
|
|
||||||
//first column is subject
|
//first column is subject
|
||||||
project.columnModel.columns.add(0, new Column(0, "subject"));
|
project.columnModel.columns.add(0, new Column(0, "subject"));
|
||||||
|
project.columnModel.setKeyColumnIndex(0); //the subject will be the key column
|
||||||
project.columnModel.update();
|
project.columnModel.update();
|
||||||
|
|
||||||
ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE);
|
ClosableIterable<Triple> triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE);
|
||||||
try {
|
try {
|
||||||
for (Triple triple : triples) {
|
for (Triple triple : triples) {
|
||||||
|
|
||||||
//System.out.println("Triple : " + triple);
|
|
||||||
String subject = triple.getSubject().toString();
|
String subject = triple.getSubject().toString();
|
||||||
String predicate = triple.getPredicate().toString();
|
String predicate = triple.getPredicate().toString();
|
||||||
String object = triple.getObject().toString();
|
String object = triple.getObject().toString();
|
||||||
|
|
||||||
//System.out.println("subject : " + subject);
|
|
||||||
//System.out.println("predicate : " + predicate);
|
|
||||||
//System.out.println("object : " + object);
|
|
||||||
//System.out.println("predicate relates to column : " + project.columnModel.getColumnByName(predicate));
|
|
||||||
|
|
||||||
int candidateMergeRowIndex = -1;
|
|
||||||
|
|
||||||
//creates new column for every predicate
|
//creates new column for every predicate
|
||||||
int columnIndex = project.columnModel.getColumnIndexByName(predicate);
|
int columnIndex = project.columnModel.getColumnIndexByName(predicate);
|
||||||
if(columnIndex == -1){
|
if(columnIndex == -1){
|
||||||
candidateMergeRowIndex = AddNewColumn(project, predicate, subject);
|
AddNewColumn(project, predicate, subject);
|
||||||
}
|
}
|
||||||
columnIndex = project.columnModel.getColumnIndexByName(predicate);
|
|
||||||
|
|
||||||
|
//now find row to match with
|
||||||
|
int candidateMergeRowIndex = -1;
|
||||||
|
for(int i = 0; i < project.rows.size(); i++){
|
||||||
|
//check to see if the subjects are the same (merge if they are)
|
||||||
|
Cell cell = project.rows.get(i).cells.get(0);
|
||||||
|
if(cell != null){
|
||||||
|
if(project.rows.get(i).cells.get(0).value == subject){
|
||||||
|
candidateMergeRowIndex = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
columnIndex = project.columnModel.getColumnIndexByName(predicate);
|
||||||
|
|
||||||
if(candidateMergeRowIndex > -1){
|
if(candidateMergeRowIndex > -1){
|
||||||
if(project.rows.get(candidateMergeRowIndex).cells.get(columnIndex) == null){
|
Cell cell = project.rows.get(candidateMergeRowIndex).cells.get(columnIndex);
|
||||||
|
if(cell == null){
|
||||||
//empty, so merge in this value
|
//empty, so merge in this value
|
||||||
MergeWithRow(project, candidateMergeRowIndex, columnIndex, object);
|
MergeWithRow(project, candidateMergeRowIndex, columnIndex, object);
|
||||||
}else{
|
}else{
|
||||||
//can't overwrite existing, so add new row
|
//can't overwrite existing, so add new dependent row
|
||||||
AddNewRow(project, subject, predicate, object); //TODO group to original row.
|
AddNewDependentRow(project, subject, candidateMergeRowIndex, columnIndex, object); //TODO group to original row.
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
AddNewRow(project, subject, predicate, object);
|
AddNewRow(project, subject, columnIndex, object);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,46 +93,42 @@ public class RdfTripleImporter implements Importer{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected int AddNewColumn(Project project, String predicate, String subject){
|
protected void AddNewColumn(Project project, String predicate, String subject){
|
||||||
//System.out.println("adding new column");
|
|
||||||
int numberOfColumns = project.columnModel.columns.size();
|
int numberOfColumns = project.columnModel.columns.size();
|
||||||
|
|
||||||
project.columnModel.columns.add(numberOfColumns, new Column(numberOfColumns, predicate));
|
project.columnModel.columns.add(numberOfColumns, new Column(numberOfColumns, predicate));
|
||||||
|
project.columnModel.setMaxCellIndex(numberOfColumns);
|
||||||
project.columnModel.update();
|
project.columnModel.update();
|
||||||
|
|
||||||
int candidateMergeRowIndex = -1;
|
|
||||||
//update existing rows with new column
|
//update existing rows with new column
|
||||||
for(int i = 0; i < project.rows.size(); i++){
|
for(int i = 0; i < project.rows.size(); i++){
|
||||||
project.rows.get(i).cells.add(numberOfColumns, null);
|
project.rows.get(i).cells.add(numberOfColumns, null);
|
||||||
if(project.rows.get(i).cells.get(0).value == subject){
|
|
||||||
candidateMergeRowIndex = i;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//numberOfColumns = project.columnModel.columns.size();
|
|
||||||
//System.out.println("New total number of columns : " + numberOfColumns);
|
|
||||||
|
|
||||||
return candidateMergeRowIndex;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void MergeWithRow(Project project, int candidateMergeRowIndex, int columnIndex, String object){
|
protected void MergeWithRow(Project project, int candidateMergeRowIndex, int columnIndex, String object){
|
||||||
project.rows.get(candidateMergeRowIndex).setCell(columnIndex, new Cell(object, null));
|
project.rows.get(candidateMergeRowIndex).setCell(columnIndex, new Cell(object, null));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void AddNewRow(Project project, String subject, String predicate, String object){
|
protected void AddNewDependentRow(Project project, String subject, int candidateMergeRowIndex, int columnIndex, String object){
|
||||||
int numberOfColumns = project.columnModel.columns.size();
|
Row row = AddNewRow(project, subject, columnIndex, object);
|
||||||
|
|
||||||
|
Project.setRowDependency(project, row, columnIndex, candidateMergeRowIndex, project.columnModel.getKeyColumnIndex());
|
||||||
|
|
||||||
|
row.cells.set(project.columnModel.getKeyColumnIndex(), null); //the subject can now be null, as the dependencies are set
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Row AddNewRow(Project project, String subject, int columnIndex, String object){
|
||||||
|
int numberOfColumns = project.columnModel.columns.size();
|
||||||
|
|
||||||
//add subject
|
//add subject
|
||||||
Row row = new Row(numberOfColumns);
|
Row row = new Row(numberOfColumns);
|
||||||
row.setCell(0, new Cell(subject, null));
|
row.setCell(0, new Cell(subject, null));
|
||||||
|
|
||||||
//add object to a row
|
//add object to a row
|
||||||
int columnIndex = project.columnModel.getColumnIndexByName(predicate);
|
|
||||||
//System.out.println("predicate relates to columnIndex : " + columnIndex);
|
|
||||||
row.setCell(columnIndex, new Cell(object, null));
|
row.setCell(columnIndex, new Cell(object, null));
|
||||||
//System.out.println("Number of cells in new row : " + row.cells.size());
|
|
||||||
project.rows.add(row);
|
project.rows.add(row);
|
||||||
//System.out.println("New total number of rows : " + project.rows.size());
|
return row;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -32,80 +32,80 @@ import com.metaweb.gridworks.util.Pool;
|
|||||||
|
|
||||||
public class Project {
|
public class Project {
|
||||||
final public long id;
|
final public long id;
|
||||||
|
|
||||||
final public ColumnModel columnModel = new ColumnModel();
|
final public ColumnModel columnModel = new ColumnModel();
|
||||||
final public List<Row> rows = new ArrayList<Row>();
|
final public List<Row> rows = new ArrayList<Row>();
|
||||||
final public History history;
|
final public History history;
|
||||||
|
|
||||||
public Protograph protograph;
|
public Protograph protograph;
|
||||||
|
|
||||||
transient public ProcessManager processManager = new ProcessManager();
|
transient public ProcessManager processManager = new ProcessManager();
|
||||||
transient public Date lastSave = new Date();
|
transient public Date lastSave = new Date();
|
||||||
|
|
||||||
final static Logger logger = LoggerFactory.getLogger("project");
|
final static Logger logger = LoggerFactory.getLogger("project");
|
||||||
|
|
||||||
static public long generateID() {
|
static public long generateID() {
|
||||||
return System.currentTimeMillis() + Math.round(Math.random() * 1000000000000L);
|
return System.currentTimeMillis() + Math.round(Math.random() * 1000000000000L);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Project() {
|
public Project() {
|
||||||
id = generateID();
|
id = generateID();
|
||||||
history = new History(this);
|
history = new History(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Project(long id) {
|
protected Project(long id) {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.history = new History(this);
|
this.history = new History(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ProjectMetadata getMetadata() {
|
public ProjectMetadata getMetadata() {
|
||||||
return ProjectManager.singleton.getProjectMetadata(id);
|
return ProjectManager.singleton.getProjectMetadata(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized public void save() {
|
synchronized public void save() {
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
File dir = ProjectManager.singleton.getProjectDir(id);
|
File dir = ProjectManager.singleton.getProjectDir(id);
|
||||||
|
|
||||||
File tempFile = new File(dir, "data.temp.zip");
|
File tempFile = new File(dir, "data.temp.zip");
|
||||||
try {
|
try {
|
||||||
saveToFile(tempFile);
|
saveToFile(tempFile);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
|
|
||||||
logger.warn("Failed to save project {}", id);
|
logger.warn("Failed to save project {}", id);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
File file = new File(dir, "data.zip");
|
File file = new File(dir, "data.zip");
|
||||||
File oldFile = new File(dir, "data.old.zip");
|
File oldFile = new File(dir, "data.old.zip");
|
||||||
|
|
||||||
if (file.exists()) {
|
if (file.exists()) {
|
||||||
file.renameTo(oldFile);
|
file.renameTo(oldFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
tempFile.renameTo(file);
|
tempFile.renameTo(file);
|
||||||
if (oldFile.exists()) {
|
if (oldFile.exists()) {
|
||||||
oldFile.delete();
|
oldFile.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
lastSave = new Date();
|
lastSave = new Date();
|
||||||
|
|
||||||
logger.info("Saved project '{}'",id);
|
logger.info("Saved project '{}'",id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void saveToFile(File file) throws Exception {
|
protected void saveToFile(File file) throws Exception {
|
||||||
ZipOutputStream out = new ZipOutputStream(new FileOutputStream(file));
|
ZipOutputStream out = new ZipOutputStream(new FileOutputStream(file));
|
||||||
try {
|
try {
|
||||||
Pool pool = new Pool();
|
Pool pool = new Pool();
|
||||||
|
|
||||||
out.putNextEntry(new ZipEntry("data.txt"));
|
out.putNextEntry(new ZipEntry("data.txt"));
|
||||||
try {
|
try {
|
||||||
saveToOutputStream(out, pool);
|
saveToOutputStream(out, pool);
|
||||||
} finally {
|
} finally {
|
||||||
out.closeEntry();
|
out.closeEntry();
|
||||||
}
|
}
|
||||||
|
|
||||||
out.putNextEntry(new ZipEntry("pool.txt"));
|
out.putNextEntry(new ZipEntry("pool.txt"));
|
||||||
try {
|
try {
|
||||||
pool.save(out);
|
pool.save(out);
|
||||||
@ -116,35 +116,35 @@ public class Project {
|
|||||||
out.close();
|
out.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void saveToOutputStream(OutputStream out, Pool pool) throws IOException {
|
protected void saveToOutputStream(OutputStream out, Pool pool) throws IOException {
|
||||||
Writer writer = new OutputStreamWriter(out);
|
Writer writer = new OutputStreamWriter(out);
|
||||||
try {
|
try {
|
||||||
Properties options = new Properties();
|
Properties options = new Properties();
|
||||||
options.setProperty("mode", "save");
|
options.setProperty("mode", "save");
|
||||||
options.put("pool", pool);
|
options.put("pool", pool);
|
||||||
|
|
||||||
saveToWriter(writer, options);
|
saveToWriter(writer, options);
|
||||||
} finally {
|
} finally {
|
||||||
writer.flush();
|
writer.flush();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void saveToWriter(Writer writer, Properties options) throws IOException {
|
protected void saveToWriter(Writer writer, Properties options) throws IOException {
|
||||||
writer.write(Gridworks.getVersion()); writer.write('\n');
|
writer.write(Gridworks.getVersion()); writer.write('\n');
|
||||||
|
|
||||||
writer.write("columnModel=\n"); columnModel.save(writer, options);
|
writer.write("columnModel=\n"); columnModel.save(writer, options);
|
||||||
writer.write("history=\n"); history.save(writer, options);
|
writer.write("history=\n"); history.save(writer, options);
|
||||||
if (protograph != null) {
|
if (protograph != null) {
|
||||||
writer.write("protograph="); protograph.save(writer, options); writer.write('\n');
|
writer.write("protograph="); protograph.save(writer, options); writer.write('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
writer.write("rowCount="); writer.write(Integer.toString(rows.size())); writer.write('\n');
|
writer.write("rowCount="); writer.write(Integer.toString(rows.size())); writer.write('\n');
|
||||||
for (Row row : rows) {
|
for (Row row : rows) {
|
||||||
row.save(writer, options); writer.write('\n');
|
row.save(writer, options); writer.write('\n');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static public Project load(File dir, long id) {
|
static public Project load(File dir, long id) {
|
||||||
try {
|
try {
|
||||||
File file = new File(dir, "data.zip");
|
File file = new File(dir, "data.zip");
|
||||||
@ -154,7 +154,7 @@ public class Project {
|
|||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
File file = new File(dir, "data.temp.zip");
|
File file = new File(dir, "data.temp.zip");
|
||||||
if (file.exists()) {
|
if (file.exists()) {
|
||||||
@ -163,7 +163,7 @@ public class Project {
|
|||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
File file = new File(dir, "data.old.zip");
|
File file = new File(dir, "data.old.zip");
|
||||||
if (file.exists()) {
|
if (file.exists()) {
|
||||||
@ -172,12 +172,12 @@ public class Project {
|
|||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
static protected Project loadFromFile(
|
static protected Project loadFromFile(
|
||||||
File file,
|
File file,
|
||||||
long id
|
long id
|
||||||
) throws Exception {
|
) throws Exception {
|
||||||
ZipFile zipFile = new ZipFile(file);
|
ZipFile zipFile = new ZipFile(file);
|
||||||
@ -188,7 +188,7 @@ public class Project {
|
|||||||
pool.load(new InputStreamReader(
|
pool.load(new InputStreamReader(
|
||||||
zipFile.getInputStream(poolEntry)));
|
zipFile.getInputStream(poolEntry)));
|
||||||
} // else, it's a legacy project file
|
} // else, it's a legacy project file
|
||||||
|
|
||||||
return loadFromReader(
|
return loadFromReader(
|
||||||
new LineNumberReader(
|
new LineNumberReader(
|
||||||
new InputStreamReader(
|
new InputStreamReader(
|
||||||
@ -201,25 +201,25 @@ public class Project {
|
|||||||
zipFile.close();
|
zipFile.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static protected Project loadFromReader(
|
static protected Project loadFromReader(
|
||||||
LineNumberReader reader,
|
LineNumberReader reader,
|
||||||
long id,
|
long id,
|
||||||
Pool pool
|
Pool pool
|
||||||
) throws Exception {
|
) throws Exception {
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
|
|
||||||
/* String version = */ reader.readLine();
|
/* String version = */ reader.readLine();
|
||||||
|
|
||||||
Project project = new Project(id);
|
Project project = new Project(id);
|
||||||
int maxCellCount = 0;
|
int maxCellCount = 0;
|
||||||
|
|
||||||
String line;
|
String line;
|
||||||
while ((line = reader.readLine()) != null) {
|
while ((line = reader.readLine()) != null) {
|
||||||
int equal = line.indexOf('=');
|
int equal = line.indexOf('=');
|
||||||
CharSequence field = line.subSequence(0, equal);
|
CharSequence field = line.subSequence(0, equal);
|
||||||
String value = line.substring(equal + 1);
|
String value = line.substring(equal + 1);
|
||||||
|
|
||||||
if ("columnModel".equals(field)) {
|
if ("columnModel".equals(field)) {
|
||||||
project.columnModel.load(reader);
|
project.columnModel.load(reader);
|
||||||
} else if ("history".equals(field)) {
|
} else if ("history".equals(field)) {
|
||||||
@ -228,7 +228,7 @@ public class Project {
|
|||||||
project.protograph = Protograph.load(project, value);
|
project.protograph = Protograph.load(project, value);
|
||||||
} else if ("rowCount".equals(field)) {
|
} else if ("rowCount".equals(field)) {
|
||||||
int count = Integer.parseInt(value);
|
int count = Integer.parseInt(value);
|
||||||
|
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
line = reader.readLine();
|
line = reader.readLine();
|
||||||
if (line != null) {
|
if (line != null) {
|
||||||
@ -239,35 +239,35 @@ public class Project {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
project.columnModel.setMaxCellIndex(maxCellCount - 1);
|
project.columnModel.setMaxCellIndex(maxCellCount - 1);
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Loaded project {} from disk in {} sec(s)",id,Long.toString((System.currentTimeMillis() - start) / 1000)
|
"Loaded project {} from disk in {} sec(s)",id,Long.toString((System.currentTimeMillis() - start) / 1000)
|
||||||
);
|
);
|
||||||
|
|
||||||
project.recomputeRowContextDependencies();
|
project.recomputeRowContextDependencies();
|
||||||
|
|
||||||
return project;
|
return project;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static protected class Group {
|
static protected class Group {
|
||||||
int[] cellIndices;
|
int[] cellIndices;
|
||||||
int keyCellIndex;
|
int keyCellIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized public void recomputeRowContextDependencies() {
|
synchronized public void recomputeRowContextDependencies() {
|
||||||
List<Group> keyedGroups = new ArrayList<Group>();
|
List<Group> keyedGroups = new ArrayList<Group>();
|
||||||
|
|
||||||
addRootKeyedGroup(keyedGroups);
|
addRootKeyedGroup(keyedGroups);
|
||||||
|
|
||||||
for (ColumnGroup group : columnModel.columnGroups) {
|
for (ColumnGroup group : columnModel.columnGroups) {
|
||||||
if (group.keyColumnIndex >= 0) {
|
if (group.keyColumnIndex >= 0) {
|
||||||
Group keyedGroup = new Group();
|
Group keyedGroup = new Group();
|
||||||
keyedGroup.keyCellIndex = columnModel.columns.get(group.keyColumnIndex).getCellIndex();
|
keyedGroup.keyCellIndex = columnModel.columns.get(group.keyColumnIndex).getCellIndex();
|
||||||
keyedGroup.cellIndices = new int[group.columnSpan - 1];
|
keyedGroup.cellIndices = new int[group.columnSpan - 1];
|
||||||
|
|
||||||
int c = 0;
|
int c = 0;
|
||||||
for (int i = 0; i < group.columnSpan; i++) {
|
for (int i = 0; i < group.columnSpan; i++) {
|
||||||
int columnIndex = group.startColumnIndex + i;
|
int columnIndex = group.startColumnIndex + i;
|
||||||
@ -276,44 +276,45 @@ public class Project {
|
|||||||
keyedGroup.cellIndices[c++] = cellIndex;
|
keyedGroup.cellIndices[c++] = cellIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
keyedGroups.add(keyedGroup);
|
keyedGroups.add(keyedGroup);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Collections.sort(keyedGroups, new Comparator<Group>() {
|
Collections.sort(keyedGroups, new Comparator<Group>() {
|
||||||
public int compare(Group o1, Group o2) {
|
public int compare(Group o1, Group o2) {
|
||||||
return o2.cellIndices.length - o1.cellIndices.length; // larger groups first
|
return o2.cellIndices.length - o1.cellIndices.length; // larger groups first
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
int[] lastNonBlankRowsByGroup = new int[keyedGroups.size()];
|
int[] lastNonBlankRowsByGroup = new int[keyedGroups.size()];
|
||||||
for (int i = 0; i < lastNonBlankRowsByGroup.length; i++) {
|
for (int i = 0; i < lastNonBlankRowsByGroup.length; i++) {
|
||||||
lastNonBlankRowsByGroup[i] = -1;
|
lastNonBlankRowsByGroup[i] = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int rowCount = rows.size();
|
int rowCount = rows.size();
|
||||||
int groupCount = keyedGroups.size();
|
int groupCount = keyedGroups.size();
|
||||||
|
|
||||||
int recordIndex = 0;
|
int recordIndex = 0;
|
||||||
for (int r = 0; r < rowCount; r++) {
|
for (int r = 0; r < rowCount; r++) {
|
||||||
Row row = rows.get(r);
|
Row row = rows.get(r);
|
||||||
row.contextRows = null;
|
row.contextRows = null;
|
||||||
row.contextRowSlots = null;
|
row.contextRowSlots = null;
|
||||||
row.contextCellSlots = null;
|
row.contextCellSlots = null;
|
||||||
|
|
||||||
for (int g = 0; g < groupCount; g++) {
|
for (int g = 0; g < groupCount; g++) {
|
||||||
Group group = keyedGroups.get(g);
|
Group group = keyedGroups.get(g);
|
||||||
|
|
||||||
if (!ExpressionUtils.isNonBlankData(row.getCellValue(group.keyCellIndex))) {
|
if (!ExpressionUtils.isNonBlankData(row.getCellValue(group.keyCellIndex))) {
|
||||||
int contextRowIndex = lastNonBlankRowsByGroup[g];
|
int contextRowIndex = lastNonBlankRowsByGroup[g];
|
||||||
if (contextRowIndex >= 0) {
|
if (contextRowIndex >= 0) {
|
||||||
for (int dependentCellIndex : group.cellIndices) {
|
for (int dependentCellIndex : group.cellIndices) {
|
||||||
if (ExpressionUtils.isNonBlankData(row.getCellValue(dependentCellIndex))) {
|
if (ExpressionUtils.isNonBlankData(row.getCellValue(dependentCellIndex))) {
|
||||||
setRowDependency(
|
setRowDependency(
|
||||||
row,
|
this,
|
||||||
dependentCellIndex,
|
row,
|
||||||
contextRowIndex,
|
dependentCellIndex,
|
||||||
|
contextRowIndex,
|
||||||
group.keyCellIndex
|
group.keyCellIndex
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -323,7 +324,7 @@ public class Project {
|
|||||||
lastNonBlankRowsByGroup[g] = r;
|
lastNonBlankRowsByGroup[g] = r;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (row.contextRowSlots != null && row.contextRowSlots.length > 0) {
|
if (row.contextRowSlots != null && row.contextRowSlots.length > 0) {
|
||||||
row.recordIndex = -1;
|
row.recordIndex = -1;
|
||||||
row.contextRows = new ArrayList<Integer>();
|
row.contextRows = new ArrayList<Integer>();
|
||||||
@ -333,19 +334,19 @@ public class Project {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Collections.sort(row.contextRows);
|
Collections.sort(row.contextRows);
|
||||||
|
|
||||||
columnModel._hasDependentRows = true;
|
columnModel._hasDependentRows = true;
|
||||||
} else {
|
} else {
|
||||||
row.recordIndex = recordIndex++;
|
row.recordIndex = recordIndex++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void addRootKeyedGroup(List<Group> keyedGroups) {
|
protected void addRootKeyedGroup(List<Group> keyedGroups) {
|
||||||
int count = columnModel.getMaxCellIndex() + 1;
|
int count = columnModel.getMaxCellIndex() + 1;
|
||||||
if (count > 0 && columnModel.getKeyColumnIndex() < columnModel.columns.size()) {
|
if (count > 0 && columnModel.getKeyColumnIndex() < columnModel.columns.size()) {
|
||||||
Group rootKeyedGroup = new Group();
|
Group rootKeyedGroup = new Group();
|
||||||
|
|
||||||
rootKeyedGroup.cellIndices = new int[count - 1];
|
rootKeyedGroup.cellIndices = new int[count - 1];
|
||||||
rootKeyedGroup.keyCellIndex = columnModel.columns.get(columnModel.getKeyColumnIndex()).getCellIndex();
|
rootKeyedGroup.keyCellIndex = columnModel.columns.get(columnModel.getKeyColumnIndex()).getCellIndex();
|
||||||
|
|
||||||
@ -359,19 +360,19 @@ public class Project {
|
|||||||
keyedGroups.add(rootKeyedGroup);
|
keyedGroups.add(rootKeyedGroup);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void setRowDependency(Row row, int cellIndex, int contextRowIndex, int contextCellIndex) {
|
public static void setRowDependency(Project project, Row row, int cellIndex, int contextRowIndex, int contextCellIndex) {
|
||||||
int count = columnModel.getMaxCellIndex() + 1;
|
int count = project.columnModel.getMaxCellIndex() + 1;
|
||||||
if (row.contextRowSlots == null || row.contextCellSlots == null) {
|
if (row.contextRowSlots == null || row.contextCellSlots == null) {
|
||||||
row.contextRowSlots = new int[count];
|
row.contextRowSlots = new int[count];
|
||||||
row.contextCellSlots = new int[count];
|
row.contextCellSlots = new int[count];
|
||||||
|
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
row.contextRowSlots[i] = -1;
|
row.contextRowSlots[i] = -1;
|
||||||
row.contextCellSlots[i] = -1;
|
row.contextCellSlots[i] = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
row.contextRowSlots[cellIndex] = contextRowIndex;
|
row.contextRowSlots[cellIndex] = contextRowIndex;
|
||||||
row.contextCellSlots[cellIndex] = contextCellIndex;
|
row.contextCellSlots[cellIndex] = contextCellIndex;
|
||||||
}
|
}
|
||||||
|
@ -63,14 +63,31 @@ public class RdfTripleImporterTests {
|
|||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//columns
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 2);
|
Assert.assertEquals(project.columnModel.columns.size(), 2);
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album");
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album");
|
||||||
|
|
||||||
|
//rows
|
||||||
Assert.assertEquals(project.rows.size(), 3);
|
Assert.assertEquals(project.rows.size(), 3);
|
||||||
|
|
||||||
|
//row0
|
||||||
Assert.assertEquals(project.rows.get(0).cells.size(), 2);
|
Assert.assertEquals(project.rows.get(0).cells.size(), 2);
|
||||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
||||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
|
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
|
||||||
|
|
||||||
|
//row1
|
||||||
|
Assert.assertEquals(project.rows.get(2).cells.size(), 2);
|
||||||
|
Assert.assertNull(project.rows.get(1).cells.get(0));
|
||||||
|
Assert.assertEquals(project.rows.get(1).contextRowSlots[1], 0);
|
||||||
|
Assert.assertEquals(project.rows.get(1).contextCellSlots[1], 0);
|
||||||
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); //NB triples aren't created in order they were input
|
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); //NB triples aren't created in order they were input
|
||||||
|
|
||||||
|
//row2
|
||||||
|
Assert.assertEquals(project.rows.get(2).cells.size(), 2);
|
||||||
|
Assert.assertEquals(project.rows.get(2).contextRowSlots[1], 0);
|
||||||
|
Assert.assertEquals(project.rows.get(2).contextCellSlots[1], 0);
|
||||||
|
Assert.assertNull(project.rows.get(2).cells.get(0));
|
||||||
Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.under_the_red_sky"); //NB triples aren't created in order they were input
|
Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.under_the_red_sky"); //NB triples aren't created in order they were input
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,18 +104,46 @@ public class RdfTripleImporterTests {
|
|||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//columns
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album");
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://rdf.freebase.com/ns/music.artist.genre");
|
Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://rdf.freebase.com/ns/music.artist.genre");
|
||||||
|
|
||||||
|
//rows
|
||||||
Assert.assertEquals(project.rows.size(), 2);
|
Assert.assertEquals(project.rows.size(), 2);
|
||||||
|
|
||||||
|
//row0
|
||||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||||
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
||||||
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
|
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks");
|
||||||
Assert.assertNull(project.rows.get(0).cells.get(2));
|
Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://rdf.freebase.com/ns/en.folk_rock");
|
||||||
|
|
||||||
|
//row1
|
||||||
Assert.assertEquals(project.rows.get(1).cells.size(), 3);
|
Assert.assertEquals(project.rows.get(1).cells.size(), 3);
|
||||||
Assert.assertEquals(project.rows.get(1).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
Assert.assertEquals(project.rows.get(1).contextRowSlots[1], 0);
|
||||||
|
Assert.assertEquals(project.rows.get(1).contextCellSlots[1], 0);
|
||||||
|
Assert.assertNull(project.rows.get(1).cells.get(0));
|
||||||
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home");
|
Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home");
|
||||||
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "http://rdf.freebase.com/ns/en.folk_rock");
|
Assert.assertNull(project.rows.get(1).cells.get(2));
|
||||||
}
|
}
|
||||||
|
@Test
|
||||||
|
public void CanParseTripleWithValue(){
|
||||||
|
String sampleRdf = "<http://rdf.freebase.com/ns/en.bob_dylan> <http://rdf.freebase.com/ns/common.topic.alias> \"Robert Zimmerman\"@en.";
|
||||||
|
StringReader reader = new StringReader(sampleRdf);
|
||||||
|
|
||||||
|
try {
|
||||||
|
SUT.read(reader, project, options);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.assertEquals(project.columnModel.columns.size(), 2);
|
||||||
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject");
|
||||||
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/common.topic.alias");
|
||||||
|
Assert.assertEquals(project.rows.size(), 1);
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.size(), 2);
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan");
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(1).value, "\"Robert Zimmerman\"@en");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user