When transposing data to triple loader output, pass row indices and cell indices deep down so later we can generate more context information for recon.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1051 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-06-29 17:30:16 +00:00
parent 7a914f01e8
commit 2e3984d54a
6 changed files with 212 additions and 106 deletions

View File

@ -41,7 +41,7 @@ public class PreviewProtographCommand extends Command {
{
StringWriter stringWriter = new StringWriter();
TripleLoaderTransposedNodeFactory nodeFactory = new TripleLoaderTransposedNodeFactory(stringWriter);
TripleLoaderTransposedNodeFactory nodeFactory = new TripleLoaderTransposedNodeFactory(project, stringWriter);
Transposer.transpose(project, filteredRows, protograph, protograph.getRootNode(0), nodeFactory);
nodeFactory.flush();

View File

@ -39,7 +39,7 @@ abstract public class ProtographTransposeExporter implements Exporter {
if (project.protograph != null) {
Protograph protograph = project.protograph;
TransposedNodeFactory nodeFactory = createNodeFactory(writer);
TransposedNodeFactory nodeFactory = createNodeFactory(project, writer);
Transposer.transpose(project, engine.getAllFilteredRows(), protograph, protograph.getRootNode(0), nodeFactory, -1);
@ -47,7 +47,7 @@ abstract public class ProtographTransposeExporter implements Exporter {
}
}
abstract protected TransposedNodeFactory createNodeFactory(Writer writer);
abstract protected TransposedNodeFactory createNodeFactory(Project project, Writer writer);
static public class TripleLoaderExporter extends ProtographTransposeExporter {
public TripleLoaderExporter() {
@ -55,8 +55,8 @@ abstract public class ProtographTransposeExporter implements Exporter {
}
@Override
protected TransposedNodeFactory createNodeFactory(Writer writer) {
return new TripleLoaderTransposedNodeFactory(writer);
protected TransposedNodeFactory createNodeFactory(Project project, Writer writer) {
return new TripleLoaderTransposedNodeFactory(project, writer);
}
}
@ -66,7 +66,7 @@ abstract public class ProtographTransposeExporter implements Exporter {
}
@Override
protected TransposedNodeFactory createNodeFactory(Writer writer) {
protected TransposedNodeFactory createNodeFactory(Project project, Writer writer) {
return new MqlwriteLikeTransposedNodeFactory(writer);
}
}

View File

@ -247,7 +247,7 @@ public class MqlwriteLikeTransposedNodeFactory implements TransposedNodeFactory
public TransposedNode transposeAnonymousNode(
TransposedNode parentNode,
FreebaseProperty property,
AnonymousNode node) {
AnonymousNode node, int rowIndex) {
return new AnonymousTransposedNode(
parentNode instanceof JsonObjectTransposedNode ? (JsonObjectTransposedNode) parentNode : null,
@ -260,7 +260,7 @@ public class MqlwriteLikeTransposedNodeFactory implements TransposedNodeFactory
TransposedNode parentNode,
FreebaseProperty property,
CellNode node,
Cell cell) {
int rowIndex, Cell cell) {
JsonTransposedNode tnode = null;
if (node instanceof CellTopicNode) {
@ -280,7 +280,7 @@ public class MqlwriteLikeTransposedNodeFactory implements TransposedNodeFactory
public TransposedNode transposeTopicNode(
TransposedNode parentNode,
FreebaseProperty property,
FreebaseTopicNode node) {
FreebaseTopicNode node, int rowIndex) {
JsonTransposedNode tnode = new TopicTransposedNode(node);
@ -292,7 +292,7 @@ public class MqlwriteLikeTransposedNodeFactory implements TransposedNodeFactory
public TransposedNode transposeValueNode(
TransposedNode parentNode,
FreebaseProperty property,
ValueNode node) {
ValueNode node, int rowIndex) {
JsonTransposedNode tnode = new ValueTransposedNode(node);

View File

@ -13,26 +13,29 @@ public interface TransposedNodeFactory {
public TransposedNode transposeAnonymousNode(
TransposedNode parentNode,
FreebaseProperty property,
AnonymousNode node
AnonymousNode node, int rowIndex
);
public TransposedNode transposeCellNode(
TransposedNode parentNode,
FreebaseProperty property,
CellNode node,
int rowIndex,
Cell cell
);
public TransposedNode transposeValueNode(
TransposedNode parentNode,
FreebaseProperty property,
ValueNode node
ValueNode node,
int rowIndex
);
public TransposedNode transposeTopicNode(
TransposedNode parentNode,
FreebaseProperty property,
FreebaseTopicNode node
FreebaseTopicNode node,
int rowIndex
);
public void flush() throws IOException;

View File

@ -52,7 +52,7 @@ public class Transposer {
@Override
public boolean visit(Project project, int rowIndex, Row row) {
if (rootContext.limit <= 0 || rootContext.count < rootContext.limit) {
descend(project, protograph, nodeFactory, row, rootNode, rootContext);
descend(project, protograph, nodeFactory, rowIndex, row, rootNode, rootContext);
}
if (rootContext.limit > 0 && rootContext.count > rootContext.limit) {
@ -93,6 +93,7 @@ public class Transposer {
Project project,
Protograph protograph,
TransposedNodeFactory nodeFactory,
int rowIndex,
Row row,
Node node,
Context context
@ -122,6 +123,7 @@ public class Transposer {
parentNode,
property,
node2,
rowIndex,
cell
);
}
@ -130,19 +132,22 @@ public class Transposer {
tnode = nodeFactory.transposeAnonymousNode(
parentNode,
property,
(AnonymousNode) node
(AnonymousNode) node,
rowIndex
);
} else if (node instanceof FreebaseTopicNode) {
tnode = nodeFactory.transposeTopicNode(
parentNode,
property,
(FreebaseTopicNode) node
(FreebaseTopicNode) node,
rowIndex
);
} else if (node instanceof ValueNode) {
tnode = nodeFactory.transposeValueNode(
parentNode,
property,
(ValueNode) node
(ValueNode) node,
rowIndex
);
}
}
@ -165,6 +170,7 @@ public class Transposer {
project,
protograph,
nodeFactory,
rowIndex,
row,
node2.getLink(i).getTarget(),
context.subContexts.get(i)

View File

@ -11,9 +11,14 @@ import java.util.Set;
import org.json.JSONObject;
import com.metaweb.gridworks.expr.ExpressionUtils;
import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Column;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.Recon;
import com.metaweb.gridworks.model.Row;
import com.metaweb.gridworks.model.Recon.Judgment;
import com.metaweb.gridworks.model.RecordModel.RowDependency;
import com.metaweb.gridworks.protograph.AnonymousNode;
import com.metaweb.gridworks.protograph.CellKeyNode;
import com.metaweb.gridworks.protograph.CellNode;
@ -24,6 +29,8 @@ import com.metaweb.gridworks.protograph.FreebaseTopicNode;
import com.metaweb.gridworks.protograph.ValueNode;
public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory {
protected Project project;
protected boolean start = true;
protected Writer writer;
protected WritingTransposedNode lastRootNode;
@ -31,14 +38,15 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
protected Map<Long, String> newTopicVars = new HashMap<Long, String>();
protected Set<Long> serializedRecons = new HashSet<Long>();
public TripleLoaderTransposedNodeFactory(Writer writer) {
public TripleLoaderTransposedNodeFactory(Project project, Writer writer) {
this.project = project;
this.writer = writer;
}
@Override
public void flush() throws IOException {
if (lastRootNode != null) {
lastRootNode.write(null, null, null);
lastRootNode.write(null, null, project, -1, -1, null);
lastRootNode = null;
}
}
@ -56,28 +64,81 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
}
}
protected void writeRecon(StringBuffer sb, Cell cell) {
protected void writeRecon(StringBuffer sb, Project project, int rowIndex, int cellIndex, Cell cell) {
Recon recon = cell.recon;
if (serializedRecons.contains(recon.id)) {
sb.append(Long.toString(recon.id));
} else {
Column column = project.columnModel.getColumnByCellIndex(cellIndex);
sb.append("{ ");
sb.append("\"id\" : "); sb.append(Long.toString(recon.id));
if (!serializedRecons.contains(recon.id)) {
serializedRecons.add(recon.id);
String s = cell.value instanceof String ? (String) cell.value : cell.value.toString();
sb.append("{ ");
sb.append("\"id\" : "); sb.append(Long.toString(recon.id));
sb.append(", \"history_entry\" : "); sb.append(Long.toString(recon.judgmentHistoryEntry));
sb.append(", \"text\" : "); sb.append(JSONObject.quote(s));
sb.append(", \"column\" : "); sb.append(JSONObject.quote(column.getName()));
sb.append(", \"service\" : "); sb.append(JSONObject.quote(recon.service));
sb.append(", \"action\" : "); sb.append(JSONObject.quote(recon.judgmentAction));
sb.append(", \"batch\" : "); sb.append(Integer.toString(recon.judgmentBatchSize));
sb.append(", \"matchRank\" : "); sb.append(Integer.toString(recon.matchRank));
sb.append(" }");
}
/*
sb.append(", \"row\" : [");
{
boolean first = true;
Row row = project.rows.get(rowIndex);
RowDependency rowDependency = project.recordModel.getRowDependency(rowIndex);
List<Integer> contextRows = rowDependency.contextRows;
int maxColumns = project.columnModel.columns.size();
for (int c = 0; c < maxColumns; c++) {
Column column2 = project.columnModel.columns.get(c);
int cellIndex2 = column2.getCellIndex();
if (cellIndex2 != cellIndex) {
Object value = row.getCellValue(cellIndex2);
if (!ExpressionUtils.isNonBlankData(value) && contextRows != null) {
for (int i = contextRows.size() - 1; i >= 0; i--) {
int rowIndex2 = contextRows.get(i);
Row row2 = project.rows.get(rowIndex2);
value = row2.getCellValue(cellIndex2);
if (ExpressionUtils.isNonBlankData(value)) {
break;
}
}
}
protected void writeLine(String subject, String predicate, Object object, Cell subjectCell, Cell objectCell) {
if (ExpressionUtils.isNonBlankData(value)) {
if (first) {
first = false;
} else {
sb.append(",");
}
String s2 = value instanceof String ? (String) value : value.toString();
sb.append("{\"c\":"); sb.append(JSONObject.quote(column2.getName()));
sb.append(",\"v\":"); sb.append(JSONObject.quote(s2));
sb.append("}");
}
}
}
}
sb.append("]");
*/
sb.append(" }");
}
protected void writeLine(
String subject, String predicate, Object object,
Project project,
int subjectRowIndex, int subjectCellIndex, Cell subjectCell,
int objectRowIndex, int objectCellIndex, Cell objectCell
) {
if (subject != null && object != null) {
String s = object instanceof String ?
JSONObject.quote((String) object) : object.toString();
@ -91,14 +152,14 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
if (subjectCell != null) {
sb.append("\"srecon\" : ");
writeRecon(sb, subjectCell);
writeRecon(sb, project, subjectRowIndex, subjectCellIndex, subjectCell);
}
if (objectCell != null) {
if (subjectCell != null) {
sb.append(", ");
}
sb.append("\"orecon\" : ");
writeRecon(sb, objectCell);
writeRecon(sb, project, objectRowIndex, objectCellIndex, objectCell);
}
sb.append(" }");
@ -109,7 +170,10 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
}
}
protected void writeLine(String subject, String predicate, Object object, String lang, Cell subjectCell) {
protected void writeLine(
String subject, String predicate, Object object, String lang,
Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell
) {
if (subject != null && object != null) {
String s = object instanceof String ?
JSONObject.quote((String) object) : object.toString();
@ -123,7 +187,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
if (subjectCell != null) {
sb.append(", \"meta\" : { ");
sb.append("\"srecon\" : ");
writeRecon(sb, subjectCell);
writeRecon(sb, project, subjectRowIndex, subjectCellIndex, subjectCell);
sb.append(" }");
}
sb.append(" }");
@ -133,19 +197,19 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
}
protected interface WritingTransposedNode extends TransposedNode {
public Object write(String subject, String predicate, Cell subjectCell);
public Object write(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell);
}
abstract protected class TransposedNodeWithChildren implements WritingTransposedNode {
public List<FreebaseProperty> properties = new LinkedList<FreebaseProperty>();
public List<WritingTransposedNode> children = new LinkedList<WritingTransposedNode>();
protected void writeChildren(String subject, Cell subjectCell) {
protected void writeChildren(String subject, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) {
for (int i = 0; i < children.size(); i++) {
WritingTransposedNode child = children.get(i);
String predicate = properties.get(i).id;
child.write(subject, predicate, subjectCell);
child.write(subject, predicate, project, subjectRowIndex, subjectCellIndex, subjectCell);
}
}
}
@ -154,7 +218,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
//protected AnonymousTransposedNode(AnonymousNode node) { }
public Object write(String subject, String predicate, Cell subjectCell) {
public Object write(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) {
if (children.size() == 0 || subject == null) {
return null;
}
@ -164,7 +228,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
boolean first = true;
for (int i = 0; i < children.size(); i++) {
Object c = children.get(i).write(null, null, null);
Object c = children.get(i).write(null, null, project, subjectRowIndex, subjectCellIndex, null);
if (c != null) {
if (first) {
first = false;
@ -177,7 +241,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
}
sb.append(" }");
writeLine(subject, predicate, sb, subjectCell, null);
writeLine(subject, predicate, sb, project, subjectRowIndex, subjectCellIndex, subjectCell, -1, -1, null);
return null;
}
@ -185,21 +249,29 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
protected class CellTopicTransposedNode extends TransposedNodeWithChildren {
protected CellTopicNode node;
protected int rowIndex;
protected int cellIndex;
protected Cell cell;
public CellTopicTransposedNode(CellTopicNode node, Cell cell) {
public CellTopicTransposedNode(CellTopicNode node, int rowIndex, int cellIndex, Cell cell) {
this.node = node;
this.rowIndex = rowIndex;
this.cellIndex = cellIndex;
this.cell = cell;
}
public Object write(String subject, String predicate, Cell subjectCell) {
public Object write(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) {
String id = null;
int objectRowIndex = -1;
int objectCellIndex = -1;
Cell objectCell = null;
if (cell.recon != null &&
cell.recon.judgment == Recon.Judgment.Matched &&
cell.recon.match != null) {
objectRowIndex = rowIndex;
objectCellIndex = cellIndex;
objectCell = cell;
id = cell.recon.match.id;
} else if (node.createForNoReconMatch ||
@ -215,8 +287,8 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
id = "$" + node.columnName.replaceAll("\\W+", "_") + "_" + var;
writeLine(id, "type", node.type.id, (Cell) null, (Cell) null);
writeLine(id, "name", cell.value, (Cell) null, (Cell) null);
writeLine(id, "type", node.type.id, project, -1, -1, (Cell) null, -1, -1, (Cell) null);
writeLine(id, "name", cell.value, project, -1, -1, (Cell) null, -1, -1, (Cell) null);
if (cell.recon != null) {
newTopicVars.put(cell.recon.id, id);
@ -227,10 +299,12 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
}
if (subject != null) {
writeLine(subject, predicate, id, subjectCell, objectCell);
writeLine(subject, predicate, id, project,
subjectRowIndex, subjectCellIndex, subjectCell,
objectRowIndex, objectCellIndex, objectCell);
}
writeChildren(id, objectCell);
writeChildren(id, project, objectRowIndex, objectCellIndex, objectCell);
return id;
}
@ -239,19 +313,26 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
protected class CellValueTransposedNode implements WritingTransposedNode {
protected JSONObject obj;
protected CellValueNode node;
protected int rowIndex;
protected int cellIndex;
protected Cell cell;
public CellValueTransposedNode(CellValueNode node, Cell cell) {
public CellValueTransposedNode(CellValueNode node, int rowIndex, int cellIndex, Cell cell) {
this.node = node;
this.rowIndex = rowIndex;
this.cellIndex = cellIndex;
this.cell = cell;
}
public Object write(String subject, String predicate, Cell subjectCell) {
public Object write(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) {
if (subject != null) {
if ("/type/text".equals(node.lang)) {
writeLine(subject, predicate, cell.value, node.lang, subjectCell);
writeLine(subject, predicate, cell.value, node.lang, project,
subjectRowIndex, subjectCellIndex, subjectCell);
} else {
writeLine(subject, predicate, cell.value, subjectCell, null);
writeLine(subject, predicate, cell.value, project,
subjectRowIndex, subjectCellIndex, subjectCell,
-1, -1, null);
}
}
@ -261,15 +342,21 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
protected class CellKeyTransposedNode implements WritingTransposedNode {
protected CellKeyNode node;
protected int rowIndex;
protected int cellIndex;
protected Cell cell;
public CellKeyTransposedNode(CellKeyNode node, Cell cell) {
public CellKeyTransposedNode(CellKeyNode node, int rowIndex, int cellIndex, Cell cell) {
this.node = node;
this.rowIndex = rowIndex;
this.cellIndex = cellIndex;
this.cell = cell;
}
public Object write(String subject, String predicate, Cell subjectCell) {
writeLine(subject, "key", node.namespace.id + "/" + cell.value, subjectCell, null);
public Object write(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) {
writeLine(subject, "key", node.namespace.id + "/" + cell.value, project,
subjectRowIndex, subjectCellIndex, subjectCell,
-1, -1, null);
return null;
}
@ -282,9 +369,12 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
this.node = node;
}
public Object write(String subject, String predicate, Cell subjectCell) {
writeLine(subject, predicate, node.topic.id, subjectCell, null);
writeChildren(node.topic.id, null);
public Object write(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) {
writeLine(subject, predicate, node.topic.id, project,
subjectRowIndex, subjectCellIndex, subjectCell,
-1, -1, null);
writeChildren(node.topic.id, project, -1, -1, null);
return node.topic.id;
}
@ -297,11 +387,14 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
this.node = node;
}
public Object write(String subject, String predicate, Cell subjectCell) {
public Object write(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) {
if ("/type/text".equals(node.lang)) {
writeLine(subject, predicate, node.value, node.lang, subjectCell);
writeLine(subject, predicate, node.value, node.lang, project,
subjectRowIndex, subjectCellIndex, subjectCell);
} else {
writeLine(subject, predicate, node.value, subjectCell, null);
writeLine(subject, predicate, node.value, project,
subjectRowIndex, subjectCellIndex, subjectCell,
-1, -1, null);
}
return node.value;
@ -311,7 +404,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
public TransposedNode transposeAnonymousNode(
TransposedNode parentNode,
FreebaseProperty property,
AnonymousNode node) {
AnonymousNode node, int rowIndex) {
WritingTransposedNode tnode = new AnonymousTransposedNode();
@ -324,15 +417,19 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
TransposedNode parentNode,
FreebaseProperty property,
CellNode node,
int rowIndex,
Cell cell) {
Column column = project.columnModel.getColumnByName(node.columnName);
int cellIndex = column != null ? column.getCellIndex() : -1;
WritingTransposedNode tnode = null;
if (node instanceof CellTopicNode) {
tnode = new CellTopicTransposedNode((CellTopicNode) node, cell);
tnode = new CellTopicTransposedNode((CellTopicNode) node, rowIndex, cellIndex, cell);
} else if (node instanceof CellValueNode) {
tnode = new CellValueTransposedNode((CellValueNode) node, cell);
tnode = new CellValueTransposedNode((CellValueNode) node, rowIndex, cellIndex, cell);
} else if (node instanceof CellKeyNode) {
tnode = new CellKeyTransposedNode((CellKeyNode) node, cell);
tnode = new CellKeyTransposedNode((CellKeyNode) node, rowIndex, cellIndex, cell);
}
if (tnode != null) {
@ -344,7 +441,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
public TransposedNode transposeTopicNode(
TransposedNode parentNode,
FreebaseProperty property,
FreebaseTopicNode node) {
FreebaseTopicNode node, int rowIndex) {
WritingTransposedNode tnode = new TopicTransposedNode(node);
@ -356,7 +453,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
public TransposedNode transposeValueNode(
TransposedNode parentNode,
FreebaseProperty property,
ValueNode node) {
ValueNode node, int rowIndex) {
WritingTransposedNode tnode = new ValueTransposedNode(node);
@ -383,7 +480,7 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
protected void addRootNode(WritingTransposedNode tnode) {
if (lastRootNode != null) {
lastRootNode.write(null, null, null);
lastRootNode.write(null, null, project, -1, -1, null);
}
lastRootNode = tnode;
}