When generating triple loader payload, assert included, schema, and expected types for existing as well as new topics.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@1963 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-12-27 22:57:04 +00:00
parent 53442c5ef2
commit ca8f64ddc4
2 changed files with 178 additions and 11 deletions

View File

@ -0,0 +1,103 @@
package com.google.refine.freebase.protograph.transpose;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import org.json.JSONException;
import org.json.JSONObject;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
public class SchemaHelper {
static private final String helperURL = "http://2.refine-helper.stefanomazzocchi.user.dev.freebaseapps.com/";
static private class PropertyInfo {
String fromTypeID;
String toTypeID;
}
static private class TypeInfo {
String[] includedTypes;
}
protected Map<String, PropertyInfo> properties = new HashMap<String, PropertyInfo>();
protected Map<String, TypeInfo> types = new HashMap<String, TypeInfo>();
public String getPropertyFromType(String propertyID) {
ensureProperty(propertyID);
return properties.get(propertyID).fromTypeID;
}
public String getPropertyToType(String propertyID) {
ensureProperty(propertyID);
return properties.get(propertyID).toTypeID;
}
public String[] getIncludedTypeIDs(String typeID) {
ensureType(typeID);
return types.get(typeID).includedTypes;
}
private void ensureProperty(String propertyID) {
if (properties.containsKey(propertyID)) {
return;
}
PropertyInfo info = new PropertyInfo();
properties.put(propertyID, info);
JSONObject obj = getJson(helperURL + "get_property_data" + propertyID);
if (obj != null) {
try {
if (!obj.isNull("from")) {
info.fromTypeID = obj.getString("from");
}
} catch (JSONException e) {
}
try {
if (!obj.isNull("to")) {
info.toTypeID = obj.getString("to");
}
} catch (JSONException e) {
}
}
}
private void ensureType(String typeID) {
if (types.containsKey(typeID)) {
return;
}
TypeInfo info = new TypeInfo();
types.put(typeID, info);
JSONObject obj = getJson(helperURL + "get_type_data" + typeID);
if (obj != null) {
if (!obj.isNull("includes")) {
info.includedTypes = JSONUtilities.getStringArray(obj, "includes");
}
}
}
private JSONObject getJson(String urlString) {
try {
URL url = new URL(urlString);
InputStream is = url.openStream();
try {
String s = ParsingUtilities.inputStreamToString(is);
return ParsingUtilities.evaluateJsonStringToObject(s);
} finally {
is.close();
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (JSONException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
}

View File

@ -79,6 +79,58 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
protected int contextRefCount = 0; protected int contextRefCount = 0;
protected JSONObject contextTreeRoot; protected JSONObject contextTreeRoot;
protected SchemaHelper schemaHelper = new SchemaHelper();
protected Map<String, Set<Long>> typeIDToAssertedReconIDs = new HashMap<String, Set<Long>>();
protected Set<Long> getAssertedReconIDSet(String typeID) {
Set<Long> assertedReconIDSet = typeIDToAssertedReconIDs.get(typeID);
if (assertedReconIDSet == null) {
assertedReconIDSet = new HashSet<Long>();
typeIDToAssertedReconIDs.put(typeID, assertedReconIDSet);
}
return assertedReconIDSet;
}
protected void ensureOneTypeAsserted(Recon recon, String typeID) {
Set<Long> assertedReconIDSet = getAssertedReconIDSet(typeID);
if (!assertedReconIDSet.contains(recon.id)) {
assertedReconIDSet.add(recon.id);
String subject = recon.judgment == Judgment.New ? newTopicVars.get(recon.id) : recon.match.id;
StringBuffer sb = new StringBuffer();
sb.append("{ \"s\" : \""); sb.append(subject); sb.append('"');
sb.append(", \"p\" : \"type\"");
sb.append(", \"o\" : \""); sb.append(typeID); sb.append('"');
sb.append(" }");
writeLine(sb.toString());
}
}
protected void ensureAllIncludedTypesAsserted(Recon recon, String typeID) {
ensureOneTypeAsserted(recon, typeID);
String[] includedTypeIDs = schemaHelper.getIncludedTypeIDs(typeID);
if (includedTypeIDs != null) {
for (String typeID2 : includedTypeIDs) {
if (!"/type/object".equals(typeID2)) {
ensureOneTypeAsserted(recon, typeID2);
}
}
}
}
protected void ensureFromTypesAsserted(Recon recon, String propertyID) {
String fromTypeID = schemaHelper.getPropertyFromType(propertyID);
if (fromTypeID != null) {
ensureAllIncludedTypesAsserted(recon, fromTypeID);
}
}
protected void ensureToTypesAsserted(Recon recon, String propertyID) {
String toTypeID = schemaHelper.getPropertyToType(propertyID);
if (toTypeID != null) {
ensureAllIncludedTypesAsserted(recon, toTypeID);
}
}
public TripleLoaderTransposedNodeFactory(Project project, Writer writer) { public TripleLoaderTransposedNodeFactory(Project project, Writer writer) {
this.project = project; this.project = project;
this.writer = writer; this.writer = writer;
@ -274,11 +326,19 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
String subject, Project project, String subject, Project project,
int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { int subjectRowIndex, int subjectCellIndex, Cell subjectCell) {
Recon recon = subjectCell.recon != null &&
(subjectCell.recon.judgment == Judgment.Matched || subjectCell.recon.judgment == Judgment.New)
? subjectCell.recon : null;
for (int i = 0; i < children.size(); i++) { for (int i = 0; i < children.size(); i++) {
WritingTransposedNode child = children.get(i); WritingTransposedNode child = children.get(i);
Link link = links.get(i); Link link = links.get(i);
String predicate = link.property.id; String predicate = link.property.id;
if (recon != null) {
ensureFromTypesAsserted(recon, predicate);
}
child.write(subject, predicate, project, child.write(subject, predicate, project,
subjectRowIndex, subjectCellIndex, subjectCell); subjectRowIndex, subjectCellIndex, subjectCell);
} }
@ -378,6 +438,14 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
int objectCellIndex = cellIndex; int objectCellIndex = cellIndex;
Cell objectCell = cell; Cell objectCell = cell;
String typeID = node.type.id;
Column column = project.columnModel.getColumnByCellIndex(cellIndex);
ReconConfig reconConfig = column.getReconConfig();
if (reconConfig instanceof StandardReconConfig) {
typeID = ((StandardReconConfig) reconConfig).typeID;
}
if (cell.recon.judgment == Recon.Judgment.Matched) { if (cell.recon.judgment == Recon.Judgment.Matched) {
id = cell.recon.match.id; id = cell.recon.match.id;
@ -385,7 +453,6 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
if (newTopicVars.containsKey(cell.recon.id)) { if (newTopicVars.containsKey(cell.recon.id)) {
id = newTopicVars.get(cell.recon.id); id = newTopicVars.get(cell.recon.id);
} else { } else {
Column column = project.columnModel.getColumnByCellIndex(cellIndex);
String columnName = column.getName(); String columnName = column.getName();
long var = 0; long var = 0;
@ -396,25 +463,22 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
id = "$" + columnName.replaceAll("\\W+", "_") + "_" + var; id = "$" + columnName.replaceAll("\\W+", "_") + "_" + var;
String typeID = node.type.id;
ReconConfig reconConfig = column.getReconConfig();
if (reconConfig instanceof StandardReconConfig) {
typeID = ((StandardReconConfig) reconConfig).typeID;
}
writeLine(id, "type", typeID, project, rowIndex, cellIndex, cell, -1, -1, (Cell) null, !load); writeLine(id, "type", typeID, project, rowIndex, cellIndex, cell, -1, -1, (Cell) null, !load);
writeLine(id, "name", cell.value, project, -1, -1, (Cell) null, -1, -1, (Cell) null, !load); writeLine(id, "name", cell.value, project, -1, -1, (Cell) null, -1, -1, (Cell) null, !load);
if (cell.recon != null) { getAssertedReconIDSet(typeID).add(cell.recon.id);
newTopicVars.put(cell.recon.id, id);
} newTopicVars.put(cell.recon.id, id);
} }
} else { } else {
return null; return null;
} }
ensureAllIncludedTypesAsserted(cell.recon, typeID);
if (subject != null) { if (subject != null) {
ensureToTypesAsserted(cell.recon, predicate);
writeLine(subject, predicate, id, project, writeLine(subject, predicate, id, project,
subjectRowIndex, subjectCellIndex, subjectCell, subjectRowIndex, subjectCellIndex, subjectCell,
objectRowIndex, objectCellIndex, objectCell, !load); objectRowIndex, objectCellIndex, objectCell, !load);