When generating triple loader payload, assert included, schema, and expected types for existing as well as new topics.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@1963 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
53442c5ef2
commit
ca8f64ddc4
@ -0,0 +1,103 @@
|
|||||||
|
package com.google.refine.freebase.protograph.transpose;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
|
public class SchemaHelper {
|
||||||
|
static private final String helperURL = "http://2.refine-helper.stefanomazzocchi.user.dev.freebaseapps.com/";
|
||||||
|
|
||||||
|
static private class PropertyInfo {
|
||||||
|
String fromTypeID;
|
||||||
|
String toTypeID;
|
||||||
|
}
|
||||||
|
|
||||||
|
static private class TypeInfo {
|
||||||
|
String[] includedTypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Map<String, PropertyInfo> properties = new HashMap<String, PropertyInfo>();
|
||||||
|
protected Map<String, TypeInfo> types = new HashMap<String, TypeInfo>();
|
||||||
|
|
||||||
|
public String getPropertyFromType(String propertyID) {
|
||||||
|
ensureProperty(propertyID);
|
||||||
|
return properties.get(propertyID).fromTypeID;
|
||||||
|
}
|
||||||
|
public String getPropertyToType(String propertyID) {
|
||||||
|
ensureProperty(propertyID);
|
||||||
|
return properties.get(propertyID).toTypeID;
|
||||||
|
}
|
||||||
|
public String[] getIncludedTypeIDs(String typeID) {
|
||||||
|
ensureType(typeID);
|
||||||
|
return types.get(typeID).includedTypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ensureProperty(String propertyID) {
|
||||||
|
if (properties.containsKey(propertyID)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
PropertyInfo info = new PropertyInfo();
|
||||||
|
properties.put(propertyID, info);
|
||||||
|
|
||||||
|
JSONObject obj = getJson(helperURL + "get_property_data" + propertyID);
|
||||||
|
if (obj != null) {
|
||||||
|
try {
|
||||||
|
if (!obj.isNull("from")) {
|
||||||
|
info.fromTypeID = obj.getString("from");
|
||||||
|
}
|
||||||
|
} catch (JSONException e) {
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
if (!obj.isNull("to")) {
|
||||||
|
info.toTypeID = obj.getString("to");
|
||||||
|
}
|
||||||
|
} catch (JSONException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ensureType(String typeID) {
|
||||||
|
if (types.containsKey(typeID)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
TypeInfo info = new TypeInfo();
|
||||||
|
types.put(typeID, info);
|
||||||
|
|
||||||
|
JSONObject obj = getJson(helperURL + "get_type_data" + typeID);
|
||||||
|
if (obj != null) {
|
||||||
|
if (!obj.isNull("includes")) {
|
||||||
|
info.includedTypes = JSONUtilities.getStringArray(obj, "includes");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject getJson(String urlString) {
|
||||||
|
try {
|
||||||
|
URL url = new URL(urlString);
|
||||||
|
InputStream is = url.openStream();
|
||||||
|
try {
|
||||||
|
String s = ParsingUtilities.inputStreamToString(is);
|
||||||
|
return ParsingUtilities.evaluateJsonStringToObject(s);
|
||||||
|
} finally {
|
||||||
|
is.close();
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
// TODO Auto-generated catch block
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// TODO Auto-generated catch block
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
@ -79,6 +79,58 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
|||||||
protected int contextRefCount = 0;
|
protected int contextRefCount = 0;
|
||||||
protected JSONObject contextTreeRoot;
|
protected JSONObject contextTreeRoot;
|
||||||
|
|
||||||
|
protected SchemaHelper schemaHelper = new SchemaHelper();
|
||||||
|
|
||||||
|
protected Map<String, Set<Long>> typeIDToAssertedReconIDs = new HashMap<String, Set<Long>>();
|
||||||
|
protected Set<Long> getAssertedReconIDSet(String typeID) {
|
||||||
|
Set<Long> assertedReconIDSet = typeIDToAssertedReconIDs.get(typeID);
|
||||||
|
if (assertedReconIDSet == null) {
|
||||||
|
assertedReconIDSet = new HashSet<Long>();
|
||||||
|
typeIDToAssertedReconIDs.put(typeID, assertedReconIDSet);
|
||||||
|
}
|
||||||
|
return assertedReconIDSet;
|
||||||
|
}
|
||||||
|
protected void ensureOneTypeAsserted(Recon recon, String typeID) {
|
||||||
|
Set<Long> assertedReconIDSet = getAssertedReconIDSet(typeID);
|
||||||
|
if (!assertedReconIDSet.contains(recon.id)) {
|
||||||
|
assertedReconIDSet.add(recon.id);
|
||||||
|
|
||||||
|
String subject = recon.judgment == Judgment.New ? newTopicVars.get(recon.id) : recon.match.id;
|
||||||
|
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
sb.append("{ \"s\" : \""); sb.append(subject); sb.append('"');
|
||||||
|
sb.append(", \"p\" : \"type\"");
|
||||||
|
sb.append(", \"o\" : \""); sb.append(typeID); sb.append('"');
|
||||||
|
sb.append(" }");
|
||||||
|
|
||||||
|
writeLine(sb.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
protected void ensureAllIncludedTypesAsserted(Recon recon, String typeID) {
|
||||||
|
ensureOneTypeAsserted(recon, typeID);
|
||||||
|
|
||||||
|
String[] includedTypeIDs = schemaHelper.getIncludedTypeIDs(typeID);
|
||||||
|
if (includedTypeIDs != null) {
|
||||||
|
for (String typeID2 : includedTypeIDs) {
|
||||||
|
if (!"/type/object".equals(typeID2)) {
|
||||||
|
ensureOneTypeAsserted(recon, typeID2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
protected void ensureFromTypesAsserted(Recon recon, String propertyID) {
|
||||||
|
String fromTypeID = schemaHelper.getPropertyFromType(propertyID);
|
||||||
|
if (fromTypeID != null) {
|
||||||
|
ensureAllIncludedTypesAsserted(recon, fromTypeID);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
protected void ensureToTypesAsserted(Recon recon, String propertyID) {
|
||||||
|
String toTypeID = schemaHelper.getPropertyToType(propertyID);
|
||||||
|
if (toTypeID != null) {
|
||||||
|
ensureAllIncludedTypesAsserted(recon, toTypeID);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public TripleLoaderTransposedNodeFactory(Project project, Writer writer) {
|
public TripleLoaderTransposedNodeFactory(Project project, Writer writer) {
|
||||||
this.project = project;
|
this.project = project;
|
||||||
this.writer = writer;
|
this.writer = writer;
|
||||||
@ -274,11 +326,19 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
|||||||
String subject, Project project,
|
String subject, Project project,
|
||||||
int subjectRowIndex, int subjectCellIndex, Cell subjectCell) {
|
int subjectRowIndex, int subjectCellIndex, Cell subjectCell) {
|
||||||
|
|
||||||
|
Recon recon = subjectCell.recon != null &&
|
||||||
|
(subjectCell.recon.judgment == Judgment.Matched || subjectCell.recon.judgment == Judgment.New)
|
||||||
|
? subjectCell.recon : null;
|
||||||
|
|
||||||
for (int i = 0; i < children.size(); i++) {
|
for (int i = 0; i < children.size(); i++) {
|
||||||
WritingTransposedNode child = children.get(i);
|
WritingTransposedNode child = children.get(i);
|
||||||
Link link = links.get(i);
|
Link link = links.get(i);
|
||||||
String predicate = link.property.id;
|
String predicate = link.property.id;
|
||||||
|
|
||||||
|
if (recon != null) {
|
||||||
|
ensureFromTypesAsserted(recon, predicate);
|
||||||
|
}
|
||||||
|
|
||||||
child.write(subject, predicate, project,
|
child.write(subject, predicate, project,
|
||||||
subjectRowIndex, subjectCellIndex, subjectCell);
|
subjectRowIndex, subjectCellIndex, subjectCell);
|
||||||
}
|
}
|
||||||
@ -378,6 +438,14 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
|||||||
int objectCellIndex = cellIndex;
|
int objectCellIndex = cellIndex;
|
||||||
Cell objectCell = cell;
|
Cell objectCell = cell;
|
||||||
|
|
||||||
|
String typeID = node.type.id;
|
||||||
|
|
||||||
|
Column column = project.columnModel.getColumnByCellIndex(cellIndex);
|
||||||
|
ReconConfig reconConfig = column.getReconConfig();
|
||||||
|
if (reconConfig instanceof StandardReconConfig) {
|
||||||
|
typeID = ((StandardReconConfig) reconConfig).typeID;
|
||||||
|
}
|
||||||
|
|
||||||
if (cell.recon.judgment == Recon.Judgment.Matched) {
|
if (cell.recon.judgment == Recon.Judgment.Matched) {
|
||||||
id = cell.recon.match.id;
|
id = cell.recon.match.id;
|
||||||
|
|
||||||
@ -385,7 +453,6 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
|||||||
if (newTopicVars.containsKey(cell.recon.id)) {
|
if (newTopicVars.containsKey(cell.recon.id)) {
|
||||||
id = newTopicVars.get(cell.recon.id);
|
id = newTopicVars.get(cell.recon.id);
|
||||||
} else {
|
} else {
|
||||||
Column column = project.columnModel.getColumnByCellIndex(cellIndex);
|
|
||||||
String columnName = column.getName();
|
String columnName = column.getName();
|
||||||
|
|
||||||
long var = 0;
|
long var = 0;
|
||||||
@ -396,25 +463,22 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
|||||||
|
|
||||||
id = "$" + columnName.replaceAll("\\W+", "_") + "_" + var;
|
id = "$" + columnName.replaceAll("\\W+", "_") + "_" + var;
|
||||||
|
|
||||||
String typeID = node.type.id;
|
|
||||||
|
|
||||||
ReconConfig reconConfig = column.getReconConfig();
|
|
||||||
if (reconConfig instanceof StandardReconConfig) {
|
|
||||||
typeID = ((StandardReconConfig) reconConfig).typeID;
|
|
||||||
}
|
|
||||||
|
|
||||||
writeLine(id, "type", typeID, project, rowIndex, cellIndex, cell, -1, -1, (Cell) null, !load);
|
writeLine(id, "type", typeID, project, rowIndex, cellIndex, cell, -1, -1, (Cell) null, !load);
|
||||||
writeLine(id, "name", cell.value, project, -1, -1, (Cell) null, -1, -1, (Cell) null, !load);
|
writeLine(id, "name", cell.value, project, -1, -1, (Cell) null, -1, -1, (Cell) null, !load);
|
||||||
|
|
||||||
if (cell.recon != null) {
|
getAssertedReconIDSet(typeID).add(cell.recon.id);
|
||||||
newTopicVars.put(cell.recon.id, id);
|
|
||||||
}
|
newTopicVars.put(cell.recon.id, id);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ensureAllIncludedTypesAsserted(cell.recon, typeID);
|
||||||
|
|
||||||
if (subject != null) {
|
if (subject != null) {
|
||||||
|
ensureToTypesAsserted(cell.recon, predicate);
|
||||||
|
|
||||||
writeLine(subject, predicate, id, project,
|
writeLine(subject, predicate, id, project,
|
||||||
subjectRowIndex, subjectCellIndex, subjectCell,
|
subjectRowIndex, subjectCellIndex, subjectCell,
|
||||||
objectRowIndex, objectCellIndex, objectCell, !load);
|
objectRowIndex, objectCellIndex, objectCell, !load);
|
||||||
|
Loading…
Reference in New Issue
Block a user