When generating triple loader payload, assert included, schema, and expected types for existing as well as new topics.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@1963 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
53442c5ef2
commit
ca8f64ddc4
@ -0,0 +1,103 @@
|
||||
package com.google.refine.freebase.protograph.transpose;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
public class SchemaHelper {
|
||||
static private final String helperURL = "http://2.refine-helper.stefanomazzocchi.user.dev.freebaseapps.com/";
|
||||
|
||||
static private class PropertyInfo {
|
||||
String fromTypeID;
|
||||
String toTypeID;
|
||||
}
|
||||
|
||||
static private class TypeInfo {
|
||||
String[] includedTypes;
|
||||
}
|
||||
|
||||
protected Map<String, PropertyInfo> properties = new HashMap<String, PropertyInfo>();
|
||||
protected Map<String, TypeInfo> types = new HashMap<String, TypeInfo>();
|
||||
|
||||
public String getPropertyFromType(String propertyID) {
|
||||
ensureProperty(propertyID);
|
||||
return properties.get(propertyID).fromTypeID;
|
||||
}
|
||||
public String getPropertyToType(String propertyID) {
|
||||
ensureProperty(propertyID);
|
||||
return properties.get(propertyID).toTypeID;
|
||||
}
|
||||
public String[] getIncludedTypeIDs(String typeID) {
|
||||
ensureType(typeID);
|
||||
return types.get(typeID).includedTypes;
|
||||
}
|
||||
|
||||
private void ensureProperty(String propertyID) {
|
||||
if (properties.containsKey(propertyID)) {
|
||||
return;
|
||||
}
|
||||
|
||||
PropertyInfo info = new PropertyInfo();
|
||||
properties.put(propertyID, info);
|
||||
|
||||
JSONObject obj = getJson(helperURL + "get_property_data" + propertyID);
|
||||
if (obj != null) {
|
||||
try {
|
||||
if (!obj.isNull("from")) {
|
||||
info.fromTypeID = obj.getString("from");
|
||||
}
|
||||
} catch (JSONException e) {
|
||||
}
|
||||
try {
|
||||
if (!obj.isNull("to")) {
|
||||
info.toTypeID = obj.getString("to");
|
||||
}
|
||||
} catch (JSONException e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void ensureType(String typeID) {
|
||||
if (types.containsKey(typeID)) {
|
||||
return;
|
||||
}
|
||||
|
||||
TypeInfo info = new TypeInfo();
|
||||
types.put(typeID, info);
|
||||
|
||||
JSONObject obj = getJson(helperURL + "get_type_data" + typeID);
|
||||
if (obj != null) {
|
||||
if (!obj.isNull("includes")) {
|
||||
info.includedTypes = JSONUtilities.getStringArray(obj, "includes");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private JSONObject getJson(String urlString) {
|
||||
try {
|
||||
URL url = new URL(urlString);
|
||||
InputStream is = url.openStream();
|
||||
try {
|
||||
String s = ParsingUtilities.inputStreamToString(is);
|
||||
return ParsingUtilities.evaluateJsonStringToObject(s);
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
} catch (JSONException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
@ -79,6 +79,58 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
||||
protected int contextRefCount = 0;
|
||||
protected JSONObject contextTreeRoot;
|
||||
|
||||
protected SchemaHelper schemaHelper = new SchemaHelper();
|
||||
|
||||
protected Map<String, Set<Long>> typeIDToAssertedReconIDs = new HashMap<String, Set<Long>>();
|
||||
protected Set<Long> getAssertedReconIDSet(String typeID) {
|
||||
Set<Long> assertedReconIDSet = typeIDToAssertedReconIDs.get(typeID);
|
||||
if (assertedReconIDSet == null) {
|
||||
assertedReconIDSet = new HashSet<Long>();
|
||||
typeIDToAssertedReconIDs.put(typeID, assertedReconIDSet);
|
||||
}
|
||||
return assertedReconIDSet;
|
||||
}
|
||||
protected void ensureOneTypeAsserted(Recon recon, String typeID) {
|
||||
Set<Long> assertedReconIDSet = getAssertedReconIDSet(typeID);
|
||||
if (!assertedReconIDSet.contains(recon.id)) {
|
||||
assertedReconIDSet.add(recon.id);
|
||||
|
||||
String subject = recon.judgment == Judgment.New ? newTopicVars.get(recon.id) : recon.match.id;
|
||||
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append("{ \"s\" : \""); sb.append(subject); sb.append('"');
|
||||
sb.append(", \"p\" : \"type\"");
|
||||
sb.append(", \"o\" : \""); sb.append(typeID); sb.append('"');
|
||||
sb.append(" }");
|
||||
|
||||
writeLine(sb.toString());
|
||||
}
|
||||
}
|
||||
protected void ensureAllIncludedTypesAsserted(Recon recon, String typeID) {
|
||||
ensureOneTypeAsserted(recon, typeID);
|
||||
|
||||
String[] includedTypeIDs = schemaHelper.getIncludedTypeIDs(typeID);
|
||||
if (includedTypeIDs != null) {
|
||||
for (String typeID2 : includedTypeIDs) {
|
||||
if (!"/type/object".equals(typeID2)) {
|
||||
ensureOneTypeAsserted(recon, typeID2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
protected void ensureFromTypesAsserted(Recon recon, String propertyID) {
|
||||
String fromTypeID = schemaHelper.getPropertyFromType(propertyID);
|
||||
if (fromTypeID != null) {
|
||||
ensureAllIncludedTypesAsserted(recon, fromTypeID);
|
||||
}
|
||||
}
|
||||
protected void ensureToTypesAsserted(Recon recon, String propertyID) {
|
||||
String toTypeID = schemaHelper.getPropertyToType(propertyID);
|
||||
if (toTypeID != null) {
|
||||
ensureAllIncludedTypesAsserted(recon, toTypeID);
|
||||
}
|
||||
}
|
||||
|
||||
public TripleLoaderTransposedNodeFactory(Project project, Writer writer) {
|
||||
this.project = project;
|
||||
this.writer = writer;
|
||||
@ -274,11 +326,19 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
||||
String subject, Project project,
|
||||
int subjectRowIndex, int subjectCellIndex, Cell subjectCell) {
|
||||
|
||||
Recon recon = subjectCell.recon != null &&
|
||||
(subjectCell.recon.judgment == Judgment.Matched || subjectCell.recon.judgment == Judgment.New)
|
||||
? subjectCell.recon : null;
|
||||
|
||||
for (int i = 0; i < children.size(); i++) {
|
||||
WritingTransposedNode child = children.get(i);
|
||||
Link link = links.get(i);
|
||||
String predicate = link.property.id;
|
||||
|
||||
if (recon != null) {
|
||||
ensureFromTypesAsserted(recon, predicate);
|
||||
}
|
||||
|
||||
child.write(subject, predicate, project,
|
||||
subjectRowIndex, subjectCellIndex, subjectCell);
|
||||
}
|
||||
@ -378,6 +438,14 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
||||
int objectCellIndex = cellIndex;
|
||||
Cell objectCell = cell;
|
||||
|
||||
String typeID = node.type.id;
|
||||
|
||||
Column column = project.columnModel.getColumnByCellIndex(cellIndex);
|
||||
ReconConfig reconConfig = column.getReconConfig();
|
||||
if (reconConfig instanceof StandardReconConfig) {
|
||||
typeID = ((StandardReconConfig) reconConfig).typeID;
|
||||
}
|
||||
|
||||
if (cell.recon.judgment == Recon.Judgment.Matched) {
|
||||
id = cell.recon.match.id;
|
||||
|
||||
@ -385,7 +453,6 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
||||
if (newTopicVars.containsKey(cell.recon.id)) {
|
||||
id = newTopicVars.get(cell.recon.id);
|
||||
} else {
|
||||
Column column = project.columnModel.getColumnByCellIndex(cellIndex);
|
||||
String columnName = column.getName();
|
||||
|
||||
long var = 0;
|
||||
@ -396,25 +463,22 @@ public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory
|
||||
|
||||
id = "$" + columnName.replaceAll("\\W+", "_") + "_" + var;
|
||||
|
||||
String typeID = node.type.id;
|
||||
|
||||
ReconConfig reconConfig = column.getReconConfig();
|
||||
if (reconConfig instanceof StandardReconConfig) {
|
||||
typeID = ((StandardReconConfig) reconConfig).typeID;
|
||||
}
|
||||
|
||||
writeLine(id, "type", typeID, project, rowIndex, cellIndex, cell, -1, -1, (Cell) null, !load);
|
||||
writeLine(id, "name", cell.value, project, -1, -1, (Cell) null, -1, -1, (Cell) null, !load);
|
||||
|
||||
if (cell.recon != null) {
|
||||
newTopicVars.put(cell.recon.id, id);
|
||||
}
|
||||
getAssertedReconIDSet(typeID).add(cell.recon.id);
|
||||
|
||||
newTopicVars.put(cell.recon.id, id);
|
||||
}
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
ensureAllIncludedTypesAsserted(cell.recon, typeID);
|
||||
|
||||
if (subject != null) {
|
||||
ensureToTypesAsserted(cell.recon, predicate);
|
||||
|
||||
writeLine(subject, predicate, id, project,
|
||||
subjectRowIndex, subjectCellIndex, subjectCell,
|
||||
objectRowIndex, objectCellIndex, objectCell, !load);
|
||||
|
Loading…
Reference in New Issue
Block a user