2010-03-13 08:13:18 +01:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
package com.metaweb.gridworks.util;
|
|
|
|
|
|
|
|
import java.io.DataOutputStream;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.io.InputStream;
|
|
|
|
import java.io.Serializable;
|
|
|
|
import java.io.StringWriter;
|
|
|
|
import java.io.Writer;
|
|
|
|
import java.net.URL;
|
|
|
|
import java.net.URLConnection;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.HashMap;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Set;
|
|
|
|
|
|
|
|
import org.json.JSONArray;
|
|
|
|
import org.json.JSONException;
|
|
|
|
import org.json.JSONObject;
|
|
|
|
import org.json.JSONWriter;
|
|
|
|
|
|
|
|
import com.metaweb.gridworks.model.ReconCandidate;
|
|
|
|
|
|
|
|
public class FreebaseDataExtensionJob {
|
|
|
|
static public class DataExtension {
|
|
|
|
final public Object[][] data;
|
|
|
|
|
2010-03-16 01:24:20 +01:00
|
|
|
public DataExtension(Object[][] data) {
|
2010-03-13 08:13:18 +01:00
|
|
|
this.data = data;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-03-15 20:44:33 +01:00
|
|
|
static public class ColumnInfo {
|
|
|
|
final public List<String> names;
|
|
|
|
final public List<String> path;
|
|
|
|
|
|
|
|
protected ColumnInfo(List<String> names, List<String> path) {
|
|
|
|
this.names = names;
|
|
|
|
this.path = path;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
final public JSONObject extension;
|
|
|
|
final public int columnCount;
|
|
|
|
final public List<ColumnInfo> columns = new ArrayList<ColumnInfo>();
|
2010-03-13 08:13:18 +01:00
|
|
|
|
|
|
|
public FreebaseDataExtensionJob(JSONObject obj) throws JSONException {
|
|
|
|
this.extension = obj;
|
|
|
|
this.columnCount = (obj.has("properties") && !obj.isNull("properties")) ?
|
2010-03-15 20:44:33 +01:00
|
|
|
countColumns(obj.getJSONArray("properties"), columns, new ArrayList<String>(), new ArrayList<String>()) : 0;
|
2010-03-13 08:13:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
public Map<String, FreebaseDataExtensionJob.DataExtension> extend(Set<String> guids) throws Exception {
|
|
|
|
StringWriter writer = new StringWriter();
|
|
|
|
formulateQuery(guids, extension, writer);
|
|
|
|
|
2010-03-14 21:55:57 +01:00
|
|
|
InputStream is = doMqlRead(writer.toString());
|
2010-03-13 08:13:18 +01:00
|
|
|
try {
|
|
|
|
String s = ParsingUtilities.inputStreamToString(is);
|
|
|
|
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s);
|
|
|
|
|
|
|
|
Map<String, FreebaseDataExtensionJob.DataExtension> map = new HashMap<String, FreebaseDataExtensionJob.DataExtension>();
|
|
|
|
JSONArray a = o.getJSONArray("result");
|
|
|
|
int l = a.length();
|
|
|
|
|
|
|
|
for (int i = 0; i < l; i++) {
|
|
|
|
JSONObject o2 = a.getJSONObject(i);
|
|
|
|
String guid = o2.getString("guid");
|
|
|
|
FreebaseDataExtensionJob.DataExtension ext = collectResult(o2);
|
|
|
|
|
|
|
|
if (ext != null) {
|
|
|
|
map.put(guid, ext);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return map;
|
|
|
|
} finally {
|
|
|
|
is.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
protected FreebaseDataExtensionJob.DataExtension collectResult(JSONObject obj) throws JSONException {
|
|
|
|
List<Object[]> rows = new ArrayList<Object[]>();
|
|
|
|
|
2010-03-13 09:08:25 +01:00
|
|
|
collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0);
|
2010-03-13 08:13:18 +01:00
|
|
|
|
|
|
|
Object[][] data = new Object[rows.size()][columnCount];
|
|
|
|
rows.toArray(data);
|
|
|
|
|
|
|
|
return new DataExtension(data);
|
|
|
|
}
|
|
|
|
|
|
|
|
protected void storeCell(
|
|
|
|
List<Object[]> rows,
|
|
|
|
int row,
|
|
|
|
int col,
|
|
|
|
Object value
|
|
|
|
) {
|
|
|
|
while (row >= rows.size()) {
|
|
|
|
rows.add(new Object[columnCount]);
|
|
|
|
}
|
|
|
|
rows.get(row)[col] = value;
|
|
|
|
}
|
|
|
|
|
|
|
|
protected void storeCell(
|
|
|
|
List<Object[]> rows,
|
|
|
|
int row,
|
|
|
|
int col,
|
|
|
|
JSONObject obj
|
|
|
|
) throws JSONException {
|
|
|
|
storeCell(rows, row, col,
|
|
|
|
new ReconCandidate(
|
|
|
|
obj.getString("id"),
|
|
|
|
obj.getString("guid"),
|
|
|
|
obj.getString("name"),
|
|
|
|
JSONUtilities.getStringArray(obj, "type"),
|
|
|
|
100
|
|
|
|
)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
protected int[] collectResult(
|
|
|
|
List<Object[]> rows,
|
|
|
|
JSONObject extNode,
|
|
|
|
JSONObject resultNode,
|
|
|
|
int startRowIndex,
|
|
|
|
int startColumnIndex
|
|
|
|
) throws JSONException {
|
|
|
|
String propertyID = extNode.getString("id");
|
|
|
|
String expectedTypeID = extNode.getString("expected");
|
|
|
|
|
|
|
|
JSONArray a = resultNode != null && resultNode.has(propertyID) && !resultNode.isNull(propertyID) ?
|
|
|
|
resultNode.getJSONArray(propertyID) : null;
|
|
|
|
|
|
|
|
if (expectedTypeID.startsWith("/type/")) {
|
|
|
|
if (a != null) {
|
|
|
|
int l = a.length();
|
|
|
|
for (int r = 0; r < l; r++) {
|
|
|
|
Object o = a.isNull(r) ? null : a.get(r);
|
|
|
|
if (o instanceof Serializable) {
|
|
|
|
storeCell(rows, startRowIndex++, startColumnIndex, o);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// note that we still take up a column even if we don't have any data
|
|
|
|
return new int[] { startRowIndex, startColumnIndex + 1 };
|
|
|
|
} else {
|
|
|
|
boolean hasSubProperties = (extNode.has("properties") && !extNode.isNull("properties"));
|
|
|
|
boolean isOwnColumn = !hasSubProperties || (extNode.has("included") && extNode.getBoolean("included"));
|
|
|
|
|
2010-03-14 21:55:57 +01:00
|
|
|
if (a != null && a.length() > 0) {
|
2010-03-13 08:13:18 +01:00
|
|
|
int maxColIndex = startColumnIndex;
|
|
|
|
|
|
|
|
int l = a.length();
|
|
|
|
for (int r = 0; r < l; r++) {
|
|
|
|
JSONObject o = a.isNull(r) ? null : a.getJSONObject(r);
|
|
|
|
|
|
|
|
int startColumnIndex2 = startColumnIndex;
|
|
|
|
int startRowIndex2 = startRowIndex;
|
|
|
|
|
|
|
|
if (isOwnColumn) {
|
|
|
|
storeCell(rows, startRowIndex2++, startColumnIndex2++, o);
|
|
|
|
}
|
|
|
|
|
2010-03-13 09:08:25 +01:00
|
|
|
if (hasSubProperties) {
|
|
|
|
int[] rowcol = collectResult(
|
|
|
|
rows,
|
|
|
|
extNode.getJSONArray("properties"),
|
|
|
|
o,
|
|
|
|
startRowIndex,
|
|
|
|
startColumnIndex2
|
|
|
|
);
|
|
|
|
|
2010-03-14 21:55:57 +01:00
|
|
|
startRowIndex2 = rowcol[0];
|
|
|
|
startColumnIndex2 = rowcol[1];
|
2010-03-13 09:08:25 +01:00
|
|
|
}
|
2010-03-14 21:55:57 +01:00
|
|
|
|
|
|
|
startRowIndex = startRowIndex2;
|
|
|
|
maxColIndex = Math.max(maxColIndex, startColumnIndex2);
|
2010-03-13 08:13:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return new int[] { startRowIndex, maxColIndex };
|
|
|
|
} else {
|
|
|
|
return new int[] {
|
|
|
|
startRowIndex,
|
2010-03-15 20:44:33 +01:00
|
|
|
startColumnIndex + countColumns(extNode, null, new ArrayList<String>(), new ArrayList<String>())
|
2010-03-13 08:13:18 +01:00
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
protected int[] collectResult(
|
|
|
|
List<Object[]> rows,
|
|
|
|
JSONArray subProperties,
|
|
|
|
JSONObject resultNode,
|
|
|
|
int startRowIndex,
|
|
|
|
int startColumnIndex
|
|
|
|
) throws JSONException {
|
|
|
|
int maxStartRowIndex = startRowIndex;
|
|
|
|
|
|
|
|
int k = subProperties.length();
|
|
|
|
for (int c = 0; c < k; c++) {
|
|
|
|
int[] rowcol = collectResult(
|
|
|
|
rows,
|
|
|
|
subProperties.getJSONObject(c),
|
|
|
|
resultNode,
|
|
|
|
startRowIndex,
|
|
|
|
startColumnIndex
|
|
|
|
);
|
|
|
|
|
|
|
|
maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]);
|
|
|
|
startColumnIndex = rowcol[1];
|
|
|
|
}
|
|
|
|
|
|
|
|
return new int[] { maxStartRowIndex, startColumnIndex };
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-03-14 21:55:57 +01:00
|
|
|
static protected InputStream doMqlRead(String query) throws IOException {
|
|
|
|
URL url = new URL("http://api.freebase.com/api/service/mqlread");
|
|
|
|
|
|
|
|
URLConnection connection = url.openConnection();
|
|
|
|
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
|
|
|
|
connection.setConnectTimeout(5000);
|
|
|
|
connection.setDoOutput(true);
|
|
|
|
|
|
|
|
DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
|
|
|
|
try {
|
|
|
|
String body = "extended=1&query=" + ParsingUtilities.encode(query);
|
|
|
|
|
|
|
|
dos.writeBytes(body);
|
|
|
|
} finally {
|
|
|
|
dos.flush();
|
|
|
|
dos.close();
|
|
|
|
}
|
|
|
|
|
|
|
|
connection.connect();
|
2010-03-13 08:13:18 +01:00
|
|
|
|
2010-03-14 21:55:57 +01:00
|
|
|
return connection.getInputStream();
|
2010-03-13 08:13:18 +01:00
|
|
|
}
|
|
|
|
|
2010-03-14 21:55:57 +01:00
|
|
|
static protected void formulateQuery(Set<String> guids, JSONObject node, Writer writer) throws JSONException {
|
|
|
|
JSONWriter jsonWriter = new JSONWriter(writer);
|
2010-03-13 08:13:18 +01:00
|
|
|
|
2010-03-14 21:55:57 +01:00
|
|
|
jsonWriter.object();
|
|
|
|
jsonWriter.key("query");
|
|
|
|
jsonWriter.array();
|
|
|
|
jsonWriter.object();
|
|
|
|
|
|
|
|
jsonWriter.key("guid"); jsonWriter.value(null);
|
|
|
|
jsonWriter.key("guid|=");
|
|
|
|
jsonWriter.array();
|
|
|
|
for (String guid : guids) {
|
|
|
|
jsonWriter.value(guid);
|
|
|
|
}
|
|
|
|
jsonWriter.endArray();
|
|
|
|
|
|
|
|
formulateQueryNode(node.getJSONArray("properties"), jsonWriter);
|
|
|
|
|
|
|
|
jsonWriter.endObject();
|
|
|
|
jsonWriter.endArray();
|
2010-03-13 08:13:18 +01:00
|
|
|
jsonWriter.endObject();
|
2010-03-14 21:55:57 +01:00
|
|
|
}
|
2010-03-13 08:13:18 +01:00
|
|
|
|
2010-03-14 21:55:57 +01:00
|
|
|
static protected void formulateQueryNode(JSONObject node, JSONWriter writer) throws JSONException {
|
|
|
|
String propertyID = node.getString("id");
|
|
|
|
String expectedTypeID = node.getString("expected");
|
|
|
|
|
|
|
|
writer.key(propertyID);
|
|
|
|
writer.array();
|
|
|
|
{
|
|
|
|
if (!expectedTypeID.startsWith("/type/")) { // not literal
|
|
|
|
writer.object();
|
|
|
|
writer.key("limit"); writer.value(10);
|
|
|
|
writer.key("optional"); writer.value(true);
|
|
|
|
{
|
|
|
|
boolean hasSubProperties = (node.has("properties") && !node.isNull("properties"));
|
|
|
|
|
|
|
|
if (!hasSubProperties || (node.has("included") && node.getBoolean("included"))) {
|
|
|
|
writer.key("name"); writer.value(null);
|
|
|
|
writer.key("id"); writer.value(null);
|
|
|
|
writer.key("guid"); writer.value(null);
|
|
|
|
writer.key("type"); writer.array(); writer.endArray();
|
|
|
|
}
|
2010-03-13 08:13:18 +01:00
|
|
|
|
2010-03-14 21:55:57 +01:00
|
|
|
if (hasSubProperties) {
|
|
|
|
formulateQueryNode(node.getJSONArray("properties"), writer);
|
|
|
|
}
|
2010-03-13 08:13:18 +01:00
|
|
|
}
|
2010-03-14 21:55:57 +01:00
|
|
|
writer.endObject();
|
2010-03-13 08:13:18 +01:00
|
|
|
}
|
|
|
|
}
|
2010-03-14 21:55:57 +01:00
|
|
|
writer.endArray();
|
2010-03-13 08:13:18 +01:00
|
|
|
}
|
2010-03-13 09:08:25 +01:00
|
|
|
|
2010-03-14 21:55:57 +01:00
|
|
|
static protected void formulateQueryNode(JSONArray propertiesA, JSONWriter writer) throws JSONException {
|
|
|
|
int l = propertiesA.length();
|
|
|
|
|
|
|
|
for (int i = 0; i < l; i++) {
|
|
|
|
formulateQueryNode(propertiesA.getJSONObject(i), writer);
|
2010-03-13 08:13:18 +01:00
|
|
|
}
|
2010-03-14 21:55:57 +01:00
|
|
|
}
|
|
|
|
|
2010-03-15 20:44:33 +01:00
|
|
|
static protected int countColumns(JSONObject obj, List<ColumnInfo> columns, List<String> names, List<String> path) throws JSONException {
|
|
|
|
String name = obj.getString("name");
|
|
|
|
|
|
|
|
List<String> names2 = null;
|
|
|
|
List<String> path2 = null;
|
|
|
|
if (columns != null) {
|
|
|
|
names2 = new ArrayList<String>(names);
|
|
|
|
names2.add(name);
|
|
|
|
|
|
|
|
path2 = new ArrayList<String>(path);
|
|
|
|
path2.add(obj.getString("id"));
|
|
|
|
}
|
|
|
|
|
2010-03-14 21:55:57 +01:00
|
|
|
if (obj.has("properties") && !obj.isNull("properties")) {
|
|
|
|
boolean included = (obj.has("included") && obj.getBoolean("included"));
|
2010-03-15 20:44:33 +01:00
|
|
|
if (included && columns != null) {
|
|
|
|
columns.add(new ColumnInfo(names2, path2));
|
2010-03-14 21:55:57 +01:00
|
|
|
}
|
2010-03-15 20:44:33 +01:00
|
|
|
|
|
|
|
return (included ? 1 : 0) +
|
|
|
|
countColumns(obj.getJSONArray("properties"), columns, names2, path2);
|
2010-03-14 21:55:57 +01:00
|
|
|
} else {
|
2010-03-15 20:44:33 +01:00
|
|
|
if (columns != null) {
|
|
|
|
columns.add(new ColumnInfo(names2, path2));
|
2010-03-14 21:55:57 +01:00
|
|
|
}
|
|
|
|
return 1;
|
2010-03-13 08:13:18 +01:00
|
|
|
}
|
|
|
|
}
|
2010-03-14 21:55:57 +01:00
|
|
|
|
2010-03-15 20:44:33 +01:00
|
|
|
static protected int countColumns(JSONArray a, List<ColumnInfo> columns, List<String> names, List<String> path) throws JSONException {
|
2010-03-14 21:55:57 +01:00
|
|
|
int c = 0;
|
|
|
|
int l = a.length();
|
|
|
|
for (int i = 0; i < l; i++) {
|
2010-03-15 20:44:33 +01:00
|
|
|
c += countColumns(a.getJSONObject(i), columns, names, path);
|
2010-03-14 21:55:57 +01:00
|
|
|
}
|
|
|
|
return c;
|
2010-03-13 08:13:18 +01:00
|
|
|
}
|
|
|
|
}
|