Switch Add from Freebase to Google APIs - part of #696

- switch to use the Google Java client instead of handrolled requests
This commit is contained in:
Tom Morris 2013-03-12 17:38:18 -04:00
parent 757c3e32ff
commit 05968e37fa
11 changed files with 190 additions and 40 deletions

View File

@ -71,5 +71,10 @@
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/signpost-commonshttp4-1.2.1.2.jar" sourcepath="main/webapp/WEB-INF/lib-src/signpost-commonshttp4-1.2.1.2-sources.jar"/> <classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/signpost-commonshttp4-1.2.1.2.jar" sourcepath="main/webapp/WEB-INF/lib-src/signpost-commonshttp4-1.2.1.2-sources.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/signpost-core-1.2.1.2.jar" sourcepath="main/webapp/WEB-INF/lib-src/signpost-core-1.2.1.2-sources.jar"/> <classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/signpost-core-1.2.1.2.jar" sourcepath="main/webapp/WEB-INF/lib-src/signpost-core-1.2.1.2-sources.jar"/>
<classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/guava-13.0.jar"/> <classpathentry exported="true" kind="lib" path="main/webapp/WEB-INF/lib/guava-13.0.jar"/>
<classpathentry exported="true" kind="lib" path="extensions/freebase/module/MOD-INF/lib/google-api-client-1.13.2-beta.jar" sourcepath="extensions/freebase/module/MOD-INF/libsrc/google-api-client-1.13.2-beta-sources.jar"/>
<classpathentry exported="true" kind="lib" path="extensions/freebase/module/MOD-INF/lib/google-http-client-1.13.1-beta.jar"/>
<classpathentry exported="true" kind="lib" path="extensions/freebase/module/MOD-INF/lib/google-api-services-freebase-v1-rev25-1.13.2-beta.jar" sourcepath="extensions/freebase/module/MOD-INF/libsrc/google-api-services-freebase-v1-rev25-1.13.2-beta-sources.jar"/>
<classpathentry exported="true" kind="lib" path="extensions/freebase/module/MOD-INF/lib/google-http-client-jackson-1.13.1-beta.jar"/>
<classpathentry exported="true" kind="lib" path="extensions/freebase/module/MOD-INF/lib/mail.jar"/>
<classpathentry kind="output" path="build"/> <classpathentry kind="output" path="build"/>
</classpath> </classpath>

Binary file not shown.

View File

@ -1,6 +1,6 @@
/* /*
Copyright 2010, Google Inc. Copyright 2010,2013 Google Inc. and contributors
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -36,25 +36,44 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
package com.google.refine.freebase.util; package com.google.refine.freebase.util;
import java.io.DataOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Serializable; import java.io.Serializable;
import java.io.StringWriter; import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer; import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.URL; import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import javax.mail.MessagingException;
import javax.mail.internet.MimeBodyPart;
import javax.mail.internet.MimeMultipart;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.message.BasicNameValuePair;
import org.json.JSONArray; import org.json.JSONArray;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
import org.json.JSONWriter; import org.json.JSONWriter;
import com.google.api.client.googleapis.batch.json.JsonBatchCallback;
import com.google.api.client.googleapis.json.GoogleJsonError;
import com.google.api.client.http.HttpHeaders;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.jackson.JacksonFactory;
import com.google.api.services.freebase.Freebase;
import com.google.api.services.freebase.FreebaseRequestInitializer;
import com.google.refine.freebase.FreebaseType; import com.google.refine.freebase.FreebaseType;
import com.google.refine.model.ReconCandidate; import com.google.refine.model.ReconCandidate;
import com.google.refine.util.JSONUtilities; import com.google.refine.util.JSONUtilities;
@ -99,32 +118,26 @@ public class FreebaseDataExtensionJob {
formulateQuery(ids, extension, writer); formulateQuery(ids, extension, writer);
String query = writer.toString(); String query = writer.toString();
InputStream is = doMqlRead(query); JSONObject o = doMqlRead(query);
try { Map<String, FreebaseDataExtensionJob.DataExtension> map = new HashMap<String, FreebaseDataExtensionJob.DataExtension>();
String s = ParsingUtilities.inputStreamToString(is); if (o.has("result")) {
JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); JSONArray a = o.getJSONArray("result");
int l = a.length();
Map<String, FreebaseDataExtensionJob.DataExtension> map = new HashMap<String, FreebaseDataExtensionJob.DataExtension>();
if (o.has("result")) { for (int i = 0; i < l; i++) {
JSONArray a = o.getJSONArray("result"); JSONObject o2 = a.getJSONObject(i);
int l = a.length(); String id = o2.getString("id");
FreebaseDataExtensionJob.DataExtension ext = collectResult(o2, reconCandidateMap);
for (int i = 0; i < l; i++) {
JSONObject o2 = a.getJSONObject(i); if (ext != null) {
String id = o2.getString("id"); map.put(id, ext);
FreebaseDataExtensionJob.DataExtension ext = collectResult(o2, reconCandidateMap);
if (ext != null) {
map.put(id, ext);
}
} }
} }
return map;
} finally {
is.close();
} }
}
return map;
}
protected FreebaseDataExtensionJob.DataExtension collectResult( protected FreebaseDataExtensionJob.DataExtension collectResult(
JSONObject obj, JSONObject obj,
@ -312,34 +325,167 @@ public class FreebaseDataExtensionJob {
} }
static protected InputStream doMqlRead(String query) throws IOException { /**
URL url = new URL("http://api.freebase.com/api/service/mqlread"); * This RPC call works for the Reconcile API, but MQLread is not supported by JSONRPC
*/
static private JSONObject rpcCall(String query) throws JSONException, UnsupportedEncodingException, IOException {
URL url = new URL("https://www.googleapis.com/rpc");
URLConnection connection = url.openConnection(); JSONObject params = new JSONObject();
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); params.put("query",query);
params.put("key", FreebaseUtils.API_KEY);
JSONObject req1 = new JSONObject();
req1.put("jsonrpc","2.0");
req1.put("id","q0");
req1.put("method","freebase.mqlread");
req1.put("apiVersion", "v1");
req1.put("params",params);
JSONArray body = new JSONArray();
body.put(req1);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestProperty("Content-Type", "application/json"); //
connection.setConnectTimeout(5000); connection.setConnectTimeout(5000);
connection.setDoOutput(true); connection.setDoOutput(true);
DataOutputStream dos = new DataOutputStream(connection.getOutputStream()); OutputStreamWriter writer = new OutputStreamWriter(connection.getOutputStream(),"utf-8");
try { try {
String body = "extended=1&query=" + ParsingUtilities.encode(query); writer.write(body.toString());
dos.writeBytes(body);
} finally { } finally {
dos.flush(); writer.flush();
dos.close(); writer.close();
} }
connection.connect(); connection.connect();
JSONArray result = null;
if (connection.getResponseCode() >= 400) {
String responseMessage = connection.getResponseMessage();
String errorStream = ParsingUtilities.inputStreamToString(connection.getErrorStream());
} else {
InputStream is = connection.getInputStream();
try {
String s = ParsingUtilities.inputStreamToString(is);
result = ParsingUtilities.evaluateJsonStringToArray(s);
} finally {
is.close();
}
}
return result.getJSONObject(0);
}
private static MimeBodyPart queryToMimeBodyPart(String query_name,
String query, String service_url, String api_key)
throws MessagingException, IOException {
MimeBodyPart mbp = new MimeBodyPart();
mbp.setHeader("Content-Transfer-Encoding", "binary");
mbp.setHeader("Content-ID", "<" + query_name + ">");
mbp.setHeader("Content-type", "application/http");
return connection.getInputStream(); List<NameValuePair> params = new ArrayList<NameValuePair>();
params.add(new BasicNameValuePair("query",query));
params.add(new BasicNameValuePair("key", api_key));
UrlEncodedFormEntity param_string = new UrlEncodedFormEntity(params, "UTF-8");
String body = "GET " + service_url + "?" + ParsingUtilities.inputStreamToString(param_string.getContent()) + "\n";
mbp.setText(body, "UTF-8");
mbp.getLineCount();
mbp.getAllHeaderLines();
return mbp;
}
/**
* The beginnings of a homebrew attempt to generate multi-part MIME messages
* so that we can parse response bodies ourselves and avoid the limitations
* of the Google Client library.
*/
static private JSONObject batchCallOld(String query) throws JSONException, MessagingException, IOException {
URL url = new URL("https://www.googleapis.com/batch");
String service_url = "https://www.googleapis.com/freebase/v1/mqlread";
MimeMultipart mp = new MimeMultipart("mixed");
MimeBodyPart mbp = queryToMimeBodyPart("q0", query, service_url, FreebaseUtils.API_KEY);
mp.addBodyPart(mbp);
mp.getPreamble();
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestProperty("Content-Type", mp.getContentType());
connection.setConnectTimeout(5000);
connection.setDoOutput(true);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
mp.writeTo(baos);
String foo = baos.toString("UTF-8");
mp.writeTo(connection.getOutputStream());
connection.connect();
JSONArray result = null;
if (connection.getResponseCode() >= 400) {
String responseMessage = connection.getResponseMessage();
String errorStream = ParsingUtilities.inputStreamToString(connection.getErrorStream());
} else {
InputStream is = connection.getInputStream();
try {
String s = ParsingUtilities.inputStreamToString(is);
result = ParsingUtilities.evaluateJsonStringToArray(s);
} finally {
is.close();
}
}
return result.getJSONObject(0);
}
private static final HttpTransport HTTP_TRANSPORT = new NetHttpTransport();
/** Global instance of the JSON factory. */
private static final JsonFactory JSON_FACTORY = new JacksonFactory();
private static final FreebaseRequestInitializer REQUEST_INITIALIZER =
new FreebaseRequestInitializer(FreebaseUtils.API_KEY);
static private JSONObject batchCall1(String query) throws IOException, JSONException {
JsonBatchCallback<Freebase> callback = new JsonBatchCallback<Freebase>() {
public void onSuccess(Freebase res, HttpHeaders responseHeaders) {
System.out.println(res);
}
public void onFailure(GoogleJsonError e, HttpHeaders responseHeaders) {
System.out.println("Error Message: " + e.getMessage());
}
};
Freebase client = new Freebase.Builder(HTTP_TRANSPORT, JSON_FACTORY, null)
.setApplicationName("OpenRefine")
.setFreebaseRequestInitializer(REQUEST_INITIALIZER)
.build();
// TODO: Batch doesn't work with MqlRead since it extends FreebaseRequest<Void>
// BatchRequest batch = client.batch();
// client.mqlread(query).queue(batch, callback);
// batch.execute();
String s = ParsingUtilities.inputStreamToString(client.mqlread(query).executeAsInputStream());
JSONObject response = ParsingUtilities.evaluateJsonStringToObject(s);
return response;
}
static protected JSONObject doMqlRead(String query)
throws IOException, JSONException, MessagingException {
// JSONObject result = rpcCall(query);
// JSONObject resutl = batchCallOld(query);
JSONObject result = batchCall1(query);
return result;
} }
static protected void formulateQuery(Set<String> ids, JSONObject node, Writer writer) throws JSONException { static protected void formulateQuery(Set<String> ids, JSONObject node, Writer writer) throws JSONException {
JSONWriter jsonWriter = new JSONWriter(writer); JSONWriter jsonWriter = new JSONWriter(writer);
jsonWriter.object();
jsonWriter.key("query");
jsonWriter.array(); jsonWriter.array();
jsonWriter.object(); jsonWriter.object();
@ -357,7 +503,6 @@ public class FreebaseDataExtensionJob {
jsonWriter.endObject(); jsonWriter.endObject();
jsonWriter.endArray(); jsonWriter.endArray();
jsonWriter.endObject();
} }
static protected void formulateQueryNode(JSONObject node, JSONWriter writer) throws JSONException { static protected void formulateQueryNode(JSONObject node, JSONWriter writer) throws JSONException {