Refactor GData support to separate FusionTables

This commit is contained in:
Tom Morris 2013-02-09 13:41:02 -05:00
parent c43e106abb
commit 0c6e065ac6
8 changed files with 667 additions and 474 deletions

View File

@ -43,6 +43,7 @@ function init() {
var RS = Packages.com.google.refine.RefineServlet; var RS = Packages.com.google.refine.RefineServlet;
RS.registerCommand(module, "deauthorize", Packages.com.google.refine.extension.gdata.DeAuthorizeCommand()); RS.registerCommand(module, "deauthorize", Packages.com.google.refine.extension.gdata.DeAuthorizeCommand());
RS.registerCommand(module, "upload", Packages.com.google.refine.extension.gdata.UploadCommand()); RS.registerCommand(module, "upload", Packages.com.google.refine.extension.gdata.UploadCommand());
// TODO: Need a new OAUTH2 authorize command for FusionTables
// Register importer and exporter // Register importer and exporter
var IM = Packages.com.google.refine.importing.ImportingManager; var IM = Packages.com.google.refine.importing.ImportingManager;

View File

@ -0,0 +1,209 @@
/*
* Copyright (c) 2013, Thomas F. Morris and other contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* - Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of Google nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.extension.gdata;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import javax.servlet.http.HttpServletRequest;
import com.google.gdata.client.GoogleService;
import com.google.gdata.client.Service.GDataRequest;
import com.google.gdata.client.Service.GDataRequest.RequestType;
import com.google.gdata.client.http.AuthSubUtil;
import com.google.gdata.util.ContentType;
import com.google.gdata.util.ServiceException;
import com.google.refine.util.ParsingUtilities;
import edu.mit.simile.butterfly.ButterflyModule;
/**
* @author Tom Morris <tfmorris@gmail.com>
* @copyright 2010,2013 Thomas F. Morris
*/
public class FusionTableHandler {
final static private String FUSION_TABLES_SERVICE_URL =
"https://www.google.com/fusiontables/api/query";
final static private Pattern CSV_VALUE_PATTERN =
Pattern.compile("([^,\\r\\n\"]*|\"(([^\"]*\"\")*[^\"]*)\")(,|\\r?\\n)");
static public String getAuthorizationUrl(ButterflyModule module, HttpServletRequest request)
throws MalformedURLException {
char[] mountPointChars = module.getMountPoint().getMountPoint().toCharArray();
StringBuffer sb = new StringBuffer();
sb.append(mountPointChars, 0, mountPointChars.length);
sb.append("authorized?winname=");
sb.append(ParsingUtilities.encode(request.getParameter("winname")));
sb.append("&callback=");
sb.append(ParsingUtilities.encode(request.getParameter("callback")));
URL thisUrl = new URL(request.getRequestURL().toString());
URL authorizedUrl = new URL(thisUrl, sb.toString());
return AuthSubUtil.getRequestUrl(
authorizedUrl.toExternalForm(), // execution continues at authorized on redirect
"https://www.google.com/fusiontables/api/query",
false,
true);
}
static public GDataRequest createFusionTablesPostRequest(
GoogleService service, RequestType requestType, String query)
throws IOException, ServiceException {
URL url = new URL(FUSION_TABLES_SERVICE_URL);
GDataRequest request = service.getRequestFactory().getRequest(
requestType, url, new ContentType("application/x-www-form-urlencoded"));
OutputStreamWriter writer =
new OutputStreamWriter(request.getRequestStream());
writer.append("sql=" + URLEncoder.encode(query, "UTF-8"));
writer.flush();
writer.close();
return request;
}
static public GDataRequest createFusionTablesRequest(
GoogleService service, RequestType requestType, String query)
throws IOException, ServiceException {
URL url = new URL(FUSION_TABLES_SERVICE_URL + "?sql=" +
URLEncoder.encode(query, "UTF-8"));
return service.getRequestFactory().getRequest(
requestType, url, ContentType.TEXT_PLAIN);
}
static String getFusionTableKey(URL url) {
String tableId = getParamValue(url,"dsrcid");
// TODO: Any special id format considerations to worry about?
// if (tableId.startsWith("p") || !tableId.contains(".")) {
// return tableId;
// }
return tableId;
}
static public GoogleService getFusionTablesGoogleService(String token) {
GoogleService service = new GoogleService("fusiontables", GDataExtension.SERVICE_APP_NAME);
if (token != null) {
service.setAuthSubToken(token);
}
return service;
}
static boolean isFusionTableURL(URL url) {
// http://www.google.com/fusiontables/DataSource?dsrcid=1219
String query = url.getQuery();
if (query == null) {
query = "";
}
return url.getHost().endsWith(".google.com")
&& url.getPath().startsWith("/fusiontables/DataSource")
&& query.contains("dsrcid=");
}
static public List<List<String>> parseFusionTablesResults(GDataRequest request) throws IOException {
List<List<String>> rows = new ArrayList<List<String>>();
List<String> row = null;
Scanner scanner = new Scanner(request.getResponseStream(), "UTF-8");
while (scanner.hasNextLine()) {
scanner.findWithinHorizon(CSV_VALUE_PATTERN, 0);
MatchResult match = scanner.match();
String quotedString = match.group(2);
String decoded = quotedString == null ? match.group(1) : quotedString.replaceAll("\"\"", "\"");
if (row == null) {
row = new ArrayList<String>();
}
row.add(decoded);
if (!match.group(4).equals(",")) {
if (row != null) {
rows.add(row);
row = null;
}
}
}
scanner.close();
if (row != null) {
rows.add(row);
}
return rows;
}
static public List<List<String>> listTables(GoogleService service) throws IOException, ServiceException {
List<List<String>> rows = runFusionTablesSelect(service, "SHOW TABLES");
// Format is id, name to which we append a link URL based on ID
if (rows.size() > 1) { // excluding headers
for (int i = 1; i < rows.size(); i++) {
List<String> row = rows.get(i);
if (row.size() >= 2) {
String id = row.get(0);
row.add("https://www.google.com/fusiontables/DataSource?dsrcid=" + id);
}
}
}
return rows;
}
static public List<List<String>> runFusionTablesSelect(GoogleService service, String selectQuery)
throws IOException, ServiceException {
GDataRequest request = createFusionTablesRequest(service, RequestType.QUERY, selectQuery);
request.execute();
return parseFusionTablesResults(request);
}
static private String getParamValue(URL url, String key) {
String query = url.getQuery();
if (query != null) {
String[] parts = query.split("&");
for (String part : parts) {
if (part.startsWith(key+"=")) {
int offset = key.length()+1;
String tableId = part.substring(offset);
return tableId;
}
}
}
return null;
}
}

View File

@ -0,0 +1,298 @@
/*
* Copyright (c) 2010, Thomas F. Morris
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* - Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of Google nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.extension.gdata;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.json.JSONObject;
import com.google.gdata.client.GoogleService;
import com.google.gdata.util.ServiceException;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TabularImportingParserBase;
import com.google.refine.importers.TabularImportingParserBase.TableDataReader;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
/**
* OpenRefine parser for Google Spreadsheets.
*
* @author Tom Morris <tfmorris@gmail.com>
* @copyright 2010 Thomas F. Morris
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
*/
public class FusionTableImporter {
static public void parse(
String token,
Project project,
ProjectMetadata metadata,
final ImportingJob job,
int limit,
JSONObject options,
List<Exception> exceptions) {
GoogleService service = FusionTableHandler.getFusionTablesGoogleService(token);
parse(
service,
project,
metadata,
job,
limit,
options,
exceptions
);
}
static void setProgress(ImportingJob job, String fileSource, int percent) {
JSONObject progress = JSONUtilities.getObject(job.config, "progress");
if (progress == null) {
progress = new JSONObject();
JSONUtilities.safePut(job.config, "progress", progress);
}
JSONUtilities.safePut(progress, "message", "Reading " + fileSource);
JSONUtilities.safePut(progress, "percent", percent);
}
static private class FusionTableBatchRowReader implements TableDataReader {
final ImportingJob job;
final String fileSource;
final GoogleService service;
final List<FTColumnData> columns;
final int batchSize;
final String baseQuery;
int nextRow = 0; // 0-based
int batchRowStart = 0; // 0-based
boolean end = false;
List<List<Object>> rowsOfCells = null;
boolean usedHeaders = false;
public FusionTableBatchRowReader(ImportingJob job, String fileSource,
GoogleService service, String tableId, List<FTColumnData> columns,
int batchSize) {
this.job = job;
this.fileSource = fileSource;
this.service = service;
this.columns = columns;
this.batchSize = batchSize;
StringBuffer sb = new StringBuffer();
sb.append("SELECT ");
boolean first = true;
for (FTColumnData cd : columns) {
if (first) {
first = false;
} else {
sb.append(",");
}
sb.append("'");
sb.append(cd.name);
sb.append("'");
}
sb.append(" FROM ");
sb.append(tableId);
baseQuery = sb.toString();
}
@Override
public List<Object> getNextRowOfCells() throws IOException {
if (!usedHeaders) {
List<Object> row = new ArrayList<Object>(columns.size());
for (FTColumnData cd : columns) {
row.add(cd.name);
}
usedHeaders = true;
return row;
}
if (rowsOfCells == null || (nextRow >= batchRowStart + rowsOfCells.size() && !end)) {
int newBatchRowStart = batchRowStart + (rowsOfCells == null ? 0 : rowsOfCells.size());
try {
rowsOfCells = getRowsOfCells(newBatchRowStart);
batchRowStart = newBatchRowStart;
GDataImporter.setProgress(job, fileSource, -1 /* batchRowStart * 100 / totalRows */);
} catch (ServiceException e) {
throw new IOException(e);
}
}
if (rowsOfCells != null && nextRow - batchRowStart < rowsOfCells.size()) {
return rowsOfCells.get(nextRow++ - batchRowStart);
} else {
return null;
}
}
private List<List<Object>> getRowsOfCells(int startRow) throws IOException, ServiceException {
List<List<Object>> rowsOfCells = new ArrayList<List<Object>>(batchSize);
String query = baseQuery + " OFFSET " + startRow + " LIMIT " + batchSize;
List<List<String>> rows = FusionTableHandler.runFusionTablesSelect(service, query);
if (rows.size() > 1) {
for (int i = 1; i < rows.size(); i++) {
List<String> row = rows.get(i);
List<Object> rowOfCells = new ArrayList<Object>(row.size());
for (int j = 0; j < row.size() && j < columns.size(); j++) {
String text = row.get(j);
if (text.isEmpty()) {
rowOfCells.add(null);
} else {
FTColumnData cd = columns.get(j);
if (cd.type == FTColumnType.NUMBER) {
try {
rowOfCells.add(Long.parseLong(text));
continue;
} catch (NumberFormatException e) {
// ignore
}
try {
double d = Double.parseDouble(text);
if (!Double.isInfinite(d) && !Double.isNaN(d)) {
rowOfCells.add(d);
continue;
}
} catch (NumberFormatException e) {
// ignore
}
}
rowOfCells.add(text);
}
}
rowsOfCells.add(rowOfCells);
}
}
end = rows.size() < batchSize + 1;
return rowsOfCells;
}
}
static public void parse(
GoogleService service,
Project project,
ProjectMetadata metadata,
final ImportingJob job,
int limit,
JSONObject options,
List<Exception> exceptions) {
String docUrlString = JSONUtilities.getString(options, "docUrl", null);
String id = getFTid(docUrlString); // Use GDataExtension.getFusionTableKey(url) ?
// TODO: Allow arbitrary Fusion Tables URL instead of (in addition to?) constructing our own?
try {
List<FTColumnData> columns = new ArrayList<FusionTableImporter.FTColumnData>();
List<List<String>> rows = FusionTableHandler.runFusionTablesSelect(service, "DESCRIBE " + id);
if (rows.size() > 1) {
for (int i = 1; i < rows.size(); i++) {
List<String> row = rows.get(i);
if (row.size() >= 2) {
FTColumnData cd = new FTColumnData();
cd.name = row.get(1);
cd.type = FTColumnType.STRING;
if (row.size() > 2) {
String type = row.get(2).toLowerCase();
if (type.equals("number")) {
cd.type = FTColumnType.NUMBER;
} else if (type.equals("datetime")) {
cd.type = FTColumnType.DATETIME;
} else if (type.equals("location")) {
cd.type = FTColumnType.LOCATION;
}
}
columns.add(cd);
}
}
setProgress(job, docUrlString, -1);
// Force these options for the next call because each fusion table
// is strictly structured with a single line of headers.
JSONUtilities.safePut(options, "ignoreLines", 0); // number of blank lines at the beginning to ignore
JSONUtilities.safePut(options, "headerLines", 1); // number of header lines
TabularImportingParserBase.readTable(
project,
metadata,
job,
new FusionTableBatchRowReader(job, docUrlString, service, id, columns, 100),
docUrlString,
limit,
options,
exceptions
);
setProgress(job, docUrlString, 100);
}
} catch (IOException e) {
e.printStackTrace();
exceptions.add(e);
} catch (ServiceException e) {
e.printStackTrace();
exceptions.add(e);
}
}
static private String getFTid(String url) {
if (url == null) {
return null;
}
int equal = url.lastIndexOf('=');
if (equal < 0) {
return null;
}
return url.substring(equal + 1);
}
static enum FTColumnType {
STRING,
NUMBER,
DATETIME,
LOCATION
}
final static class FTColumnData {
String name;
FTColumnType type;
}
}

View File

@ -0,0 +1,149 @@
package com.google.refine.extension.gdata;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.json.JSONObject;
import com.google.gdata.client.GoogleService;
import com.google.gdata.client.Service.GDataRequest;
import com.google.gdata.client.Service.GDataRequest.RequestType;
import com.google.gdata.util.ServiceException;
import com.google.refine.exporters.TabularSerializer;
final class FusionTableSerializer implements TabularSerializer {
GoogleService service;
String tableName;
List<Exception> exceptions;
String tableId;
List<String> columnNames;
StringBuffer sbBatch;
int rows;
FusionTableSerializer(GoogleService service, String tableName, List<Exception> exceptions) {
this.service = service;
this.tableName = tableName;
this.exceptions = exceptions;
}
@Override
public void startFile(JSONObject options) {
}
@Override
public void endFile() {
if (sbBatch != null) {
sendBatch();
}
}
@Override
public void addRow(List<CellData> cells, boolean isHeader) {
if (isHeader) {
columnNames = new ArrayList<String>(cells.size());
StringBuffer sb = new StringBuffer();
sb.append("CREATE TABLE '");
sb.append(tableName);
sb.append("' (");
boolean first = true;
for (CellData cellData : cells) {
columnNames.add(cellData.text);
if (first) {
first = false;
} else {
sb.append(',');
}
sb.append("'");
sb.append(cellData.text);
sb.append("': STRING");
}
sb.append(")");
try {
String createQuery = sb.toString();
GDataRequest createTableRequest = FusionTableHandler.createFusionTablesPostRequest(
service, RequestType.INSERT, createQuery);
createTableRequest.execute();
List<List<String>> createTableResults =
FusionTableHandler.parseFusionTablesResults(createTableRequest);
if (createTableResults != null && createTableResults.size() == 2) {
tableId = createTableResults.get(1).get(0);
}
} catch (Exception e) {
exceptions.add(e);
}
} else if (tableId != null) {
if (sbBatch == null) {
sbBatch = new StringBuffer();
}
formulateInsert(cells, sbBatch);
rows++;
if (rows % 20 == 0) {
sendBatch();
}
}
}
private void sendBatch() {
try {
GDataRequest createTableRequest = FusionTableHandler.createFusionTablesPostRequest(
service, RequestType.INSERT, sbBatch.toString());
createTableRequest.execute();
} catch (IOException e) {
exceptions.add(e);
} catch (ServiceException e) {
exceptions.add(e);
} finally {
sbBatch = null;
}
}
private void formulateInsert(List<CellData> cells, StringBuffer sb) {
StringBuffer sbColumnNames = new StringBuffer();
StringBuffer sbValues = new StringBuffer();
boolean first = true;
for (int i = 0; i < cells.size() && i < columnNames.size(); i++) {
CellData cellData = cells.get(i);
if (first) {
first = false;
} else {
sbColumnNames.append(',');
sbValues.append(',');
}
sbColumnNames.append("'");
sbColumnNames.append(columnNames.get(i));
sbColumnNames.append("'");
sbValues.append("'");
if (cellData != null && cellData.text != null) {
sbValues.append(cellData.text.replaceAll("'", "\\\\'"));
}
sbValues.append("'");
}
if (sb.length() > 0) {
sb.append(';');
}
sb.append("INSERT INTO ");
sb.append(tableId);
sb.append("(");
sb.append(sbColumnNames.toString());
sb.append(") values (");
sb.append(sbValues.toString());
sb.append(")");
}
public String getUrl() {
// FIXME: This base URL is no longer correct
return tableId == null || exceptions.size() > 0 ? null :
"https://www.google.com/fusiontables/DataSource?dsrcid=" + tableId;
}
}

View File

@ -28,28 +28,14 @@
*/ */
package com.google.refine.extension.gdata; package com.google.refine.extension.gdata;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import com.google.gdata.client.GoogleService;
import com.google.gdata.client.Service.GDataRequest;
import com.google.gdata.client.Service.GDataRequest.RequestType;
import com.google.gdata.client.docs.DocsService; import com.google.gdata.client.docs.DocsService;
import com.google.gdata.client.http.AuthSubUtil; import com.google.gdata.client.http.AuthSubUtil;
import com.google.gdata.client.spreadsheet.FeedURLFactory;
import com.google.gdata.client.spreadsheet.SpreadsheetService; import com.google.gdata.client.spreadsheet.SpreadsheetService;
import com.google.gdata.util.ContentType;
import com.google.gdata.util.ServiceException;
import com.google.refine.util.ParsingUtilities; import com.google.refine.util.ParsingUtilities;
@ -79,20 +65,12 @@ abstract public class GDataExtension {
return AuthSubUtil.getRequestUrl( return AuthSubUtil.getRequestUrl(
authorizedUrl.toExternalForm(), // execution continues at authorized on redirect authorizedUrl.toExternalForm(), // execution continues at authorized on redirect
"https://docs.google.com/feeds https://spreadsheets.google.com/feeds https://www.google.com/fusiontables/api/query", "https://docs.google.com/feeds https://spreadsheets.google.com/feeds",
false, false,
true); true);
} }
static private FeedURLFactory factory;
static public FeedURLFactory getFeedUrlFactory() {
if (factory == null) {
// Careful - this is shared by everyone.
factory = FeedURLFactory.getDefault();
}
return factory;
}
static public DocsService getDocsService(String token) { static public DocsService getDocsService(String token) {
DocsService service = new DocsService(SERVICE_APP_NAME); DocsService service = new DocsService(SERVICE_APP_NAME);
if (token != null) { if (token != null) {
@ -109,83 +87,6 @@ abstract public class GDataExtension {
return service; return service;
} }
static public GoogleService getFusionTablesGoogleService(String token) {
GoogleService service = new GoogleService("fusiontables", SERVICE_APP_NAME);
if (token != null) {
service.setAuthSubToken(token);
}
return service;
}
final static private String FUSION_TABLES_SERVICE_URL =
"https://www.google.com/fusiontables/api/query";
final static private Pattern CSV_VALUE_PATTERN =
Pattern.compile("([^,\\r\\n\"]*|\"(([^\"]*\"\")*[^\"]*)\")(,|\\r?\\n)");
static public List<List<String>> runFusionTablesSelect(GoogleService service, String selectQuery)
throws IOException, ServiceException {
GDataRequest request = createFusionTablesRequest(service, RequestType.QUERY, selectQuery);
request.execute();
return parseFusionTablesResults(request);
}
static public GDataRequest createFusionTablesRequest(
GoogleService service, RequestType requestType, String query)
throws IOException, ServiceException {
URL url = new URL(FUSION_TABLES_SERVICE_URL + "?sql=" +
URLEncoder.encode(query, "UTF-8"));
return service.getRequestFactory().getRequest(
requestType, url, ContentType.TEXT_PLAIN);
}
static public GDataRequest createFusionTablesPostRequest(
GoogleService service, RequestType requestType, String query)
throws IOException, ServiceException {
URL url = new URL(FUSION_TABLES_SERVICE_URL);
GDataRequest request = service.getRequestFactory().getRequest(
requestType, url, new ContentType("application/x-www-form-urlencoded"));
OutputStreamWriter writer =
new OutputStreamWriter(request.getRequestStream());
writer.append("sql=" + URLEncoder.encode(query, "UTF-8"));
writer.flush();
writer.close();
return request;
}
static public List<List<String>> parseFusionTablesResults(GDataRequest request) throws IOException {
List<List<String>> rows = new ArrayList<List<String>>();
List<String> row = null;
Scanner scanner = new Scanner(request.getResponseStream(), "UTF-8");
while (scanner.hasNextLine()) {
scanner.findWithinHorizon(CSV_VALUE_PATTERN, 0);
MatchResult match = scanner.match();
String quotedString = match.group(2);
String decoded = quotedString == null ? match.group(1) : quotedString.replaceAll("\"\"", "\"");
if (row == null) {
row = new ArrayList<String>();
}
row.add(decoded);
if (!match.group(4).equals(",")) {
if (row != null) {
rows.add(row);
row = null;
}
}
}
scanner.close();
if (row != null) {
rows.add(row);
}
return rows;
}
static boolean isSpreadsheetURL(String url) { static boolean isSpreadsheetURL(String url) {
// e.g. http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en // e.g. http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
// TODO: The following should work, but the GData implementation is too limited // TODO: The following should work, but the GData implementation is too limited
@ -220,25 +121,5 @@ abstract public class GDataExtension {
} }
return null; return null;
} }
static boolean isFusionTableURL(URL url) {
// http://www.google.com/fusiontables/DataSource?dsrcid=1219
String query = url.getQuery();
if (query == null) {
query = "";
}
return url.getHost().endsWith(".google.com")
&& url.getPath().startsWith("/fusiontables/DataSource")
&& query.contains("dsrcid=");
}
static String getFusionTableKey(URL url) {
String tableId = getParamValue(url,"dsrcid");
// TODO: Any special id format considerations to worry about?
// if (tableId.startsWith("p") || !tableId.contains(".")) {
// return tableId;
// }
return tableId;
}
} }

View File

@ -36,7 +36,6 @@ import java.util.List;
import org.json.JSONObject; import org.json.JSONObject;
import com.google.gdata.client.GoogleService;
import com.google.gdata.client.spreadsheet.CellQuery; import com.google.gdata.client.spreadsheet.CellQuery;
import com.google.gdata.client.spreadsheet.SpreadsheetService; import com.google.gdata.client.spreadsheet.SpreadsheetService;
import com.google.gdata.data.spreadsheet.Cell; import com.google.gdata.data.spreadsheet.Cell;
@ -83,9 +82,7 @@ public class GDataImporter {
exceptions exceptions
); );
} else if ("table".equals(docType)) { } else if ("table".equals(docType)) {
GoogleService service = GDataExtension.getFusionTablesGoogleService(token); FusionTableImporter.parse(token,
parse(
service,
project, project,
metadata, metadata,
job, job,
@ -171,7 +168,7 @@ public class GDataImporter {
} }
} }
static private void setProgress(ImportingJob job, String fileSource, int percent) { static void setProgress(ImportingJob job, String fileSource, int percent) {
JSONObject progress = JSONUtilities.getObject(job.config, "progress"); JSONObject progress = JSONUtilities.getObject(job.config, "progress");
if (progress == null) { if (progress == null) {
progress = new JSONObject(); progress = new JSONObject();
@ -278,214 +275,5 @@ public class GDataImporter {
return rowsOfCells; return rowsOfCells;
} }
} }
}
static public void parse(
GoogleService service,
Project project,
ProjectMetadata metadata,
final ImportingJob job,
int limit,
JSONObject options,
List<Exception> exceptions) {
String docUrlString = JSONUtilities.getString(options, "docUrl", null);
String id = getFTid(docUrlString); // Use GDataExtension.getFusionTableKey(url) ?
// TODO: Allow arbitrary Fusion Tables URL instead of (in addition to?) constructing our own?
try {
List<FTColumnData> columns = new ArrayList<GDataImporter.FTColumnData>();
List<List<String>> rows = GDataExtension.runFusionTablesSelect(service, "DESCRIBE " + id);
if (rows.size() > 1) {
for (int i = 1; i < rows.size(); i++) {
List<String> row = rows.get(i);
if (row.size() >= 2) {
FTColumnData cd = new FTColumnData();
cd.name = row.get(1);
cd.type = FTColumnType.STRING;
if (row.size() > 2) {
String type = row.get(2).toLowerCase();
if (type.equals("number")) {
cd.type = FTColumnType.NUMBER;
} else if (type.equals("datetime")) {
cd.type = FTColumnType.DATETIME;
} else if (type.equals("location")) {
cd.type = FTColumnType.LOCATION;
}
}
columns.add(cd);
}
}
setProgress(job, docUrlString, -1);
// Force these options for the next call because each fusion table
// is strictly structured with a single line of headers.
JSONUtilities.safePut(options, "ignoreLines", 0); // number of blank lines at the beginning to ignore
JSONUtilities.safePut(options, "headerLines", 1); // number of header lines
TabularImportingParserBase.readTable(
project,
metadata,
job,
new FusionTableBatchRowReader(job, docUrlString, service, id, columns, 100),
docUrlString,
limit,
options,
exceptions
);
setProgress(job, docUrlString, 100);
}
} catch (IOException e) {
e.printStackTrace();
exceptions.add(e);
} catch (ServiceException e) {
e.printStackTrace();
exceptions.add(e);
}
}
static private String getFTid(String url) {
if (url == null) {
return null;
}
int equal = url.lastIndexOf('=');
if (equal < 0) {
return null;
}
return url.substring(equal + 1);
}
static private enum FTColumnType {
STRING,
NUMBER,
DATETIME,
LOCATION
}
final static private class FTColumnData {
String name;
FTColumnType type;
}
static private class FusionTableBatchRowReader implements TableDataReader {
final ImportingJob job;
final String fileSource;
final GoogleService service;
final List<FTColumnData> columns;
final int batchSize;
final String baseQuery;
int nextRow = 0; // 0-based
int batchRowStart = 0; // 0-based
boolean end = false;
List<List<Object>> rowsOfCells = null;
boolean usedHeaders = false;
public FusionTableBatchRowReader(ImportingJob job, String fileSource,
GoogleService service, String tableId, List<FTColumnData> columns,
int batchSize) {
this.job = job;
this.fileSource = fileSource;
this.service = service;
this.columns = columns;
this.batchSize = batchSize;
StringBuffer sb = new StringBuffer();
sb.append("SELECT ");
boolean first = true;
for (FTColumnData cd : columns) {
if (first) {
first = false;
} else {
sb.append(",");
}
sb.append("'");
sb.append(cd.name);
sb.append("'");
}
sb.append(" FROM ");
sb.append(tableId);
baseQuery = sb.toString();
}
@Override
public List<Object> getNextRowOfCells() throws IOException {
if (!usedHeaders) {
List<Object> row = new ArrayList<Object>(columns.size());
for (FTColumnData cd : columns) {
row.add(cd.name);
}
usedHeaders = true;
return row;
}
if (rowsOfCells == null || (nextRow >= batchRowStart + rowsOfCells.size() && !end)) {
int newBatchRowStart = batchRowStart + (rowsOfCells == null ? 0 : rowsOfCells.size());
try {
rowsOfCells = getRowsOfCells(newBatchRowStart);
batchRowStart = newBatchRowStart;
setProgress(job, fileSource, -1 /* batchRowStart * 100 / totalRows */);
} catch (ServiceException e) {
throw new IOException(e);
}
}
if (rowsOfCells != null && nextRow - batchRowStart < rowsOfCells.size()) {
return rowsOfCells.get(nextRow++ - batchRowStart);
} else {
return null;
}
}
private List<List<Object>> getRowsOfCells(int startRow) throws IOException, ServiceException {
List<List<Object>> rowsOfCells = new ArrayList<List<Object>>(batchSize);
String query = baseQuery + " OFFSET " + startRow + " LIMIT " + batchSize;
List<List<String>> rows = GDataExtension.runFusionTablesSelect(service, query);
if (rows.size() > 1) {
for (int i = 1; i < rows.size(); i++) {
List<String> row = rows.get(i);
List<Object> rowOfCells = new ArrayList<Object>(row.size());
for (int j = 0; j < row.size() && j < columns.size(); j++) {
String text = row.get(j);
if (text.isEmpty()) {
rowOfCells.add(null);
} else {
FTColumnData cd = columns.get(j);
if (cd.type == FTColumnType.NUMBER) {
try {
rowOfCells.add(Long.parseLong(text));
continue;
} catch (NumberFormatException e) {
// ignore
}
try {
double d = Double.parseDouble(text);
if (!Double.isInfinite(d) && !Double.isNaN(d)) {
rowOfCells.add(d);
continue;
}
} catch (NumberFormatException e) {
// ignore
}
}
rowOfCells.add(text);
}
}
rowsOfCells.add(rowOfCells);
}
}
end = rows.size() < batchSize + 1;
return rowsOfCells;
}
}
}

View File

@ -125,7 +125,7 @@ public class GDataImportingController implements ImportingController {
try { try {
listSpreadsheets(GDataExtension.getDocsService(token), writer); listSpreadsheets(GDataExtension.getDocsService(token), writer);
listFusionTables(GDataExtension.getFusionTablesGoogleService(token), writer); listFusionTables(FusionTableHandler.getFusionTablesGoogleService(token), writer);
} catch (ServiceException e) { } catch (ServiceException e) {
e.printStackTrace(); e.printStackTrace();
} }
@ -170,14 +170,14 @@ public class GDataImportingController implements ImportingController {
private void listFusionTables(GoogleService service, JSONWriter writer) private void listFusionTables(GoogleService service, JSONWriter writer)
throws IOException, ServiceException, JSONException { throws IOException, ServiceException, JSONException {
List<List<String>> rows = GDataExtension.runFusionTablesSelect(service, "SHOW TABLES"); List<List<String>> rows = FusionTableHandler.listTables(service);
if (rows.size() > 1) { // excluding headers if (rows.size() > 1) { // excluding headers
for (int i = 1; i < rows.size(); i++) { for (int i = 1; i < rows.size(); i++) {
List<String> row = rows.get(i); List<String> row = rows.get(i);
if (row.size() >= 2) { if (row.size() >= 2) {
String id = row.get(0); String id = row.get(0);
String name = row.get(1); String name = row.get(1);
String link = "https://www.google.com/fusiontables/DataSource?dsrcid=" + id; String link = row.get(2);
writer.object(); writer.object();
writer.key("docId"); writer.value(id); writer.key("docId"); writer.value(id);

View File

@ -33,7 +33,6 @@ import java.io.IOException;
import java.io.Writer; import java.io.Writer;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Properties; import java.util.Properties;
@ -48,8 +47,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.gdata.client.GoogleService; import com.google.gdata.client.GoogleService;
import com.google.gdata.client.Service.GDataRequest;
import com.google.gdata.client.Service.GDataRequest.RequestType;
import com.google.gdata.client.docs.DocsService; import com.google.gdata.client.docs.DocsService;
import com.google.gdata.client.spreadsheet.CellQuery; import com.google.gdata.client.spreadsheet.CellQuery;
import com.google.gdata.client.spreadsheet.SpreadsheetService; import com.google.gdata.client.spreadsheet.SpreadsheetService;
@ -295,142 +292,12 @@ public class UploadCommand extends Command {
static private String uploadFusionTable( static private String uploadFusionTable(
Project project, final Engine engine, final Properties params, Project project, final Engine engine, final Properties params,
String token, String name, List<Exception> exceptions) { String token, String name, List<Exception> exceptions) {
GoogleService service = GDataExtension.getFusionTablesGoogleService(token); GoogleService service = FusionTableHandler.getFusionTablesGoogleService(token);
FusionTableSerializer serializer = new FusionTableSerializer(service, name, exceptions); FusionTableSerializer serializer = new FusionTableSerializer(service, name, exceptions);
CustomizableTabularExporterUtilities.exportRows( CustomizableTabularExporterUtilities.exportRows(
project, engine, params, serializer); project, engine, params, serializer);
return serializer.tableId == null || exceptions.size() > 0 ? null : return serializer.getUrl();
"https://www.google.com/fusiontables/DataSource?dsrcid=" + serializer.tableId;
}
final static private class FusionTableSerializer implements TabularSerializer {
GoogleService service;
String tableName;
List<Exception> exceptions;
String tableId;
List<String> columnNames;
StringBuffer sbBatch;
int rows;
FusionTableSerializer(GoogleService service, String tableName, List<Exception> exceptions) {
this.service = service;
this.tableName = tableName;
this.exceptions = exceptions;
}
@Override
public void startFile(JSONObject options) {
}
@Override
public void endFile() {
if (sbBatch != null) {
sendBatch();
}
}
@Override
public void addRow(List<CellData> cells, boolean isHeader) {
if (isHeader) {
columnNames = new ArrayList<String>(cells.size());
StringBuffer sb = new StringBuffer();
sb.append("CREATE TABLE '");
sb.append(tableName);
sb.append("' (");
boolean first = true;
for (CellData cellData : cells) {
columnNames.add(cellData.text);
if (first) {
first = false;
} else {
sb.append(',');
}
sb.append("'");
sb.append(cellData.text);
sb.append("': STRING");
}
sb.append(")");
try {
String createQuery = sb.toString();
GDataRequest createTableRequest = GDataExtension.createFusionTablesPostRequest(
service, RequestType.INSERT, createQuery);
createTableRequest.execute();
List<List<String>> createTableResults =
GDataExtension.parseFusionTablesResults(createTableRequest);
if (createTableResults != null && createTableResults.size() == 2) {
tableId = createTableResults.get(1).get(0);
}
} catch (Exception e) {
exceptions.add(e);
}
} else if (tableId != null) {
if (sbBatch == null) {
sbBatch = new StringBuffer();
}
formulateInsert(cells, sbBatch);
rows++;
if (rows % 20 == 0) {
sendBatch();
}
}
}
void sendBatch() {
try {
GDataRequest createTableRequest = GDataExtension.createFusionTablesPostRequest(
service, RequestType.INSERT, sbBatch.toString());
createTableRequest.execute();
} catch (IOException e) {
exceptions.add(e);
} catch (ServiceException e) {
exceptions.add(e);
} finally {
sbBatch = null;
}
}
void formulateInsert(List<CellData> cells, StringBuffer sb) {
StringBuffer sbColumnNames = new StringBuffer();
StringBuffer sbValues = new StringBuffer();
boolean first = true;
for (int i = 0; i < cells.size() && i < columnNames.size(); i++) {
CellData cellData = cells.get(i);
if (first) {
first = false;
} else {
sbColumnNames.append(',');
sbValues.append(',');
}
sbColumnNames.append("'");
sbColumnNames.append(columnNames.get(i));
sbColumnNames.append("'");
sbValues.append("'");
if (cellData != null && cellData.text != null) {
sbValues.append(cellData.text.replaceAll("'", "\\\\'"));
}
sbValues.append("'");
}
if (sb.length() > 0) {
sb.append(';');
}
sb.append("INSERT INTO ");
sb.append(tableId);
sb.append("(");
sb.append(sbColumnNames.toString());
sb.append(") values (");
sb.append(sbValues.toString());
sb.append(")");
}
} }
} }