Refactor GData support to separate FusionTables
This commit is contained in:
parent
c43e106abb
commit
0c6e065ac6
@ -43,6 +43,7 @@ function init() {
|
|||||||
var RS = Packages.com.google.refine.RefineServlet;
|
var RS = Packages.com.google.refine.RefineServlet;
|
||||||
RS.registerCommand(module, "deauthorize", Packages.com.google.refine.extension.gdata.DeAuthorizeCommand());
|
RS.registerCommand(module, "deauthorize", Packages.com.google.refine.extension.gdata.DeAuthorizeCommand());
|
||||||
RS.registerCommand(module, "upload", Packages.com.google.refine.extension.gdata.UploadCommand());
|
RS.registerCommand(module, "upload", Packages.com.google.refine.extension.gdata.UploadCommand());
|
||||||
|
// TODO: Need a new OAUTH2 authorize command for FusionTables
|
||||||
|
|
||||||
// Register importer and exporter
|
// Register importer and exporter
|
||||||
var IM = Packages.com.google.refine.importing.ImportingManager;
|
var IM = Packages.com.google.refine.importing.ImportingManager;
|
||||||
|
@ -0,0 +1,209 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2013, Thomas F. Morris and other contributors
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* - Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
* - Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* Neither the name of Google nor the names of its contributors may be used to
|
||||||
|
* endorse or promote products derived from this software without specific
|
||||||
|
* prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||||
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package com.google.refine.extension.gdata;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Scanner;
|
||||||
|
import java.util.regex.MatchResult;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
|
||||||
|
import com.google.gdata.client.GoogleService;
|
||||||
|
import com.google.gdata.client.Service.GDataRequest;
|
||||||
|
import com.google.gdata.client.Service.GDataRequest.RequestType;
|
||||||
|
import com.google.gdata.client.http.AuthSubUtil;
|
||||||
|
import com.google.gdata.util.ContentType;
|
||||||
|
import com.google.gdata.util.ServiceException;
|
||||||
|
|
||||||
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
|
import edu.mit.simile.butterfly.ButterflyModule;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Tom Morris <tfmorris@gmail.com>
|
||||||
|
* @copyright 2010,2013 Thomas F. Morris
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class FusionTableHandler {
|
||||||
|
|
||||||
|
final static private String FUSION_TABLES_SERVICE_URL =
|
||||||
|
"https://www.google.com/fusiontables/api/query";
|
||||||
|
|
||||||
|
final static private Pattern CSV_VALUE_PATTERN =
|
||||||
|
Pattern.compile("([^,\\r\\n\"]*|\"(([^\"]*\"\")*[^\"]*)\")(,|\\r?\\n)");
|
||||||
|
|
||||||
|
static public String getAuthorizationUrl(ButterflyModule module, HttpServletRequest request)
|
||||||
|
throws MalformedURLException {
|
||||||
|
char[] mountPointChars = module.getMountPoint().getMountPoint().toCharArray();
|
||||||
|
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
sb.append(mountPointChars, 0, mountPointChars.length);
|
||||||
|
sb.append("authorized?winname=");
|
||||||
|
sb.append(ParsingUtilities.encode(request.getParameter("winname")));
|
||||||
|
sb.append("&callback=");
|
||||||
|
sb.append(ParsingUtilities.encode(request.getParameter("callback")));
|
||||||
|
|
||||||
|
URL thisUrl = new URL(request.getRequestURL().toString());
|
||||||
|
URL authorizedUrl = new URL(thisUrl, sb.toString());
|
||||||
|
|
||||||
|
return AuthSubUtil.getRequestUrl(
|
||||||
|
authorizedUrl.toExternalForm(), // execution continues at authorized on redirect
|
||||||
|
"https://www.google.com/fusiontables/api/query",
|
||||||
|
false,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public GDataRequest createFusionTablesPostRequest(
|
||||||
|
GoogleService service, RequestType requestType, String query)
|
||||||
|
throws IOException, ServiceException {
|
||||||
|
URL url = new URL(FUSION_TABLES_SERVICE_URL);
|
||||||
|
GDataRequest request = service.getRequestFactory().getRequest(
|
||||||
|
requestType, url, new ContentType("application/x-www-form-urlencoded"));
|
||||||
|
|
||||||
|
OutputStreamWriter writer =
|
||||||
|
new OutputStreamWriter(request.getRequestStream());
|
||||||
|
writer.append("sql=" + URLEncoder.encode(query, "UTF-8"));
|
||||||
|
writer.flush();
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
return request;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public GDataRequest createFusionTablesRequest(
|
||||||
|
GoogleService service, RequestType requestType, String query)
|
||||||
|
throws IOException, ServiceException {
|
||||||
|
URL url = new URL(FUSION_TABLES_SERVICE_URL + "?sql=" +
|
||||||
|
URLEncoder.encode(query, "UTF-8"));
|
||||||
|
return service.getRequestFactory().getRequest(
|
||||||
|
requestType, url, ContentType.TEXT_PLAIN);
|
||||||
|
}
|
||||||
|
|
||||||
|
static String getFusionTableKey(URL url) {
|
||||||
|
String tableId = getParamValue(url,"dsrcid");
|
||||||
|
// TODO: Any special id format considerations to worry about?
|
||||||
|
// if (tableId.startsWith("p") || !tableId.contains(".")) {
|
||||||
|
// return tableId;
|
||||||
|
// }
|
||||||
|
return tableId;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public GoogleService getFusionTablesGoogleService(String token) {
|
||||||
|
GoogleService service = new GoogleService("fusiontables", GDataExtension.SERVICE_APP_NAME);
|
||||||
|
if (token != null) {
|
||||||
|
service.setAuthSubToken(token);
|
||||||
|
}
|
||||||
|
return service;
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean isFusionTableURL(URL url) {
|
||||||
|
// http://www.google.com/fusiontables/DataSource?dsrcid=1219
|
||||||
|
String query = url.getQuery();
|
||||||
|
if (query == null) {
|
||||||
|
query = "";
|
||||||
|
}
|
||||||
|
return url.getHost().endsWith(".google.com")
|
||||||
|
&& url.getPath().startsWith("/fusiontables/DataSource")
|
||||||
|
&& query.contains("dsrcid=");
|
||||||
|
}
|
||||||
|
|
||||||
|
static public List<List<String>> parseFusionTablesResults(GDataRequest request) throws IOException {
|
||||||
|
List<List<String>> rows = new ArrayList<List<String>>();
|
||||||
|
List<String> row = null;
|
||||||
|
|
||||||
|
Scanner scanner = new Scanner(request.getResponseStream(), "UTF-8");
|
||||||
|
while (scanner.hasNextLine()) {
|
||||||
|
scanner.findWithinHorizon(CSV_VALUE_PATTERN, 0);
|
||||||
|
MatchResult match = scanner.match();
|
||||||
|
String quotedString = match.group(2);
|
||||||
|
String decoded = quotedString == null ? match.group(1) : quotedString.replaceAll("\"\"", "\"");
|
||||||
|
|
||||||
|
if (row == null) {
|
||||||
|
row = new ArrayList<String>();
|
||||||
|
}
|
||||||
|
row.add(decoded);
|
||||||
|
|
||||||
|
if (!match.group(4).equals(",")) {
|
||||||
|
if (row != null) {
|
||||||
|
rows.add(row);
|
||||||
|
row = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scanner.close();
|
||||||
|
if (row != null) {
|
||||||
|
rows.add(row);
|
||||||
|
}
|
||||||
|
return rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public List<List<String>> listTables(GoogleService service) throws IOException, ServiceException {
|
||||||
|
List<List<String>> rows = runFusionTablesSelect(service, "SHOW TABLES");
|
||||||
|
// Format is id, name to which we append a link URL based on ID
|
||||||
|
if (rows.size() > 1) { // excluding headers
|
||||||
|
for (int i = 1; i < rows.size(); i++) {
|
||||||
|
List<String> row = rows.get(i);
|
||||||
|
if (row.size() >= 2) {
|
||||||
|
String id = row.get(0);
|
||||||
|
row.add("https://www.google.com/fusiontables/DataSource?dsrcid=" + id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public List<List<String>> runFusionTablesSelect(GoogleService service, String selectQuery)
|
||||||
|
throws IOException, ServiceException {
|
||||||
|
|
||||||
|
GDataRequest request = createFusionTablesRequest(service, RequestType.QUERY, selectQuery);
|
||||||
|
request.execute();
|
||||||
|
return parseFusionTablesResults(request);
|
||||||
|
}
|
||||||
|
|
||||||
|
static private String getParamValue(URL url, String key) {
|
||||||
|
String query = url.getQuery();
|
||||||
|
if (query != null) {
|
||||||
|
String[] parts = query.split("&");
|
||||||
|
for (String part : parts) {
|
||||||
|
if (part.startsWith(key+"=")) {
|
||||||
|
int offset = key.length()+1;
|
||||||
|
String tableId = part.substring(offset);
|
||||||
|
return tableId;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,298 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2010, Thomas F. Morris
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* - Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
* - Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* Neither the name of Google nor the names of its contributors may be used to
|
||||||
|
* endorse or promote products derived from this software without specific
|
||||||
|
* prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||||
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package com.google.refine.extension.gdata;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.google.gdata.client.GoogleService;
|
||||||
|
import com.google.gdata.util.ServiceException;
|
||||||
|
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.importers.TabularImportingParserBase;
|
||||||
|
import com.google.refine.importers.TabularImportingParserBase.TableDataReader;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OpenRefine parser for Google Spreadsheets.
|
||||||
|
*
|
||||||
|
* @author Tom Morris <tfmorris@gmail.com>
|
||||||
|
* @copyright 2010 Thomas F. Morris
|
||||||
|
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
|
||||||
|
*/
|
||||||
|
public class FusionTableImporter {
|
||||||
|
static public void parse(
|
||||||
|
String token,
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
final ImportingJob job,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions) {
|
||||||
|
|
||||||
|
GoogleService service = FusionTableHandler.getFusionTablesGoogleService(token);
|
||||||
|
parse(
|
||||||
|
service,
|
||||||
|
project,
|
||||||
|
metadata,
|
||||||
|
job,
|
||||||
|
limit,
|
||||||
|
options,
|
||||||
|
exceptions
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void setProgress(ImportingJob job, String fileSource, int percent) {
|
||||||
|
JSONObject progress = JSONUtilities.getObject(job.config, "progress");
|
||||||
|
if (progress == null) {
|
||||||
|
progress = new JSONObject();
|
||||||
|
JSONUtilities.safePut(job.config, "progress", progress);
|
||||||
|
}
|
||||||
|
JSONUtilities.safePut(progress, "message", "Reading " + fileSource);
|
||||||
|
JSONUtilities.safePut(progress, "percent", percent);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static private class FusionTableBatchRowReader implements TableDataReader {
|
||||||
|
final ImportingJob job;
|
||||||
|
final String fileSource;
|
||||||
|
|
||||||
|
final GoogleService service;
|
||||||
|
final List<FTColumnData> columns;
|
||||||
|
final int batchSize;
|
||||||
|
|
||||||
|
final String baseQuery;
|
||||||
|
|
||||||
|
int nextRow = 0; // 0-based
|
||||||
|
int batchRowStart = 0; // 0-based
|
||||||
|
boolean end = false;
|
||||||
|
List<List<Object>> rowsOfCells = null;
|
||||||
|
boolean usedHeaders = false;
|
||||||
|
|
||||||
|
public FusionTableBatchRowReader(ImportingJob job, String fileSource,
|
||||||
|
GoogleService service, String tableId, List<FTColumnData> columns,
|
||||||
|
int batchSize) {
|
||||||
|
this.job = job;
|
||||||
|
this.fileSource = fileSource;
|
||||||
|
this.service = service;
|
||||||
|
this.columns = columns;
|
||||||
|
this.batchSize = batchSize;
|
||||||
|
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
sb.append("SELECT ");
|
||||||
|
|
||||||
|
boolean first = true;
|
||||||
|
for (FTColumnData cd : columns) {
|
||||||
|
if (first) {
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
sb.append(",");
|
||||||
|
}
|
||||||
|
sb.append("'");
|
||||||
|
sb.append(cd.name);
|
||||||
|
sb.append("'");
|
||||||
|
}
|
||||||
|
sb.append(" FROM ");
|
||||||
|
sb.append(tableId);
|
||||||
|
|
||||||
|
baseQuery = sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Object> getNextRowOfCells() throws IOException {
|
||||||
|
if (!usedHeaders) {
|
||||||
|
List<Object> row = new ArrayList<Object>(columns.size());
|
||||||
|
for (FTColumnData cd : columns) {
|
||||||
|
row.add(cd.name);
|
||||||
|
}
|
||||||
|
usedHeaders = true;
|
||||||
|
return row;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rowsOfCells == null || (nextRow >= batchRowStart + rowsOfCells.size() && !end)) {
|
||||||
|
int newBatchRowStart = batchRowStart + (rowsOfCells == null ? 0 : rowsOfCells.size());
|
||||||
|
try {
|
||||||
|
rowsOfCells = getRowsOfCells(newBatchRowStart);
|
||||||
|
batchRowStart = newBatchRowStart;
|
||||||
|
|
||||||
|
GDataImporter.setProgress(job, fileSource, -1 /* batchRowStart * 100 / totalRows */);
|
||||||
|
} catch (ServiceException e) {
|
||||||
|
throw new IOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rowsOfCells != null && nextRow - batchRowStart < rowsOfCells.size()) {
|
||||||
|
return rowsOfCells.get(nextRow++ - batchRowStart);
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private List<List<Object>> getRowsOfCells(int startRow) throws IOException, ServiceException {
|
||||||
|
List<List<Object>> rowsOfCells = new ArrayList<List<Object>>(batchSize);
|
||||||
|
|
||||||
|
String query = baseQuery + " OFFSET " + startRow + " LIMIT " + batchSize;
|
||||||
|
|
||||||
|
List<List<String>> rows = FusionTableHandler.runFusionTablesSelect(service, query);
|
||||||
|
if (rows.size() > 1) {
|
||||||
|
for (int i = 1; i < rows.size(); i++) {
|
||||||
|
List<String> row = rows.get(i);
|
||||||
|
List<Object> rowOfCells = new ArrayList<Object>(row.size());
|
||||||
|
for (int j = 0; j < row.size() && j < columns.size(); j++) {
|
||||||
|
String text = row.get(j);
|
||||||
|
if (text.isEmpty()) {
|
||||||
|
rowOfCells.add(null);
|
||||||
|
} else {
|
||||||
|
FTColumnData cd = columns.get(j);
|
||||||
|
if (cd.type == FTColumnType.NUMBER) {
|
||||||
|
try {
|
||||||
|
rowOfCells.add(Long.parseLong(text));
|
||||||
|
continue;
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
double d = Double.parseDouble(text);
|
||||||
|
if (!Double.isInfinite(d) && !Double.isNaN(d)) {
|
||||||
|
rowOfCells.add(d);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rowOfCells.add(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rowsOfCells.add(rowOfCells);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end = rows.size() < batchSize + 1;
|
||||||
|
return rowsOfCells;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void parse(
|
||||||
|
GoogleService service,
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
final ImportingJob job,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions) {
|
||||||
|
|
||||||
|
String docUrlString = JSONUtilities.getString(options, "docUrl", null);
|
||||||
|
String id = getFTid(docUrlString); // Use GDataExtension.getFusionTableKey(url) ?
|
||||||
|
// TODO: Allow arbitrary Fusion Tables URL instead of (in addition to?) constructing our own?
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<FTColumnData> columns = new ArrayList<FusionTableImporter.FTColumnData>();
|
||||||
|
List<List<String>> rows = FusionTableHandler.runFusionTablesSelect(service, "DESCRIBE " + id);
|
||||||
|
if (rows.size() > 1) {
|
||||||
|
for (int i = 1; i < rows.size(); i++) {
|
||||||
|
List<String> row = rows.get(i);
|
||||||
|
if (row.size() >= 2) {
|
||||||
|
FTColumnData cd = new FTColumnData();
|
||||||
|
cd.name = row.get(1);
|
||||||
|
cd.type = FTColumnType.STRING;
|
||||||
|
|
||||||
|
if (row.size() > 2) {
|
||||||
|
String type = row.get(2).toLowerCase();
|
||||||
|
if (type.equals("number")) {
|
||||||
|
cd.type = FTColumnType.NUMBER;
|
||||||
|
} else if (type.equals("datetime")) {
|
||||||
|
cd.type = FTColumnType.DATETIME;
|
||||||
|
} else if (type.equals("location")) {
|
||||||
|
cd.type = FTColumnType.LOCATION;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
columns.add(cd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setProgress(job, docUrlString, -1);
|
||||||
|
|
||||||
|
// Force these options for the next call because each fusion table
|
||||||
|
// is strictly structured with a single line of headers.
|
||||||
|
JSONUtilities.safePut(options, "ignoreLines", 0); // number of blank lines at the beginning to ignore
|
||||||
|
JSONUtilities.safePut(options, "headerLines", 1); // number of header lines
|
||||||
|
|
||||||
|
TabularImportingParserBase.readTable(
|
||||||
|
project,
|
||||||
|
metadata,
|
||||||
|
job,
|
||||||
|
new FusionTableBatchRowReader(job, docUrlString, service, id, columns, 100),
|
||||||
|
docUrlString,
|
||||||
|
limit,
|
||||||
|
options,
|
||||||
|
exceptions
|
||||||
|
);
|
||||||
|
setProgress(job, docUrlString, 100);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
exceptions.add(e);
|
||||||
|
} catch (ServiceException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
exceptions.add(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static private String getFTid(String url) {
|
||||||
|
if (url == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
int equal = url.lastIndexOf('=');
|
||||||
|
if (equal < 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return url.substring(equal + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum FTColumnType {
|
||||||
|
STRING,
|
||||||
|
NUMBER,
|
||||||
|
DATETIME,
|
||||||
|
LOCATION
|
||||||
|
}
|
||||||
|
|
||||||
|
final static class FTColumnData {
|
||||||
|
String name;
|
||||||
|
FTColumnType type;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,149 @@
|
|||||||
|
package com.google.refine.extension.gdata;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.google.gdata.client.GoogleService;
|
||||||
|
import com.google.gdata.client.Service.GDataRequest;
|
||||||
|
import com.google.gdata.client.Service.GDataRequest.RequestType;
|
||||||
|
import com.google.gdata.util.ServiceException;
|
||||||
|
|
||||||
|
import com.google.refine.exporters.TabularSerializer;
|
||||||
|
|
||||||
|
final class FusionTableSerializer implements TabularSerializer {
|
||||||
|
GoogleService service;
|
||||||
|
String tableName;
|
||||||
|
List<Exception> exceptions;
|
||||||
|
|
||||||
|
String tableId;
|
||||||
|
List<String> columnNames;
|
||||||
|
StringBuffer sbBatch;
|
||||||
|
int rows;
|
||||||
|
|
||||||
|
FusionTableSerializer(GoogleService service, String tableName, List<Exception> exceptions) {
|
||||||
|
this.service = service;
|
||||||
|
this.tableName = tableName;
|
||||||
|
this.exceptions = exceptions;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startFile(JSONObject options) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void endFile() {
|
||||||
|
if (sbBatch != null) {
|
||||||
|
sendBatch();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addRow(List<CellData> cells, boolean isHeader) {
|
||||||
|
if (isHeader) {
|
||||||
|
columnNames = new ArrayList<String>(cells.size());
|
||||||
|
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
sb.append("CREATE TABLE '");
|
||||||
|
sb.append(tableName);
|
||||||
|
sb.append("' (");
|
||||||
|
boolean first = true;
|
||||||
|
for (CellData cellData : cells) {
|
||||||
|
columnNames.add(cellData.text);
|
||||||
|
|
||||||
|
if (first) {
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
sb.append(',');
|
||||||
|
}
|
||||||
|
sb.append("'");
|
||||||
|
sb.append(cellData.text);
|
||||||
|
sb.append("': STRING");
|
||||||
|
}
|
||||||
|
sb.append(")");
|
||||||
|
|
||||||
|
try {
|
||||||
|
String createQuery = sb.toString();
|
||||||
|
|
||||||
|
GDataRequest createTableRequest = FusionTableHandler.createFusionTablesPostRequest(
|
||||||
|
service, RequestType.INSERT, createQuery);
|
||||||
|
createTableRequest.execute();
|
||||||
|
|
||||||
|
List<List<String>> createTableResults =
|
||||||
|
FusionTableHandler.parseFusionTablesResults(createTableRequest);
|
||||||
|
if (createTableResults != null && createTableResults.size() == 2) {
|
||||||
|
tableId = createTableResults.get(1).get(0);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
exceptions.add(e);
|
||||||
|
}
|
||||||
|
} else if (tableId != null) {
|
||||||
|
if (sbBatch == null) {
|
||||||
|
sbBatch = new StringBuffer();
|
||||||
|
}
|
||||||
|
formulateInsert(cells, sbBatch);
|
||||||
|
|
||||||
|
rows++;
|
||||||
|
if (rows % 20 == 0) {
|
||||||
|
sendBatch();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void sendBatch() {
|
||||||
|
try {
|
||||||
|
GDataRequest createTableRequest = FusionTableHandler.createFusionTablesPostRequest(
|
||||||
|
service, RequestType.INSERT, sbBatch.toString());
|
||||||
|
createTableRequest.execute();
|
||||||
|
} catch (IOException e) {
|
||||||
|
exceptions.add(e);
|
||||||
|
} catch (ServiceException e) {
|
||||||
|
exceptions.add(e);
|
||||||
|
} finally {
|
||||||
|
sbBatch = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void formulateInsert(List<CellData> cells, StringBuffer sb) {
|
||||||
|
StringBuffer sbColumnNames = new StringBuffer();
|
||||||
|
StringBuffer sbValues = new StringBuffer();
|
||||||
|
boolean first = true;
|
||||||
|
for (int i = 0; i < cells.size() && i < columnNames.size(); i++) {
|
||||||
|
CellData cellData = cells.get(i);
|
||||||
|
if (first) {
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
sbColumnNames.append(',');
|
||||||
|
sbValues.append(',');
|
||||||
|
}
|
||||||
|
sbColumnNames.append("'");
|
||||||
|
sbColumnNames.append(columnNames.get(i));
|
||||||
|
sbColumnNames.append("'");
|
||||||
|
|
||||||
|
sbValues.append("'");
|
||||||
|
if (cellData != null && cellData.text != null) {
|
||||||
|
sbValues.append(cellData.text.replaceAll("'", "\\\\'"));
|
||||||
|
}
|
||||||
|
sbValues.append("'");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sb.length() > 0) {
|
||||||
|
sb.append(';');
|
||||||
|
}
|
||||||
|
sb.append("INSERT INTO ");
|
||||||
|
sb.append(tableId);
|
||||||
|
sb.append("(");
|
||||||
|
sb.append(sbColumnNames.toString());
|
||||||
|
sb.append(") values (");
|
||||||
|
sb.append(sbValues.toString());
|
||||||
|
sb.append(")");
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getUrl() {
|
||||||
|
// FIXME: This base URL is no longer correct
|
||||||
|
return tableId == null || exceptions.size() > 0 ? null :
|
||||||
|
"https://www.google.com/fusiontables/DataSource?dsrcid=" + tableId;
|
||||||
|
}
|
||||||
|
}
|
@ -28,28 +28,14 @@
|
|||||||
*/
|
*/
|
||||||
package com.google.refine.extension.gdata;
|
package com.google.refine.extension.gdata;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.OutputStreamWriter;
|
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLEncoder;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Scanner;
|
|
||||||
import java.util.regex.MatchResult;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
|
||||||
import com.google.gdata.client.GoogleService;
|
|
||||||
import com.google.gdata.client.Service.GDataRequest;
|
|
||||||
import com.google.gdata.client.Service.GDataRequest.RequestType;
|
|
||||||
import com.google.gdata.client.docs.DocsService;
|
import com.google.gdata.client.docs.DocsService;
|
||||||
import com.google.gdata.client.http.AuthSubUtil;
|
import com.google.gdata.client.http.AuthSubUtil;
|
||||||
import com.google.gdata.client.spreadsheet.FeedURLFactory;
|
|
||||||
import com.google.gdata.client.spreadsheet.SpreadsheetService;
|
import com.google.gdata.client.spreadsheet.SpreadsheetService;
|
||||||
import com.google.gdata.util.ContentType;
|
|
||||||
import com.google.gdata.util.ServiceException;
|
|
||||||
|
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
@ -79,20 +65,12 @@ abstract public class GDataExtension {
|
|||||||
|
|
||||||
return AuthSubUtil.getRequestUrl(
|
return AuthSubUtil.getRequestUrl(
|
||||||
authorizedUrl.toExternalForm(), // execution continues at authorized on redirect
|
authorizedUrl.toExternalForm(), // execution continues at authorized on redirect
|
||||||
"https://docs.google.com/feeds https://spreadsheets.google.com/feeds https://www.google.com/fusiontables/api/query",
|
"https://docs.google.com/feeds https://spreadsheets.google.com/feeds",
|
||||||
false,
|
false,
|
||||||
true);
|
true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static private FeedURLFactory factory;
|
|
||||||
static public FeedURLFactory getFeedUrlFactory() {
|
|
||||||
if (factory == null) {
|
|
||||||
// Careful - this is shared by everyone.
|
|
||||||
factory = FeedURLFactory.getDefault();
|
|
||||||
}
|
|
||||||
return factory;
|
|
||||||
}
|
|
||||||
|
|
||||||
static public DocsService getDocsService(String token) {
|
static public DocsService getDocsService(String token) {
|
||||||
DocsService service = new DocsService(SERVICE_APP_NAME);
|
DocsService service = new DocsService(SERVICE_APP_NAME);
|
||||||
if (token != null) {
|
if (token != null) {
|
||||||
@ -109,83 +87,6 @@ abstract public class GDataExtension {
|
|||||||
return service;
|
return service;
|
||||||
}
|
}
|
||||||
|
|
||||||
static public GoogleService getFusionTablesGoogleService(String token) {
|
|
||||||
GoogleService service = new GoogleService("fusiontables", SERVICE_APP_NAME);
|
|
||||||
if (token != null) {
|
|
||||||
service.setAuthSubToken(token);
|
|
||||||
}
|
|
||||||
return service;
|
|
||||||
}
|
|
||||||
|
|
||||||
final static private String FUSION_TABLES_SERVICE_URL =
|
|
||||||
"https://www.google.com/fusiontables/api/query";
|
|
||||||
|
|
||||||
final static private Pattern CSV_VALUE_PATTERN =
|
|
||||||
Pattern.compile("([^,\\r\\n\"]*|\"(([^\"]*\"\")*[^\"]*)\")(,|\\r?\\n)");
|
|
||||||
|
|
||||||
static public List<List<String>> runFusionTablesSelect(GoogleService service, String selectQuery)
|
|
||||||
throws IOException, ServiceException {
|
|
||||||
|
|
||||||
GDataRequest request = createFusionTablesRequest(service, RequestType.QUERY, selectQuery);
|
|
||||||
request.execute();
|
|
||||||
return parseFusionTablesResults(request);
|
|
||||||
}
|
|
||||||
|
|
||||||
static public GDataRequest createFusionTablesRequest(
|
|
||||||
GoogleService service, RequestType requestType, String query)
|
|
||||||
throws IOException, ServiceException {
|
|
||||||
URL url = new URL(FUSION_TABLES_SERVICE_URL + "?sql=" +
|
|
||||||
URLEncoder.encode(query, "UTF-8"));
|
|
||||||
return service.getRequestFactory().getRequest(
|
|
||||||
requestType, url, ContentType.TEXT_PLAIN);
|
|
||||||
}
|
|
||||||
|
|
||||||
static public GDataRequest createFusionTablesPostRequest(
|
|
||||||
GoogleService service, RequestType requestType, String query)
|
|
||||||
throws IOException, ServiceException {
|
|
||||||
URL url = new URL(FUSION_TABLES_SERVICE_URL);
|
|
||||||
GDataRequest request = service.getRequestFactory().getRequest(
|
|
||||||
requestType, url, new ContentType("application/x-www-form-urlencoded"));
|
|
||||||
|
|
||||||
OutputStreamWriter writer =
|
|
||||||
new OutputStreamWriter(request.getRequestStream());
|
|
||||||
writer.append("sql=" + URLEncoder.encode(query, "UTF-8"));
|
|
||||||
writer.flush();
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
return request;
|
|
||||||
}
|
|
||||||
|
|
||||||
static public List<List<String>> parseFusionTablesResults(GDataRequest request) throws IOException {
|
|
||||||
List<List<String>> rows = new ArrayList<List<String>>();
|
|
||||||
List<String> row = null;
|
|
||||||
|
|
||||||
Scanner scanner = new Scanner(request.getResponseStream(), "UTF-8");
|
|
||||||
while (scanner.hasNextLine()) {
|
|
||||||
scanner.findWithinHorizon(CSV_VALUE_PATTERN, 0);
|
|
||||||
MatchResult match = scanner.match();
|
|
||||||
String quotedString = match.group(2);
|
|
||||||
String decoded = quotedString == null ? match.group(1) : quotedString.replaceAll("\"\"", "\"");
|
|
||||||
|
|
||||||
if (row == null) {
|
|
||||||
row = new ArrayList<String>();
|
|
||||||
}
|
|
||||||
row.add(decoded);
|
|
||||||
|
|
||||||
if (!match.group(4).equals(",")) {
|
|
||||||
if (row != null) {
|
|
||||||
rows.add(row);
|
|
||||||
row = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
scanner.close();
|
|
||||||
if (row != null) {
|
|
||||||
rows.add(row);
|
|
||||||
}
|
|
||||||
return rows;
|
|
||||||
}
|
|
||||||
|
|
||||||
static boolean isSpreadsheetURL(String url) {
|
static boolean isSpreadsheetURL(String url) {
|
||||||
// e.g. http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
|
// e.g. http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
|
||||||
// TODO: The following should work, but the GData implementation is too limited
|
// TODO: The following should work, but the GData implementation is too limited
|
||||||
@ -220,25 +121,5 @@ abstract public class GDataExtension {
|
|||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean isFusionTableURL(URL url) {
|
|
||||||
// http://www.google.com/fusiontables/DataSource?dsrcid=1219
|
|
||||||
String query = url.getQuery();
|
|
||||||
if (query == null) {
|
|
||||||
query = "";
|
|
||||||
}
|
|
||||||
return url.getHost().endsWith(".google.com")
|
|
||||||
&& url.getPath().startsWith("/fusiontables/DataSource")
|
|
||||||
&& query.contains("dsrcid=");
|
|
||||||
}
|
|
||||||
|
|
||||||
static String getFusionTableKey(URL url) {
|
|
||||||
String tableId = getParamValue(url,"dsrcid");
|
|
||||||
// TODO: Any special id format considerations to worry about?
|
|
||||||
// if (tableId.startsWith("p") || !tableId.contains(".")) {
|
|
||||||
// return tableId;
|
|
||||||
// }
|
|
||||||
return tableId;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -36,7 +36,6 @@ import java.util.List;
|
|||||||
|
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
|
|
||||||
import com.google.gdata.client.GoogleService;
|
|
||||||
import com.google.gdata.client.spreadsheet.CellQuery;
|
import com.google.gdata.client.spreadsheet.CellQuery;
|
||||||
import com.google.gdata.client.spreadsheet.SpreadsheetService;
|
import com.google.gdata.client.spreadsheet.SpreadsheetService;
|
||||||
import com.google.gdata.data.spreadsheet.Cell;
|
import com.google.gdata.data.spreadsheet.Cell;
|
||||||
@ -83,9 +82,7 @@ public class GDataImporter {
|
|||||||
exceptions
|
exceptions
|
||||||
);
|
);
|
||||||
} else if ("table".equals(docType)) {
|
} else if ("table".equals(docType)) {
|
||||||
GoogleService service = GDataExtension.getFusionTablesGoogleService(token);
|
FusionTableImporter.parse(token,
|
||||||
parse(
|
|
||||||
service,
|
|
||||||
project,
|
project,
|
||||||
metadata,
|
metadata,
|
||||||
job,
|
job,
|
||||||
@ -171,7 +168,7 @@ public class GDataImporter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static private void setProgress(ImportingJob job, String fileSource, int percent) {
|
static void setProgress(ImportingJob job, String fileSource, int percent) {
|
||||||
JSONObject progress = JSONUtilities.getObject(job.config, "progress");
|
JSONObject progress = JSONUtilities.getObject(job.config, "progress");
|
||||||
if (progress == null) {
|
if (progress == null) {
|
||||||
progress = new JSONObject();
|
progress = new JSONObject();
|
||||||
@ -278,214 +275,5 @@ public class GDataImporter {
|
|||||||
return rowsOfCells;
|
return rowsOfCells;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static public void parse(
|
|
||||||
GoogleService service,
|
|
||||||
Project project,
|
|
||||||
ProjectMetadata metadata,
|
|
||||||
final ImportingJob job,
|
|
||||||
int limit,
|
|
||||||
JSONObject options,
|
|
||||||
List<Exception> exceptions) {
|
|
||||||
|
|
||||||
String docUrlString = JSONUtilities.getString(options, "docUrl", null);
|
|
||||||
String id = getFTid(docUrlString); // Use GDataExtension.getFusionTableKey(url) ?
|
|
||||||
// TODO: Allow arbitrary Fusion Tables URL instead of (in addition to?) constructing our own?
|
|
||||||
|
|
||||||
try {
|
|
||||||
List<FTColumnData> columns = new ArrayList<GDataImporter.FTColumnData>();
|
|
||||||
List<List<String>> rows = GDataExtension.runFusionTablesSelect(service, "DESCRIBE " + id);
|
|
||||||
if (rows.size() > 1) {
|
|
||||||
for (int i = 1; i < rows.size(); i++) {
|
|
||||||
List<String> row = rows.get(i);
|
|
||||||
if (row.size() >= 2) {
|
|
||||||
FTColumnData cd = new FTColumnData();
|
|
||||||
cd.name = row.get(1);
|
|
||||||
cd.type = FTColumnType.STRING;
|
|
||||||
|
|
||||||
if (row.size() > 2) {
|
|
||||||
String type = row.get(2).toLowerCase();
|
|
||||||
if (type.equals("number")) {
|
|
||||||
cd.type = FTColumnType.NUMBER;
|
|
||||||
} else if (type.equals("datetime")) {
|
|
||||||
cd.type = FTColumnType.DATETIME;
|
|
||||||
} else if (type.equals("location")) {
|
|
||||||
cd.type = FTColumnType.LOCATION;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
columns.add(cd);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
setProgress(job, docUrlString, -1);
|
|
||||||
|
|
||||||
// Force these options for the next call because each fusion table
|
|
||||||
// is strictly structured with a single line of headers.
|
|
||||||
JSONUtilities.safePut(options, "ignoreLines", 0); // number of blank lines at the beginning to ignore
|
|
||||||
JSONUtilities.safePut(options, "headerLines", 1); // number of header lines
|
|
||||||
|
|
||||||
TabularImportingParserBase.readTable(
|
|
||||||
project,
|
|
||||||
metadata,
|
|
||||||
job,
|
|
||||||
new FusionTableBatchRowReader(job, docUrlString, service, id, columns, 100),
|
|
||||||
docUrlString,
|
|
||||||
limit,
|
|
||||||
options,
|
|
||||||
exceptions
|
|
||||||
);
|
|
||||||
setProgress(job, docUrlString, 100);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
exceptions.add(e);
|
|
||||||
} catch (ServiceException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
exceptions.add(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static private String getFTid(String url) {
|
|
||||||
if (url == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
int equal = url.lastIndexOf('=');
|
|
||||||
if (equal < 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return url.substring(equal + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static private enum FTColumnType {
|
|
||||||
STRING,
|
|
||||||
NUMBER,
|
|
||||||
DATETIME,
|
|
||||||
LOCATION
|
|
||||||
}
|
|
||||||
|
|
||||||
final static private class FTColumnData {
|
|
||||||
String name;
|
|
||||||
FTColumnType type;
|
|
||||||
}
|
|
||||||
|
|
||||||
static private class FusionTableBatchRowReader implements TableDataReader {
|
|
||||||
final ImportingJob job;
|
|
||||||
final String fileSource;
|
|
||||||
|
|
||||||
final GoogleService service;
|
|
||||||
final List<FTColumnData> columns;
|
|
||||||
final int batchSize;
|
|
||||||
|
|
||||||
final String baseQuery;
|
|
||||||
|
|
||||||
int nextRow = 0; // 0-based
|
|
||||||
int batchRowStart = 0; // 0-based
|
|
||||||
boolean end = false;
|
|
||||||
List<List<Object>> rowsOfCells = null;
|
|
||||||
boolean usedHeaders = false;
|
|
||||||
|
|
||||||
public FusionTableBatchRowReader(ImportingJob job, String fileSource,
|
|
||||||
GoogleService service, String tableId, List<FTColumnData> columns,
|
|
||||||
int batchSize) {
|
|
||||||
this.job = job;
|
|
||||||
this.fileSource = fileSource;
|
|
||||||
this.service = service;
|
|
||||||
this.columns = columns;
|
|
||||||
this.batchSize = batchSize;
|
|
||||||
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append("SELECT ");
|
|
||||||
|
|
||||||
boolean first = true;
|
|
||||||
for (FTColumnData cd : columns) {
|
|
||||||
if (first) {
|
|
||||||
first = false;
|
|
||||||
} else {
|
|
||||||
sb.append(",");
|
|
||||||
}
|
|
||||||
sb.append("'");
|
|
||||||
sb.append(cd.name);
|
|
||||||
sb.append("'");
|
|
||||||
}
|
|
||||||
sb.append(" FROM ");
|
|
||||||
sb.append(tableId);
|
|
||||||
|
|
||||||
baseQuery = sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Object> getNextRowOfCells() throws IOException {
|
|
||||||
if (!usedHeaders) {
|
|
||||||
List<Object> row = new ArrayList<Object>(columns.size());
|
|
||||||
for (FTColumnData cd : columns) {
|
|
||||||
row.add(cd.name);
|
|
||||||
}
|
|
||||||
usedHeaders = true;
|
|
||||||
return row;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rowsOfCells == null || (nextRow >= batchRowStart + rowsOfCells.size() && !end)) {
|
|
||||||
int newBatchRowStart = batchRowStart + (rowsOfCells == null ? 0 : rowsOfCells.size());
|
|
||||||
try {
|
|
||||||
rowsOfCells = getRowsOfCells(newBatchRowStart);
|
|
||||||
batchRowStart = newBatchRowStart;
|
|
||||||
|
|
||||||
setProgress(job, fileSource, -1 /* batchRowStart * 100 / totalRows */);
|
|
||||||
} catch (ServiceException e) {
|
|
||||||
throw new IOException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rowsOfCells != null && nextRow - batchRowStart < rowsOfCells.size()) {
|
|
||||||
return rowsOfCells.get(nextRow++ - batchRowStart);
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private List<List<Object>> getRowsOfCells(int startRow) throws IOException, ServiceException {
|
|
||||||
List<List<Object>> rowsOfCells = new ArrayList<List<Object>>(batchSize);
|
|
||||||
|
|
||||||
String query = baseQuery + " OFFSET " + startRow + " LIMIT " + batchSize;
|
|
||||||
|
|
||||||
List<List<String>> rows = GDataExtension.runFusionTablesSelect(service, query);
|
|
||||||
if (rows.size() > 1) {
|
|
||||||
for (int i = 1; i < rows.size(); i++) {
|
|
||||||
List<String> row = rows.get(i);
|
|
||||||
List<Object> rowOfCells = new ArrayList<Object>(row.size());
|
|
||||||
for (int j = 0; j < row.size() && j < columns.size(); j++) {
|
|
||||||
String text = row.get(j);
|
|
||||||
if (text.isEmpty()) {
|
|
||||||
rowOfCells.add(null);
|
|
||||||
} else {
|
|
||||||
FTColumnData cd = columns.get(j);
|
|
||||||
if (cd.type == FTColumnType.NUMBER) {
|
|
||||||
try {
|
|
||||||
rowOfCells.add(Long.parseLong(text));
|
|
||||||
continue;
|
|
||||||
} catch (NumberFormatException e) {
|
|
||||||
// ignore
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
double d = Double.parseDouble(text);
|
|
||||||
if (!Double.isInfinite(d) && !Double.isNaN(d)) {
|
|
||||||
rowOfCells.add(d);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} catch (NumberFormatException e) {
|
|
||||||
// ignore
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rowOfCells.add(text);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rowsOfCells.add(rowOfCells);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
end = rows.size() < batchSize + 1;
|
|
||||||
return rowsOfCells;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -125,7 +125,7 @@ public class GDataImportingController implements ImportingController {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
listSpreadsheets(GDataExtension.getDocsService(token), writer);
|
listSpreadsheets(GDataExtension.getDocsService(token), writer);
|
||||||
listFusionTables(GDataExtension.getFusionTablesGoogleService(token), writer);
|
listFusionTables(FusionTableHandler.getFusionTablesGoogleService(token), writer);
|
||||||
} catch (ServiceException e) {
|
} catch (ServiceException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
@ -170,14 +170,14 @@ public class GDataImportingController implements ImportingController {
|
|||||||
private void listFusionTables(GoogleService service, JSONWriter writer)
|
private void listFusionTables(GoogleService service, JSONWriter writer)
|
||||||
throws IOException, ServiceException, JSONException {
|
throws IOException, ServiceException, JSONException {
|
||||||
|
|
||||||
List<List<String>> rows = GDataExtension.runFusionTablesSelect(service, "SHOW TABLES");
|
List<List<String>> rows = FusionTableHandler.listTables(service);
|
||||||
if (rows.size() > 1) { // excluding headers
|
if (rows.size() > 1) { // excluding headers
|
||||||
for (int i = 1; i < rows.size(); i++) {
|
for (int i = 1; i < rows.size(); i++) {
|
||||||
List<String> row = rows.get(i);
|
List<String> row = rows.get(i);
|
||||||
if (row.size() >= 2) {
|
if (row.size() >= 2) {
|
||||||
String id = row.get(0);
|
String id = row.get(0);
|
||||||
String name = row.get(1);
|
String name = row.get(1);
|
||||||
String link = "https://www.google.com/fusiontables/DataSource?dsrcid=" + id;
|
String link = row.get(2);
|
||||||
|
|
||||||
writer.object();
|
writer.object();
|
||||||
writer.key("docId"); writer.value(id);
|
writer.key("docId"); writer.value(id);
|
||||||
|
@ -33,7 +33,6 @@ import java.io.IOException;
|
|||||||
import java.io.Writer;
|
import java.io.Writer;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
@ -48,8 +47,6 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.gdata.client.GoogleService;
|
import com.google.gdata.client.GoogleService;
|
||||||
import com.google.gdata.client.Service.GDataRequest;
|
|
||||||
import com.google.gdata.client.Service.GDataRequest.RequestType;
|
|
||||||
import com.google.gdata.client.docs.DocsService;
|
import com.google.gdata.client.docs.DocsService;
|
||||||
import com.google.gdata.client.spreadsheet.CellQuery;
|
import com.google.gdata.client.spreadsheet.CellQuery;
|
||||||
import com.google.gdata.client.spreadsheet.SpreadsheetService;
|
import com.google.gdata.client.spreadsheet.SpreadsheetService;
|
||||||
@ -295,142 +292,12 @@ public class UploadCommand extends Command {
|
|||||||
static private String uploadFusionTable(
|
static private String uploadFusionTable(
|
||||||
Project project, final Engine engine, final Properties params,
|
Project project, final Engine engine, final Properties params,
|
||||||
String token, String name, List<Exception> exceptions) {
|
String token, String name, List<Exception> exceptions) {
|
||||||
GoogleService service = GDataExtension.getFusionTablesGoogleService(token);
|
GoogleService service = FusionTableHandler.getFusionTablesGoogleService(token);
|
||||||
FusionTableSerializer serializer = new FusionTableSerializer(service, name, exceptions);
|
FusionTableSerializer serializer = new FusionTableSerializer(service, name, exceptions);
|
||||||
|
|
||||||
CustomizableTabularExporterUtilities.exportRows(
|
CustomizableTabularExporterUtilities.exportRows(
|
||||||
project, engine, params, serializer);
|
project, engine, params, serializer);
|
||||||
|
|
||||||
return serializer.tableId == null || exceptions.size() > 0 ? null :
|
return serializer.getUrl();
|
||||||
"https://www.google.com/fusiontables/DataSource?dsrcid=" + serializer.tableId;
|
|
||||||
}
|
|
||||||
|
|
||||||
final static private class FusionTableSerializer implements TabularSerializer {
|
|
||||||
GoogleService service;
|
|
||||||
String tableName;
|
|
||||||
List<Exception> exceptions;
|
|
||||||
|
|
||||||
String tableId;
|
|
||||||
List<String> columnNames;
|
|
||||||
StringBuffer sbBatch;
|
|
||||||
int rows;
|
|
||||||
|
|
||||||
FusionTableSerializer(GoogleService service, String tableName, List<Exception> exceptions) {
|
|
||||||
this.service = service;
|
|
||||||
this.tableName = tableName;
|
|
||||||
this.exceptions = exceptions;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void startFile(JSONObject options) {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void endFile() {
|
|
||||||
if (sbBatch != null) {
|
|
||||||
sendBatch();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addRow(List<CellData> cells, boolean isHeader) {
|
|
||||||
if (isHeader) {
|
|
||||||
columnNames = new ArrayList<String>(cells.size());
|
|
||||||
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append("CREATE TABLE '");
|
|
||||||
sb.append(tableName);
|
|
||||||
sb.append("' (");
|
|
||||||
boolean first = true;
|
|
||||||
for (CellData cellData : cells) {
|
|
||||||
columnNames.add(cellData.text);
|
|
||||||
|
|
||||||
if (first) {
|
|
||||||
first = false;
|
|
||||||
} else {
|
|
||||||
sb.append(',');
|
|
||||||
}
|
|
||||||
sb.append("'");
|
|
||||||
sb.append(cellData.text);
|
|
||||||
sb.append("': STRING");
|
|
||||||
}
|
|
||||||
sb.append(")");
|
|
||||||
|
|
||||||
try {
|
|
||||||
String createQuery = sb.toString();
|
|
||||||
|
|
||||||
GDataRequest createTableRequest = GDataExtension.createFusionTablesPostRequest(
|
|
||||||
service, RequestType.INSERT, createQuery);
|
|
||||||
createTableRequest.execute();
|
|
||||||
|
|
||||||
List<List<String>> createTableResults =
|
|
||||||
GDataExtension.parseFusionTablesResults(createTableRequest);
|
|
||||||
if (createTableResults != null && createTableResults.size() == 2) {
|
|
||||||
tableId = createTableResults.get(1).get(0);
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
exceptions.add(e);
|
|
||||||
}
|
|
||||||
} else if (tableId != null) {
|
|
||||||
if (sbBatch == null) {
|
|
||||||
sbBatch = new StringBuffer();
|
|
||||||
}
|
|
||||||
formulateInsert(cells, sbBatch);
|
|
||||||
|
|
||||||
rows++;
|
|
||||||
if (rows % 20 == 0) {
|
|
||||||
sendBatch();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void sendBatch() {
|
|
||||||
try {
|
|
||||||
GDataRequest createTableRequest = GDataExtension.createFusionTablesPostRequest(
|
|
||||||
service, RequestType.INSERT, sbBatch.toString());
|
|
||||||
createTableRequest.execute();
|
|
||||||
} catch (IOException e) {
|
|
||||||
exceptions.add(e);
|
|
||||||
} catch (ServiceException e) {
|
|
||||||
exceptions.add(e);
|
|
||||||
} finally {
|
|
||||||
sbBatch = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void formulateInsert(List<CellData> cells, StringBuffer sb) {
|
|
||||||
StringBuffer sbColumnNames = new StringBuffer();
|
|
||||||
StringBuffer sbValues = new StringBuffer();
|
|
||||||
boolean first = true;
|
|
||||||
for (int i = 0; i < cells.size() && i < columnNames.size(); i++) {
|
|
||||||
CellData cellData = cells.get(i);
|
|
||||||
if (first) {
|
|
||||||
first = false;
|
|
||||||
} else {
|
|
||||||
sbColumnNames.append(',');
|
|
||||||
sbValues.append(',');
|
|
||||||
}
|
|
||||||
sbColumnNames.append("'");
|
|
||||||
sbColumnNames.append(columnNames.get(i));
|
|
||||||
sbColumnNames.append("'");
|
|
||||||
|
|
||||||
sbValues.append("'");
|
|
||||||
if (cellData != null && cellData.text != null) {
|
|
||||||
sbValues.append(cellData.text.replaceAll("'", "\\\\'"));
|
|
||||||
}
|
|
||||||
sbValues.append("'");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sb.length() > 0) {
|
|
||||||
sb.append(';');
|
|
||||||
}
|
|
||||||
sb.append("INSERT INTO ");
|
|
||||||
sb.append(tableId);
|
|
||||||
sb.append("(");
|
|
||||||
sb.append(sbColumnNames.toString());
|
|
||||||
sb.append(") values (");
|
|
||||||
sb.append(sbValues.toString());
|
|
||||||
sb.append(")");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user