Initial Fusion Tables implementation
git-svn-id: http://google-refine.googlecode.com/svn/trunk@1889 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
6a68fc9da7
commit
acbed0c1ba
@ -32,10 +32,17 @@ import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.Scanner;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.google.gdata.client.GoogleService;
|
||||
import com.google.gdata.client.Service.GDataRequest;
|
||||
import com.google.gdata.client.Service.GDataRequest.RequestType;
|
||||
import com.google.gdata.client.spreadsheet.CellQuery;
|
||||
import com.google.gdata.client.spreadsheet.FeedURLFactory;
|
||||
import com.google.gdata.client.spreadsheet.SpreadsheetService;
|
||||
@ -47,6 +54,7 @@ import com.google.gdata.data.spreadsheet.SpreadsheetEntry;
|
||||
import com.google.gdata.data.spreadsheet.SpreadsheetFeed;
|
||||
import com.google.gdata.data.spreadsheet.WorksheetEntry;
|
||||
import com.google.gdata.data.spreadsheet.WorksheetFeed;
|
||||
import com.google.gdata.util.ContentType;
|
||||
import com.google.gdata.util.InvalidEntryException;
|
||||
import com.google.gdata.util.ServiceException;
|
||||
import com.google.refine.ProjectMetadata;
|
||||
@ -72,22 +80,14 @@ public class GDataImporter implements UrlImporter {
|
||||
private FeedURLFactory factory;
|
||||
|
||||
public GDataImporter() {
|
||||
// Careful - this is done at server init time and is shared by everyone
|
||||
// Careful - this constructor is called at server init time
|
||||
// and is shared by everyone.
|
||||
factory = FeedURLFactory.getDefault();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void read(URL url, Project project, ProjectMetadata metadata,
|
||||
Properties options) throws Exception {
|
||||
// Start fresh for each read so that we're not caching authorization or
|
||||
// anything
|
||||
SpreadsheetService service = new SpreadsheetService(
|
||||
SERVICE_APP_NAME);
|
||||
|
||||
// String token = TokenCookie.getToken(request);
|
||||
// if (token != null) {
|
||||
// service.setAuthSubToken(token);
|
||||
// }
|
||||
|
||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
||||
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
||||
@ -103,7 +103,31 @@ public class GDataImporter implements UrlImporter {
|
||||
// TODO: Put this in a namespace?
|
||||
metadata.setCustomMetadata("source-url", url.toExternalForm());
|
||||
|
||||
String spreadsheetKey = getKey(url);
|
||||
// Start fresh for each read so that we're not caching authorization or
|
||||
// anything
|
||||
if (isSpreadsheetURL(url)) {
|
||||
importSpreadsheet(url, project, ignoreLines, headerLines, limit,
|
||||
dataStart, guessValueType);
|
||||
} else if (isFusionTableURL(url)) {
|
||||
importFusionTable(url, project, ignoreLines, headerLines, limit,
|
||||
dataStart, guessValueType);
|
||||
} else {
|
||||
// should never happen (famous last words)
|
||||
throw new IllegalArgumentException(
|
||||
"Got invalid format URL in GDataImporter.read()");
|
||||
}
|
||||
}
|
||||
|
||||
private void importSpreadsheet(URL url, Project project, int ignoreLines,
|
||||
int headerLines, int limit, int dataStart, boolean guessValueType)
|
||||
throws MalformedURLException, IOException, ServiceException,
|
||||
Exception {
|
||||
SpreadsheetService service = new SpreadsheetService(SERVICE_APP_NAME);
|
||||
// String token = TokenCookie.getToken(request);
|
||||
// if (token != null) {
|
||||
// service.setAuthSubToken(token);
|
||||
// }
|
||||
String spreadsheetKey = getSpreadsheetKey(url);
|
||||
WorksheetEntry worksheet;
|
||||
try {
|
||||
worksheet = getWorksheetEntries(service, spreadsheetKey).get(0);
|
||||
@ -191,6 +215,96 @@ public class GDataImporter implements UrlImporter {
|
||||
}
|
||||
}
|
||||
|
||||
private void importFusionTable(URL url, Project project, int ignoreLines,
|
||||
int headerLines, int limit, int dataStart, boolean guessValueType)
|
||||
throws MalformedURLException, IOException, ServiceException,
|
||||
Exception {
|
||||
GoogleService service = new GoogleService("fusiontables", SERVICE_APP_NAME);
|
||||
// String token = TokenCookie.getToken(request);
|
||||
// if (token != null) {
|
||||
// service.setAuthSubToken(token);
|
||||
// }
|
||||
String tableId = getFusionTableKey(url);
|
||||
|
||||
final String SERVICE_URL =
|
||||
"http://www.google.com/fusiontables/api/query";
|
||||
final String selectQuery = "select * from " + tableId
|
||||
+ " offset " + (dataStart) + (limit>0 ? (" limit " + limit):"");
|
||||
|
||||
URL queryUrl = new URL(
|
||||
SERVICE_URL + "?sql=" + URLEncoder.encode(selectQuery, "UTF-8"));
|
||||
GDataRequest queryRequest = service.getRequestFactory().getRequest(
|
||||
RequestType.QUERY, queryUrl, ContentType.TEXT_PLAIN);
|
||||
queryRequest.execute();
|
||||
|
||||
Scanner scanner = new Scanner(queryRequest.getResponseStream(),"UTF-8");
|
||||
|
||||
// TODO: Just use the first row of data as column headers for now
|
||||
List<String> columnHeaders = getTableRow(scanner);
|
||||
|
||||
// Create columns
|
||||
int columnCount = columnHeaders.size();
|
||||
project.columnModel.setMaxCellIndex(columnCount);
|
||||
boolean validColumn[] = new boolean[columnCount];
|
||||
int index = 0;
|
||||
for (String name : columnHeaders) {
|
||||
Column column = new Column(index, name + " " + index);
|
||||
project.columnModel.columns.add(column);
|
||||
validColumn[index++] = true;
|
||||
}
|
||||
for (int i = index; index < columnCount; index++) {
|
||||
Column column = new Column(index, "Column " + index);
|
||||
project.columnModel.columns.add(column);
|
||||
validColumn[i] = true;
|
||||
}
|
||||
|
||||
// Create data rows & cells
|
||||
List<String> values = columnHeaders;
|
||||
while (values != null) {
|
||||
Row row = new Row(columnCount);
|
||||
for (String valString : values) {
|
||||
valString = valString.trim();
|
||||
if (ExpressionUtils.isNonBlankData(valString)) {
|
||||
Serializable value = guessValueType ? ImporterUtilities
|
||||
.parseCellValue(valString) : valString;
|
||||
row.cells.add(new Cell(value, null));
|
||||
} else {
|
||||
row.cells.add(null);
|
||||
}
|
||||
project.rows.add(row);
|
||||
values = getTableRow(scanner);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<String> getTableRow(Scanner scanner) {
|
||||
/**
|
||||
* CSV values are terminated by comma or end-of-line and consist either of
|
||||
* plain text without commas or quotes, or a quoted expression, where inner
|
||||
* quotes are escaped by doubling.
|
||||
*/
|
||||
final Pattern CSV_VALUE_PATTERN =
|
||||
Pattern.compile("([^,\\r\\n\"]*|\"(([^\"]*\"\")*[^\"]*)\")(,|\\r?\\n)");
|
||||
|
||||
if (!scanner.hasNextLine()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<String> result = new ArrayList<String>();
|
||||
while (scanner.hasNextLine()) {
|
||||
scanner.findWithinHorizon(CSV_VALUE_PATTERN, 0);
|
||||
MatchResult match = scanner.match();
|
||||
String quotedString = match.group(2);
|
||||
String decoded = quotedString == null ? match.group(1)
|
||||
: quotedString.replaceAll("\"\"", "\"");
|
||||
result.add(decoded);
|
||||
if (!match.group(4).equals(",")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the spreadsheets that an authenticated user has access to. Not
|
||||
* valid for unauthenticated access.
|
||||
@ -296,17 +410,23 @@ public class GDataImporter implements UrlImporter {
|
||||
|
||||
@Override
|
||||
public boolean canImportData(URL url) {
|
||||
// http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
|
||||
if (url.getHost().endsWith(".google.com")) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return isSpreadsheetURL(url) | isFusionTableURL(url);
|
||||
}
|
||||
|
||||
private boolean isSpreadsheetURL(URL url) {
|
||||
String host = url.getHost();
|
||||
// http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
|
||||
return host.endsWith(".google.com") && host.contains("spreadsheet");
|
||||
}
|
||||
|
||||
private boolean isFusionTableURL(URL url) {
|
||||
// http://www.google.com/fusiontables/DataSource?dsrcid=1219
|
||||
return url.getHost().endsWith(".google.com")
|
||||
&& url.getPath().startsWith("/fusiontables/");
|
||||
}
|
||||
|
||||
// Modified version of FeedURLFactor.getSpreadsheetKeyFromUrl()
|
||||
private String getKey(URL url) {
|
||||
private String getSpreadsheetKey(URL url) {
|
||||
String query = url.getQuery();
|
||||
if (query != null) {
|
||||
String[] parts = query.split("&");
|
||||
@ -342,4 +462,22 @@ public class GDataImporter implements UrlImporter {
|
||||
return null;
|
||||
}
|
||||
|
||||
private String getFusionTableKey(URL url) {
|
||||
String query = url.getQuery();
|
||||
if (query != null) {
|
||||
String[] parts = query.split("&");
|
||||
for (String part : parts) {
|
||||
if (part.startsWith("dsrcid=")) {
|
||||
int offset = ("dsrcid=").length();
|
||||
String tableId = part.substring(offset);
|
||||
// TODO: Any special id format considerations to worry about?
|
||||
// if (tableId.startsWith("p") || !tableId.contains(".")) {
|
||||
// return tableId;
|
||||
// }
|
||||
return tableId;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user