Issue 334 - tighten up URL pattern matching for Google Spreadsheets & Fusion Tables

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2006 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
Tom Morris 2011-02-14 22:23:48 +00:00
parent 9384d22d85
commit e72d590a31

View File

@ -415,14 +415,23 @@ public class GDataImporter implements UrlImporter {
private boolean isSpreadsheetURL(URL url) {
String host = url.getHost();
String query = url.getQuery();
if (query == null) {
query = "";
}
// http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
return host.endsWith(".google.com") && host.contains("spreadsheet");
return host.endsWith(".google.com") && host.contains("spreadsheet") && query.contains("key=");
}
private boolean isFusionTableURL(URL url) {
// http://www.google.com/fusiontables/DataSource?dsrcid=1219
String query = url.getQuery();
if (query == null) {
query = "";
}
return url.getHost().endsWith(".google.com")
&& url.getPath().startsWith("/fusiontables/");
&& url.getPath().startsWith("/fusiontables/DataSource")
&& query.contains("dsrcid=");
}
// Modified version of FeedURLFactor.getSpreadsheetKeyFromUrl()