Google spreadsheets can now be imported directly from within Refine.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2192 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2011-08-11 00:35:01 +00:00
parent c42382f3ae
commit 823729776d
29 changed files with 1117 additions and 510 deletions

View File

@ -50,9 +50,6 @@ function init() {
// Register importer and exporter
var IM = Packages.com.google.refine.importing.ImportingManager;
IM.registerFormat("service/gdata", "GData services"); // generic format, no parser to handle it
IM.registerFormat("service/gdata/spreadsheet", "Google spreadsheets", false, "GoogleSpreadsheetParserUI",
new Packages.com.google.refine.extension.gdata.GDataImporter());
IM.registerUrlRewriter(new Packages.com.google.refine.extension.gdata.GDataUrlRewriter())
IM.registerUrlRewriter(new Packages.com.google.refine.extension.gdata.FusionTablesUrlRewriter())
@ -71,7 +68,8 @@ function init() {
"index/scripts",
module,
[
"scripts/index/importing-controller.js"
"scripts/index/importing-controller.js",
"scripts/index/gdata-source-ui.js"
]
);
// Style files to inject into /index page

View File

@ -0,0 +1,40 @@
<div bind="wizardHeader" class="gdata-importing-wizard-header"><div class="grid-layout layout-tightest layout-full"><table><tr>
<td width="1%"><button bind="startOverButton" class="button">&laquo; Start Over</button></td>
<td width="98%">Configure Parsing Options</td>
<td style="text-align: right;">Project&nbsp;name</td>
<td width="1%"><input class="inline" type="text" size="30" bind="projectNameInput" /></td>
<td width="1%"><button bind="createProjectButton" class="button button-primary">Create Project &raquo;</button></td>
</tr></table></div></div>
<div bind="dataPanel" class="gdata-importing-parsing-data-panel"></div>
<div bind="progressPanel" class="gdata-importing-progress-data-panel">
<img src="images/large-spinner.gif" /> Updating preview ...
</div>
<div bind="controlPanel" class="gdata-importing-parsing-control-panel"><div class="grid-layout layout-normal"><table>
<tr>
<td>Worksheets</td>
<td colspan="2">Options</td>
<td rowspan="2"><button class="button" bind="previewButton">Update&nbsp;Preview</button></td>
</tr>
<tr>
<td rowspan="2" width="40%"><div class="grid-layout layout-tightest"><table bind="sheetRecordContainer"></table></div></td>
<td colspan="2"><div class="grid-layout layout-tightest"><table>
<tr><td width="1%"><input type="checkbox" bind="ignoreCheckbox" /></td><td>Ignore first</td>
<td><input bind="ignoreInput" type="text" class="lightweight" size="2" value="0" /> line(s) at beginning of file</td></tr>
<tr><td width="1%"><input type="checkbox" bind="headerLinesCheckbox" /></td><td>Parse next</td>
<td><input bind="headerLinesInput" type="text" class="lightweight" size="2" value="1" /> line(s) as column headers</td></tr>
<tr><td width="1%"><input type="checkbox" bind="skipCheckbox" /></td><td>Discard initial</td>
<td><input bind="skipInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" /></td>
<td colspan="2">Store blank rows</td></tr>
<tr><td width="1%"><input type="checkbox" bind="storeBlankCellsAsNullsCheckbox" /></td>
<td colspan="2">Store blank cells as nulls</td></tr>
</table></div></td>
</tr>
</table></div></div>

View File

@ -0,0 +1,153 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
Refine.GDataSourceUI = function(controller) {
this._controller = controller;
var self = this;
window.addEventListener(
"message",
function(evt) {
if ($.cookie('authsub_token')) {
self._listDocuments();
} else {
self._body.find('.gdata-page').hide();
self._elmts.signinPage.show();
}
},
false);
};
Refine.GDataSourceUI.prototype.attachUI = function(body) {
this._body = body;
this._body.html(DOM.loadHTML("gdata", "scripts/index/import-from-gdata-form.html"));
this._elmts = DOM.bind(this._body);
this._body.find('.gdata-signin.button').click(function() {
window.open(
"/command/gdata/authorize",
"google-refine-gdata-signin",
"resizable=1,width=600,height=450"
);
});
this._body.find('.gdata-page').hide();
this._elmts.signinPage.show();
if ($.cookie('authsub_token')) {
this._listDocuments();
}
};
Refine.GDataSourceUI.prototype.focus = function() {
};
Refine.GDataSourceUI.prototype._listDocuments = function() {
this._body.find('.gdata-page').hide();
this._elmts.progressPage.show();
var self = this;
$.post(
"/command/core/importing-controller?" + $.param({
"controller": "gdata/gdata-importing-controller",
"subCommand": "list-documents"
}),
null,
function(o) {
self._renderDocuments(o);
},
"json"
);
};
Refine.GDataSourceUI.prototype._renderDocuments = function(o) {
var self = this;
this._elmts.listingContainer.empty();
var table = $(
'<table><tr>' +
'<th></th>' + // starred
'<th>Title</th>' +
'<th>Authors</th>' +
'<th>Updated</th>' +
'</tr></table>'
).appendTo(this._elmts.listingContainer)[0];
var renderDocument = function(doc) {
var tr = table.insertRow(table.rows.length);
var td = tr.insertCell(tr.cells.length);
if (doc.isStarred) {
$('<img>').attr('src', 'images/star.png').appendTo(td);
}
td = tr.insertCell(tr.cells.length);
var title = $('<a>')
.addClass('gdata-doc-title')
.attr('href', 'javascript:{}')
.text(doc.title)
.appendTo(td)
.click(function(evt) {
self._controller.startImportingDocument(doc);
});
$('<a>')
.addClass('gdata-doc-preview')
.attr('href', doc.docLink)
.attr('target', '_blank')
.text('preview')
.appendTo(td);
td = tr.insertCell(tr.cells.length);
$('<span>')
.text(doc.authors.join(', '))
.appendTo(td);
td = tr.insertCell(tr.cells.length);
if (doc.updated) {
$('<span>')
.addClass('gdata-doc-date')
.text(formatRelativeDate(doc.updated))
.attr('title', doc.updated)
.appendTo(td);
}
};
for (var i = 0; i < o.documents.length; i++) {
renderDocument(o.documents[i]);
}
this._body.find('.gdata-page').hide();
this._elmts.listingPage.show();
};

View File

@ -33,6 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Refine.GDataImportingController = function(createProjectUI) {
this._createProjectUI = createProjectUI;
this._parsingPanel = createProjectUI.addCustomPanel();
createProjectUI.addSourceSelectionUI({
label: "Google Data",
@ -42,138 +44,320 @@ Refine.GDataImportingController = function(createProjectUI) {
};
Refine.CreateProjectUI.controllers.push(Refine.GDataImportingController);
Refine.GDataSourceUI = function(controller) {
this._controller = controller;
var self = this;
window.addEventListener(
"message",
function(evt) {
var url = document.location.href;
var slash = url.indexOf('/', url.indexOf('//') + 2);
var origin = url.substring(0, slash);
if (origin == evt.origin) {
var prefix = 'gdata:authsub_token=';
if (evt.data.startsWith(prefix) && evt.data.length > prefix.length) {
self._listDocuments();
} else {
this._body.find('.gdata-page').hide();
this._elmts.signinPage.show();
}
}
},
false);
};
Refine.GDataSourceUI.prototype.attachUI = function(body) {
this._body = body;
this._body.html(DOM.loadHTML("gdata", "scripts/index/import-from-gdata-form.html"));
this._elmts = DOM.bind(this._body);
this._body.find('.gdata-page').hide();
this._elmts.signinPage.show();
this._body.find('.gdata-signin.button').click(function() {
window.open(
"/command/gdata/authorize",
"google-refine-gdata-signin",
"resizable=1,width=600,height=450"
);
});
};
Refine.GDataSourceUI.prototype.focus = function() {
};
Refine.GDataSourceUI.prototype._listDocuments = function() {
this._body.find('.gdata-page').hide();
this._elmts.progressPage.show();
Refine.GDataImportingController.prototype.startImportingDocument = function(doc) {
var dismiss = DialogSystem.showBusy("Preparing importing job ...");
var self = this;
$.post(
"/command/core/importing-controller?" + $.param({
"controller": "gdata/gdata-importing-controller",
"subCommand": "list-documents"
}),
"/command/core/create-importing-job",
null,
function(o) {
self._renderDocuments(o);
function(data) {
$.post(
"/command/core/importing-controller?" + $.param({
"controller": "gdata/gdata-importing-controller",
"subCommand": "initialize-parser-ui",
"docUrl": doc.docSelfLink
}),
null,
function(data2) {
dismiss();
if (data2.status == 'ok') {
self._doc = doc;
self._jobID = data.jobID;
self._options = data2.options;
self._showParsingPanel();
} else {
alert(data2.message);
}
},
"json"
);
},
"json"
);
};
Refine.GDataSourceUI.prototype._renderDocuments = function(o) {
this._elmts.listingContainer.empty();
var table = $(
'<table><tr>' +
'<th></th>' + // starred
'<th>Title</th>' +
'<th>Authors</th>' +
'<th>Last Edited</th>' +
'<th>Last Viewed</th>' +
'</tr></table>'
).appendTo(this._elmts.listingContainer)[0];
var renderDocument = function(doc) {
var tr = table.insertRow(table.rows.length);
var td = tr.insertCell(tr.cells.length);
if (doc.isStarred) {
$('<img>').attr('src', 'images/star.png').appendTo(td);
}
td = tr.insertCell(tr.cells.length);
var title = $('<a>')
.addClass('gdata-doc-title')
.attr('href', 'javascript:{}')
.text(doc.title)
.appendTo(td);
$('<a>')
.addClass('gdata-doc-preview')
.attr('href', doc.docLink)
.attr('target', '_blank')
.text('preview')
.appendTo(td);
td = tr.insertCell(tr.cells.length);
$('<span>')
.text(doc.authors.join(', '))
.appendTo(td);
td = tr.insertCell(tr.cells.length);
$('<span>')
.addClass('gdata-doc-date')
.text(formatRelativeDate(doc.edited))
.attr('title', doc.edited)
.appendTo(td);
var alreadyViewed = false;
td = tr.insertCell(tr.cells.length);
if (doc.lastViewed) {
if (new Date(doc.lastViewed).getTime() - new Date(doc.edited).getTime() > -60000) {
alreadyViewed = true;
}
$('<span>')
.addClass('gdata-doc-date')
.text(formatRelativeDate(doc.lastViewed))
.attr('title', doc.lastViewed)
.appendTo(td);
}
if (!alreadyViewed) {
title.addClass('gdata-doc-unread');
}
Refine.GDataImportingController.prototype.getOptions = function() {
var options = {
docUrl: this._doc.docSelfLink,
sheetUrl: this._sheetUrl
};
for (var i = 0; i < o.documents.length; i++) {
renderDocument(o.documents[i]);
var parseIntDefault = function(s, def) {
try {
var n = parseInt(s);
if (!isNaN(n)) {
return n;
}
} catch (e) {
// Ignore
}
return def;
};
this._parsingPanelElmts.sheetRecordContainer.find('input').each(function() {
if (this.checked) {
options.sheetUrl = this.getAttribute('sheetUrl');
}
});
if (this._parsingPanelElmts.ignoreCheckbox[0].checked) {
options.ignoreLines = parseIntDefault(this._parsingPanelElmts.ignoreInput[0].value, -1);
} else {
options.ignoreLines = -1;
}
if (this._parsingPanelElmts.headerLinesCheckbox[0].checked) {
options.headerLines = parseIntDefault(this._parsingPanelElmts.headerLinesInput[0].value, 0);
} else {
options.headerLines = 0;
}
if (this._parsingPanelElmts.skipCheckbox[0].checked) {
options.skipDataLines = parseIntDefault(this._parsingPanelElmts.skipInput[0].value, 0);
} else {
options.skipDataLines = 0;
}
if (this._parsingPanelElmts.limitCheckbox[0].checked) {
options.limit = parseIntDefault(this._parsingPanelElmts.limitInput[0].value, -1);
} else {
options.limit = -1;
}
options.storeBlankRows = this._parsingPanelElmts.storeBlankRowsCheckbox[0].checked;
options.storeBlankCellsAsNulls = this._parsingPanelElmts.storeBlankCellsAsNullsCheckbox[0].checked;
return options;
};
Refine.GDataImportingController.prototype._showParsingPanel = function() {
var self = this;
this._parsingPanel.unbind().empty().html(
DOM.loadHTML("gdata", "scripts/index/gdata-parsing-panel.html"));
this._parsingPanelElmts = DOM.bind(this._parsingPanel);
if (this._parsingPanelResizer) {
$(window).unbind('resize', this._parsingPanelResizer);
}
this._body.find('.gdata-page').hide();
this._elmts.listingPage.show();
this._parsingPanelResizer = function() {
var elmts = self._parsingPanelElmts;
var width = self._parsingPanel.width();
var height = self._parsingPanel.height();
var headerHeight = elmts.wizardHeader.outerHeight(true);
var controlPanelHeight = 250;
elmts.dataPanel
.css("left", "0px")
.css("top", headerHeight + "px")
.css("width", (width - DOM.getHPaddings(elmts.dataPanel)) + "px")
.css("height", (height - headerHeight - controlPanelHeight - DOM.getVPaddings(elmts.dataPanel)) + "px");
elmts.progressPanel
.css("left", "0px")
.css("top", headerHeight + "px")
.css("width", (width - DOM.getHPaddings(elmts.progressPanel)) + "px")
.css("height", (height - headerHeight - controlPanelHeight - DOM.getVPaddings(elmts.progressPanel)) + "px");
elmts.controlPanel
.css("left", "0px")
.css("top", (height - controlPanelHeight) + "px")
.css("width", (width - DOM.getHPaddings(elmts.controlPanel)) + "px")
.css("height", (controlPanelHeight - DOM.getVPaddings(elmts.controlPanel)) + "px");
};
$(window).resize(this._parsingPanelResizer);
this._parsingPanelResizer();
this._parsingPanelElmts.startOverButton.click(function() {
// explicitly cancel the import job
$.post("/command/core/cancel-importing-job?" + $.param({ "jobID": self._jobID }));
delete self._doc;
delete self._jobID;
delete self._options;
self._createProjectUI.showSourceSelectionPanel();
});
this._parsingPanelElmts.createProjectButton.click(function() { self._createProject(); });
this._parsingPanelElmts.previewButton.click(function() { self._updatePreview(); });
this._parsingPanelElmts.projectNameInput[0].value = this._doc.title;
var sheetTable = this._parsingPanelElmts.sheetRecordContainer[0];
$.each(this._options.worksheets, function(i, v) {
var tr = sheetTable.insertRow(sheetTable.rows.length);
var td0 = $(tr.insertCell(0)).attr('width', '1%');
var checkbox = $('<input>')
.attr('type', 'radio')
.attr('sheetUrl', this.link)
.appendTo(td0);
if (i === 0) {
checkbox.attr('checked', 'true');
}
$(tr.insertCell(1)).text(this.name);
$(tr.insertCell(2)).text(this.rows + ' rows');
});
if (this._options.ignoreLines > 0) {
this._parsingPanelElmts.ignoreCheckbox.attr("checked", "checked");
this._parsingPanelElmts.ignoreInput[0].value = this._options.ignoreLines.toString();
}
if (this._options.headerLines > 0) {
this._parsingPanelElmts.headerLinesCheckbox.attr("checked", "checked");
this._parsingPanelElmts.headerLinesInput[0].value = this._options.headerLines.toString();
}
if (this._options.limit > 0) {
this._parsingPanelElmts.limitCheckbox.attr("checked", "checked");
this._parsingPanelElmts.limitInput[0].value = this._options.limit.toString();
}
if (this._options.skipDataLines > 0) {
this._parsingPanelElmts.skipCheckbox.attr("checked", "checked");
this._parsingPanelElmts.skipInput.value[0].value = this._options.skipDataLines.toString();
}
if (this._options.storeBlankRows) {
this._parsingPanelElmts.storeBlankRowsCheckbox.attr("checked", "checked");
}
if (this._options.storeBlankCellsAsNulls) {
this._parsingPanelElmts.storeBlankCellsAsNullsCheckbox.attr("checked", "checked");
}
var onChange = function() {
self._scheduleUpdatePreview();
};
this._parsingPanel.find("input").bind("change", onChange);
this._parsingPanel.find("select").bind("change", onChange);
this._createProjectUI.showCustomPanel(this._parsingPanel);
this._updatePreview();
};
Refine.GDataImportingController.prototype._scheduleUpdatePreview = function() {
if (this._timerID != null) {
window.clearTimeout(this._timerID);
this._timerID = null;
}
var self = this;
this._timerID = window.setTimeout(function() {
self._timerID = null;
self._updatePreview();
}, 500); // 0.5 second
};
Refine.GDataImportingController.prototype._updatePreview = function() {
var self = this;
this._parsingPanelElmts.dataPanel.hide();
this._parsingPanelElmts.progressPanel.show();
$.post(
"/command/core/importing-controller?" + $.param({
"controller": "gdata/gdata-importing-controller",
"jobID": this._jobID,
"subCommand": "parse-preview"
}),
{
"options" : JSON.stringify(this.getOptions())
},
function(result) {
if (result.code == "ok") {
self._getPreviewData(function(projectData) {
self._parsingPanelElmts.progressPanel.hide();
self._parsingPanelElmts.dataPanel.show();
new Refine.PreviewTable(projectData, self._parsingPanelElmts.dataPanel.unbind().empty());
});
} else {
self._parsingPanelElmts.progressPanel.hide();
alert('Errors:\n' + result.errors.join('\n'));
}
},
"json"
);
};
Refine.GDataImportingController.prototype._getPreviewData = function(callback, numRows) {
var self = this;
var result = {};
$.post(
"/command/core/get-models?" + $.param({ "importingJobID" : this._jobID }),
null,
function(data) {
for (var n in data) {
if (data.hasOwnProperty(n)) {
result[n] = data[n];
}
}
$.post(
"/command/core/get-rows?" + $.param({
"importingJobID" : self._jobID,
"start" : 0,
"limit" : numRows || 100 // More than we parse for preview anyway
}),
null,
function(data) {
result.rowModel = data;
callback(result);
},
"jsonp"
);
},
"json"
);
};
Refine.GDataImportingController.prototype._createProject = function() {
var projectName = $.trim(this._parsingPanelElmts.projectNameInput[0].value);
if (projectName.length == 0) {
window.alert("Please name the project.");
this._parsingPanelElmts.projectNameInput.focus();
return;
}
var self = this;
var options = this.getOptions();
options.projectName = projectName;
$.post(
"/command/core/importing-controller?" + $.param({
"controller": "gdata/gdata-importing-controller",
"jobID": this._jobID,
"subCommand": "create-project"
}),
{
"options" : JSON.stringify(options)
},
function() {},
"json"
);
var start = new Date();
var timerID = window.setInterval(
function() {
self._createProjectUI.pollImportJob(
start,
self._jobID,
timerID,
function(job) {
return "projectID" in job.config;
},
function(jobID, job) {
document.location = "project?project=" + job.config.projectID;
},
function(job) {
alert(job.config.error + '\n' + job.config.errorDetails);
}
);
},
1000
);
this._createProjectUI.showImportProgressPanel("Creating project ...", function() {
// stop the timed polling
window.clearInterval(timerID);
// explicitly cancel the import job
$.post("/command/core/cancel-importing-job?" + $.param({ "jobID": jobID }));
self._createProjectUI.showSourceSelectionPanel();
});
};

View File

@ -64,3 +64,33 @@ a.gdata-doc-preview:hover {
.gdata-doc-date {
color: @metadata_grey;
}
.gdata-importing-wizard-header {
font-size: 1.3em;
background: @chrome_primary;
padding: @padding_tight;
}
.gdata-importing-parsing-data-panel {
font-size: 1.1em;
position: absolute;
overflow: auto;
}
.gdata-importing-progress-data-panel {
position: absolute;
overflow: auto;
font-size: 200%;
padding: 3em;
background: rgba(255, 255, 255, 0.7);
text-align: center;
}
.gdata-importing-parsing-control-panel {
font-size: 1.3em;
position: absolute;
overflow: auto;
border-top: 5px solid @chrome_primary;
background: white;
padding: @padding_looser;
}

View File

@ -33,7 +33,7 @@ public class AuthorizeCommand extends Command {
String requestUrl = AuthSubUtil.getRequestUrl(
authorizedUrl.toExternalForm(), // execution continues at authorized on redirect
"http://docs.google.com/feeds", // Scope must be http, not https
"https://docs.google.com/feeds https://spreadsheets.google.com/feeds",
false,
true);
response.sendRedirect(requestUrl);

View File

@ -42,15 +42,13 @@ import com.google.gdata.data.spreadsheet.Cell;
import com.google.gdata.data.spreadsheet.CellEntry;
import com.google.gdata.data.spreadsheet.CellFeed;
import com.google.gdata.data.spreadsheet.SpreadsheetEntry;
import com.google.gdata.data.spreadsheet.SpreadsheetFeed;
import com.google.gdata.data.spreadsheet.WorksheetEntry;
import com.google.gdata.data.spreadsheet.WorksheetFeed;
import com.google.gdata.util.ServiceException;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TabularImportingParserBase;
import com.google.refine.importers.TabularImportingParserBase.TableDataReader;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
@ -61,84 +59,128 @@ import com.google.refine.util.JSONUtilities;
* @copyright 2010 Thomas F. Morris
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
*/
public class GDataImporter extends TabularImportingParserBase {
public GDataImporter() {
super(false);
}
public void parseOneFile(
public class GDataImporter {
static public void parse(
SpreadsheetService service,
Project project,
ProjectMetadata metadata,
ImportingJob job,
JSONObject fileRecord,
final ImportingJob job,
int limit,
JSONObject options,
List<Exception> exceptions
) throws IOException {
String fileSource = ImportingUtilities.getFileSource(fileRecord);
String urlString = JSONUtilities.getString(fileRecord, "url", null);
URL url = new URL(urlString);
SpreadsheetService service = new SpreadsheetService(GDataExtension.SERVICE_APP_NAME);
// String token = TokenCookie.getToken(request);
// if (token != null) {
// service.setAuthSubToken(token);
// }
String spreadsheetKey = getSpreadsheetKey(url);
List<Exception> exceptions) {
int[] sheets = JSONUtilities.getIntArray(options, "sheets");
for (int sheetIndex : sheets) {
WorksheetEntry worksheet;
String docUrlString = JSONUtilities.getString(options, "docUrl", null);
String worksheetUrlString = JSONUtilities.getString(options, "sheetUrl", null);
if (docUrlString != null && worksheetUrlString != null) {
try {
worksheet = getWorksheetEntries(service, spreadsheetKey).get(sheetIndex);
} catch (ServiceException e) {
parseOneWorkSheet(
service,
project,
metadata,
job,
new URL(docUrlString),
new URL(worksheetUrlString),
limit,
options,
exceptions);
} catch (MalformedURLException e) {
e.printStackTrace();
exceptions.add(e);
continue;
}
}
}
static public void parseOneWorkSheet(
SpreadsheetService service,
Project project,
ProjectMetadata metadata,
final ImportingJob job,
URL docURL,
URL worksheetURL,
int limit,
JSONObject options,
List<Exception> exceptions) {
try {
SpreadsheetEntry spreadsheetEntry = service.getEntry(docURL, SpreadsheetEntry.class);
WorksheetEntry worksheetEntry = service.getEntry(worksheetURL, WorksheetEntry.class);
readTable(
String fileSource = spreadsheetEntry.getTitle().getPlainText() + " # " +
worksheetEntry.getTitle().getPlainText();
setProgress(job, fileSource, 0);
TabularImportingParserBase.readTable(
project,
metadata,
job,
new BatchRowReader(service, worksheet, 20),
fileSource + "#" + worksheet.getTitle().getPlainText(),
new BatchRowReader(job, fileSource, service, worksheetEntry, 20),
fileSource,
limit,
options,
exceptions
);
setProgress(job, fileSource, 100);
} catch (IOException e) {
e.printStackTrace();
exceptions.add(e);
} catch (ServiceException e) {
e.printStackTrace();
exceptions.add(e);
}
}
static private void setProgress(ImportingJob job, String fileSource, int percent) {
JSONObject progress = JSONUtilities.getObject(job.config, "progress");
if (progress == null) {
progress = new JSONObject();
JSONUtilities.safePut(job.config, "progress", progress);
}
JSONUtilities.safePut(progress, "message", "Reading " + fileSource);
JSONUtilities.safePut(progress, "percent", percent);
}
static private class BatchRowReader implements TableDataReader {
final int batchSize;
final ImportingJob job;
final String fileSource;
final SpreadsheetService service;
final WorksheetEntry worksheet;
final int totalRowCount;
final int batchSize;
final int totalRows;
int nextRow = 0; // 0-based
int batchRowStart = -1; // 0-based
int batchRowStart = 0; // 0-based
List<List<Object>> rowsOfCells = null;
public BatchRowReader(SpreadsheetService service, WorksheetEntry worksheet, int batchSize) {
public BatchRowReader(ImportingJob job, String fileSource,
SpreadsheetService service, WorksheetEntry worksheet,
int batchSize) {
this.job = job;
this.fileSource = fileSource;
this.service = service;
this.worksheet = worksheet;
this.batchSize = batchSize;
this.totalRowCount = worksheet.getRowCount();
this.totalRows = worksheet.getRowCount();
}
@Override
public List<Object> getNextRowOfCells() throws IOException {
if (rowsOfCells == null || nextRow > batchRowStart + rowsOfCells.size()) {
batchRowStart = batchRowStart + (rowsOfCells == null ? 0 : rowsOfCells.size());
if (batchRowStart < totalRowCount) {
try {
rowsOfCells = getRowsOfCells(service, worksheet, batchRowStart + 1, batchSize);
} catch (ServiceException e) {
rowsOfCells = null;
throw new IOException(e);
}
} else {
rowsOfCells = null;
if (rowsOfCells == null || (nextRow >= batchRowStart + rowsOfCells.size() && nextRow < totalRows)) {
int newBatchRowStart = batchRowStart + (rowsOfCells == null ? 0 : rowsOfCells.size());
try {
rowsOfCells = getRowsOfCells(
service,
worksheet,
newBatchRowStart + 1, // convert to 1-based
batchSize);
batchRowStart = newBatchRowStart;
setProgress(job, fileSource, batchRowStart * 100 / totalRows);
} catch (ServiceException e) {
throw new IOException(e);
}
}
@ -150,32 +192,6 @@ public class GDataImporter extends TabularImportingParserBase {
}
}
/**
* Retrieves the spreadsheets that an authenticated user has access to. Not
* valid for unauthenticated access.
*
* @return a list of spreadsheet entries
* @throws Exception
* if error in retrieving the spreadsheet information
*/
static public List<SpreadsheetEntry> getSpreadsheetEntries(
SpreadsheetService service
) throws Exception {
SpreadsheetFeed feed = service.getFeed(
GDataExtension.getFeedUrlFactory().getSpreadsheetsFeedUrl(),
SpreadsheetFeed.class);
return feed.getEntries();
}
static public List<WorksheetEntry> getWorksheetEntries(
SpreadsheetService service, String spreadsheetKey
) throws MalformedURLException, IOException, ServiceException {
WorksheetFeed feed = service.getFeed(
GDataExtension.getFeedUrlFactory().getWorksheetFeedUrl(spreadsheetKey, "public", "values"),
WorksheetFeed.class);
return feed.getEntries();
}
static public List<List<Object>> getRowsOfCells(
SpreadsheetService service,
WorksheetEntry worksheet,
@ -183,11 +199,11 @@ public class GDataImporter extends TabularImportingParserBase {
int rowCount
) throws IOException, ServiceException {
URL cellFeedUrl = worksheet.getCellFeedUrl();
int minRow = Math.max(1, startRow);
int minRow = startRow;
int maxRow = Math.min(worksheet.getRowCount(), startRow + rowCount - 1);
int rows = maxRow - minRow + 1;
int cols = worksheet.getColCount();
int rows = worksheet.getRowCount();
CellQuery cellQuery = new CellQuery(cellFeedUrl);
cellQuery.setMinimumRow(minRow);
@ -199,59 +215,24 @@ public class GDataImporter extends TabularImportingParserBase {
CellFeed cellFeed = service.query(cellQuery, CellFeed.class);
List<CellEntry> cellEntries = cellFeed.getEntries();
List<List<Object>> rowsOfCells = new ArrayList<List<Object>>(rows);
List<List<Object>> rowsOfCells = new ArrayList<List<Object>>(rowCount);
for (CellEntry cellEntry : cellEntries) {
Cell cell = cellEntry.getCell();
int row = cell.getRow();
int col = cell.getCol();
while (row > rowsOfCells.size()) {
rowsOfCells.add(new ArrayList<Object>(cols));
if (cell != null) {
int row = cell.getRow() - startRow;
int col = cell.getCol() - 1;
while (row >= rowsOfCells.size()) {
rowsOfCells.add(new ArrayList<Object>());
}
List<Object> rowOfCells = rowsOfCells.get(row);
while (col >= rowOfCells.size()) {
rowOfCells.add(null);
}
rowOfCells.set(col, cell.getValue());
}
List<Object> rowOfCells = rowsOfCells.get(row - 1); // 1-based
while (col > rowOfCells.size()) {
rowOfCells.add(null);
}
rowOfCells.set(col - 1, cell.getValue());
}
return rowsOfCells;
}
// Modified version of FeedURLFactory.getSpreadsheetKeyFromUrl()
private String getSpreadsheetKey(URL url) {
String query = url.getQuery();
if (query != null) {
String[] parts = query.split("&");
int offset = -1;
int numParts = 0;
String keyOrId = "";
for (String part : parts) {
if (part.startsWith("id=")) {
offset = ("id=").length();
keyOrId = part.substring(offset);
numParts = 4;
break;
} else if (part.startsWith("key=")) {
offset = ("key=").length();
keyOrId = part.substring(offset);
if (keyOrId.startsWith("p") || !keyOrId.contains(".")) {
return keyOrId;
}
numParts = 2;
break;
}
}
if (offset > -1) {
String[] dottedParts = keyOrId.split("\\.");
if (dottedParts.length == numParts) {
return dottedParts[0] + "." + dottedParts[1];
}
}
}
return null;
}
}

View File

@ -36,28 +36,37 @@ package com.google.refine.extension.gdata;
import java.io.IOException;
import java.io.Writer;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.gdata.client.DocumentQuery;
import com.google.gdata.client.Query;
import com.google.gdata.client.docs.DocsService;
import com.google.gdata.data.Category;
import com.google.gdata.client.spreadsheet.SpreadsheetService;
import com.google.gdata.data.DateTime;
import com.google.gdata.data.Person;
import com.google.gdata.data.docs.DocumentListEntry;
import com.google.gdata.data.docs.DocumentListFeed;
import com.google.gdata.data.spreadsheet.SpreadsheetEntry;
import com.google.gdata.data.spreadsheet.SpreadsheetFeed;
import com.google.gdata.data.spreadsheet.WorksheetEntry;
import com.google.gdata.util.ServiceException;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.RefineServlet;
import com.google.refine.commands.HttpUtilities;
import com.google.refine.importing.ImportingController;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingManager;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
public class GDataImportingController implements ImportingController {
@ -83,6 +92,12 @@ public class GDataImportingController implements ImportingController {
String subCommand = parameters.getProperty("subCommand");
if ("list-documents".equals(subCommand)) {
doListDocuments(request, response, parameters);
} else if ("initialize-parser-ui".equals(subCommand)) {
doInitializeParserUI(request, response, parameters);
} else if ("parse-preview".equals(subCommand)) {
doParsePreview(request, response, parameters);
} else if ("create-project".equals(subCommand)) {
doCreateProject(request, response, parameters);
} else {
HttpUtilities.respond(response, "error", "No such sub command");
}
@ -106,30 +121,19 @@ public class GDataImportingController implements ImportingController {
try {
DocsService service = getDocsService(token);
DocumentQuery query = new DocumentQuery(
new URL("https://docs.google.com/feeds/default/private/full"));
query.addCategoryFilter(new Query.CategoryFilter(new Category(
"http://schemas.google.com/g/2005#kind",
"http://schemas.google.com/docs/2007#spreadsheet")));
query.setMaxResults(100);
DocumentListFeed feed = service.getFeed(query, DocumentListFeed.class);
for (DocumentListEntry entry : feed.getEntries()) {
URL metafeedUrl = new URL("https://spreadsheets.google.com/feeds/spreadsheets/private/full");
SpreadsheetFeed feed = service.getFeed(metafeedUrl, SpreadsheetFeed.class);
for (SpreadsheetEntry entry : feed.getEntries()) {
writer.object();
writer.key("docId"); writer.value(entry.getDocId());
writer.key("docLink"); writer.value(entry.getDocumentLink().getHref());
writer.key("docId"); writer.value(entry.getId());
writer.key("docLink"); writer.value(entry.getHtmlLink().getHref());
writer.key("docSelfLink"); writer.value(entry.getSelfLink().getHref());
writer.key("title"); writer.value(entry.getTitle().getPlainText());
writer.key("isViewed"); writer.value(entry.isViewed());
writer.key("isStarred"); writer.value(entry.isStarred());
DateTime edited = entry.getEdited();
if (edited != null) {
writer.key("edited"); writer.value(edited.toStringRfc822());
}
DateTime lastViewed = entry.getLastViewed();
if (lastViewed != null) {
writer.key("lastViewed"); writer.value(lastViewed.toStringRfc822());
DateTime updated = entry.getUpdated();
if (updated != null) {
writer.key("updated"); writer.value(updated.toStringRfc822());
}
writer.key("authors"); writer.array();
@ -155,9 +159,191 @@ public class GDataImportingController implements ImportingController {
}
}
private void doInitializeParserUI(
HttpServletRequest request, HttpServletResponse response, Properties parameters)
throws ServletException, IOException {
String token = TokenCookie.getToken(request);
if (token == null) {
HttpUtilities.respond(response, "error", "Not authorized");
return;
}
SpreadsheetService service = getSpreadsheetService(token);
try {
JSONObject result = new JSONObject();
JSONObject options = new JSONObject();
JSONUtilities.safePut(result, "status", "ok");
JSONUtilities.safePut(result, "options", options);
JSONUtilities.safePut(options, "ignoreLines", -1); // number of blank lines at the beginning to ignore
JSONUtilities.safePut(options, "headerLines", 1); // number of header lines
JSONUtilities.safePut(options, "skipDataLines", 0); // number of initial data lines to skip
JSONUtilities.safePut(options, "storeBlankRows", true);
JSONUtilities.safePut(options, "storeBlankCellsAsNulls", true);
JSONArray worksheets = new JSONArray();
JSONUtilities.safePut(options, "worksheets", worksheets);
String urlString = parameters.getProperty("docUrl");
URL url = new URL(urlString);
SpreadsheetEntry spreadsheetEntry = service.getEntry(url, SpreadsheetEntry.class);
for (WorksheetEntry worksheetEntry : spreadsheetEntry.getWorksheets()) {
JSONObject worksheetO = new JSONObject();
JSONUtilities.safePut(worksheetO, "name", worksheetEntry.getTitle().getPlainText());
JSONUtilities.safePut(worksheetO, "rows", worksheetEntry.getRowCount());
JSONUtilities.safePut(worksheetO, "link", worksheetEntry.getSelfLink().getHref());
JSONUtilities.append(worksheets, worksheetO);
}
HttpUtilities.respond(response, result.toString());
} catch (ServiceException e) {
e.printStackTrace();
HttpUtilities.respond(response, "error", "Internal error: " + e.getLocalizedMessage());
}
}
private void doParsePreview(
HttpServletRequest request, HttpServletResponse response, Properties parameters)
throws ServletException, IOException {
String token = TokenCookie.getToken(request);
if (token == null) {
HttpUtilities.respond(response, "error", "Not authorized");
return;
}
SpreadsheetService service = getSpreadsheetService(token);
long jobID = Long.parseLong(parameters.getProperty("jobID"));
ImportingJob job = ImportingManager.getJob(jobID);
if (job == null) {
HttpUtilities.respond(response, "error", "No such import job");
return;
}
try {
// This is for setting progress during the parsing process.
job.config = new JSONObject();
JSONObject optionObj = ParsingUtilities.evaluateJsonStringToObject(
request.getParameter("options"));
List<Exception> exceptions = new LinkedList<Exception>();
job.prepareNewProject();
GDataImporter.parse(
service,
job.project,
job.metadata,
job,
100,
optionObj,
exceptions
);
Writer w = response.getWriter();
JSONWriter writer = new JSONWriter(w);
try {
writer.object();
if (exceptions.size() == 0) {
job.project.update(); // update all internal models, indexes, caches, etc.
writer.key("code"); writer.value("ok");
} else {
writer.key("code"); writer.value("error");
writer.key("errors");
writer.array();
for (Exception e : exceptions) {
writer.value(e.getLocalizedMessage());
}
writer.endArray();
}
writer.endObject();
} catch (JSONException e) {
throw new ServletException(e);
} finally {
w.flush();
w.close();
}
} catch (JSONException e) {
throw new ServletException(e);
}
}
private void doCreateProject(HttpServletRequest request, HttpServletResponse response, Properties parameters)
throws ServletException, IOException {
String token = TokenCookie.getToken(request);
if (token == null) {
HttpUtilities.respond(response, "error", "Not authorized");
return;
}
final SpreadsheetService service = getSpreadsheetService(token);
long jobID = Long.parseLong(parameters.getProperty("jobID"));
final ImportingJob job = ImportingManager.getJob(jobID);
if (job == null) {
HttpUtilities.respond(response, "error", "No such import job");
return;
}
try {
final JSONObject optionObj = ParsingUtilities.evaluateJsonStringToObject(
request.getParameter("options"));
final List<Exception> exceptions = new LinkedList<Exception>();
JSONUtilities.safePut(job.config, "state", "creating-project");
final Project project = new Project();
new Thread() {
@Override
public void run() {
ProjectMetadata pm = new ProjectMetadata();
pm.setName(JSONUtilities.getString(optionObj, "projectName", "Untitled"));
pm.setEncoding(JSONUtilities.getString(optionObj, "encoding", "UTF-8"));
GDataImporter.parse(
service,
project,
pm,
job,
-1,
optionObj,
exceptions
);
if (!job.canceled) {
project.update(); // update all internal models, indexes, caches, etc.
ProjectManager.singleton.registerProject(project, pm);
JSONUtilities.safePut(job.config, "projectID", project.id);
JSONUtilities.safePut(job.config, "state", "created-project");
}
}
}.start();
HttpUtilities.respond(response, "ok", "done");
} catch (JSONException e) {
throw new ServletException(e);
}
}
private DocsService getDocsService(String token) {
DocsService service = new DocsService(GDataExtension.SERVICE_APP_NAME);
service.setAuthSubToken(token);
return service;
}
private SpreadsheetService getSpreadsheetService(String token) {
SpreadsheetService service = new SpreadsheetService(GDataExtension.SERVICE_APP_NAME);
service.setAuthSubToken(token);
return service;
}
}

View File

@ -45,10 +45,14 @@ public class GDataUrlRewriter implements UrlRewriter {
try {
URL url = new URL(urlString);
if (isSpreadsheetURL(url)) {
int keyFrom = Math.max(urlString.indexOf("?key="), urlString.indexOf("&key=")) + 5;
int keyTo = urlString.indexOf("&", keyFrom);
String key = urlString.substring(keyFrom, keyTo > 0 ? keyTo : urlString.length());
Result result = new Result();
result.rewrittenUrl = urlString;
result.format = "service/gdata/spreadsheet";
result.download = false;
result.rewrittenUrl = "https://spreadsheets.google.com/pub?key=" + key + "&output=csv";
result.format = "text/line-based/*sv";
result.download = true;
return result;
}
} catch (MalformedURLException e) {
@ -64,6 +68,6 @@ public class GDataUrlRewriter implements UrlRewriter {
query = "";
}
// http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
return host.endsWith(".google.com") && host.contains("spreadsheet") && query.contains("key=");
return host.endsWith(".google.com") && host.contains("spreadsheets") && query.contains("key=");
}
}

View File

@ -35,6 +35,8 @@ package com.google.refine;
import java.io.File;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@ -347,4 +349,15 @@ public class RefineServlet extends Butterfly {
}
return klass;
}
static public void setUserAgent(URLConnection urlConnection) {
if (urlConnection instanceof HttpURLConnection) {
setUserAgent((HttpURLConnection) urlConnection);
}
}
static public void setUserAgent(HttpURLConnection httpConnection) {
httpConnection.addRequestProperty("User-Agent", "Google Refine/" + FULL_VERSION);
}
}

View File

@ -182,7 +182,7 @@ public class ExcelImporter extends TabularImportingParserBase {
}
};
readTable(
TabularImportingParserBase.readTable(
project,
metadata,
job,

View File

@ -99,7 +99,7 @@ public class FixedWidthImporter extends TabularImportingParserBase {
}
};
readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
}
/**

View File

@ -209,7 +209,7 @@ public class ImporterUtilities {
ImportingUtilities.setCreatingProjectProgress(
job,
"Reading " + fileSource,
(int) (100 * (totalBytesRead + bytesRead) / totalSize2));
totalSize2 == 0 ? -1 : (int) (100 * (totalBytesRead + bytesRead) / totalSize2));
}
@Override

View File

@ -100,6 +100,6 @@ public class LineBasedImporter extends TabularImportingParserBase {
}
};
readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
}
}

View File

@ -119,7 +119,7 @@ public class SeparatorBasedImporter extends TabularImportingParserBase {
}
};
readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
}
static protected ArrayList<Object> getCells(String line, CSVParser parser, LineNumberReader lnReader)

View File

@ -76,7 +76,7 @@ abstract public class TabularImportingParserBase extends ImportingParserBase {
super(useInputStream);
}
protected void readTable(
static public void readTable(
Project project,
ProjectMetadata metadata,
ImportingJob job,

View File

@ -42,6 +42,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
@ -60,7 +61,6 @@ import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.FileUploadException;
import org.apache.commons.fileupload.ProgressListener;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
@ -78,6 +78,7 @@ import com.ibm.icu.text.NumberFormat;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.RefineServlet;
import com.google.refine.importing.ImportingManager.Format;
import com.google.refine.importing.UrlRewriter.Result;
import com.google.refine.model.Project;
@ -124,11 +125,11 @@ public class ImportingUtilities {
}
}
);
} catch (FileUploadException e) {
} catch (Exception e) {
JSONUtilities.safePut(config, "state", "error");
JSONUtilities.safePut(config, "error", "Error uploading data");
throw new ServletException(e);
JSONUtilities.safePut(config, "errorDetails", e.getLocalizedMessage());
return;
}
JSONArray fileSelectionIndexes = new JSONArray();
@ -163,7 +164,7 @@ public class ImportingUtilities {
File rawDataDir,
JSONObject retrievalRecord,
final Progress progress
) throws FileUploadException, IOException {
) throws Exception {
JSONArray fileRecords = new JSONArray();
JSONUtilities.safePut(retrievalRecord, "files", fileRecords);
@ -212,7 +213,7 @@ public class ImportingUtilities {
});
progress.setProgress("Uploading data ...", -1);
for (Object obj : upload.parseRequest(request)) {
parts: for (Object obj : upload.parseRequest(request)) {
if (progress.isCanceled()) {
break;
}
@ -260,33 +261,41 @@ public class ImportingUtilities {
if (!result.download) {
downloadCount++;
JSONUtilities.append(fileRecords, fileRecord);
continue;
continue parts;
}
}
}
URLConnection urlConnection = url.openConnection();
urlConnection.setConnectTimeout(5000);
if (urlConnection instanceof HttpURLConnection) {
HttpURLConnection httpConnection = (HttpURLConnection) urlConnection;
RefineServlet.setUserAgent(httpConnection);
}
urlConnection.connect();
InputStream stream2 = urlConnection.getInputStream();
try {
String fileName = url.getFile();
File file = allocateFile(rawDataDir, fileName);
File file = allocateFile(rawDataDir, url.getFile());
int contentLength = urlConnection.getContentLength();
if (contentLength >= 0) {
if (contentLength > 0) {
update.totalExpectedSize += contentLength;
}
JSONUtilities.safePut(fileRecord, "declaredEncoding", urlConnection.getContentEncoding());
JSONUtilities.safePut(fileRecord, "declaredMimeType", urlConnection.getContentType());
JSONUtilities.safePut(fileRecord, "fileName", fileName);
JSONUtilities.safePut(fileRecord, "fileName", file.getName());
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
progress.setProgress("Downloading " + urlString,
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
long actualLength = saveStreamToFile(stream, file, update);
long actualLength = saveStreamToFile(stream2, file, update);
JSONUtilities.safePut(fileRecord, "size", actualLength);
if (contentLength >= 0) {
if (actualLength == 0) {
throw new Exception("No content found in " + urlString);
} else if (contentLength >= 0) {
update.totalExpectedSize += (actualLength - contentLength);
} else {
update.totalExpectedSize += actualLength;
@ -344,8 +353,13 @@ public class ImportingUtilities {
}
static public File allocateFile(File dir, String name) {
int q = name.indexOf('?');
if (q > 0) {
name = name.substring(0, q);
}
File file = new File(dir, name);
int dot = name.indexOf('.');
String prefix = dot < 0 ? name : name.substring(0, dot);
String suffix = dot < 0 ? "" : name.substring(dot);

View File

@ -54,6 +54,7 @@ function registerCommands() {
RS.registerCommand(module, "create-importing-job", new Packages.com.google.refine.commands.importing.CreateImportingJobCommand());
RS.registerCommand(module, "get-importing-job-status", new Packages.com.google.refine.commands.importing.GetImportingJobStatusCommand());
RS.registerCommand(module, "importing-controller", new Packages.com.google.refine.commands.importing.ImportingControllerCommand());
RS.registerCommand(module, "cancel-importing-job", new Packages.com.google.refine.commands.importing.CancelImportingJobCommand());
RS.registerCommand(module, "create-project-from-upload", new Packages.com.google.refine.commands.project.CreateProjectCommand());
RS.registerCommand(module, "import-project", new Packages.com.google.refine.commands.project.ImportProjectCommand());
@ -456,7 +457,8 @@ function init() {
"externals/jquery-ui/css/ui-lightness/jquery-ui-1.8.custom.css",
"styles/jquery-ui-overrides.less",
"styles/common.less",
"styles/pure.css"
"styles/pure.css",
"styles/util/dialog.less"
]
);
}

View File

@ -0,0 +1,5 @@
<div id="create-project-error-panel"><div class="grid-layout layout-normal layout-full"><table>
<tr><td id="create-project-error-message"></td></tr>
<tr><td id="create-project-error-stack"></td></tr>
<tr><td><button class="button button-primary" id="create-project-error-ok-button">OK</button></td></tr>
</table></div></div>

View File

@ -0,0 +1,13 @@
<div id="create-project-progress-panel">
<div class="grid-layout layout-normal layout-full"><table>
<tr><td colspan="3" id="create-project-progress-message"></td></tr>
<tr><td colspan="3">
<div id="create-project-progress-bar-frame"><div id="create-project-progress-bar-body"></div></div>
</td></tr>
<tr><td colspan="3">
<button class="button" id="create-project-progress-cancel-button">Cancel</button>
<span id="create-project-progress-timing"></span>
</td></tr>
</table></div>
<iframe id="create-project-iframe" name="create-project-iframe"></iframe>
</div>

View File

@ -42,7 +42,13 @@ Refine.CreateProjectUI = function(elmt) {
this._sourceSelectionElmt =
$(DOM.loadHTML("core", "scripts/index/create-project-ui-source-selection.html")).appendTo(this._elmt);
this._sourceSelectionElmts = DOM.bind(this._sourceSelectionElmt);
this._progressPanel = this.addCustomPanel();
this._progressPanel.html(DOM.loadHTML("core", "scripts/index/create-project-progress-panel.html"));
this._errorPanel = this.addCustomPanel();
this._errorPanel.html(DOM.loadHTML("core", "scripts/index/create-project-error-panel.html"));
$.post(
"/command/core/get-importing-configuration",
null,
@ -152,3 +158,91 @@ Refine.actionAreas.push({
label: "Create Project",
uiClass: Refine.CreateProjectUI
});
Refine.CreateProjectUI.prototype.showImportProgressPanel = function(progressMessage, onCancel) {
var self = this;
this.showCustomPanel(this._progressPanel);
$('#create-project-progress-message').text(progressMessage);
$('#create-project-progress-bar-body').css("width", "0%");
$('#create-project-progress-message-left').text('Starting');
$('#create-project-progress-message-center').empty();
$('#create-project-progress-message-right').empty();
$('#create-project-progress-timing').empty();
$('#create-project-progress-cancel-button').unbind().click(onCancel);
};
Refine.CreateProjectUI.prototype.pollImportJob = function(start, jobID, timerID, checkDone, callback, onError) {
var self = this;
$.post(
"/command/core/get-importing-job-status?" + $.param({ "jobID": jobID }),
null,
function(data) {
if (!(data)) {
self.showImportJobError("Unknown error");
window.clearInterval(timerID);
return;
} else if (data.code == "error" || !("job" in data)) {
self.showImportJobError(data.message || "Unknown error");
window.clearInterval(timerID);
return;
}
var job = data.job;
if (job.config.state == "error") {
window.clearInterval(timerID);
onError(job);
} else if (checkDone(job)) {
$('#create-project-progress-message').text('Done.');
window.clearInterval(timerID);
if (callback) {
callback(jobID, job);
}
} else {
var progress = job.config.progress;
if (progress.percent > 0) {
var secondsSpent = (new Date().getTime() - start.getTime()) / 1000;
var secondsRemaining = (100 / progress.percent) * secondsSpent - secondsSpent;
$('#create-project-progress-bar-body')
.removeClass('indefinite')
.css("width", progress.percent + "%");
if (secondsRemaining > 1) {
if (secondsRemaining > 60) {
$('#create-project-progress-timing').text(
Math.ceil(secondsRemaining / 60) + " minutes remaining");
} else {
$('#create-project-progress-timing').text(
Math.ceil(secondsRemaining) + " seconds remaining");
}
} else {
$('#create-project-progress-timing').text('almost done ...');
}
} else {
$('#create-project-progress-bar-body').addClass('indefinite');
$('#create-project-progress-timing').empty();
}
$('#create-project-progress-message').text(progress.message);
}
},
"json"
);
};
Refine.CreateProjectUI.prototype.showImportJobError = function(message, stack) {
var self = this;
$('#create-project-error-message').text(message);
$('#create-project-error-stack').text(stack || 'No technical details.');
this.showCustomPanel(this._errorPanel);
$('#create-project-error-ok-button').unbind().click(function() {
self.showSourceSelectionPanel();
});
};

View File

@ -34,12 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Refine.DefaultImportingController = function(createProjectUI) {
this._createProjectUI = createProjectUI;
this._progressPanel = createProjectUI.addCustomPanel();
this._progressPanel.html(DOM.loadHTML("core", "scripts/index/default-importing-controller/progress-panel.html"));
this._errorPanel = createProjectUI.addCustomPanel();
this._errorPanel.html(DOM.loadHTML("core", "scripts/index/default-importing-controller/error-panel.html"));
this._fileSelectionPanel = createProjectUI.addCustomPanel();
this._parsingPanel = createProjectUI.addCustomPanel();
@ -84,7 +78,7 @@ Refine.DefaultImportingController.prototype.startImportJob = function(form, prog
form.attr("method", "post")
.attr("enctype", "multipart/form-data")
.attr("accept-charset", "UTF-8")
.attr("target", "default-importing-iframe")
.attr("target", "create-project-iframe")
.attr("action", "/command/core/importing-controller?" + $.param({
"controller": "core/default-importing-controller",
"jobID": jobID,
@ -94,26 +88,30 @@ Refine.DefaultImportingController.prototype.startImportJob = function(form, prog
var start = new Date();
var timerID = window.setInterval(
function() {
self._pollImportJob(
start, jobID, timerID,
function(job) {
return job.config.hasData;
},
function(jobID, job) {
self._job = job;
self._onImportJobReady();
if (callback) {
callback(jobID, job);
}
}
);
},
1000
function() {
self._createProjectUI.pollImportJob(
start, jobID, timerID,
function(job) {
return job.config.hasData;
},
function(jobID, job) {
self._job = job;
self._onImportJobReady();
if (callback) {
callback(jobID, job);
}
},
function(job) {
alert(job.config.error + '\n' + job.config.errorDetails);
self._startOver();
}
);
},
1000
);
self._initializeImportProgressPanel(progressMessage, function() {
self._createProjectUI.showImportProgressPanel(progressMessage, function() {
// stop the iframe
$('#default-importing-iframe')[0].contentWindow.stop();
$('#create-project-iframe')[0].contentWindow.stop();
// stop the timed polling
window.clearInterval(timerID);
@ -128,89 +126,6 @@ Refine.DefaultImportingController.prototype.startImportJob = function(form, prog
);
};
Refine.DefaultImportingController.prototype._initializeImportProgressPanel = function(progressMessage, onCancel) {
var self = this;
this._createProjectUI.showCustomPanel(this._progressPanel);
$('#default-importing-progress-message').text(progressMessage);
$('#default-importing-progress-bar-body').css("width", "0%");
$('#default-importing-progress-message-left').text('Starting');
$('#default-importing-progress-message-center').empty();
$('#default-importing-progress-message-right').empty();
$('#default-importing-progress-timing').empty();
$('#default-importing-progress-cancel-button').unbind().click(onCancel);
};
Refine.DefaultImportingController.prototype._pollImportJob = function(start, jobID, timerID, checkDone, callback) {
var self = this;
$.post(
"/command/core/get-importing-job-status?" + $.param({ "jobID": jobID }),
null,
function(data) {
if (!(data)) {
self._showImportJobError("Unknown error");
window.clearInterval(timerID);
return;
} else if (data.code == "error" || !("job" in data)) {
self._showImportJobError(data.message || "Unknown error");
window.clearInterval(timerID);
return;
}
var job = data.job;
if (checkDone(job)) {
$('#default-importing-progress-message').text('Done.');
window.clearInterval(timerID);
if (callback) {
callback(jobID, job);
}
} else {
var progress = job.config.progress;
if (progress.percent > 0) {
var secondsSpent = (new Date().getTime() - start.getTime()) / 1000;
var secondsRemaining = (100 / progress.percent) * secondsSpent - secondsSpent;
$('#default-importing-progress-bar-body')
.removeClass('indefinite')
.css("width", progress.percent + "%");
if (secondsRemaining > 1) {
if (secondsRemaining > 60) {
$('#default-importing-progress-timing').text(
Math.ceil(secondsRemaining / 60) + " minutes remaining");
} else {
$('#default-importing-progress-timing').text(
Math.ceil(secondsRemaining) + " seconds remaining");
}
} else {
$('#default-importing-progress-timing').text('almost done ...');
}
} else {
$('#default-importing-progress-bar-body').addClass('indefinite');
$('#default-importing-progress-timing').empty();
}
$('#default-importing-progress-message').text(progress.message);
}
},
"json"
);
};
Refine.DefaultImportingController.prototype._showImportJobError = function(message, stack) {
var self = this;
$('#default-importing-error-message').text(message);
$('#default-importing-error-stack').text(stack || 'No technical details.');
this._createProjectUI.showCustomPanel(this._errorPanel);
$('#default-importing-error-ok-button').unbind().click(function() {
self._createProjectUI.showSourceSelectionPanel();
});
};
Refine.DefaultImportingController.prototype._onImportJobReady = function() {
this._prepareData();
if (this._job.config.retrievalRecord.files.length > 1) {
@ -318,17 +233,6 @@ Refine.DefaultImportingController.prototype.getPreviewData = function(callback,
}),
null,
function(data) {
// Un-pool objects
for (var r = 0; r < data.rows.length; r++) {
var row = data.rows[r];
for (var c = 0; c < row.cells.length; c++) {
var cell = row.cells[c];
if ((cell) && ("r" in cell)) {
cell.r = data.pool.recons[cell.r];
}
}
}
result.rowModel = data;
callback(result);
},
@ -344,7 +248,7 @@ Refine.DefaultImportingController.prototype._createProject = function() {
var projectName = $.trim(this._parsingPanelElmts.projectNameInput[0].value);
if (projectName.length == 0) {
window.alert("Please name the project.");
this._parsingPanelElmts.focus();
this._parsingPanelElmts.projectNameInput.focus();
return;
}
@ -365,7 +269,7 @@ Refine.DefaultImportingController.prototype._createProject = function() {
var start = new Date();
var timerID = window.setInterval(
function() {
self._pollImportJob(
self._createProjectUI.pollImportJob(
start,
self._jobID,
timerID,
@ -374,12 +278,16 @@ Refine.DefaultImportingController.prototype._createProject = function() {
},
function(jobID, job) {
document.location = "project?project=" + job.config.projectID;
},
function(job) {
alert(job.config.error + '\n' + job.config.errorDetails);
self._onImportJobReady();
}
);
},
1000
);
self._initializeImportProgressPanel("Creating project ...", function() {
self._createProjectUI.showImportProgressPanel("Creating project ...", function() {
// stop the timed polling
window.clearInterval(timerID);

View File

@ -1,5 +0,0 @@
<div id="default-importing-error-panel"><div class="grid-layout layout-normal layout-full"><table>
<tr><td id="default-importing-error-message"></td></tr>
<tr><td id="default-importing-error-stack"></td></tr>
<tr><td><button class="button button-primary" id="default-importing-error-ok-button">OK</button></td></tr>
</table></div></div>

View File

@ -89,28 +89,28 @@ Refine.DefaultImportingController.prototype._prepareParsingPanel = function() {
this._parsingPanelElmts.progressPanel.hide();
this._parsingPanelResizer = function() {
var elmts = self._parsingPanelElmts;
var width = self._parsingPanel.width();
var height = self._parsingPanel.height();
var headerHeight = elmts.wizardHeader.outerHeight(true);
var controlPanelHeight = 300;
var elmts = self._parsingPanelElmts;
var width = self._parsingPanel.width();
var height = self._parsingPanel.height();
var headerHeight = elmts.wizardHeader.outerHeight(true);
var controlPanelHeight = 300;
elmts.dataPanel
.css("left", "0px")
.css("top", headerHeight + "px")
.css("width", (width - DOM.getHPaddings(elmts.dataPanel)) + "px")
.css("height", (height - headerHeight - controlPanelHeight - DOM.getVPaddings(elmts.dataPanel)) + "px");
elmts.progressPanel
.css("left", "0px")
.css("top", headerHeight + "px")
.css("width", (width - DOM.getHPaddings(elmts.progressPanel)) + "px")
.css("height", (height - headerHeight - controlPanelHeight - DOM.getVPaddings(elmts.progressPanel)) + "px");
elmts.dataPanel
.css("left", "0px")
.css("top", headerHeight + "px")
.css("width", (width - DOM.getHPaddings(elmts.dataPanel)) + "px")
.css("height", (height - headerHeight - controlPanelHeight - DOM.getVPaddings(elmts.dataPanel)) + "px");
elmts.progressPanel
.css("left", "0px")
.css("top", headerHeight + "px")
.css("width", (width - DOM.getHPaddings(elmts.progressPanel)) + "px")
.css("height", (height - headerHeight - controlPanelHeight - DOM.getVPaddings(elmts.progressPanel)) + "px");
elmts.controlPanel
.css("left", "0px")
.css("top", (height - controlPanelHeight) + "px")
.css("width", (width - DOM.getHPaddings(elmts.controlPanel)) + "px")
.css("height", (controlPanelHeight - DOM.getVPaddings(elmts.controlPanel)) + "px");
elmts.controlPanel
.css("left", "0px")
.css("top", (height - controlPanelHeight) + "px")
.css("width", (width - DOM.getHPaddings(elmts.controlPanel)) + "px")
.css("height", (controlPanelHeight - DOM.getVPaddings(elmts.controlPanel)) + "px");
};
$(window).resize(this._parsingPanelResizer);

View File

@ -1,13 +0,0 @@
<div id="default-importing-progress-panel">
<div class="grid-layout layout-normal layout-full"><table>
<tr><td colspan="3" id="default-importing-progress-message"></td></tr>
<tr><td colspan="3">
<div id="default-importing-progress-bar-frame"><div id="default-importing-progress-bar-body"></div></div>
</td></tr>
<tr><td colspan="3">
<button class="button" id="default-importing-progress-cancel-button">Cancel</button>
<span id="default-importing-progress-timing"></span>
</td></tr>
</table></div>
<iframe id="default-importing-iframe" name="default-importing-iframe"></iframe>
</div>

View File

@ -74,7 +74,7 @@ UrlImportingSourceUI.prototype.attachUI = function(bodyDiv) {
this._elmts = DOM.bind(bodyDiv);
this._elmts.nextButton.click(function(evt) {
if ($.trim(self._elmts.urlInput[0].value.length) === 0) {
if ($.trim(self._elmts.urlInput[0].value).length === 0) {
window.alert("You must specify a web address (URL) to import.");
} else {
self._controller.startImportJob(self._elmts.form, "Downloading data ...");

View File

@ -64,10 +64,10 @@ function formatRelativeDate(d) {
var tomorrow = Date.today().add({ days: 1 });
if (d.between(today, tomorrow)) {
return "today " + d.toString("h:mm tt");
return "today " + d.toString("H:mm tt");
} else if (d.between(last_week, today)) {
var diff = Math.floor(today.getDayOfYear() - d.getDayOfYear());
return (diff <= 1) ? ("yesterday " + d.toString("h:mm tt")) : (diff + " days ago");
return (diff <= 1) ? ("yesterday " + d.toString("H:mm tt")) : (diff + " days ago");
} else if (d.between(last_month, today)) {
var diff = Math.floor((today.getDayOfYear() - d.getDayOfYear()) / 7);
return (diff == 1) ? "a week ago" : diff.toFixed(0) + " weeks ago" ;

View File

@ -100,3 +100,46 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
height: 100%;
visibility: hidden;
}
#create-project-progress-panel {
font-size: 1.3em;
padding: @padding_loose;
}
#create-project-progress-bar-frame {
border: 1px solid @chrome_primary;
padding: @padding_tighter;
width: 300px;
}
#create-project-progress-bar-body {
background: @chrome_primary;
height: 1em;
position: relative;
width: 30%;
}
#create-project-progress-bar-body.indefinite {
background: #eee;
width: 100%;
}
#create-project-iframe {
position: fixed;
width: 200px;
height: 200px;
left: -300px;
top: -300px;
}
#create-project-error-panel {
font-size: 1.3em;
padding: @padding_loose;
}
#create-project-error-message {
}
#create-project-error-stack {
font-family: monospace;
whitespace: pre;
padding: @padding_normal;
border: 1px solid @chrome_primary;
}

View File

@ -33,49 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@import-less url("../theme.less");
#default-importing-progress-panel {
font-size: 1.3em;
padding: @padding_loose;
}
#default-importing-progress-bar-frame {
border: 1px solid @chrome_primary;
padding: @padding_tighter;
width: 300px;
}
#default-importing-progress-bar-body {
background: @chrome_primary;
height: 1em;
position: relative;
width: 30%;
}
#default-importing-progress-bar-body.indefinite {
background: #eee;
width: 100%;
}
#default-importing-iframe {
position: fixed;
width: 200px;
height: 200px;
left: -300px;
top: -300px;
}
#default-importing-error-panel {
font-size: 1.3em;
padding: @padding_loose;
}
#default-importing-error-message {
}
#default-importing-error-stack {
font-family: monospace;
whitespace: pre;
padding: @padding_normal;
border: 1px solid @chrome_primary;
}
.default-importing-wizard-header {
font-size: 1.3em;
background: @chrome_primary;