Implemented project import and export commands (from/to .tar files).

git-svn-id: http://google-refine.googlecode.com/svn/trunk@234 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-03-08 02:34:25 +00:00
parent 12d5c6aba5
commit 5d3a57eeeb
13 changed files with 392 additions and 67 deletions

View File

@ -16,5 +16,6 @@
<classpathentry kind="lib" path="lib/secondstring-20100303.jar" sourcepath="lib-src/secondstring-20100303-sources.jar"/> <classpathentry kind="lib" path="lib/secondstring-20100303.jar" sourcepath="lib-src/secondstring-20100303-sources.jar"/>
<classpathentry kind="lib" path="lib/poi-3.6.jar"/> <classpathentry kind="lib" path="lib/poi-3.6.jar"/>
<classpathentry kind="lib" path="lib/poi-ooxml-3.6.jar"/> <classpathentry kind="lib" path="lib/poi-ooxml-3.6.jar"/>
<classpathentry kind="lib" path="lib/apache-tools-tar.jar"/>
<classpathentry kind="output" path="build/classes"/> <classpathentry kind="output" path="build/classes"/>
</classpath> </classpath>

View File

@ -144,7 +144,7 @@ set CLASSPATH="%GRIDWORKS_BUILD_DIR%\classes;%GRIDWORKS_LIB_DIR%\*"
goto end goto end
:doAnt :doAnt
ant -f build.xml -Dbuild.dir="%GRIDWORKS_BUILD_DI%" -Ddist.dir="%GRIDWORKS_DIST_DIR%" -Dversion="%VERSION%" %ACTION% ant -f build.xml -Dbuild.dir="%GRIDWORKS_BUILD_DIR%" -Ddist.dir="%GRIDWORKS_DIST_DIR%" -Dversion="%VERSION%" %ACTION%
goto end goto end
:end :end

BIN
lib/apache-tools-tar.jar Normal file

Binary file not shown.

View File

@ -18,6 +18,8 @@ import com.metaweb.gridworks.commands.edit.AnnotateRowsCommand;
import com.metaweb.gridworks.commands.edit.ApplyOperationsCommand; import com.metaweb.gridworks.commands.edit.ApplyOperationsCommand;
import com.metaweb.gridworks.commands.edit.CreateProjectCommand; import com.metaweb.gridworks.commands.edit.CreateProjectCommand;
import com.metaweb.gridworks.commands.edit.DeleteProjectCommand; import com.metaweb.gridworks.commands.edit.DeleteProjectCommand;
import com.metaweb.gridworks.commands.edit.ExportProjectCommand;
import com.metaweb.gridworks.commands.edit.ImportProjectCommand;
import com.metaweb.gridworks.commands.edit.TextTransformCommand; import com.metaweb.gridworks.commands.edit.TextTransformCommand;
import com.metaweb.gridworks.commands.edit.EditOneCellCommand; import com.metaweb.gridworks.commands.edit.EditOneCellCommand;
import com.metaweb.gridworks.commands.edit.MassEditCommand; import com.metaweb.gridworks.commands.edit.MassEditCommand;
@ -60,6 +62,8 @@ public class GridworksServlet extends HttpServlet {
static { static {
_commands.put("create-project-from-upload", new CreateProjectCommand()); _commands.put("create-project-from-upload", new CreateProjectCommand());
_commands.put("import-project", new ImportProjectCommand());
_commands.put("export-project", new ExportProjectCommand());
_commands.put("export-rows", new ExportRowsCommand()); _commands.put("export-rows", new ExportRowsCommand());
_commands.put("get-project-metadata", new GetProjectMetadataCommand()); _commands.put("get-project-metadata", new GetProjectMetadataCommand());

View File

@ -117,6 +117,38 @@ public class ProjectManager {
} }
} }
public void importProject(long projectID) {
synchronized (this) {
ProjectMetadata metadata = ProjectMetadata.load(getProjectDir(projectID));
_projectsMetadata.put(projectID, metadata);
}
}
public void ensureProjectSaved(long id) {
synchronized (this) {
File projectDir = getProjectDir(id);
ProjectMetadata metadata = _projectsMetadata.get(id);
if (metadata != null) {
try {
metadata.save(projectDir);
} catch (Exception e) {
e.printStackTrace();
}
}
Project project = _projects.get(id);
if (project != null && metadata.getModified().after(project.lastSave)) {
try {
project.save();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
public ProjectMetadata getProjectMetadata(long id) { public ProjectMetadata getProjectMetadata(long id) {
return _projectsMetadata.get(id); return _projectsMetadata.get(id);
} }

View File

@ -38,7 +38,7 @@ public class CreateProjectCommand extends Command {
throws ServletException, IOException { throws ServletException, IOException {
try { try {
Properties options = parseUrlParameters(request); Properties options = ParsingUtilities.parseUrlParameters(request);
Project project = new Project(); Project project = new Project();
internalImport(request, project, options); internalImport(request, project, options);
@ -60,27 +60,6 @@ public class CreateProjectCommand extends Command {
} }
} }
protected Properties parseUrlParameters(HttpServletRequest request) {
Properties options = new Properties();
String query = request.getQueryString();
if (query != null) {
if (query.startsWith("?")) {
query = query.substring(1);
}
String[] pairs = query.split("&");
for (String pairString : pairs) {
int equal = pairString.indexOf('=');
String name = equal >= 0 ? pairString.substring(0, equal) : "";
String value = equal >= 0 ? ParsingUtilities.decode(pairString.substring(equal + 1)) : "";
options.put(name, value);
}
}
return options;
}
protected void internalImport( protected void internalImport(
HttpServletRequest request, HttpServletRequest request,
Project project, Project project,

View File

@ -0,0 +1,94 @@
package com.metaweb.gridworks.commands.edit;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStream;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.tools.tar.TarEntry;
import org.apache.tools.tar.TarOutputStream;
import com.metaweb.gridworks.ProjectManager;
import com.metaweb.gridworks.commands.Command;
import com.metaweb.gridworks.model.Project;
public class ExportProjectCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
try {
Project project = getProject(request);
ProjectManager.singleton.ensureProjectSaved(project.id);
response.setHeader("Content-Type", "application/x-tar");
OutputStream os = response.getOutputStream();
try {
tarToOutputStream(
ProjectManager.singleton.getProjectDir(project.id),
os
);
} finally {
os.close();
}
} catch (Exception e) {
respondException(response, e);
}
}
protected void tarToOutputStream(File dir, OutputStream os) throws IOException {
TarOutputStream tos = new TarOutputStream(os);
try {
tarDir("", dir, tos);
} finally {
tos.close();
}
}
protected void tarDir(String relative, File dir, TarOutputStream tos) throws IOException {
File[] files = dir.listFiles();
for (File file : files) {
if (!file.isHidden()) {
String path = relative + file.getName();
if (file.isDirectory()) {
tarDir(path + File.separator, file, tos);
} else {
TarEntry entry = new TarEntry(path);
entry.setMode(TarEntry.DEFAULT_FILE_MODE);
entry.setSize(file.length());
entry.setModTime(file.lastModified());
tos.putNextEntry(entry);
copyFile(file, tos);
tos.closeEntry();
}
}
}
}
protected void copyFile(File file, OutputStream os) throws IOException {
final int buffersize = 4096;
FileInputStream fis = new FileInputStream(file);
try {
byte[] buf = new byte[buffersize];
int count;
while((count = fis.read(buf, 0, buffersize)) != -1) {
os.write(buf, 0, count);
}
} finally {
fis.close();
}
}
}

View File

@ -0,0 +1,157 @@
package com.metaweb.gridworks.commands.edit;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.Properties;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.tools.tar.TarEntry;
import org.apache.tools.tar.TarInputStream;
import com.metaweb.gridworks.ProjectManager;
import com.metaweb.gridworks.commands.Command;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.util.ParsingUtilities;
import com.oreilly.servlet.multipart.FilePart;
import com.oreilly.servlet.multipart.MultipartParser;
import com.oreilly.servlet.multipart.ParamPart;
import com.oreilly.servlet.multipart.Part;
public class ImportProjectCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
try {
Properties options = ParsingUtilities.parseUrlParameters(request);
long projectID = Project.generateID();
internalImport(request, options, projectID);
ProjectManager.singleton.importProject(projectID);
if (options.containsKey("project-name")) {
String projectName = options.getProperty("project-name");
if (projectName != null && projectName.length() > 0) {
ProjectManager.singleton.getProjectMetadata(projectID).setName(projectName);
}
}
redirect(response, "/project.html?project=" + projectID);
} catch (Exception e) {
e.printStackTrace();
}
}
protected void internalImport(
HttpServletRequest request,
Properties options,
long projectID
) throws Exception {
MultipartParser parser = null;
try {
parser = new MultipartParser(request, 20 * 1024 * 1024);
} catch (Exception e) {
// silent
}
if (parser != null) {
Part part = null;
String url = null;
while ((part = parser.readNextPart()) != null) {
if (part.isFile()) {
FilePart filePart = (FilePart) part;
InputStream inputStream = filePart.getInputStream();
try {
internalImportInputStream(projectID, inputStream);
} finally {
inputStream.close();
}
} else if (part.isParam()) {
ParamPart paramPart = (ParamPart) part;
String paramName = paramPart.getName();
if (paramName.equals("url")) {
url = paramPart.getStringValue();
} else {
options.put(paramName, paramPart.getStringValue());
}
}
}
if (url != null && url.length() > 0) {
internalImportURL(request, options, projectID, url);
}
}
}
protected void internalImportURL(
HttpServletRequest request,
Properties options,
long projectID,
String urlString
) throws Exception {
URL url = new URL(urlString);
URLConnection connection = null;
try {
connection = url.openConnection();
connection.setConnectTimeout(5000);
connection.connect();
} catch (Exception e) {
throw new Exception("Cannot connect to " + urlString, e);
}
InputStream inputStream = null;
try {
inputStream = connection.getInputStream();
} catch (Exception e) {
throw new Exception("Cannot retrieve content from " + url, e);
}
try {
internalImportInputStream(projectID, inputStream);
} finally {
inputStream.close();
}
}
protected void internalImportInputStream(long projectID, InputStream inputStream) throws IOException {
File destDir = ProjectManager.singleton.getProjectDir(projectID);
destDir.mkdirs();
untar(destDir, inputStream);
}
protected void untar(File destDir, InputStream inputStream) throws IOException {
TarInputStream tin = new TarInputStream(inputStream);
TarEntry tarEntry = null;
while ((tarEntry = tin.getNextEntry()) != null) {
File destEntry = new File(destDir, tarEntry.getName());
File parent = destEntry.getParentFile();
if (!parent.exists()) {
parent.mkdirs();
}
if (tarEntry.isDirectory()) {
destEntry.mkdirs();
} else {
FileOutputStream fout = new FileOutputStream(destEntry);
try {
tin.copyEntryContents(fout);
} finally {
fout.close();
}
}
}
}
}

View File

@ -38,8 +38,12 @@ public class Project {
transient public ProcessManager processManager = new ProcessManager(); transient public ProcessManager processManager = new ProcessManager();
transient public Date lastSave = new Date(); transient public Date lastSave = new Date();
static public long generateID() {
return System.currentTimeMillis() + Math.round(Math.random() * 1000000000000L);
}
public Project() { public Project() {
id = System.currentTimeMillis() + Math.round(Math.random() * 1000000000000L); id = generateID();
history = new History(this); history = new History(this);
} }

View File

@ -8,6 +8,9 @@ import java.io.UnsupportedEncodingException;
import java.text.ParseException; import java.text.ParseException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Date; import java.util.Date;
import java.util.Properties;
import javax.servlet.http.HttpServletRequest;
import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.net.URLCodec; import org.apache.commons.codec.net.URLCodec;
@ -19,6 +22,27 @@ import org.json.JSONTokener;
public class ParsingUtilities { public class ParsingUtilities {
static public SimpleDateFormat s_sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); static public SimpleDateFormat s_sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
static public Properties parseUrlParameters(HttpServletRequest request) {
Properties options = new Properties();
String query = request.getQueryString();
if (query != null) {
if (query.startsWith("?")) {
query = query.substring(1);
}
String[] pairs = query.split("&");
for (String pairString : pairs) {
int equal = pairString.indexOf('=');
String name = equal >= 0 ? pairString.substring(0, equal) : "";
String value = equal >= 0 ? ParsingUtilities.decode(pairString.substring(equal + 1)) : "";
options.put(name, value);
}
}
return options;
}
static public String inputStreamToString(InputStream is) throws IOException { static public String inputStreamToString(InputStream is) throws IOException {
Reader reader = new InputStreamReader(is, "UTF-8"); Reader reader = new InputStreamReader(is, "UTF-8");
try { try {

View File

@ -1 +1 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> <html> <head> <title>Gridworks</title> <link rel="stylesheet" href="/styles/common.css" /> <link rel="stylesheet" href="/styles/index.css" /> <script type="text/javascript" src="externals/jquery-1.4.1.min.js"></script> <script type="text/javascript" src="externals/date.js"></script> <script type="text/javascript" src="scripts/util/string.js"></script> <script type="text/javascript" src="scripts/index.js"></script> </head> <body> <div id="header"> <a id="logo" href="http://www.metaweb.com/"><img alt="Metaweb" src="images/metaweb-headerlogo.png" /></a> </div> <div id="body"> <div id="body-empty"> <table><tr> <td id="body-empty-logo-container"><img src="images/gridworks.png" /> Gridworks</td> <td id="body-empty-create-project-panel-container"></td> </tr></table> </div> <div id="body-nonempty"> <table><tr> <td id="body-nonempty-logo-container"><img src="images/gridworks.png" /> Gridworks</td> <td id="body-nonempty-projects-container"> <div id="projects"></div> </td> <td id="body-nonempty-create-project-panel-container"></td> </tr></table> </div> </div> <div id="footer"> <a href="about.html">About Gridworks</a> &bull; &copy; 2010 <a href="http://www.metaweb.com/">Metaweb Technologies, Inc.</a> </div> <div id="body-template"> <div id="create-project-panel"> <form id="file-upload-form" method="post" enctype="multipart/form-data" action="/command/create-project-from-upload" accept-charset="UTF-8"> <table id="create-project-panel-layout"> <tr><td>Data File:</td><td> <input type="file" id="project-file-input" name="project-file" /> </td></tr> <tr><td>Project Name:</td><td> <input type="text" size="30" id="project-name-input" name="project-name" /> </td></tr> <tr><td>Load up to:</td><td> <input id="limit-input" name="limit" size="5" /> data rows (optional) </td></tr> <tr><td>Skip:</td><td> <input id="skip-input" name="skip" size="5" /> initial data rows (optional) </td></tr> <tr><td></td><td> <input type="submit" value="Create Project" id="upload-file-button" /> </td></tr> </table> </form> </div> </div> </body> </html> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> <html> <head> <title>Gridworks</title> <link rel="stylesheet" href="/styles/common.css" /> <link rel="stylesheet" href="/styles/index.css" /> <script type="text/javascript" src="externals/jquery-1.4.1.min.js"></script> <script type="text/javascript" src="externals/date.js"></script> <script type="text/javascript" src="scripts/util/string.js"></script> <script type="text/javascript" src="scripts/index.js"></script> </head> <body> <div id="header"> <a id="logo" href="http://www.metaweb.com/"><img alt="Metaweb" src="images/metaweb-headerlogo.png" /></a> </div> <div id="body"> <div id="body-empty"> <table><tr> <td id="body-empty-logo-container"><img src="images/gridworks.png" /> Gridworks</td> <td id="body-empty-create-project-panel-container"></td> </tr></table> </div> <div id="body-nonempty"> <table><tr> <td id="body-nonempty-logo-container"><img src="images/gridworks.png" /> Gridworks</td> <td id="body-nonempty-projects-container"> <div id="projects"></div> </td> <td id="body-nonempty-create-project-panel-container"></td> </tr></table> </div> </div> <div id="footer"> <a href="about.html">About Gridworks</a> &bull; &copy; 2010 <a href="http://www.metaweb.com/">Metaweb Technologies, Inc.</a> </div> <div id="body-template"> <div id="create-project-panel"> <h1>Upload Data File</h1> <form id="file-upload-form" method="post" enctype="multipart/form-data" action="/command/create-project-from-upload" accept-charset="UTF-8"> <table id="create-project-panel-layout"> <tr><td>Data File:</td><td> <input type="file" id="project-file-input" name="project-file" /> </td></tr> <tr><td>Project Name:</td><td> <input type="text" size="30" id="project-name-input" name="project-name" /> </td></tr> <tr><td>Load up to:</td><td> <input id="limit-input" name="limit" size="5" /> data rows (optional) </td></tr> <tr><td>Skip:</td><td> <input id="skip-input" name="skip" size="5" /> initial data rows (optional) </td></tr> <tr><td></td><td> <input type="submit" value="Create Project" id="upload-file-button" /> </td></tr> </table> </form> <h1>Import Existing Project</h1> <form id="project-upload-form" method="post" enctype="multipart/form-data" action="/command/import-project" accept-charset="UTF-8"> <table id="import-project-panel-layout"> <tr><td>Project TAR File:</td><td> <input type="file" id="project-tar-file-input" name="project-file" /> </td></tr> <tr><td>Re-name Project:</td><td> <input type="text" size="30" id="project-name-input" name="project-name" /> (optional) </td></tr> <tr><td></td><td> <input type="submit" value="Import Project" id="import-project-button" /> </td></tr> </table> </form> </div> </div> </body> </html>

View File

@ -12,7 +12,7 @@ MenuBar.prototype._initializeUI = function() {
var self = this; var self = this;
this._createTopLevelMenuItem("Data Set", [ this._createTopLevelMenuItem("Project", [
{ {
"label": "Export Filtered Rows", "label": "Export Filtered Rows",
"submenu": [ "submenu": [
@ -25,6 +25,10 @@ MenuBar.prototype._initializeUI = function() {
"click": function() { self._doExportRows("tripleloader", "txt"); } "click": function() { self._doExportRows("tripleloader", "txt"); }
} }
] ]
},
{
"label": "Export Project",
"click": function() { self._exportProject(); }
} }
]); ]);
this._createTopLevelMenuItem("Schemas", [ this._createTopLevelMenuItem("Schemas", [
@ -161,6 +165,27 @@ MenuBar.prototype._doExportRows = function(format, ext) {
document.body.removeChild(form); document.body.removeChild(form);
}; };
MenuBar.prototype._exportProject = function() {
var name = theProject.metadata.name.replace(/\W/g, ' ').replace(/\s+/g, '-');
var form = document.createElement("form");
$(form)
.css("display", "none")
.attr("method", "post")
.attr("action", "/command/export-project/" + name + ".gridworks.tar")
.attr("target", "gridworks-export");
$('<input />')
.attr("name", "project")
.attr("value", theProject.id)
.appendTo(form);
document.body.appendChild(form);
window.open("about:blank", "gridworks-export");
form.submit();
document.body.removeChild(form);
};
MenuBar.prototype._doAutoSchemaAlignment = function() { MenuBar.prototype._doAutoSchemaAlignment = function() {
//SchemaAlignment.autoAlign(); //SchemaAlignment.autoAlign();
}; };

View File

@ -53,6 +53,11 @@
padding-bottom: 0px; padding-bottom: 0px;
} }
#create-project-panel h1 {
font-size: 120%;
margin: 1em 0;
}
#projects { #projects {
white-space: pre; white-space: pre;
} }