Make workspace saving and loading more robust - fixes #528

- don't overwrite old files if we get an error writing new ones
- don't write unchanged data
- keep backup files around until next write rather than deleting
immediately
- attempt to recreate missing metadata as best as possible
This commit is contained in:
Tom Morris 2013-08-09 19:53:53 -04:00
parent c4bd5d7392
commit 1d8784e059
5 changed files with 128 additions and 37 deletions

View File

@ -62,6 +62,13 @@ public abstract class ProjectManager {
// last n expressions used across all projects
static protected final int s_expressionHistoryMax = 100;
// If a project has been idle this long, flush it from memory
static protected final int PROJECT_FLUSH_DELAY = 1000 * 60 * 15; // 15 minutes
// Don't spend more than this much time saving projects if doing a quick save
static protected final int QUICK_SAVE_MAX_TIME = 1000 * 30; // 30 secs
protected Map<Long, ProjectMetadata> _projectsMetadata;
protected PreferenceStore _preferenceStore;
@ -202,7 +209,6 @@ public abstract class ProjectManager {
public void save(boolean allModified) {
if (allModified || _busy == 0) {
saveProjects(allModified);
// TODO: Only save workspace if it's dirty
saveWorkspace();
}
}
@ -226,9 +232,6 @@ public abstract class ProjectManager {
}
}
static protected final int s_projectFlushDelay = 1000 * 60 * 15; // 15 minutes
static protected final int s_quickSaveTimeout = 1000 * 30; // 30 secs
/**
* Saves all projects to the data store
* @param allModified
@ -255,7 +258,7 @@ public abstract class ProjectManager {
records.add(new SaveRecord(project, msecsOverdue));
} else if (!project.getProcessManager().hasPending()
&& startTimeOfSave.getTime() - project.getLastSave().getTime() > s_projectFlushDelay) {
&& startTimeOfSave.getTime() - project.getLastSave().getTime() > PROJECT_FLUSH_DELAY) {
/*
* It's been a while since the project was last saved and it hasn't been
@ -288,7 +291,7 @@ public abstract class ProjectManager {
for (int i = 0;
i < records.size() &&
(allModified || (new Date().getTime() - startTimeOfSave.getTime() < s_quickSaveTimeout));
(allModified || (new Date().getTime() - startTimeOfSave.getTime() < QUICK_SAVE_MAX_TIME));
i++) {
try {

View File

@ -54,6 +54,7 @@ import com.google.refine.util.ParsingUtilities;
public class ProjectMetadata implements Jsonizable {
private final Date _created;
private Date _modified;
private Date written = null;
private String _name;
private String _password;
@ -71,9 +72,14 @@ public class ProjectMetadata implements Jsonizable {
}
public ProjectMetadata() {
_created = new Date();
this(new Date());
_modified = _created;
preparePreferenceStore(_preferenceStore);
}
public ProjectMetadata(Date created, Date modified, String name) {
this(created);
_modified = modified;
_name = name;
}
@Override
@ -103,16 +109,36 @@ public class ProjectMetadata implements Jsonizable {
}
writer.endObject();
if ("save".equals(options.getProperty("mode"))) {
written = new Date();
}
}
public boolean isDirty() {
return written == null || _modified.after(written);
}
public void write(JSONWriter jsonWriter) throws JSONException {
Properties options = new Properties();
options.setProperty("mode", "save");
write(jsonWriter, false);
}
write(jsonWriter, options);
/**
* @param jsonWriter writer to save metadatea to
* @param onlyIfDirty true to not write unchanged metadata
* @throws JSONException
*/
public void write(JSONWriter jsonWriter, boolean onlyIfDirty) throws JSONException {
if (!onlyIfDirty || isDirty()) {
Properties options = new Properties();
options.setProperty("mode", "save");
write(jsonWriter, options);
}
}
static public ProjectMetadata loadFromJSON(JSONObject obj) {
// TODO: Is this correct? It's using modified date for creation date
ProjectMetadata pm = new ProjectMetadata(JSONUtilities.getDate(obj, "modified", new Date()));
pm._modified = JSONUtilities.getDate(obj, "modified", new Date());
@ -157,6 +183,8 @@ public class ProjectMetadata implements Jsonizable {
}
}
pm.written = new Date(); // Mark it as not needing writing until modified
return pm;
}

View File

@ -62,7 +62,7 @@ import com.google.refine.model.Project;
import com.google.refine.preference.TopList;
public class FileProjectManager extends ProjectManager {
final static protected String s_projectDirNameSuffix = ".project";
final static protected String PROJECT_DIR_SUFFIX = ".project";
protected File _workspaceDir;
@ -72,6 +72,8 @@ public class FileProjectManager extends ProjectManager {
if (singleton == null) {
logger.info("Using workspace directory: {}", dir.getAbsolutePath());
singleton = new FileProjectManager(dir);
// This needs our singleton set, thus the unconventional control flow
((FileProjectManager) singleton).recover();
}
}
@ -85,7 +87,6 @@ public class FileProjectManager extends ProjectManager {
}
load();
recover();
}
public File getWorkspaceDir() {
@ -93,7 +94,7 @@ public class FileProjectManager extends ProjectManager {
}
static public File getProjectDir(File workspaceDir, long projectID) {
File dir = new File(workspaceDir, projectID + s_projectDirNameSuffix);
File dir = new File(workspaceDir, projectID + PROJECT_DIR_SUFFIX);
if (!dir.exists()) {
dir.mkdir();
}
@ -114,6 +115,9 @@ public class FileProjectManager extends ProjectManager {
public boolean loadProjectMetadata(long projectID) {
synchronized (this) {
ProjectMetadata metadata = ProjectMetadataUtilities.load(getProjectDir(projectID));
if (metadata == null) {
metadata = ProjectMetadataUtilities.recover(getProjectDir(projectID), projectID);
}
if (metadata != null) {
_projectsMetadata.put(projectID, metadata);
return true;
@ -227,20 +231,21 @@ public class FileProjectManager extends ProjectManager {
}
/**
* Save the workspace's data out to file in a safe way: save to a temporary file first
* and rename it to the real file.
* <p>
* FIXME: Even though this attempts to be safe by writing new file and renaming,
* it's still possible for it to corrupt things.
*/
@Override
protected void saveWorkspace() {
synchronized (this) {
File tempFile = new File(_workspaceDir, "workspace.temp.json");
try {
saveToFile(tempFile);
if (!saveToFile(tempFile)) {
// If the save wasn't really needed, just keep what we had
tempFile.delete();
logger.info("Skipping unnecessary workspace save");
return;
}
} catch (Exception e) {
e.printStackTrace();
@ -251,21 +256,23 @@ public class FileProjectManager extends ProjectManager {
File file = new File(_workspaceDir, "workspace.json");
File oldFile = new File(_workspaceDir, "workspace.old.json");
if (oldFile.exists()) {
oldFile.delete();
}
if (file.exists()) {
file.renameTo(oldFile);
}
tempFile.renameTo(file);
if (oldFile.exists()) {
oldFile.delete();
}
logger.info("Saved workspace");
}
}
protected void saveToFile(File file) throws IOException, JSONException {
protected boolean saveToFile(File file) throws IOException, JSONException {
FileWriter writer = new FileWriter(file);
boolean saveWasNeeded = false;
try {
JSONWriter jsonWriter = new JSONWriter(writer);
jsonWriter.object();
@ -275,20 +282,24 @@ public class FileProjectManager extends ProjectManager {
ProjectMetadata metadata = _projectsMetadata.get(id);
if (metadata != null) {
jsonWriter.value(id);
ProjectMetadataUtilities.save(metadata, getProjectDir(id));
if (metadata.isDirty()) {
ProjectMetadataUtilities.save(metadata, getProjectDir(id));
saveWasNeeded = true;
}
}
}
jsonWriter.endArray();
writer.write('\n');
jsonWriter.key("preferences");
saveWasNeeded |= _preferenceStore.isDirty();
_preferenceStore.write(jsonWriter, new Properties());
jsonWriter.endObject();
} finally {
writer.close();
}
return saveWasNeeded;
}
@ -385,11 +396,12 @@ public class FileProjectManager extends ProjectManager {
}
protected void recover() {
boolean recovered = false;
for (File file : _workspaceDir.listFiles()) {
if (file.isDirectory() && !file.isHidden()) {
String name = file.getName();
if (file.getName().endsWith(s_projectDirNameSuffix)) {
String idString = name.substring(0, name.length() - s_projectDirNameSuffix.length());
String dirName = file.getName();
if (file.getName().endsWith(PROJECT_DIR_SUFFIX)) {
String idString = dirName.substring(0, dirName.length() - PROJECT_DIR_SUFFIX.length());
long id = -1;
try {
id = Long.parseLong(idString);
@ -399,19 +411,22 @@ public class FileProjectManager extends ProjectManager {
if (id > 0 && !_projectsMetadata.containsKey(id)) {
if (loadProjectMetadata(id)) {
logger.info(
"Recovered project named " +
getProjectMetadata(id).getName() +
" in directory " + name);
logger.info("Recovered project named "
+ getProjectMetadata(id).getName()
+ " in directory " + dirName);
recovered = true;
} else {
logger.warn("Failed to recover project in directory " + name);
logger.warn("Failed to recover project in directory " + dirName);
file.renameTo(new File(file.getParentFile(), name + ".corrupted"));
file.renameTo(new File(file.getParentFile(), dirName + ".corrupted"));
}
}
}
}
}
if (recovered) {
saveWorkspace();
}
}
@Override

View File

@ -39,7 +39,10 @@ import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Date;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONTokener;
@ -48,6 +51,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.ProjectMetadata;
import com.google.refine.model.Project;
public class ProjectMetadataUtilities {
@ -60,14 +64,15 @@ public class ProjectMetadataUtilities {
File file = new File(projectDir, "metadata.json");
File oldFile = new File(projectDir, "metadata.old.json");
if (oldFile.exists()) {
oldFile.delete();
}
if (file.exists()) {
file.renameTo(oldFile);
}
tempFile.renameTo(file);
if (oldFile.exists()) {
oldFile.delete();
}
}
protected static void saveToFile(ProjectMetadata projectMeta, File metadataFile) throws JSONException, IOException {
@ -99,6 +104,45 @@ public class ProjectMetadataUtilities {
return null;
}
/**
* Reconstruct the project metadata on a best efforts basis. The name is
* gone, so build something descriptive from the column names. Recover the
* creation and modification times based on whatever files are available.
*
* @param projectDir the project directory
* @param id the proejct id
* @return
*/
static public ProjectMetadata recover(File projectDir, long id) {
ProjectMetadata pm = null;
Project p = ProjectUtilities.load(projectDir, id);
if (p != null) {
List<String> columnNames = p.columnModel.getColumnNames();
String tempName = "<recovered project> - " + columnNames.size()
+ " cols X " + p.rows.size() + " rows - "
+ StringUtils.join(columnNames,'|');
p.dispose();
long ctime = System.currentTimeMillis();
long mtime = 0;
File dataFile = new File(projectDir, "data.zip");
ctime = mtime = dataFile.lastModified();
File historyDir = new File(projectDir,"history");
File[] files = historyDir.listFiles();
if (files != null) {
for (File f : files) {
long time = f.lastModified();
ctime = Math.min(ctime, time);
mtime = Math.max(mtime, time);
}
}
pm = new ProjectMetadata(new Date(ctime),new Date(mtime), tempName);
logger.error("Partially recovered missing metadata project in directory " + projectDir + " - " + tempName);
}
return pm;
}
static protected ProjectMetadata loadFromFile(File metadataFile) throws Exception {
FileReader reader = new FileReader(metadataFile);
try {

View File

@ -194,6 +194,7 @@ public class Project {
) throws Exception {
long start = System.currentTimeMillis();
// version of Refine which wrote the file
/* String version = */ reader.readLine();
Project project = new Project(id);