Added streaming json parser for faster re-loading of existing projects.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@470 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
8142b27ee4
commit
4a06c49a9a
@ -24,5 +24,6 @@
|
||||
<classpathentry kind="lib" path="lib/jython-2.5.1.jar"/>
|
||||
<classpathentry kind="lib" path="lib/clojure-1.1.0.jar"/>
|
||||
<classpathentry kind="lib" path="tests/java/lib/junit-4.8.1.jar" sourcepath="tests/java/lib-src/junit-4.8.1-sources.jar"/>
|
||||
<classpathentry kind="lib" path="lib/jackson-core-asl-1.5.1.jar"/>
|
||||
<classpathentry kind="output" path="src/main/webapp/WEB-INF/classes"/>
|
||||
</classpath>
|
||||
|
@ -42,6 +42,7 @@ licenses/apache2.0.LICENSE.txt
|
||||
commons-lang
|
||||
commons-codec
|
||||
commons-math
|
||||
jackson
|
||||
jdatapath
|
||||
jetty
|
||||
jetty-util
|
||||
|
BIN
lib-src/jackson-1.5.1-sources.zip
Normal file
BIN
lib-src/jackson-1.5.1-sources.zip
Normal file
Binary file not shown.
BIN
lib/jackson-core-asl-1.5.1.jar
Normal file
BIN
lib/jackson-core-asl-1.5.1.jar
Normal file
Binary file not shown.
@ -417,15 +417,19 @@ public class ProjectManager {
|
||||
}
|
||||
|
||||
public void deleteProject(Project project) {
|
||||
synchronized (this) {
|
||||
if (_projectsMetadata.containsKey(project.id)) {
|
||||
_projectsMetadata.remove(project.id);
|
||||
}
|
||||
if (_projects.containsKey(project.id)) {
|
||||
_projects.remove(project.id);
|
||||
deleteProject(project.id);
|
||||
}
|
||||
|
||||
File dir = getProjectDir(project.id);
|
||||
public void deleteProject(long projectID) {
|
||||
synchronized (this) {
|
||||
if (_projectsMetadata.containsKey(projectID)) {
|
||||
_projectsMetadata.remove(projectID);
|
||||
}
|
||||
if (_projects.containsKey(projectID)) {
|
||||
_projects.remove(projectID);
|
||||
}
|
||||
|
||||
File dir = getProjectDir(projectID);
|
||||
if (dir.exists()) {
|
||||
dir.delete();
|
||||
}
|
||||
@ -433,7 +437,6 @@ public class ProjectManager {
|
||||
|
||||
saveWorkspace();
|
||||
}
|
||||
|
||||
protected void load() {
|
||||
if (loadFromFile(new File(_workspaceDir, "workspace.json"))) return;
|
||||
if (loadFromFile(new File(_workspaceDir, "workspace.temp.json"))) return;
|
||||
|
@ -15,7 +15,9 @@ public class DeleteProjectCommand extends Command {
|
||||
throws ServletException, IOException {
|
||||
|
||||
try {
|
||||
ProjectManager.singleton.deleteProject(getProject(request));
|
||||
long projectID = Long.parseLong(request.getParameter("project"));
|
||||
|
||||
ProjectManager.singleton.deleteProject(projectID);
|
||||
|
||||
respond(response, "{ \"code\" : \"ok\" }");
|
||||
|
||||
|
@ -7,6 +7,8 @@ import java.util.Date;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.codehaus.jackson.JsonParser;
|
||||
import org.codehaus.jackson.JsonToken;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
@ -103,4 +105,53 @@ public class Cell implements HasFields, Jsonizable {
|
||||
|
||||
return new Cell(value, recon);
|
||||
}
|
||||
|
||||
static public Cell loadStreaming(JsonParser jp, Map<Long, Recon> reconCache) throws Exception {
|
||||
JsonToken t = jp.getCurrentToken();
|
||||
if (t == JsonToken.VALUE_NULL || t != JsonToken.START_OBJECT) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Serializable value = null;
|
||||
String type = null;
|
||||
Recon recon = null;
|
||||
|
||||
while (jp.nextToken() != JsonToken.END_OBJECT) {
|
||||
String fieldName = jp.getCurrentName();
|
||||
jp.nextToken();
|
||||
|
||||
if ("r".equals(fieldName)) {
|
||||
recon = Recon.loadStreaming(jp, reconCache);
|
||||
} else if ("e".equals(fieldName)) {
|
||||
value = new EvalError(jp.getText());
|
||||
} else if ("v".equals(fieldName)) {
|
||||
JsonToken token = jp.getCurrentToken();
|
||||
|
||||
if (token == JsonToken.VALUE_STRING) {
|
||||
value = jp.getText();
|
||||
} else if (token == JsonToken.VALUE_NUMBER_INT) {
|
||||
value = jp.getIntValue();
|
||||
} else if (token == JsonToken.VALUE_NUMBER_FLOAT) {
|
||||
value = jp.getFloatValue();
|
||||
} else if (token == JsonToken.VALUE_TRUE) {
|
||||
value = true;
|
||||
} else if (token == JsonToken.VALUE_FALSE) {
|
||||
value = false;
|
||||
}
|
||||
} else if ("t".equals(fieldName)) {
|
||||
type = jp.getText();
|
||||
}
|
||||
}
|
||||
|
||||
if (value != null) {
|
||||
if (type != null) {
|
||||
if ("date".equals(type)) {
|
||||
value = ParsingUtilities.stringToDate((String) value);
|
||||
}
|
||||
}
|
||||
return new Cell(value, recon);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -177,6 +177,8 @@ public class Project {
|
||||
}
|
||||
|
||||
static protected Project loadFromReader(LineNumberReader reader, long id) throws Exception {
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
/* String version = */ reader.readLine();
|
||||
|
||||
Project project = new Project(id);
|
||||
@ -210,6 +212,13 @@ public class Project {
|
||||
}
|
||||
|
||||
project.columnModel.setMaxCellIndex(maxCellCount - 1);
|
||||
|
||||
Gridworks.log(
|
||||
"Loaded project " + id + " from disk in " +
|
||||
(System.currentTimeMillis() - start) / 1000 +
|
||||
" sec(s)"
|
||||
);
|
||||
|
||||
project.recomputeRowContextDependencies();
|
||||
|
||||
return project;
|
||||
|
@ -6,6 +6,8 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.codehaus.jackson.JsonParser;
|
||||
import org.codehaus.jackson.JsonToken;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
@ -235,4 +237,72 @@ public class Recon implements HasFields, Jsonizable {
|
||||
return recon;
|
||||
}
|
||||
|
||||
static public Recon loadStreaming(JsonParser jp, Map<Long, Recon> reconCache) throws Exception {
|
||||
JsonToken t = jp.getCurrentToken();
|
||||
if (t == JsonToken.VALUE_NULL || t != JsonToken.START_OBJECT) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Recon recon = null;
|
||||
boolean old = true;
|
||||
|
||||
while (jp.nextToken() != JsonToken.END_OBJECT) {
|
||||
String fieldName = jp.getCurrentName();
|
||||
jp.nextToken();
|
||||
|
||||
if ("id".equals(fieldName)) {
|
||||
long id = jp.getLongValue();
|
||||
if (reconCache.containsKey(id)) {
|
||||
recon = reconCache.get(id);
|
||||
} else {
|
||||
recon = new Recon(id);
|
||||
old = false;
|
||||
}
|
||||
} else if ("j".equals(fieldName)) {
|
||||
recon.judgment = stringToJudgment(jp.getText());
|
||||
} else if ("m".equals(fieldName)) {
|
||||
if (jp.getCurrentToken() == JsonToken.START_OBJECT) {
|
||||
ReconCandidate match = ReconCandidate.loadStreaming(jp, reconCache);
|
||||
if (!old) {
|
||||
recon.match = match;
|
||||
}
|
||||
}
|
||||
} else if ("f".equals(fieldName)) {
|
||||
if (jp.getCurrentToken() != JsonToken.START_ARRAY) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int feature = 0;
|
||||
while (jp.nextToken() != JsonToken.END_ARRAY) {
|
||||
if (feature < recon.features.length && !old) {
|
||||
JsonToken token = jp.getCurrentToken();
|
||||
if (token == JsonToken.VALUE_STRING) {
|
||||
recon.features[feature++] = jp.getText();
|
||||
} else if (token == JsonToken.VALUE_NUMBER_INT) {
|
||||
recon.features[feature++] = jp.getIntValue();
|
||||
} else if (token == JsonToken.VALUE_NUMBER_FLOAT) {
|
||||
recon.features[feature++] = jp.getFloatValue();
|
||||
} else if (token == JsonToken.VALUE_FALSE) {
|
||||
recon.features[feature++] = false;
|
||||
} else if (token == JsonToken.VALUE_TRUE) {
|
||||
recon.features[feature++] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ("c".equals(fieldName)) {
|
||||
if (jp.getCurrentToken() != JsonToken.START_ARRAY) {
|
||||
return null;
|
||||
}
|
||||
|
||||
while (jp.nextToken() != JsonToken.END_ARRAY) {
|
||||
ReconCandidate rc = ReconCandidate.loadStreaming(jp, reconCache);
|
||||
if (rc != null && !old) {
|
||||
recon.addCandidate(rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return recon;
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,12 @@
|
||||
package com.metaweb.gridworks.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.codehaus.jackson.JsonParser;
|
||||
import org.codehaus.jackson.JsonToken;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
@ -78,4 +83,58 @@ public class ReconCandidate implements HasFields, Jsonizable {
|
||||
);
|
||||
return candidate;
|
||||
}
|
||||
|
||||
static public ReconCandidate loadStreaming(JsonParser jp, Map<Long, Recon> reconCache) throws Exception {
|
||||
JsonToken t = jp.getCurrentToken();
|
||||
if (t == JsonToken.VALUE_NULL || t != JsonToken.START_OBJECT) {
|
||||
return null;
|
||||
}
|
||||
|
||||
String id = null;
|
||||
String guid = null;
|
||||
String name = null;
|
||||
List<String> types = null;
|
||||
double score = 0;
|
||||
|
||||
while (jp.nextToken() != JsonToken.END_OBJECT) {
|
||||
String fieldName = jp.getCurrentName();
|
||||
jp.nextToken();
|
||||
|
||||
if ("id".equals(fieldName)) {
|
||||
id = jp.getText();
|
||||
} else if ("guid".equals(fieldName)) {
|
||||
guid = jp.getText();
|
||||
} else if ("name".equals(fieldName)) {
|
||||
name = jp.getText();
|
||||
} else if ("score".equals(fieldName)) {
|
||||
score = jp.getDoubleValue();
|
||||
} else if ("types".equals(fieldName)) {
|
||||
if (jp.getCurrentToken() != JsonToken.START_ARRAY) {
|
||||
return null;
|
||||
}
|
||||
|
||||
types = new ArrayList<String>();
|
||||
|
||||
while (jp.nextToken() != JsonToken.END_ARRAY) {
|
||||
types.add(jp.getText());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String[] typesA;
|
||||
if (types != null) {
|
||||
typesA = new String[types.size()];
|
||||
types.toArray(typesA);
|
||||
} else {
|
||||
typesA = new String[0];
|
||||
}
|
||||
|
||||
return new ReconCandidate(
|
||||
id,
|
||||
guid,
|
||||
name,
|
||||
typesA,
|
||||
score
|
||||
);
|
||||
}
|
||||
}
|
@ -7,6 +7,9 @@ import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.codehaus.jackson.JsonFactory;
|
||||
import org.codehaus.jackson.JsonParser;
|
||||
import org.codehaus.jackson.JsonToken;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
@ -34,6 +37,12 @@ public class Row implements HasFields, Jsonizable {
|
||||
cells = new ArrayList<Cell>(cellCount);
|
||||
}
|
||||
|
||||
protected Row(List<Cell> cells, boolean flagged, boolean starred) {
|
||||
this.cells = cells;
|
||||
this.flagged = flagged;
|
||||
this.starred = starred;
|
||||
}
|
||||
|
||||
public Row dup() {
|
||||
Row row = new Row(cells.size());
|
||||
row.flagged = flagged;
|
||||
@ -154,7 +163,9 @@ public class Row implements HasFields, Jsonizable {
|
||||
}
|
||||
|
||||
static public Row load(String s, Map<Long, Recon> reconCache) throws Exception {
|
||||
return s.length() == 0 ? null : load(ParsingUtilities.evaluateJsonStringToObject(s), reconCache);
|
||||
return s.length() == 0 ? null :
|
||||
//load(ParsingUtilities.evaluateJsonStringToObject(s), reconCache);
|
||||
loadStreaming(s, reconCache);
|
||||
}
|
||||
|
||||
static public Row load(JSONObject obj, Map<Long, Recon> reconCache) throws Exception {
|
||||
@ -180,4 +191,39 @@ public class Row implements HasFields, Jsonizable {
|
||||
return row;
|
||||
}
|
||||
|
||||
static public Row loadStreaming(String s, Map<Long, Recon> reconCache) throws Exception {
|
||||
JsonFactory jsonFactory = new JsonFactory();
|
||||
JsonParser jp = jsonFactory.createJsonParser(s);
|
||||
|
||||
if (jp.nextToken() != JsonToken.START_OBJECT) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<Cell> cells = new ArrayList<Cell>();
|
||||
boolean starred = false;
|
||||
boolean flagged = false;
|
||||
|
||||
while (jp.nextToken() != JsonToken.END_OBJECT) {
|
||||
String fieldName = jp.getCurrentName();
|
||||
jp.nextToken();
|
||||
|
||||
if (STARRED.equals(fieldName)) {
|
||||
starred = jp.getBooleanValue();
|
||||
} else if (FLAGGED.equals(fieldName)) {
|
||||
flagged = jp.getBooleanValue();
|
||||
} else if ("cells".equals(fieldName)) {
|
||||
if (jp.getCurrentToken() != JsonToken.START_ARRAY) {
|
||||
return null;
|
||||
}
|
||||
|
||||
while (jp.nextToken() != JsonToken.END_ARRAY) {
|
||||
Cell cell = Cell.loadStreaming(jp, reconCache);
|
||||
|
||||
cells.add(cell);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (cells.size() > 0) ? new Row(cells, flagged, starred) : null;
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because one or more lines are too long
@ -1 +1 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Freebase Gridworks</title>
<link rel="stylesheet" href="/styles/common.css" />
<link rel="stylesheet" href="/styles/index.css" />
<script type="text/javascript" src="externals/jquery-1.4.1.min.js"></script>
<script type="text/javascript" src="externals/date.js"></script>
<script type="text/javascript" src="scripts/util/string.js"></script>
<script type="text/javascript" src="scripts/version.js"></script>
<script type="text/javascript" src="scripts/index.js"></script>
<script type="text/javascript" src="http://www.freebase.com/labs/gridworks.js"></script>
</head>
<body>
<div id="header">
<a id="logo" href="http://www.freebase.com/" title="Freebase"><img alt="Freebase" src="images/freebase-headerlogo.png" /></a>
<div id="path"><span class="app-path-section"><a href="./index.html">Gridworks</a></span></div>
</div>
<div id="body">
<div id="body-empty">
<table><tr>
<td id="body-empty-logo-container"><img src="images/gridworks.png" /> Gridworks</td>
<td id="body-empty-create-project-panel-container"></td>
</tr></table>
</div>
<div id="body-nonempty">
<table><tr>
<td id="body-nonempty-logo-container"><img src="images/gridworks.png" /> Gridworks</td>
<td id="body-nonempty-projects-container">
<div id="projects"></div>
</td>
<td id="body-nonempty-create-project-panel-container"></td>
</tr></table>
</div>
</div>
<div id="footer">
<a href="about.html">About Freebase Gridworks</a>
•
© 2010 <a href="http://www.metaweb.com/">Metaweb Technologies, Inc.</a>
</div>
<div id="body-template">
<div id="create-project-panel">
<h1>Upload Data File</h1>
<form id="file-upload-form" method="post" enctype="multipart/form-data" action="/command/create-project-from-upload" accept-charset="UTF-8">
<div class="grid-layout layout-tight"><table class="import-project-panel-layout">
<tr><td>Data File:</td>
<td><input type="file" id="project-file-input" name="project-file" /></td></tr>
<tr><td>Project Name:</td>
<td><input type="text" size="20" id="project-name-input" name="project-name" /></td></tr>
<tr><td>Column separator:</td>
<td><input id="separator-input" name="separator" size="2" /> leave blank to guess comma or tab</td></tr>
<tr><td>Guess Value Type:</td>
<td><input id="guess-value-type-input" name="guess-value-type" type="checkbox" checked="true" /> (try to parse cells' content into numbers, dates, etc.)</td></tr>
<tr><td>Ignore:</td>
<td><input id="ignore-input" name="ignore" size="5" value="0" /> initial non-blank lines</td></tr>
<tr><td>Header lines:</td>
<td><input id="header-lines-input" name="header-lines" size="5" value="1" /> (can be zero)</td></tr>
<tr><td>Skip:</td>
<td><input id="skip-input" name="skip" size="5" value="0" /> initial data rows</td></tr>
<tr><td>Load up to:</td>
<td><input id="limit-input" name="limit" size="5" /> data rows (leave blank to load all rows)</td></tr>
<tr><td></td><td><input type="submit" value="Create Project" id="upload-file-button" /></td></tr>
</table></div>
</form>
<h1>Import Existing Project</h1>
<form id="project-upload-form" method="post" enctype="multipart/form-data" action="/command/import-project" accept-charset="UTF-8">
<table class="import-project-panel-layout">
<tr><td>Project .tar or .tar.gz File:</td><td><input type="file" id="project-tar-file-input" name="project-file" /></td></tr>
<tr><td>Re-name Project:</td><td><input type="text" size="20" id="project-name-input" name="project-name" /> (optional)</td></tr>
<tr><td></td><td><input type="submit" value="Import Project" id="import-project-button" /></td></tr>
</table>
</form>
</div>
</div>
</body>
</html>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Freebase Gridworks</title>
<link rel="stylesheet" href="/styles/common.css" />
<link rel="stylesheet" href="/styles/index.css" />
<script type="text/javascript" src="externals/jquery-1.4.1.min.js"></script>
<script type="text/javascript" src="externals/date.js"></script>
<script type="text/javascript" src="scripts/util/string.js"></script>
<script type="text/javascript" src="scripts/version.js"></script>
<script type="text/javascript" src="scripts/index.js"></script>
<script type="text/javascript" src="http://www.freebase.com/labs/gridworks.js"></script>
</head>
<body>
<div id="header">
<a id="logo" href="http://www.freebase.com/" title="Freebase"><img alt="Freebase" src="images/freebase-headerlogo.png" /></a>
<div id="path"><span class="app-path-section"><a href="./index.html">Gridworks</a></span></div>
</div>
<div id="body">
<div id="body-empty">
<table><tr>
<td id="body-empty-logo-container"><img src="images/gridworks.png" /> Gridworks</td>
<td id="body-empty-create-project-panel-container"></td>
</tr></table>
</div>
<div id="body-nonempty">
<table><tr>
<td id="body-nonempty-logo-container"><img src="images/gridworks.png" /> Gridworks</td>
<td id="body-nonempty-projects-container">
<div id="projects"></div>
</td>
<td id="body-nonempty-create-project-panel-container"></td>
</tr></table>
</div>
</div>
<div id="footer">
<a href="about.html">About Freebase Gridworks</a>
•
© 2010 <a href="http://www.metaweb.com/">Metaweb Technologies, Inc.</a>
</div>
<div id="body-template">
<div id="create-project-panel">
<h1>Upload Data File</h1>
<form id="file-upload-form" method="post" enctype="multipart/form-data" action="/command/create-project-from-upload" accept-charset="UTF-8">
<div class="grid-layout layout-tight"><table class="import-project-panel-layout">
<tr><td>Data File:</td>
<td><input type="file" id="project-file-input" name="project-file" /></td></tr>
<tr><td>Project Name:</td>
<td><input type="text" size="20" id="project-name-input" name="project-name" /></td></tr>
<tr><td></td><td><h3>Text File Options</h3></td></tr>
<tr><td>Column separator:</td>
<td><input id="separator-input" name="separator" size="2" /> leave blank to guess comma or tab</td></tr>
<tr><td>Guess Value Type:</td>
<td><input id="guess-value-type-input" name="guess-value-type" type="checkbox" checked="true" /> (try to parse cells' content into numbers, dates, etc.)</td></tr>
<tr><td>Header lines:</td>
<td><input id="separator-input" name="separator" size="2" /> leave blank to guess comma or tab</td></tr>
<tr><td></td><td><h3>Text File and Excel File Options</h3></td></tr>
<tr><td>Ignore:</td>
<td><input id="ignore-input" name="ignore" size="5" value="0" /> initial non-blank lines</td></tr>
<tr><td>Skip:</td>
<td><input id="skip-input" name="skip" size="5" value="0" /> initial data rows</td></tr>
<tr><td>Load up to:</td>
<td><input id="limit-input" name="limit" size="5" /> data rows (leave blank to load all rows)</td></tr>
<tr><td></td><td><input type="submit" value="Create Project" id="upload-file-button" /></td></tr>
</table></div>
</form>
<h1>Import Existing Project</h1>
<form id="project-upload-form" method="post" enctype="multipart/form-data" action="/command/import-project" accept-charset="UTF-8">
<table class="import-project-panel-layout">
<tr><td>Project .tar or .tar.gz File:</td><td><input type="file" id="project-tar-file-input" name="project-file" /></td></tr>
<tr><td>Re-name Project:</td><td><input type="text" size="20" id="project-name-input" name="project-name" /> (optional)</td></tr>
<tr><td></td><td><input type="submit" value="Import Project" id="import-project-button" /></td></tr>
</table>
</form>
</div>
</div>
</body>
</html>
|
Loading…
Reference in New Issue
Block a user