Jacked up jetty form upload size limit.

Added a few more array bound checks.
Reduced number of recon candidate and recon objects created by extend data operations.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@577 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-04-30 19:41:53 +00:00
parent 7bc0810aa7
commit bab1e8905b
6 changed files with 92 additions and 43 deletions

View File

@ -109,8 +109,9 @@ public class ExpressionNumericRowBinner implements RowVisitor {
rowHasNumeric = true;
int bin = (int) Math.floor((d - _index.getMin()) / _index.getStep());
if (bin >= 0 && bin < bins.length) { // as a precaution
bins[bin]++;
}
} else {
rowHasError = true;
}

View File

@ -2,6 +2,7 @@ package com.metaweb.gridworks.commands.util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@ -64,8 +65,9 @@ public class PreviewExtendDataCommand extends Command {
}
}
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
FreebaseDataExtensionJob job = new FreebaseDataExtensionJob(json);
Map<String, DataExtension> map = job.extend(guids);
Map<String, DataExtension> map = job.extend(guids, reconCandidateMap);
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json");

View File

@ -5,7 +5,9 @@ import java.io.LineNumberReader;
import java.io.Serializable;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.json.JSONException;
@ -106,10 +108,13 @@ public class DataExtensionChange implements Change {
int keyCellIndex = project.columnModel.columns.get(project.columnModel.getKeyColumnIndex()).getCellIndex();
int index = 0;
int rowIndex = _rowIndices.get(index);
DataExtension dataExtension = _dataExtensions.get(index);
int rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size();
DataExtension dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null;
index++;
Map<String, Recon> reconMap = new HashMap<String, Recon>();
for (int r = 0; r < _oldRows.size(); r++) {
Row oldRow = _oldRows.get(r);
if (r < rowIndex) {
@ -121,7 +126,7 @@ public class DataExtensionChange implements Change {
_newRows.add(oldRow);
} else {
Row firstNewRow = oldRow.dup();
extendRow(firstNewRow, dataExtension, 0);
extendRow(firstNewRow, dataExtension, 0, reconMap);
_newRows.add(firstNewRow);
int r2 = r + 1;
@ -132,7 +137,7 @@ public class DataExtensionChange implements Change {
oldRow2.isCellBlank(keyCellIndex)) {
Row newRow = oldRow2.dup();
extendRow(newRow, dataExtension, subR);
extendRow(newRow, dataExtension, subR, reconMap);
_newRows.add(newRow);
r2++;
@ -142,7 +147,7 @@ public class DataExtensionChange implements Change {
}
Row newRow = new Row(cellIndex + _columnNames.size());
extendRow(newRow, dataExtension, subR);
extendRow(newRow, dataExtension, subR, reconMap);
_newRows.add(newRow);
}
@ -175,7 +180,12 @@ public class DataExtensionChange implements Change {
}
}
protected void extendRow(Row row, DataExtension dataExtension, int extensionRowIndex) {
protected void extendRow(
Row row,
DataExtension dataExtension,
int extensionRowIndex,
Map<String, Recon> reconMap
) {
Object[] values = dataExtension.data[extensionRowIndex];
for (int c = 0; c < values.length; c++) {
Object value = values[c];
@ -183,7 +193,11 @@ public class DataExtensionChange implements Change {
if (value instanceof ReconCandidate) {
ReconCandidate rc = (ReconCandidate) value;
Recon recon = new Recon(_historyEntryID);
Recon recon;
if (reconMap.containsKey(rc.topicGUID)) {
recon = reconMap.get(rc.topicGUID);
} else {
recon = new Recon(_historyEntryID);
recon.addCandidate(rc);
recon.service = "mql";
recon.match = rc;
@ -192,6 +206,8 @@ public class DataExtensionChange implements Change {
recon.judgmentAction = "auto";
recon.judgmentBatchSize = 1;
reconMap.put(rc.topicGUID, recon);
}
cell = new Cell(rc.topicName, recon);
} else {
cell = new Cell((Serializable) value, null);

View File

@ -21,6 +21,7 @@ import com.metaweb.gridworks.model.AbstractOperation;
import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Column;
import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.ReconCandidate;
import com.metaweb.gridworks.model.Row;
import com.metaweb.gridworks.model.changes.CellAtRow;
import com.metaweb.gridworks.model.changes.DataExtensionChange;
@ -159,7 +160,13 @@ public class ExtendDataOperation extends EngineDependentOperation {
}.init(rowIndices));
}
protected int extendRows(List<Integer> rowIndices, List<DataExtension> dataExtensions, int from, int limit) {
protected int extendRows(
List<Integer> rowIndices,
List<DataExtension> dataExtensions,
int from,
int limit,
Map<String, ReconCandidate> reconCandidateMap
) {
Set<String> guids = new HashSet<String>();
int end;
@ -173,7 +180,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
Map<String, DataExtension> map = null;
try {
map = _job.extend(guids);
map = _job.extend(guids, reconCandidateMap);
} catch (Exception e) {
map = new HashMap<String, DataExtension>();
}
@ -206,8 +213,10 @@ public class ExtendDataOperation extends EngineDependentOperation {
}
int start = 0;
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
while (start < rowIndices.size()) {
int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size());
int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size(), reconCandidateMap);
start = end;
_progress = end * 100 / rowIndices.size();

View File

@ -56,7 +56,10 @@ public class FreebaseDataExtensionJob {
countColumns(obj.getJSONArray("properties"), columns, new ArrayList<String>(), new ArrayList<String>()) : 0;
}
public Map<String, FreebaseDataExtensionJob.DataExtension> extend(Set<String> guids) throws Exception {
public Map<String, FreebaseDataExtensionJob.DataExtension> extend(
Set<String> guids,
Map<String, ReconCandidate> reconCandidateMap
) throws Exception {
StringWriter writer = new StringWriter();
formulateQuery(guids, extension, writer);
@ -73,7 +76,7 @@ public class FreebaseDataExtensionJob {
for (int i = 0; i < l; i++) {
JSONObject o2 = a.getJSONObject(i);
String guid = o2.getString("guid");
FreebaseDataExtensionJob.DataExtension ext = collectResult(o2);
FreebaseDataExtensionJob.DataExtension ext = collectResult(o2, reconCandidateMap);
if (ext != null) {
map.put(guid, ext);
@ -86,10 +89,13 @@ public class FreebaseDataExtensionJob {
}
}
protected FreebaseDataExtensionJob.DataExtension collectResult(JSONObject obj) throws JSONException {
protected FreebaseDataExtensionJob.DataExtension collectResult(
JSONObject obj,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
List<Object[]> rows = new ArrayList<Object[]>();
collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0);
collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap);
Object[][] data = new Object[rows.size()][columnCount];
rows.toArray(data);
@ -101,7 +107,8 @@ public class FreebaseDataExtensionJob {
List<Object[]> rows,
int row,
int col,
Object value
Object value,
Map<String, ReconCandidate> reconCandidateMap
) {
while (row >= rows.size()) {
rows.add(new Object[columnCount]);
@ -113,17 +120,26 @@ public class FreebaseDataExtensionJob {
List<Object[]> rows,
int row,
int col,
JSONObject obj
JSONObject obj,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
storeCell(rows, row, col,
new ReconCandidate(
String guid = obj.getString("guid");
ReconCandidate rc;
if (reconCandidateMap.containsKey(guid)) {
rc = reconCandidateMap.get(guid);
} else {
rc = new ReconCandidate(
obj.getString("id"),
obj.getString("guid"),
obj.getString("name"),
JSONUtilities.getStringArray(obj, "type"),
100
)
);
reconCandidateMap.put(guid, rc);
}
storeCell(rows, row, col, rc, reconCandidateMap);
}
protected int[] collectResult(
@ -131,7 +147,8 @@ public class FreebaseDataExtensionJob {
JSONObject extNode,
JSONObject resultNode,
int startRowIndex,
int startColumnIndex
int startColumnIndex,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
String propertyID = extNode.getString("id");
String expectedTypeID = extNode.getJSONObject("expected").getString("id");
@ -145,7 +162,7 @@ public class FreebaseDataExtensionJob {
for (int r = 0; r < l; r++) {
Object o = a.isNull(r) ? null : a.get(r);
if (o instanceof Serializable) {
storeCell(rows, startRowIndex++, startColumnIndex, o);
storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap);
}
}
}
@ -169,9 +186,9 @@ public class FreebaseDataExtensionJob {
if (isOwnColumn) {
if (o != null) {
storeCell(rows, startRowIndex2++, startColumnIndex2++, o);
storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap);
} else {
storeCell(rows, startRowIndex2++, startColumnIndex2++, v);
storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap);
}
}
@ -181,7 +198,8 @@ public class FreebaseDataExtensionJob {
extNode.getJSONArray("properties"),
o,
startRowIndex,
startColumnIndex2
startColumnIndex2,
reconCandidateMap
);
startRowIndex2 = rowcol[0];
@ -207,7 +225,8 @@ public class FreebaseDataExtensionJob {
JSONArray subProperties,
JSONObject resultNode,
int startRowIndex,
int startColumnIndex
int startColumnIndex,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
int maxStartRowIndex = startRowIndex;
@ -218,7 +237,8 @@ public class FreebaseDataExtensionJob {
subProperties.getJSONObject(c),
resultNode,
startRowIndex,
startColumnIndex
startColumnIndex,
reconCandidateMap
);
maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]);

View File

@ -156,6 +156,7 @@ class GridworksServer extends Server {
logger.info("Initializing context: '" + contextPath + "' from '" + contextRoot.getAbsolutePath() + "'");
WebAppContext context = new WebAppContext(contextRoot.getAbsolutePath(), contextPath);
context.setMaxFormContentSize(1048576);
//context.setCopyWebDir(false);
//context.setDefaultsDescriptor(null);