Jacked up jetty form upload size limit.

Added a few more array bound checks.
Reduced number of recon candidate and recon objects created by extend data operations.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@577 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-04-30 19:41:53 +00:00
parent 7bc0810aa7
commit bab1e8905b
6 changed files with 92 additions and 43 deletions

View File

@ -109,8 +109,9 @@ public class ExpressionNumericRowBinner implements RowVisitor {
rowHasNumeric = true; rowHasNumeric = true;
int bin = (int) Math.floor((d - _index.getMin()) / _index.getStep()); int bin = (int) Math.floor((d - _index.getMin()) / _index.getStep());
if (bin >= 0 && bin < bins.length) { // as a precaution
bins[bin]++; bins[bin]++;
}
} else { } else {
rowHasError = true; rowHasError = true;
} }

View File

@ -2,6 +2,7 @@ package com.metaweb.gridworks.commands.util;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -64,8 +65,9 @@ public class PreviewExtendDataCommand extends Command {
} }
} }
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
FreebaseDataExtensionJob job = new FreebaseDataExtensionJob(json); FreebaseDataExtensionJob job = new FreebaseDataExtensionJob(json);
Map<String, DataExtension> map = job.extend(guids); Map<String, DataExtension> map = job.extend(guids, reconCandidateMap);
response.setCharacterEncoding("UTF-8"); response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json"); response.setHeader("Content-Type", "application/json");

View File

@ -5,7 +5,9 @@ import java.io.LineNumberReader;
import java.io.Serializable; import java.io.Serializable;
import java.io.Writer; import java.io.Writer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Properties; import java.util.Properties;
import org.json.JSONException; import org.json.JSONException;
@ -106,10 +108,13 @@ public class DataExtensionChange implements Change {
int keyCellIndex = project.columnModel.columns.get(project.columnModel.getKeyColumnIndex()).getCellIndex(); int keyCellIndex = project.columnModel.columns.get(project.columnModel.getKeyColumnIndex()).getCellIndex();
int index = 0; int index = 0;
int rowIndex = _rowIndices.get(index); int rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size();
DataExtension dataExtension = _dataExtensions.get(index); DataExtension dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null;
index++; index++;
Map<String, Recon> reconMap = new HashMap<String, Recon>();
for (int r = 0; r < _oldRows.size(); r++) { for (int r = 0; r < _oldRows.size(); r++) {
Row oldRow = _oldRows.get(r); Row oldRow = _oldRows.get(r);
if (r < rowIndex) { if (r < rowIndex) {
@ -121,7 +126,7 @@ public class DataExtensionChange implements Change {
_newRows.add(oldRow); _newRows.add(oldRow);
} else { } else {
Row firstNewRow = oldRow.dup(); Row firstNewRow = oldRow.dup();
extendRow(firstNewRow, dataExtension, 0); extendRow(firstNewRow, dataExtension, 0, reconMap);
_newRows.add(firstNewRow); _newRows.add(firstNewRow);
int r2 = r + 1; int r2 = r + 1;
@ -132,7 +137,7 @@ public class DataExtensionChange implements Change {
oldRow2.isCellBlank(keyCellIndex)) { oldRow2.isCellBlank(keyCellIndex)) {
Row newRow = oldRow2.dup(); Row newRow = oldRow2.dup();
extendRow(newRow, dataExtension, subR); extendRow(newRow, dataExtension, subR, reconMap);
_newRows.add(newRow); _newRows.add(newRow);
r2++; r2++;
@ -142,7 +147,7 @@ public class DataExtensionChange implements Change {
} }
Row newRow = new Row(cellIndex + _columnNames.size()); Row newRow = new Row(cellIndex + _columnNames.size());
extendRow(newRow, dataExtension, subR); extendRow(newRow, dataExtension, subR, reconMap);
_newRows.add(newRow); _newRows.add(newRow);
} }
@ -175,7 +180,12 @@ public class DataExtensionChange implements Change {
} }
} }
protected void extendRow(Row row, DataExtension dataExtension, int extensionRowIndex) { protected void extendRow(
Row row,
DataExtension dataExtension,
int extensionRowIndex,
Map<String, Recon> reconMap
) {
Object[] values = dataExtension.data[extensionRowIndex]; Object[] values = dataExtension.data[extensionRowIndex];
for (int c = 0; c < values.length; c++) { for (int c = 0; c < values.length; c++) {
Object value = values[c]; Object value = values[c];
@ -183,15 +193,21 @@ public class DataExtensionChange implements Change {
if (value instanceof ReconCandidate) { if (value instanceof ReconCandidate) {
ReconCandidate rc = (ReconCandidate) value; ReconCandidate rc = (ReconCandidate) value;
Recon recon = new Recon(_historyEntryID); Recon recon;
recon.addCandidate(rc); if (reconMap.containsKey(rc.topicGUID)) {
recon.service = "mql"; recon = reconMap.get(rc.topicGUID);
recon.match = rc; } else {
recon.matchRank = 0; recon = new Recon(_historyEntryID);
recon.judgment = Judgment.Matched; recon.addCandidate(rc);
recon.judgmentAction = "auto"; recon.service = "mql";
recon.judgmentBatchSize = 1; recon.match = rc;
recon.matchRank = 0;
recon.judgment = Judgment.Matched;
recon.judgmentAction = "auto";
recon.judgmentBatchSize = 1;
reconMap.put(rc.topicGUID, recon);
}
cell = new Cell(rc.topicName, recon); cell = new Cell(rc.topicName, recon);
} else { } else {
cell = new Cell((Serializable) value, null); cell = new Cell((Serializable) value, null);

View File

@ -21,6 +21,7 @@ import com.metaweb.gridworks.model.AbstractOperation;
import com.metaweb.gridworks.model.Cell; import com.metaweb.gridworks.model.Cell;
import com.metaweb.gridworks.model.Column; import com.metaweb.gridworks.model.Column;
import com.metaweb.gridworks.model.Project; import com.metaweb.gridworks.model.Project;
import com.metaweb.gridworks.model.ReconCandidate;
import com.metaweb.gridworks.model.Row; import com.metaweb.gridworks.model.Row;
import com.metaweb.gridworks.model.changes.CellAtRow; import com.metaweb.gridworks.model.changes.CellAtRow;
import com.metaweb.gridworks.model.changes.DataExtensionChange; import com.metaweb.gridworks.model.changes.DataExtensionChange;
@ -159,7 +160,13 @@ public class ExtendDataOperation extends EngineDependentOperation {
}.init(rowIndices)); }.init(rowIndices));
} }
protected int extendRows(List<Integer> rowIndices, List<DataExtension> dataExtensions, int from, int limit) { protected int extendRows(
List<Integer> rowIndices,
List<DataExtension> dataExtensions,
int from,
int limit,
Map<String, ReconCandidate> reconCandidateMap
) {
Set<String> guids = new HashSet<String>(); Set<String> guids = new HashSet<String>();
int end; int end;
@ -173,7 +180,7 @@ public class ExtendDataOperation extends EngineDependentOperation {
Map<String, DataExtension> map = null; Map<String, DataExtension> map = null;
try { try {
map = _job.extend(guids); map = _job.extend(guids, reconCandidateMap);
} catch (Exception e) { } catch (Exception e) {
map = new HashMap<String, DataExtension>(); map = new HashMap<String, DataExtension>();
} }
@ -206,8 +213,10 @@ public class ExtendDataOperation extends EngineDependentOperation {
} }
int start = 0; int start = 0;
Map<String, ReconCandidate> reconCandidateMap = new HashMap<String, ReconCandidate>();
while (start < rowIndices.size()) { while (start < rowIndices.size()) {
int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size()); int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size(), reconCandidateMap);
start = end; start = end;
_progress = end * 100 / rowIndices.size(); _progress = end * 100 / rowIndices.size();

View File

@ -56,7 +56,10 @@ public class FreebaseDataExtensionJob {
countColumns(obj.getJSONArray("properties"), columns, new ArrayList<String>(), new ArrayList<String>()) : 0; countColumns(obj.getJSONArray("properties"), columns, new ArrayList<String>(), new ArrayList<String>()) : 0;
} }
public Map<String, FreebaseDataExtensionJob.DataExtension> extend(Set<String> guids) throws Exception { public Map<String, FreebaseDataExtensionJob.DataExtension> extend(
Set<String> guids,
Map<String, ReconCandidate> reconCandidateMap
) throws Exception {
StringWriter writer = new StringWriter(); StringWriter writer = new StringWriter();
formulateQuery(guids, extension, writer); formulateQuery(guids, extension, writer);
@ -73,7 +76,7 @@ public class FreebaseDataExtensionJob {
for (int i = 0; i < l; i++) { for (int i = 0; i < l; i++) {
JSONObject o2 = a.getJSONObject(i); JSONObject o2 = a.getJSONObject(i);
String guid = o2.getString("guid"); String guid = o2.getString("guid");
FreebaseDataExtensionJob.DataExtension ext = collectResult(o2); FreebaseDataExtensionJob.DataExtension ext = collectResult(o2, reconCandidateMap);
if (ext != null) { if (ext != null) {
map.put(guid, ext); map.put(guid, ext);
@ -86,10 +89,13 @@ public class FreebaseDataExtensionJob {
} }
} }
protected FreebaseDataExtensionJob.DataExtension collectResult(JSONObject obj) throws JSONException { protected FreebaseDataExtensionJob.DataExtension collectResult(
JSONObject obj,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException {
List<Object[]> rows = new ArrayList<Object[]>(); List<Object[]> rows = new ArrayList<Object[]>();
collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0); collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap);
Object[][] data = new Object[rows.size()][columnCount]; Object[][] data = new Object[rows.size()][columnCount];
rows.toArray(data); rows.toArray(data);
@ -101,7 +107,8 @@ public class FreebaseDataExtensionJob {
List<Object[]> rows, List<Object[]> rows,
int row, int row,
int col, int col,
Object value Object value,
Map<String, ReconCandidate> reconCandidateMap
) { ) {
while (row >= rows.size()) { while (row >= rows.size()) {
rows.add(new Object[columnCount]); rows.add(new Object[columnCount]);
@ -113,17 +120,26 @@ public class FreebaseDataExtensionJob {
List<Object[]> rows, List<Object[]> rows,
int row, int row,
int col, int col,
JSONObject obj JSONObject obj,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException { ) throws JSONException {
storeCell(rows, row, col, String guid = obj.getString("guid");
new ReconCandidate( ReconCandidate rc;
obj.getString("id"), if (reconCandidateMap.containsKey(guid)) {
obj.getString("guid"), rc = reconCandidateMap.get(guid);
obj.getString("name"), } else {
JSONUtilities.getStringArray(obj, "type"), rc = new ReconCandidate(
100 obj.getString("id"),
) obj.getString("guid"),
); obj.getString("name"),
JSONUtilities.getStringArray(obj, "type"),
100
);
reconCandidateMap.put(guid, rc);
}
storeCell(rows, row, col, rc, reconCandidateMap);
} }
protected int[] collectResult( protected int[] collectResult(
@ -131,7 +147,8 @@ public class FreebaseDataExtensionJob {
JSONObject extNode, JSONObject extNode,
JSONObject resultNode, JSONObject resultNode,
int startRowIndex, int startRowIndex,
int startColumnIndex int startColumnIndex,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException { ) throws JSONException {
String propertyID = extNode.getString("id"); String propertyID = extNode.getString("id");
String expectedTypeID = extNode.getJSONObject("expected").getString("id"); String expectedTypeID = extNode.getJSONObject("expected").getString("id");
@ -145,7 +162,7 @@ public class FreebaseDataExtensionJob {
for (int r = 0; r < l; r++) { for (int r = 0; r < l; r++) {
Object o = a.isNull(r) ? null : a.get(r); Object o = a.isNull(r) ? null : a.get(r);
if (o instanceof Serializable) { if (o instanceof Serializable) {
storeCell(rows, startRowIndex++, startColumnIndex, o); storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap);
} }
} }
} }
@ -169,9 +186,9 @@ public class FreebaseDataExtensionJob {
if (isOwnColumn) { if (isOwnColumn) {
if (o != null) { if (o != null) {
storeCell(rows, startRowIndex2++, startColumnIndex2++, o); storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap);
} else { } else {
storeCell(rows, startRowIndex2++, startColumnIndex2++, v); storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap);
} }
} }
@ -181,7 +198,8 @@ public class FreebaseDataExtensionJob {
extNode.getJSONArray("properties"), extNode.getJSONArray("properties"),
o, o,
startRowIndex, startRowIndex,
startColumnIndex2 startColumnIndex2,
reconCandidateMap
); );
startRowIndex2 = rowcol[0]; startRowIndex2 = rowcol[0];
@ -207,7 +225,8 @@ public class FreebaseDataExtensionJob {
JSONArray subProperties, JSONArray subProperties,
JSONObject resultNode, JSONObject resultNode,
int startRowIndex, int startRowIndex,
int startColumnIndex int startColumnIndex,
Map<String, ReconCandidate> reconCandidateMap
) throws JSONException { ) throws JSONException {
int maxStartRowIndex = startRowIndex; int maxStartRowIndex = startRowIndex;
@ -218,7 +237,8 @@ public class FreebaseDataExtensionJob {
subProperties.getJSONObject(c), subProperties.getJSONObject(c),
resultNode, resultNode,
startRowIndex, startRowIndex,
startColumnIndex startColumnIndex,
reconCandidateMap
); );
maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]); maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]);

View File

@ -156,6 +156,7 @@ class GridworksServer extends Server {
logger.info("Initializing context: '" + contextPath + "' from '" + contextRoot.getAbsolutePath() + "'"); logger.info("Initializing context: '" + contextPath + "' from '" + contextRoot.getAbsolutePath() + "'");
WebAppContext context = new WebAppContext(contextRoot.getAbsolutePath(), contextPath); WebAppContext context = new WebAppContext(contextRoot.getAbsolutePath(), contextPath);
context.setMaxFormContentSize(1048576);
//context.setCopyWebDir(false); //context.setCopyWebDir(false);
//context.setDefaultsDescriptor(null); //context.setDefaultsDescriptor(null);