From bab1e8905b46ddee0cf49e1343836bde5c809419 Mon Sep 17 00:00:00 2001 From: David Huynh Date: Fri, 30 Apr 2010 19:41:53 +0000 Subject: [PATCH] Jacked up jetty form upload size limit. Added a few more array bound checks. Reduced number of recon candidate and recon objects created by extend data operations. git-svn-id: http://google-refine.googlecode.com/svn/trunk@577 7d457c2a-affb-35e4-300a-418c747d4874 --- .../facets/ExpressionNumericRowBinner.java | 5 +- .../util/PreviewExtendDataCommand.java | 4 +- .../model/changes/DataExtensionChange.java | 46 ++++++++----- .../operations/ExtendDataOperation.java | 15 ++++- .../util/FreebaseDataExtensionJob.java | 64 ++++++++++++------- .../java/com/metaweb/gridworks/Gridworks.java | 1 + 6 files changed, 92 insertions(+), 43 deletions(-) diff --git a/src/main/java/com/metaweb/gridworks/browsing/facets/ExpressionNumericRowBinner.java b/src/main/java/com/metaweb/gridworks/browsing/facets/ExpressionNumericRowBinner.java index 14a0a0e49..bf51bba0d 100644 --- a/src/main/java/com/metaweb/gridworks/browsing/facets/ExpressionNumericRowBinner.java +++ b/src/main/java/com/metaweb/gridworks/browsing/facets/ExpressionNumericRowBinner.java @@ -109,8 +109,9 @@ public class ExpressionNumericRowBinner implements RowVisitor { rowHasNumeric = true; int bin = (int) Math.floor((d - _index.getMin()) / _index.getStep()); - - bins[bin]++; + if (bin >= 0 && bin < bins.length) { // as a precaution + bins[bin]++; + } } else { rowHasError = true; } diff --git a/src/main/java/com/metaweb/gridworks/commands/util/PreviewExtendDataCommand.java b/src/main/java/com/metaweb/gridworks/commands/util/PreviewExtendDataCommand.java index 4ac5084c4..300971ccf 100644 --- a/src/main/java/com/metaweb/gridworks/commands/util/PreviewExtendDataCommand.java +++ b/src/main/java/com/metaweb/gridworks/commands/util/PreviewExtendDataCommand.java @@ -2,6 +2,7 @@ package com.metaweb.gridworks.commands.util; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -64,8 +65,9 @@ public class PreviewExtendDataCommand extends Command { } } + Map reconCandidateMap = new HashMap(); FreebaseDataExtensionJob job = new FreebaseDataExtensionJob(json); - Map map = job.extend(guids); + Map map = job.extend(guids, reconCandidateMap); response.setCharacterEncoding("UTF-8"); response.setHeader("Content-Type", "application/json"); diff --git a/src/main/java/com/metaweb/gridworks/model/changes/DataExtensionChange.java b/src/main/java/com/metaweb/gridworks/model/changes/DataExtensionChange.java index 184651710..5e2ca2014 100644 --- a/src/main/java/com/metaweb/gridworks/model/changes/DataExtensionChange.java +++ b/src/main/java/com/metaweb/gridworks/model/changes/DataExtensionChange.java @@ -5,7 +5,9 @@ import java.io.LineNumberReader; import java.io.Serializable; import java.io.Writer; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Properties; import org.json.JSONException; @@ -106,10 +108,13 @@ public class DataExtensionChange implements Change { int keyCellIndex = project.columnModel.columns.get(project.columnModel.getKeyColumnIndex()).getCellIndex(); int index = 0; - int rowIndex = _rowIndices.get(index); - DataExtension dataExtension = _dataExtensions.get(index); + int rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size(); + DataExtension dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null; + index++; + Map reconMap = new HashMap(); + for (int r = 0; r < _oldRows.size(); r++) { Row oldRow = _oldRows.get(r); if (r < rowIndex) { @@ -121,7 +126,7 @@ public class DataExtensionChange implements Change { _newRows.add(oldRow); } else { Row firstNewRow = oldRow.dup(); - extendRow(firstNewRow, dataExtension, 0); + extendRow(firstNewRow, dataExtension, 0, reconMap); _newRows.add(firstNewRow); int r2 = r + 1; @@ -132,7 +137,7 @@ public class DataExtensionChange implements Change { oldRow2.isCellBlank(keyCellIndex)) { Row newRow = oldRow2.dup(); - extendRow(newRow, dataExtension, subR); + extendRow(newRow, dataExtension, subR, reconMap); _newRows.add(newRow); r2++; @@ -142,7 +147,7 @@ public class DataExtensionChange implements Change { } Row newRow = new Row(cellIndex + _columnNames.size()); - extendRow(newRow, dataExtension, subR); + extendRow(newRow, dataExtension, subR, reconMap); _newRows.add(newRow); } @@ -175,7 +180,12 @@ public class DataExtensionChange implements Change { } } - protected void extendRow(Row row, DataExtension dataExtension, int extensionRowIndex) { + protected void extendRow( + Row row, + DataExtension dataExtension, + int extensionRowIndex, + Map reconMap + ) { Object[] values = dataExtension.data[extensionRowIndex]; for (int c = 0; c < values.length; c++) { Object value = values[c]; @@ -183,15 +193,21 @@ public class DataExtensionChange implements Change { if (value instanceof ReconCandidate) { ReconCandidate rc = (ReconCandidate) value; - Recon recon = new Recon(_historyEntryID); - recon.addCandidate(rc); - recon.service = "mql"; - recon.match = rc; - recon.matchRank = 0; - recon.judgment = Judgment.Matched; - recon.judgmentAction = "auto"; - recon.judgmentBatchSize = 1; - + Recon recon; + if (reconMap.containsKey(rc.topicGUID)) { + recon = reconMap.get(rc.topicGUID); + } else { + recon = new Recon(_historyEntryID); + recon.addCandidate(rc); + recon.service = "mql"; + recon.match = rc; + recon.matchRank = 0; + recon.judgment = Judgment.Matched; + recon.judgmentAction = "auto"; + recon.judgmentBatchSize = 1; + + reconMap.put(rc.topicGUID, recon); + } cell = new Cell(rc.topicName, recon); } else { cell = new Cell((Serializable) value, null); diff --git a/src/main/java/com/metaweb/gridworks/operations/ExtendDataOperation.java b/src/main/java/com/metaweb/gridworks/operations/ExtendDataOperation.java index 5e615f353..0b1832271 100644 --- a/src/main/java/com/metaweb/gridworks/operations/ExtendDataOperation.java +++ b/src/main/java/com/metaweb/gridworks/operations/ExtendDataOperation.java @@ -21,6 +21,7 @@ import com.metaweb.gridworks.model.AbstractOperation; import com.metaweb.gridworks.model.Cell; import com.metaweb.gridworks.model.Column; import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.model.ReconCandidate; import com.metaweb.gridworks.model.Row; import com.metaweb.gridworks.model.changes.CellAtRow; import com.metaweb.gridworks.model.changes.DataExtensionChange; @@ -159,7 +160,13 @@ public class ExtendDataOperation extends EngineDependentOperation { }.init(rowIndices)); } - protected int extendRows(List rowIndices, List dataExtensions, int from, int limit) { + protected int extendRows( + List rowIndices, + List dataExtensions, + int from, + int limit, + Map reconCandidateMap + ) { Set guids = new HashSet(); int end; @@ -173,7 +180,7 @@ public class ExtendDataOperation extends EngineDependentOperation { Map map = null; try { - map = _job.extend(guids); + map = _job.extend(guids, reconCandidateMap); } catch (Exception e) { map = new HashMap(); } @@ -206,8 +213,10 @@ public class ExtendDataOperation extends EngineDependentOperation { } int start = 0; + Map reconCandidateMap = new HashMap(); + while (start < rowIndices.size()) { - int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size()); + int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size(), reconCandidateMap); start = end; _progress = end * 100 / rowIndices.size(); diff --git a/src/main/java/com/metaweb/gridworks/util/FreebaseDataExtensionJob.java b/src/main/java/com/metaweb/gridworks/util/FreebaseDataExtensionJob.java index 776b3a6fa..7db623583 100644 --- a/src/main/java/com/metaweb/gridworks/util/FreebaseDataExtensionJob.java +++ b/src/main/java/com/metaweb/gridworks/util/FreebaseDataExtensionJob.java @@ -56,7 +56,10 @@ public class FreebaseDataExtensionJob { countColumns(obj.getJSONArray("properties"), columns, new ArrayList(), new ArrayList()) : 0; } - public Map extend(Set guids) throws Exception { + public Map extend( + Set guids, + Map reconCandidateMap + ) throws Exception { StringWriter writer = new StringWriter(); formulateQuery(guids, extension, writer); @@ -73,7 +76,7 @@ public class FreebaseDataExtensionJob { for (int i = 0; i < l; i++) { JSONObject o2 = a.getJSONObject(i); String guid = o2.getString("guid"); - FreebaseDataExtensionJob.DataExtension ext = collectResult(o2); + FreebaseDataExtensionJob.DataExtension ext = collectResult(o2, reconCandidateMap); if (ext != null) { map.put(guid, ext); @@ -86,10 +89,13 @@ public class FreebaseDataExtensionJob { } } - protected FreebaseDataExtensionJob.DataExtension collectResult(JSONObject obj) throws JSONException { + protected FreebaseDataExtensionJob.DataExtension collectResult( + JSONObject obj, + Map reconCandidateMap + ) throws JSONException { List rows = new ArrayList(); - collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0); + collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap); Object[][] data = new Object[rows.size()][columnCount]; rows.toArray(data); @@ -101,7 +107,8 @@ public class FreebaseDataExtensionJob { List rows, int row, int col, - Object value + Object value, + Map reconCandidateMap ) { while (row >= rows.size()) { rows.add(new Object[columnCount]); @@ -113,17 +120,26 @@ public class FreebaseDataExtensionJob { List rows, int row, int col, - JSONObject obj + JSONObject obj, + Map reconCandidateMap ) throws JSONException { - storeCell(rows, row, col, - new ReconCandidate( - obj.getString("id"), - obj.getString("guid"), - obj.getString("name"), - JSONUtilities.getStringArray(obj, "type"), - 100 - ) - ); + String guid = obj.getString("guid"); + ReconCandidate rc; + if (reconCandidateMap.containsKey(guid)) { + rc = reconCandidateMap.get(guid); + } else { + rc = new ReconCandidate( + obj.getString("id"), + obj.getString("guid"), + obj.getString("name"), + JSONUtilities.getStringArray(obj, "type"), + 100 + ); + + reconCandidateMap.put(guid, rc); + } + + storeCell(rows, row, col, rc, reconCandidateMap); } protected int[] collectResult( @@ -131,7 +147,8 @@ public class FreebaseDataExtensionJob { JSONObject extNode, JSONObject resultNode, int startRowIndex, - int startColumnIndex + int startColumnIndex, + Map reconCandidateMap ) throws JSONException { String propertyID = extNode.getString("id"); String expectedTypeID = extNode.getJSONObject("expected").getString("id"); @@ -145,7 +162,7 @@ public class FreebaseDataExtensionJob { for (int r = 0; r < l; r++) { Object o = a.isNull(r) ? null : a.get(r); if (o instanceof Serializable) { - storeCell(rows, startRowIndex++, startColumnIndex, o); + storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap); } } } @@ -169,9 +186,9 @@ public class FreebaseDataExtensionJob { if (isOwnColumn) { if (o != null) { - storeCell(rows, startRowIndex2++, startColumnIndex2++, o); + storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap); } else { - storeCell(rows, startRowIndex2++, startColumnIndex2++, v); + storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap); } } @@ -181,7 +198,8 @@ public class FreebaseDataExtensionJob { extNode.getJSONArray("properties"), o, startRowIndex, - startColumnIndex2 + startColumnIndex2, + reconCandidateMap ); startRowIndex2 = rowcol[0]; @@ -207,7 +225,8 @@ public class FreebaseDataExtensionJob { JSONArray subProperties, JSONObject resultNode, int startRowIndex, - int startColumnIndex + int startColumnIndex, + Map reconCandidateMap ) throws JSONException { int maxStartRowIndex = startRowIndex; @@ -218,7 +237,8 @@ public class FreebaseDataExtensionJob { subProperties.getJSONObject(c), resultNode, startRowIndex, - startColumnIndex + startColumnIndex, + reconCandidateMap ); maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]); diff --git a/src/server/java/com/metaweb/gridworks/Gridworks.java b/src/server/java/com/metaweb/gridworks/Gridworks.java index 5c416c2e3..0b15b24d6 100644 --- a/src/server/java/com/metaweb/gridworks/Gridworks.java +++ b/src/server/java/com/metaweb/gridworks/Gridworks.java @@ -156,6 +156,7 @@ class GridworksServer extends Server { logger.info("Initializing context: '" + contextPath + "' from '" + contextRoot.getAbsolutePath() + "'"); WebAppContext context = new WebAppContext(contextRoot.getAbsolutePath(), contextPath); + context.setMaxFormContentSize(1048576); //context.setCopyWebDir(false); //context.setDefaultsDescriptor(null);