Remove O(n^2) issue in tree importers - fixes #699

- Add sparse/based list implementation for ImportRecord
This commit is contained in:
Tom Morris 2013-03-23 12:02:51 -04:00
parent f78dfadcf3
commit 6b3592982e
2 changed files with 297 additions and 234 deletions

View File

@ -10,5 +10,66 @@ import com.google.refine.model.Cell;
*
*/
public class ImportRecord {
public List<List<Cell>> rows = new ArrayList<List<Cell>>();
public List<List<Cell>> rows = new BasedList<List<Cell>>();
/**
* A List implementation to match the characteristics needed by the
* import process. It's optimized for a relatively small number of
* contiguous records at a potentially large offset from zero.
* <p>
* I suspect it's usually only a single row, but we support more, just
* not as efficiently. Depending on the behavior of the ColumnGroups
* this may not be necessary at all, but I don't fully understand what it
* does, so we'll just put this hack in place for now.
*
* @param <T>
*/
class BasedList<T> extends ArrayList<T> {
private static final long serialVersionUID = 1L;
int offset = Integer.MAX_VALUE;
public T set(int index, T element) {
rebase(index);
extend(index);
return super.set(index - offset, element);
}
public T get(int index) {
if (offset == Integer.MAX_VALUE || index - offset > size() - 1) {
return null;
}
return super.get(index - offset);
}
private void rebase(final int index) {
if (index < offset) {
if (offset < Integer.MAX_VALUE) {
int new_offset = Math.max(0, index - 10); // Leave some extra room
int delta = offset - new_offset;
// Ensure room at top
for (int i = 0; i < delta; i++) {
add(null);
}
// Shuffle up
for (int i = size(); i > delta; i --) {
set(i,get(i-delta));
} // Null unused entries
for (int i = 0; i < delta; i++) {
set(i,null);
}
offset = new_offset;
} else {
offset = index;
}
}
}
private void extend(final int index) {
int i = index - offset;
while (i >= size()) {
add(null);
}
}
}
}

View File

@ -150,11 +150,13 @@ public abstract class TreeImportUtilities {
int cellIndex = column.cellIndex;
int rowIndex = Math.max(columnGroup.nextRowIndex, column.nextRowIndex);
while (rowIndex >= record.rows.size()) {
record.rows.add(new ArrayList<Cell>());
}
List<Cell> row = record.rows.get(rowIndex);
if (row == null) {
row = new ArrayList<Cell>();
record.rows.set(rowIndex, row);
}
while (cellIndex >= row.size()) {
row.add(null);
}