Projekt_Grafika/dependencies/physx-4.1/source/scenequery/src/SqAABBTree.cpp

921 lines
29 KiB
C++

//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2019 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "SqAABBTree.h"
#include "SqAABBTreeUpdateMap.h"
#include "SqBounds.h"
#include "PsMathUtils.h"
#include "PsFoundation.h"
#include "GuInternal.h"
using namespace physx;
using namespace Sq;
using namespace Gu;
#define INVALID_ID 0xffffffff
// Progressive building
class Sq::FIFOStack : public Ps::UserAllocated
{
public:
FIFOStack() : mStack(PX_DEBUG_EXP("SQFIFOStack")), mCurIndex(0) {}
~FIFOStack() {}
PX_FORCE_INLINE PxU32 getNbEntries() const { return mStack.size(); }
PX_FORCE_INLINE void push(AABBTreeBuildNode* entry) { mStack.pushBack(entry); }
bool pop(AABBTreeBuildNode*& entry);
private:
Ps::Array<AABBTreeBuildNode*> mStack;
PxU32 mCurIndex; //!< Current index within the container
};
bool Sq::FIFOStack::pop(AABBTreeBuildNode*& entry)
{
const PxU32 NbEntries = mStack.size(); // Get current number of entries
if (!NbEntries)
return false; // Can be NULL when no value has been pushed. This is an invalid pop call.
entry = mStack[mCurIndex++]; // Get oldest entry, move to next one
if (mCurIndex == NbEntries)
{
// All values have been poped
mStack.clear();
mCurIndex = 0;
}
return true;
}
//~Progressive building
void flatten(const NodeAllocator& nodeAllocator, AABBTreeRuntimeNode* dest)
{
// PT: gathers all build nodes allocated so far and flatten them to a linear destination array of smaller runtime nodes
PxU32 offset = 0;
const PxU32 nbSlabs = nodeAllocator.mSlabs.size();
for(PxU32 s=0;s<nbSlabs;s++)
{
const NodeAllocator::Slab& currentSlab = nodeAllocator.mSlabs[s];
AABBTreeBuildNode* pool = currentSlab.mPool;
for(PxU32 i=0;i<currentSlab.mNbUsedNodes;i++)
{
dest[offset].mBV = pool[i].mBV;
if(pool[i].isLeaf())
{
const PxU32 index = pool[i].mNodeIndex;
const PxU32 nbPrims = pool[i].getNbPrimitives();
PX_ASSERT(nbPrims<=16);
dest[offset].mData = (index<<5)|((nbPrims&15)<<1)|1;
}
else
{
PX_ASSERT(pool[i].mPos);
PxU32 localNodeIndex = 0xffffffff;
PxU32 nodeBase = 0;
for(PxU32 j=0;j<nbSlabs;j++)
{
if(pool[i].mPos>= nodeAllocator.mSlabs[j].mPool && pool[i].mPos < nodeAllocator.mSlabs[j].mPool + nodeAllocator.mSlabs[j].mNbUsedNodes)
{
localNodeIndex = PxU32(pool[i].mPos - nodeAllocator.mSlabs[j].mPool);
break;
}
nodeBase += nodeAllocator.mSlabs[j].mNbUsedNodes;
}
const PxU32 nodeIndex = nodeBase + localNodeIndex;
dest[offset].mData = nodeIndex<<1;
}
offset++;
}
}
}
AABBTree::AABBTree() :
mIndices (NULL),
mNbIndices (0),
mRuntimePool (NULL),
mParentIndices (NULL),
mTotalNbNodes (0),
mTotalPrims (0)
{
// Progressive building
mStack = NULL;
//~Progressive building
// REFIT
mRefitHighestSetWord = 0;
//~REFIT
}
AABBTree::~AABBTree()
{
release(false);
}
void AABBTree::release(bool clearRefitMap)
{
// Progressive building
PX_DELETE_AND_RESET(mStack);
//~Progressive building
PX_FREE_AND_RESET(mParentIndices);
PX_DELETE_ARRAY(mRuntimePool);
mNodeAllocator.release();
PX_FREE_AND_RESET(mIndices);
mTotalNbNodes = 0;
mNbIndices = 0;
// REFIT
if(clearRefitMap)
mRefitBitmask.clearAll();
mRefitHighestSetWord = 0;
//~REFIT
}
// Initialize nodes/indices from the input tree merge data
void AABBTree::initTree(const AABBTreeMergeData& tree)
{
PX_ASSERT(mIndices == NULL);
PX_ASSERT(mRuntimePool == NULL);
PX_ASSERT(mParentIndices == NULL);
// allocate,copy indices
mIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*tree.mNbIndices, "AABB tree indices"));
mNbIndices = tree.mNbIndices;
PxMemCopy(mIndices, tree.mIndices, sizeof(PxU32)*tree.mNbIndices);
// allocate,copy nodes
mRuntimePool = PX_NEW(AABBTreeRuntimeNode)[tree.mNbNodes];
mTotalNbNodes = tree.mNbNodes;
PxMemCopy(mRuntimePool, tree.mNodes, sizeof(AABBTreeRuntimeNode)*tree.mNbNodes);
}
// Shift indices of the tree by offset. Used for merged trees, when initial indices needs to be shifted to match indices in current pruning pool
void AABBTree::shiftIndices(PxU32 offset)
{
for (PxU32 i = 0; i < mNbIndices; i++)
{
mIndices[i] += offset;
}
}
bool AABBTree::buildInit(AABBTreeBuildParams& params, BuildStats& stats)
{
// Checkings
const PxU32 nbPrimitives = params.mNbPrimitives;
if(!nbPrimitives)
return false;
// Release previous tree
release();
// Initialize indices. This list will be modified during build.
mNbIndices = nbPrimitives;
return initAABBTreeBuild(params, mNodeAllocator, stats, mIndices);
}
void AABBTree::buildEnd(AABBTreeBuildParams& params, BuildStats& stats)
{
PX_FREE_AND_RESET(params.mCache);
// Get back total number of nodes
mTotalNbNodes = stats.getCount();
mTotalPrims = stats.mTotalPrims;
mRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes];
PX_ASSERT(mTotalNbNodes==mNodeAllocator.mTotalNbNodes);
flatten(mNodeAllocator, mRuntimePool);
mNodeAllocator.release();
}
bool AABBTree::build(AABBTreeBuildParams& params)
{
const PxU32 nbPrimitives = params.mNbPrimitives;
if(!nbPrimitives)
return false;
// Release previous tree
release();
BuildStats stats;
mNbIndices = nbPrimitives;
const bool buildStatus = buildAABBTree(params, mNodeAllocator, stats, mIndices);
PX_UNUSED(buildStatus);
PX_ASSERT(buildStatus);
buildEnd(params, stats);
return true;
}
void AABBTree::shiftOrigin(const PxVec3& shift)
{
AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
const PxU32 totalNbNodes = mTotalNbNodes;
for(PxU32 i=0; i<totalNbNodes; i++)
{
AABBTreeRuntimeNode& current = nodeBase[i];
if((i+1) < totalNbNodes)
Ps::prefetch(nodeBase + i + 1);
current.mBV.minimum -= shift;
current.mBV.maximum -= shift;
}
}
#if PX_DEBUG
void AABBTree::validate() const
{
}
#endif
// Progressive building
static PxU32 incrementalBuildHierarchy(FIFOStack& stack, AABBTreeBuildNode* node, AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& nodeBase, PxU32* const indices)
{
node->subdivide(params, stats, nodeBase, indices);
if(!node->isLeaf())
{
AABBTreeBuildNode* pos = const_cast<AABBTreeBuildNode*>(node->getPos());
PX_ASSERT(pos);
AABBTreeBuildNode* neg = pos + 1;
stack.push(neg);
stack.push(pos);
}
stats.mTotalPrims += node->mNbPrimitives;
return node->mNbPrimitives;
}
PxU32 AABBTree::progressiveBuild(AABBTreeBuildParams& params, BuildStats& stats, PxU32 progress, PxU32 limit)
{
if(progress==0)
{
if(!buildInit(params, stats))
return PX_INVALID_U32;
mStack = PX_NEW(FIFOStack);
mStack->push(mNodeAllocator.mPool);
return progress++;
}
else if(progress==1)
{
PxU32 stackCount = mStack->getNbEntries();
if(stackCount)
{
PxU32 Total = 0;
const PxU32 Limit = limit;
while(Total<Limit)
{
AABBTreeBuildNode* Entry;
if(mStack->pop(Entry))
Total += incrementalBuildHierarchy(*mStack, Entry, params, stats, mNodeAllocator, mIndices);
else
break;
}
return progress;
}
buildEnd(params, stats);
PX_DELETE_AND_RESET(mStack);
return 0; // Done!
}
return PX_INVALID_U32;
}
//~Progressive building
static PX_FORCE_INLINE PxU32 BitsToDwords(PxU32 nb_bits)
{
return (nb_bits>>5) + ((nb_bits&31) ? 1 : 0);
}
bool Sq::BitArray::init(PxU32 nb_bits)
{
mSize = BitsToDwords(nb_bits);
// Get ram for n bits
PX_FREE(mBits);
mBits = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mSize, "BitArray::mBits"));
// Set all bits to 0
clearAll();
return true;
}
void Sq::BitArray::resize(PxU32 maxBitNumber)
{
const PxU32 newSize = BitsToDwords(maxBitNumber);
if (newSize <= mSize)
return;
PxU32* newBits = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*newSize, "BitArray::mBits"));
PxMemZero(newBits + mSize, (newSize - mSize) * sizeof(PxU32));
PxMemCopy(newBits, mBits, mSize*sizeof(PxU32));
PX_FREE(mBits);
mBits = newBits;
mSize = newSize;
}
static PX_FORCE_INLINE PxU32 getNbPrimitives(PxU32 data) { return (data>>1)&15; }
static PX_FORCE_INLINE const PxU32* getPrimitives(const PxU32* base, PxU32 data) { return base + (data>>5); }
static PX_FORCE_INLINE const AABBTreeRuntimeNode* getPos(const AABBTreeRuntimeNode* base, PxU32 data) { return base + (data>>1); }
static PX_FORCE_INLINE PxU32 isLeaf(PxU32 data) { return data&1; }
static PX_FORCE_INLINE void refitNode(AABBTreeRuntimeNode* PX_RESTRICT current, const PxBounds3* PX_RESTRICT boxes, const PxU32* PX_RESTRICT indices, AABBTreeRuntimeNode* PX_RESTRICT const nodeBase)
{
// PT: we can safely use V4 loads on both boxes and nodes here:
// - it's safe on boxes because we allocated one extra box in the pruning pool
// - it's safe on nodes because there's always some data within the node, after the BV
const PxU32 data = current->mData;
Vec4V resultMinV, resultMaxV;
if(isLeaf(data))
{
const PxU32 nbPrims = getNbPrimitives(data);
if(nbPrims)
{
const PxU32* primitives = getPrimitives(indices, data);
resultMinV = V4LoadU(&boxes[*primitives].minimum.x);
resultMaxV = V4LoadU(&boxes[*primitives].maximum.x);
if(nbPrims>1)
{
const PxU32* last = primitives + nbPrims;
primitives++;
while(primitives!=last)
{
resultMinV = V4Min(resultMinV, V4LoadU(&boxes[*primitives].minimum.x));
resultMaxV = V4Max(resultMaxV, V4LoadU(&boxes[*primitives].maximum.x));
primitives++;
}
}
}
else
{
// Might happen after a node has been invalidated
const float max = SQ_EMPTY_BOUNDS_EXTENTS;
resultMinV = V4Load(max);
resultMaxV = V4Load(-max);
}
}
else
{
const AABBTreeRuntimeNode* pos = getPos(nodeBase, data);
const AABBTreeRuntimeNode* neg = pos+1;
const PxBounds3& posBox = pos->mBV;
const PxBounds3& negBox = neg->mBV;
resultMinV = V4Min(V4LoadU(&posBox.minimum.x), V4LoadU(&negBox.minimum.x));
// resultMaxV = V4Max(V4LoadU(&posBox.maximum.x), V4LoadU(&negBox.maximum.x));
#if PX_INTEL_FAMILY && !defined(PX_SIMD_DISABLED)
Vec4V posMinV = V4LoadU(&posBox.minimum.z);
Vec4V negMinV = V4LoadU(&negBox.minimum.z);
posMinV = _mm_shuffle_ps(posMinV, posMinV, _MM_SHUFFLE(0, 3, 2, 1));
negMinV = _mm_shuffle_ps(negMinV, negMinV, _MM_SHUFFLE(0, 3, 2, 1));
resultMaxV = V4Max(posMinV, negMinV);
#else
// PT: fixes the perf issue but not really convincing
resultMaxV = Vec4V_From_Vec3V(V3Max(V3LoadU(&posBox.maximum.x), V3LoadU(&negBox.maximum.x)));
#endif
}
// PT: the V4 stores overwrite the data after the BV, but we just put it back afterwards
V4StoreU(resultMinV, &current->mBV.minimum.x);
V4StoreU(resultMaxV, &current->mBV.maximum.x);
current->mData = data;
}
void AABBTree::fullRefit(const PxBounds3* boxes)
{
PX_ASSERT(boxes);
const PxU32* indices = mIndices;
AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
PX_ASSERT(nodeBase);
// Bottom-up update
PxU32 index = mTotalNbNodes;
while(index--)
{
AABBTreeRuntimeNode* current = nodeBase + index;
if(index)
Ps::prefetch(current - 1);
refitNode(current, boxes, indices, nodeBase);
}
}
static void _createParentArray(PxU32 totalNbNodes, PxU32* parentIndices, const AABBTreeRuntimeNode* parentNode, const AABBTreeRuntimeNode* currentNode, const AABBTreeRuntimeNode* root)
{
const PxU32 parentIndex = PxU32(parentNode - root);
const PxU32 currentIndex = PxU32(currentNode - root);
PX_ASSERT(parentIndex<totalNbNodes);
PX_ASSERT(currentIndex<totalNbNodes);
PX_UNUSED(totalNbNodes);
parentIndices[currentIndex] = parentIndex;
if(!currentNode->isLeaf())
{
_createParentArray(totalNbNodes, parentIndices, currentNode, currentNode->getPos(root), root);
_createParentArray(totalNbNodes, parentIndices, currentNode, currentNode->getNeg(root), root);
}
}
void AABBTree::markNodeForRefit(TreeNodeIndex nodeIndex)
{
if(!mRefitBitmask.getBits())
mRefitBitmask.init(mTotalNbNodes);
PX_ASSERT(nodeIndex<mTotalNbNodes);
// PT: lazy-create parent array. Memory is not wasted for purely static trees, or dynamic trees that only do "full refit".
if(!mParentIndices)
{
mParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mTotalNbNodes, "AABB parent indices"));
_createParentArray(mTotalNbNodes, mParentIndices, mRuntimePool, mRuntimePool, mRuntimePool);
}
PxU32 currentIndex = nodeIndex;
while(1)
{
PX_ASSERT(currentIndex<mTotalNbNodes);
if(mRefitBitmask.isSet(currentIndex))
{
// We can early exit if we already visited the node!
return;
}
else
{
mRefitBitmask.setBit(currentIndex);
const PxU32 currentMarkedWord = currentIndex>>5;
mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord);
const PxU32 parentIndex = mParentIndices[currentIndex];
PX_ASSERT(parentIndex == 0 || parentIndex < currentIndex);
if(currentIndex == parentIndex)
break;
currentIndex = parentIndex;
}
}
}
#define FIRST_VERSION
#ifdef FIRST_VERSION
void AABBTree::refitMarkedNodes(const PxBounds3* boxes)
{
if(!mRefitBitmask.getBits())
return; // No refit needed
{
/*const*/ PxU32* bits = const_cast<PxU32*>(mRefitBitmask.getBits());
PxU32 size = mRefitHighestSetWord+1;
#ifdef _DEBUG
if(1)
{
const PxU32 totalSize = mRefitBitmask.getSize();
for(PxU32 i=size;i<totalSize;i++)
{
PX_ASSERT(!bits[i]);
}
}
PxU32 nbRefit=0;
#endif
const PxU32* indices = mIndices;
AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
while(size--)
{
// Test 32 bits at a time
const PxU32 currentBits = bits[size];
if(!currentBits)
continue;
PxU32 index = (size+1)<<5;
PxU32 mask = PxU32(1<<((index-1)&31));
PxU32 _Count=32;
while(_Count--)
{
index--;
Ps::prefetch(nodeBase + index);
PX_ASSERT(size==index>>5);
PX_ASSERT(mask==PxU32(1<<(index&31)));
if(currentBits & mask)
{
refitNode(nodeBase + index, boxes, indices, nodeBase);
#ifdef _DEBUG
nbRefit++;
#endif
}
mask>>=1;
}
bits[size] = 0;
}
mRefitHighestSetWord = 0;
// mRefitBitmask.clearAll();
}
}
#endif
//#define SECOND_VERSION
#ifdef SECOND_VERSION
void AABBTree::refitMarkedNodes(const PxBounds3* boxes)
{
/*const*/ PxU32* bits = const_cast<PxU32*>(mRefitBitmask.getBits());
if(!bits)
return; // No refit needed
const PxU32 lastSetBit = mRefitBitmask.findLast();
const PxU32* indices = mIndices;
AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
for(PxU32 w = 0; w <= lastSetBit >> 5; ++w)
{
for(PxU32 b = bits[w]; b; b &= b-1)
{
const PxU32 index = (PxU32)(w<<5|Ps::lowestSetBit(b));
while(size--)
{
// Test 32 bits at a time
const PxU32 currentBits = bits[size];
if(!currentBits)
continue;
PxU32 index = (size+1)<<5;
PxU32 mask = PxU32(1<<((index-1)&31));
PxU32 _Count=32;
while(_Count--)
{
index--;
Ps::prefetch(nodeBase + index);
PX_ASSERT(size==index>>5);
PX_ASSERT(mask==PxU32(1<<(index&31)));
if(currentBits & mask)
{
refitNode(nodeBase + index, boxes, indices, nodeBase);
#ifdef _DEBUG
nbRefit++;
#endif
}
mask>>=1;
}
bits[size] = 0;
}
mRefitHighestSetWord = 0;
// mRefitBitmask.clearAll();
}
}
#endif
PX_FORCE_INLINE static void setLeafData(PxU32& leafData, const AABBTreeRuntimeNode& node, const PxU32 indicesOffset)
{
const PxU32 index = indicesOffset + (node.mData >> 5);
const PxU32 nbPrims = node.getNbPrimitives();
PX_ASSERT(nbPrims <= 16);
leafData = (index << 5) | ((nbPrims & 15) << 1) | 1;
}
// Copy the tree into nodes. Update node indices, leaf indices.
void AABBTree::addRuntimeChilds(PxU32& nodeIndex, const AABBTreeMergeData& treeParams)
{
PX_ASSERT(nodeIndex < mTotalNbNodes + treeParams.mNbNodes + 1);
const PxU32 baseNodeIndex = nodeIndex;
// copy the src tree into dest tree nodes, update its data
for (PxU32 i = 0; i < treeParams.mNbNodes; i++)
{
PX_ASSERT(nodeIndex < mTotalNbNodes + treeParams.mNbNodes + 1);
mRuntimePool[nodeIndex].mBV = treeParams.mNodes[i].mBV;
if (treeParams.mNodes[i].isLeaf())
{
setLeafData(mRuntimePool[nodeIndex].mData, treeParams.mNodes[i], mNbIndices);
}
else
{
const PxU32 srcNodeIndex = baseNodeIndex + (treeParams.mNodes[i].getPosIndex());
mRuntimePool[nodeIndex].mData = srcNodeIndex << 1;
mParentIndices[srcNodeIndex] = nodeIndex;
mParentIndices[srcNodeIndex + 1] = nodeIndex;
}
nodeIndex++;
}
}
// Merge tree into targetNode, where target node is a leaf
// 1. Allocate new nodes/parent, copy all the nodes/parents
// 2. Create new node at the end, copy the data from target node
// 3. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
// Schematic view:
// Target Nodes: ...Tn...
// Input tree: R1->Rc0, Rc1...
// Merged tree: ...Tnc->...->Nc0,R1->Rc0,Rc1...
// where new node: Nc0==Tn and Tnc is not a leaf anymore and points to Nc0
void AABBTree::mergeRuntimeLeaf(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 targetMergeNodeIndex)
{
PX_ASSERT(mParentIndices);
PX_ASSERT(targetNode.isLeaf());
// 1. Allocate new nodes/parent, copy all the nodes/parents
// allocate new runtime pool with max combine number of nodes
// we allocate only 1 additional node each merge
AABBTreeRuntimeNode* newRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes + treeParams.mNbNodes + 1];
PxU32* newParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mTotalNbNodes + treeParams.mNbNodes + 1), "AABB parent indices"));
// copy the whole target nodes, we will add the new node at the end together with the merge tree
PxMemCopy(newRuntimePool, mRuntimePool, sizeof(AABBTreeRuntimeNode)*(mTotalNbNodes));
PxMemCopy(newParentIndices, mParentIndices, sizeof(PxU32)*(mTotalNbNodes));
// 2. Create new node at the end, copy the data from target node
PxU32 nodeIndex = mTotalNbNodes;
// copy the targetNode at the end of the new nodes
newRuntimePool[nodeIndex].mBV = targetNode.mBV;
newRuntimePool[nodeIndex].mData = targetNode.mData;
// update the parent information
newParentIndices[nodeIndex] = targetMergeNodeIndex;
// mark for refit
if (mRefitBitmask.getBits() && mRefitBitmask.isSet(targetMergeNodeIndex))
{
mRefitBitmask.setBit(nodeIndex);
const PxU32 currentMarkedWord = nodeIndex >> 5;
mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord);
}
// swap pointers
PX_DELETE_ARRAY(mRuntimePool);
mRuntimePool = newRuntimePool;
PX_FREE(mParentIndices);
mParentIndices = newParentIndices;
// 3. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
nodeIndex++;
addRuntimeChilds(nodeIndex, treeParams);
PX_ASSERT(nodeIndex == mTotalNbNodes + 1 + treeParams.mNbNodes);
// update the parent information for the input tree root node
mParentIndices[mTotalNbNodes + 1] = targetMergeNodeIndex;
// fix the child information for the target node, was a leaf before
mRuntimePool[targetMergeNodeIndex].mData = mTotalNbNodes << 1;
// update the total number of nodes
mTotalNbNodes = mTotalNbNodes + 1 + treeParams.mNbNodes;
}
// Merge tree into targetNode, where target node is not a leaf
// 1. Allocate new nodes/parent, copy the nodes/parents till targetNodePosIndex
// 2. Create new node , copy the data from target node
// 3. Copy the rest of the target tree nodes/parents at the end -> targetNodePosIndex + 1 + treeParams.mNbNodes
// 4. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
// 5. Go through the nodes copied at the end and fix the parents/childs
// Schematic view:
// Target Nodes: ...Tn->...->Tc0,Tc1...
// Input tree: R1->Rc0, Rc1...
// Merged tree: ...Tn->...->Nc0,R1->Rc0,Rc1...,Tc0,Tc1...
// where new node: Nc0->...->Tc0,Tc1
void AABBTree::mergeRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 targetMergeNodeIndex)
{
PX_ASSERT(mParentIndices);
PX_ASSERT(!targetNode.isLeaf());
// Get the target node child pos, this is where we insert the new node and the input tree
const PxU32 targetNodePosIndex = targetNode.getPosIndex();
// 1. Allocate new nodes/parent, copy the nodes/parents till targetNodePosIndex
// allocate new runtime pool with max combine number of nodes
// we allocate only 1 additional node each merge
AABBTreeRuntimeNode* newRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes + treeParams.mNbNodes + 1];
PxU32* newParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mTotalNbNodes + treeParams.mNbNodes + 1), "AABB parent indices"));
// copy the untouched part of the nodes and parents
PxMemCopy(newRuntimePool, mRuntimePool, sizeof(AABBTreeRuntimeNode)*(targetNodePosIndex));
PxMemCopy(newParentIndices, mParentIndices, sizeof(PxU32)*(targetNodePosIndex));
PxU32 nodeIndex = targetNodePosIndex;
// 2. Create new node , copy the data from target node
newRuntimePool[nodeIndex].mBV = targetNode.mBV;
newRuntimePool[nodeIndex].mData = ((targetNode.mData >> 1) + 1 + treeParams.mNbNodes) << 1;
// update parent information
newParentIndices[nodeIndex] = targetMergeNodeIndex;
// handle mark for refit
if(mRefitBitmask.getBits() && mRefitBitmask.isSet(targetMergeNodeIndex))
{
mRefitBitmask.setBit(nodeIndex);
const PxU32 currentMarkedWord = nodeIndex >> 5;
mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord);
}
// 3. Copy the rest of the target tree nodes/parents at the end -> targetNodePosIndex + 1 + treeParams.mNbNodes
if(mTotalNbNodes - targetNodePosIndex)
{
PX_ASSERT(mTotalNbNodes - targetNodePosIndex > 0);
PxMemCopy(newRuntimePool + targetNodePosIndex + 1 + treeParams.mNbNodes, mRuntimePool + targetNodePosIndex, sizeof(AABBTreeRuntimeNode)*(mTotalNbNodes - targetNodePosIndex));
PxMemCopy(newParentIndices + targetNodePosIndex + 1 + treeParams.mNbNodes, mParentIndices + targetNodePosIndex, sizeof(PxU32)*(mTotalNbNodes - targetNodePosIndex));
}
// swap the pointers, release the old memory
PX_DELETE_ARRAY(mRuntimePool);
mRuntimePool = newRuntimePool;
PX_FREE(mParentIndices);
mParentIndices = newParentIndices;
// 4. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
nodeIndex++;
addRuntimeChilds(nodeIndex, treeParams);
PX_ASSERT(nodeIndex == targetNodePosIndex + 1 + treeParams.mNbNodes);
// update the total number of nodes
mTotalNbNodes = mTotalNbNodes + 1 + treeParams.mNbNodes;
// update the parent information for the input tree root node
mParentIndices[targetNodePosIndex + 1] = targetMergeNodeIndex;
// 5. Go through the nodes copied at the end and fix the parents/childs
for (PxU32 i = targetNodePosIndex + 1 + treeParams.mNbNodes; i < mTotalNbNodes; i++)
{
// check if the parent is the targetNode, if yes update the parent to new node
if(mParentIndices[i] == targetMergeNodeIndex)
{
mParentIndices[i] = targetNodePosIndex;
}
else
{
// if parent node has been moved, update the parent node
if(mParentIndices[i] >= targetNodePosIndex)
{
mParentIndices[i] = mParentIndices[i] + 1 + treeParams.mNbNodes;
}
else
{
// if parent has not been moved, update its child information
const PxU32 parentIndex = mParentIndices[i];
// update the child information to point to Pos child
if(i % 2 != 0)
{
const PxU32 srcNodeIndex = mRuntimePool[parentIndex].getPosIndex();
// if child index points to a node that has been moved, update the child index
PX_ASSERT(!mRuntimePool[parentIndex].isLeaf());
PX_ASSERT(srcNodeIndex > targetNodePosIndex);
mRuntimePool[parentIndex].mData = (1 + treeParams.mNbNodes + srcNodeIndex) << 1;
}
}
}
if(!mRuntimePool[i].isLeaf())
{
// update the child node index
const PxU32 srcNodeIndex = 1 + treeParams.mNbNodes + mRuntimePool[i].getPosIndex();
mRuntimePool[i].mData = srcNodeIndex << 1;
}
}
}
// traverse the target node, the tree is inside the targetNode, and find the best place where merge the tree
void AABBTree::traverseRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 nodeIndex)
{
const AABBTreeRuntimeNode& srcNode = treeParams.getRootNode();
PX_ASSERT(srcNode.mBV.isInside(targetNode.mBV));
// Check if the srcNode(tree) can fit inside any of the target childs. If yes, traverse the target tree child
AABBTreeRuntimeNode& targetPosChild = *targetNode.getPos(mRuntimePool);
if(srcNode.mBV.isInside(targetPosChild.mBV))
{
return traverseRuntimeNode(targetPosChild, treeParams, targetNode.getPosIndex());
}
AABBTreeRuntimeNode& targetNegChild = *targetNode.getNeg(mRuntimePool);
if (srcNode.mBV.isInside(targetNegChild.mBV))
{
return traverseRuntimeNode(targetNegChild, treeParams, targetNode.getNegIndex());
}
// we cannot traverse target anymore, lets add the srcTree to current target node
if(targetNode.isLeaf())
mergeRuntimeLeaf(targetNode, treeParams, nodeIndex);
else
mergeRuntimeNode(targetNode, treeParams, nodeIndex);
}
// Merge the input tree into current tree.
// Traverse the tree and find the smallest node, where the whole new tree fits. When we find the node
// we create one new node pointing to the original children and the to the input tree root.
void AABBTree::mergeTree(const AABBTreeMergeData& treeParams)
{
// allocate new indices buffer
PxU32* newIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mNbIndices + treeParams.mNbIndices), "AABB tree indices"));
PxMemCopy(newIndices, mIndices, sizeof(PxU32)*mNbIndices);
PX_FREE(mIndices);
mIndices = newIndices;
mTotalPrims += treeParams.mNbIndices;
// copy the new indices, re-index using the provided indicesOffset. Note that indicesOffset
// must be provided, as original mNbIndices can be different than indicesOffset dues to object releases.
for (PxU32 i = 0; i < treeParams.mNbIndices; i++)
{
mIndices[mNbIndices + i] = treeParams.mIndicesOffset + treeParams.mIndices[i];
}
// check the mRefitBitmask if we fit all the new nodes
mRefitBitmask.resize(mTotalNbNodes + treeParams.mNbNodes + 1);
// create the parent information so we can update it
if(!mParentIndices)
{
mParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mTotalNbNodes, "AABB parent indices"));
_createParentArray(mTotalNbNodes, mParentIndices, mRuntimePool, mRuntimePool, mRuntimePool);
}
// if new tree is inside the root AABB we will traverse the tree to find better node where to attach the tree subnodes
// if the root is a leaf we merge with the root.
if(treeParams.getRootNode().mBV.isInside(mRuntimePool[0].mBV) && !mRuntimePool[0].isLeaf())
{
traverseRuntimeNode(mRuntimePool[0], treeParams, 0);
}
else
{
if(mRuntimePool[0].isLeaf())
{
mergeRuntimeLeaf(mRuntimePool[0], treeParams, 0);
}
else
{
mergeRuntimeNode(mRuntimePool[0], treeParams, 0);
}
// increase the tree root AABB
mRuntimePool[0].mBV.include(treeParams.getRootNode().mBV);
}
#ifdef _DEBUG
//verify parent indices
for (PxU32 i = 0; i < mTotalNbNodes; i++)
{
if (i)
{
PX_ASSERT(mRuntimePool[mParentIndices[i]].getPosIndex() == i || mRuntimePool[mParentIndices[i]].getNegIndex() == i);
}
if (!mRuntimePool[i].isLeaf())
{
PX_ASSERT(mParentIndices[mRuntimePool[i].getPosIndex()] == i);
PX_ASSERT(mParentIndices[mRuntimePool[i].getNegIndex()] == i);
}
}
// verify the tree nodes, leafs
for (PxU32 i = 0; i < mTotalNbNodes; i++)
{
if (mRuntimePool[i].isLeaf())
{
const PxU32 index = mRuntimePool[i].mData >> 5;
const PxU32 nbPrim = mRuntimePool[i].getNbPrimitives();
PX_ASSERT(index + nbPrim <= mNbIndices + treeParams.mNbIndices);
}
else
{
const PxU32 nodeIndex = (mRuntimePool[i].getPosIndex());
PX_ASSERT(nodeIndex < mTotalNbNodes);
}
}
#endif // _DEBUG
mNbIndices += treeParams.mNbIndices;
}