1304 lines
46 KiB
C++
1304 lines
46 KiB
C++
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions
|
|
// are met:
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
// contributors may be used to endorse or promote products derived
|
|
// from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Copyright (c) 2008-2019 NVIDIA Corporation. All rights reserved.
|
|
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
|
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
|
|
|
|
|
#include "foundation/PxPreprocessor.h"
|
|
#include "PsVecMath.h"
|
|
|
|
#include "CmPhysXCommon.h"
|
|
#include "DySolverBody.h"
|
|
#include "DySolverContact.h"
|
|
#include "DySolverConstraint1D.h"
|
|
#include "DySolverConstraintDesc.h"
|
|
#include "DyThresholdTable.h"
|
|
#include "DySolverContext.h"
|
|
#include "PsUtilities.h"
|
|
#include "DyConstraint.h"
|
|
#include "PsAtomic.h"
|
|
#include "DySolverConstraintsShared.h"
|
|
|
|
namespace physx
|
|
{
|
|
|
|
namespace Dy
|
|
{
|
|
|
|
//Port of scalar implementation to SIMD maths with some interleaving of instructions
|
|
void solve1D(const PxSolverConstraintDesc& desc, SolverContext& cache)
|
|
{
|
|
PX_UNUSED(cache);
|
|
PxSolverBody& b0 = *desc.bodyA;
|
|
PxSolverBody& b1 = *desc.bodyB;
|
|
|
|
PxU8* PX_RESTRICT bPtr = desc.constraint;
|
|
if (bPtr == NULL)
|
|
return;
|
|
//PxU32 length = desc.constraintLength;
|
|
|
|
const SolverConstraint1DHeader* PX_RESTRICT header = reinterpret_cast<const SolverConstraint1DHeader*>(bPtr);
|
|
SolverConstraint1D* PX_RESTRICT base = reinterpret_cast<SolverConstraint1D*>(bPtr + sizeof(SolverConstraint1DHeader));
|
|
|
|
Vec3V linVel0 = V3LoadA(b0.linearVelocity);
|
|
Vec3V linVel1 = V3LoadA(b1.linearVelocity);
|
|
Vec3V angState0 = V3LoadA(b0.angularState);
|
|
Vec3V angState1 = V3LoadA(b1.angularState);
|
|
|
|
const FloatV invMass0 = FLoad(header->invMass0D0);
|
|
const FloatV invMass1 = FLoad(header->invMass1D1);
|
|
const FloatV invInertiaScale0 = FLoad(header->angularInvMassScale0);
|
|
const FloatV invInertiaScale1 = FLoad(header->angularInvMassScale1);
|
|
|
|
|
|
for(PxU32 i=0; i<header->count;++i, base++)
|
|
{
|
|
Ps::prefetchLine(base+1);
|
|
SolverConstraint1D& c = *base;
|
|
|
|
const Vec3V clinVel0 = V3LoadA(c.lin0);
|
|
const Vec3V clinVel1 = V3LoadA(c.lin1);
|
|
const Vec3V cangVel0 = V3LoadA(c.ang0);
|
|
const Vec3V cangVel1 = V3LoadA(c.ang1);
|
|
|
|
const FloatV constant = FLoad(c.constant);
|
|
const FloatV vMul = FLoad(c.velMultiplier);
|
|
const FloatV iMul = FLoad(c.impulseMultiplier);
|
|
const FloatV appliedForce = FLoad(c.appliedForce);
|
|
//const FloatV targetVel = FLoad(c.targetVelocity);
|
|
|
|
const FloatV maxImpulse = FLoad(c.maxImpulse);
|
|
const FloatV minImpulse = FLoad(c.minImpulse);
|
|
|
|
const Vec3V v0 = V3MulAdd(linVel0, clinVel0, V3Mul(angState0, cangVel0));
|
|
const Vec3V v1 = V3MulAdd(linVel1, clinVel1, V3Mul(angState1, cangVel1));
|
|
|
|
const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
|
|
const FloatV unclampedForce = FScaleAdd(iMul, appliedForce, FScaleAdd(vMul, normalVel, constant));
|
|
const FloatV clampedForce = FMin(maxImpulse, (FMax(minImpulse, unclampedForce)));
|
|
const FloatV deltaF = FSub(clampedForce, appliedForce);
|
|
|
|
FStore(clampedForce, &c.appliedForce);
|
|
linVel0 = V3ScaleAdd(clinVel0, FMul(deltaF, invMass0), linVel0);
|
|
linVel1 = V3NegScaleSub(clinVel1, FMul(deltaF, invMass1), linVel1);
|
|
angState0 = V3ScaleAdd(cangVel0, FMul(deltaF, invInertiaScale0), angState0);
|
|
//This should be negScaleSub but invInertiaScale1 is negated already
|
|
angState1 = V3ScaleAdd(cangVel1, FMul(deltaF, invInertiaScale1), angState1);
|
|
|
|
}
|
|
|
|
V3StoreA(linVel0, b0.linearVelocity);
|
|
V3StoreA(angState0, b0.angularState);
|
|
V3StoreA(linVel1, b1.linearVelocity);
|
|
V3StoreA(angState1, b1.angularState);
|
|
|
|
PX_ASSERT(b0.linearVelocity.isFinite());
|
|
PX_ASSERT(b0.angularState.isFinite());
|
|
PX_ASSERT(b1.linearVelocity.isFinite());
|
|
PX_ASSERT(b1.angularState.isFinite());
|
|
}
|
|
|
|
void conclude1D(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
|
|
{
|
|
SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
|
|
if (header == NULL)
|
|
return;
|
|
PxU8* base = desc.constraint + sizeof(SolverConstraint1DHeader);
|
|
PxU32 stride = header->type == DY_SC_TYPE_EXT_1D ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D);
|
|
|
|
for(PxU32 i=0; i<header->count; i++)
|
|
{
|
|
SolverConstraint1D& c = *reinterpret_cast<SolverConstraint1D*>(base);
|
|
|
|
c.constant = c.unbiasedConstant;
|
|
|
|
base += stride;
|
|
}
|
|
PX_ASSERT(desc.constraint + getConstraintLength(desc) == base);
|
|
}
|
|
|
|
// ==============================================================
|
|
|
|
void solveContact(const PxSolverConstraintDesc& desc, SolverContext& cache)
|
|
{
|
|
PxSolverBody& b0 = *desc.bodyA;
|
|
PxSolverBody& b1 = *desc.bodyB;
|
|
|
|
Vec3V linVel0 = V3LoadA(b0.linearVelocity);
|
|
Vec3V linVel1 = V3LoadA(b1.linearVelocity);
|
|
Vec3V angState0 = V3LoadA(b0.angularState);
|
|
Vec3V angState1 = V3LoadA(b1.angularState);
|
|
|
|
const PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
|
|
|
|
//hopefully pointer aliasing doesn't bite.
|
|
PxU8* PX_RESTRICT currPtr = desc.constraint;
|
|
|
|
while(currPtr < last)
|
|
{
|
|
SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr);
|
|
currPtr += sizeof(SolverContactHeader);
|
|
|
|
const PxU32 numNormalConstr = hdr->numNormalConstr;
|
|
const PxU32 numFrictionConstr = hdr->numFrictionConstr;
|
|
|
|
SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
|
|
Ps::prefetchLine(contacts);
|
|
currPtr += numNormalConstr * sizeof(SolverContactPoint);
|
|
|
|
PxF32* forceBuffer = reinterpret_cast<PxF32*>(currPtr);
|
|
currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
|
|
|
|
SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
|
|
currPtr += numFrictionConstr * sizeof(SolverContactFriction);
|
|
|
|
const FloatV invMassA = FLoad(hdr->invMass0);
|
|
const FloatV invMassB = FLoad(hdr->invMass1);
|
|
|
|
const FloatV angDom0 = FLoad(hdr->angDom0);
|
|
const FloatV angDom1 = FLoad(hdr->angDom1);
|
|
|
|
const Vec3V contactNormal = Vec3V_From_Vec4V_WUndefined(hdr->normal_minAppliedImpulseForFrictionW);
|
|
|
|
const FloatV accumulatedNormalImpulse = solveDynamicContacts(contacts, numNormalConstr, contactNormal, invMassA, invMassB,
|
|
angDom0, angDom1, linVel0, angState0, linVel1, angState1, forceBuffer);
|
|
|
|
if(cache.doFriction && numFrictionConstr)
|
|
{
|
|
const FloatV staticFrictionCof = hdr->getStaticFriction();
|
|
const FloatV dynamicFrictionCof = hdr->getDynamicFriction();
|
|
const FloatV maxFrictionImpulse = FMul(staticFrictionCof, accumulatedNormalImpulse);
|
|
const FloatV maxDynFrictionImpulse = FMul(dynamicFrictionCof, accumulatedNormalImpulse);
|
|
const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse);
|
|
|
|
BoolV broken = BFFFF();
|
|
|
|
if(cache.writeBackIteration)
|
|
Ps::prefetchLine(hdr->frictionBrokenWritebackByte);
|
|
|
|
for(PxU32 i=0;i<numFrictionConstr;i++)
|
|
{
|
|
SolverContactFriction& f = frictions[i];
|
|
Ps::prefetchLine(&frictions[i],128);
|
|
|
|
|
|
const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW;
|
|
const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW;
|
|
const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW;
|
|
|
|
const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW);
|
|
const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);
|
|
const Vec3V rbXn = Vec3V_From_Vec4V(rbXnXYZ_biasW);
|
|
|
|
const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW);
|
|
const FloatV bias = V4GetW(rbXnXYZ_biasW);
|
|
const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW);
|
|
|
|
const FloatV targetVel = FLoad(f.targetVel);
|
|
|
|
const Vec3V delLinVel0 = V3Scale(normal, invMassA);
|
|
const Vec3V delLinVel1 = V3Scale(normal, invMassB);
|
|
|
|
const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angState0, raXn));
|
|
const Vec3V v1 = V3MulAdd(linVel1, normal, V3Mul(angState1, rbXn));
|
|
const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
|
|
|
|
|
|
|
|
// appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
|
|
const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce);
|
|
|
|
// Algorithm:
|
|
// if abs(appliedForce + deltaF) > maxFrictionImpulse
|
|
// clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse]
|
|
// (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce]
|
|
// set broken flag to true || broken flag
|
|
|
|
// FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier);
|
|
// FloatV potentialSumF = FAdd(appliedForce, deltaF);
|
|
|
|
const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1);
|
|
|
|
// On XBox this clamping code uses the vector simple pipe rather than vector float,
|
|
// which eliminates a lot of stall cycles
|
|
|
|
const BoolV clamp = FIsGrtr(FAbs(totalImpulse), maxFrictionImpulse);
|
|
|
|
const FloatV totalClamped = FMin(maxDynFrictionImpulse, FMax(negMaxDynFrictionImpulse, totalImpulse));
|
|
|
|
const FloatV newAppliedForce = FSel(clamp, totalClamped,totalImpulse);
|
|
|
|
broken = BOr(broken, clamp);
|
|
|
|
FloatV deltaF = FSub(newAppliedForce, appliedForce);
|
|
|
|
// we could get rid of the stall here by calculating and clamping delta separately, but
|
|
// the complexity isn't really worth it.
|
|
|
|
linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
|
|
linVel1 = V3NegScaleSub(delLinVel1, deltaF, linVel1);
|
|
angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
|
|
angState1 = V3NegScaleSub(rbXn, FMul(deltaF, angDom1), angState1);
|
|
|
|
f.setAppliedForce(newAppliedForce);
|
|
|
|
|
|
}
|
|
Store_From_BoolV(broken, &hdr->broken);
|
|
}
|
|
|
|
}
|
|
|
|
PX_ASSERT(b0.linearVelocity.isFinite());
|
|
PX_ASSERT(b0.angularState.isFinite());
|
|
PX_ASSERT(b1.linearVelocity.isFinite());
|
|
PX_ASSERT(b1.angularState.isFinite());
|
|
|
|
// Write back
|
|
V3StoreU(linVel0, b0.linearVelocity);
|
|
V3StoreU(linVel1, b1.linearVelocity);
|
|
V3StoreU(angState0, b0.angularState);
|
|
V3StoreU(angState1, b1.angularState);
|
|
|
|
PX_ASSERT(b0.linearVelocity.isFinite());
|
|
PX_ASSERT(b0.angularState.isFinite());
|
|
PX_ASSERT(b1.linearVelocity.isFinite());
|
|
PX_ASSERT(b1.angularState.isFinite());
|
|
|
|
PX_ASSERT(currPtr == last);
|
|
}
|
|
|
|
void solveContact_BStatic(const PxSolverConstraintDesc& desc, SolverContext& cache)
|
|
{
|
|
PxSolverBody& b0 = *desc.bodyA;
|
|
//PxSolverBody& b1 = *desc.bodyB;
|
|
|
|
Vec3V linVel0 = V3LoadA(b0.linearVelocity);
|
|
Vec3V angState0 = V3LoadA(b0.angularState);
|
|
|
|
const PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
|
|
|
|
//hopefully pointer aliasing doesn't bite.
|
|
PxU8* PX_RESTRICT currPtr = desc.constraint;
|
|
|
|
while(currPtr < last)
|
|
{
|
|
SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr);
|
|
currPtr += sizeof(SolverContactHeader);
|
|
|
|
const PxU32 numNormalConstr = hdr->numNormalConstr;
|
|
const PxU32 numFrictionConstr = hdr->numFrictionConstr;
|
|
|
|
SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
|
|
//Ps::prefetchLine(contacts);
|
|
currPtr += numNormalConstr * sizeof(SolverContactPoint);
|
|
|
|
PxF32* forceBuffer = reinterpret_cast<PxF32*>(currPtr);
|
|
currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
|
|
|
|
SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
|
|
currPtr += numFrictionConstr * sizeof(SolverContactFriction);
|
|
|
|
|
|
|
|
const FloatV invMassA = FLoad(hdr->invMass0);
|
|
|
|
const Vec3V contactNormal = Vec3V_From_Vec4V_WUndefined(hdr->normal_minAppliedImpulseForFrictionW);
|
|
const FloatV angDom0 = FLoad(hdr->angDom0);
|
|
|
|
|
|
const FloatV accumulatedNormalImpulse = solveStaticContacts(contacts, numNormalConstr, contactNormal,
|
|
invMassA, angDom0, linVel0, angState0, forceBuffer);
|
|
|
|
if(cache.doFriction && numFrictionConstr)
|
|
{
|
|
const FloatV maxFrictionImpulse = FMul(hdr->getStaticFriction(), accumulatedNormalImpulse);
|
|
const FloatV maxDynFrictionImpulse = FMul(hdr->getDynamicFriction(), accumulatedNormalImpulse);
|
|
|
|
BoolV broken = BFFFF();
|
|
if(cache.writeBackIteration)
|
|
Ps::prefetchLine(hdr->frictionBrokenWritebackByte);
|
|
|
|
for(PxU32 i=0;i<numFrictionConstr;i++)
|
|
{
|
|
SolverContactFriction& f = frictions[i];
|
|
Ps::prefetchLine(&frictions[i],128);
|
|
|
|
|
|
const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW;
|
|
const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW;
|
|
const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW;
|
|
|
|
const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW);
|
|
const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);
|
|
|
|
const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW);
|
|
const FloatV bias = V4GetW(rbXnXYZ_biasW);
|
|
const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW);
|
|
|
|
const FloatV targetVel = FLoad(f.targetVel);
|
|
|
|
const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse);
|
|
|
|
const Vec3V delLinVel0 = V3Scale(normal, invMassA);
|
|
//const FloatV negMaxFrictionImpulse = FNeg(maxFrictionImpulse);
|
|
|
|
const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angState0, raXn));
|
|
const FloatV normalVel = V3SumElems(v0);
|
|
|
|
|
|
// appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
|
|
const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce);
|
|
|
|
// Algorithm:
|
|
// if abs(appliedForce + deltaF) > maxFrictionImpulse
|
|
// clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse]
|
|
// (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce]
|
|
// set broken flag to true || broken flag
|
|
|
|
// FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier);
|
|
// FloatV potentialSumF = FAdd(appliedForce, deltaF);
|
|
|
|
const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1);
|
|
|
|
// On XBox this clamping code uses the vector simple pipe rather than vector float,
|
|
// which eliminates a lot of stall cycles
|
|
|
|
const BoolV clamp = FIsGrtr(FAbs(totalImpulse), maxFrictionImpulse);
|
|
|
|
const FloatV totalClamped = FMin(maxDynFrictionImpulse, FMax(negMaxDynFrictionImpulse, totalImpulse));
|
|
|
|
broken = BOr(broken, clamp);
|
|
|
|
const FloatV newAppliedForce = FSel(clamp, totalClamped,totalImpulse);
|
|
|
|
FloatV deltaF = FSub(newAppliedForce, appliedForce);
|
|
|
|
// we could get rid of the stall here by calculating and clamping delta separately, but
|
|
// the complexity isn't really worth it.
|
|
|
|
linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
|
|
angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
|
|
|
|
f.setAppliedForce(newAppliedForce);
|
|
|
|
}
|
|
Store_From_BoolV(broken, &hdr->broken);
|
|
}
|
|
|
|
}
|
|
|
|
PX_ASSERT(b0.linearVelocity.isFinite());
|
|
PX_ASSERT(b0.angularState.isFinite());
|
|
|
|
// Write back
|
|
V3StoreA(linVel0, b0.linearVelocity);
|
|
V3StoreA(angState0, b0.angularState);
|
|
|
|
PX_ASSERT(b0.linearVelocity.isFinite());
|
|
PX_ASSERT(b0.angularState.isFinite());
|
|
|
|
PX_ASSERT(currPtr == last);
|
|
}
|
|
|
|
|
|
void concludeContact(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
|
|
{
|
|
PxU8* PX_RESTRICT cPtr = desc.constraint;
|
|
|
|
const FloatV zero = FZero();
|
|
|
|
PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
|
|
while(cPtr < last)
|
|
{
|
|
const SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactHeader*>(cPtr);
|
|
cPtr += sizeof(SolverContactHeader);
|
|
|
|
const PxU32 numNormalConstr = hdr->numNormalConstr;
|
|
const PxU32 numFrictionConstr = hdr->numFrictionConstr;
|
|
|
|
//if(cPtr < last)
|
|
//Ps::prefetchLine(cPtr, 512);
|
|
Ps::prefetchLine(cPtr,128);
|
|
Ps::prefetchLine(cPtr,256);
|
|
Ps::prefetchLine(cPtr,384);
|
|
|
|
const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt)
|
|
: sizeof(SolverContactPoint);
|
|
for(PxU32 i=0;i<numNormalConstr;i++)
|
|
{
|
|
SolverContactPoint *c = reinterpret_cast<SolverContactPoint*>(cPtr);
|
|
cPtr += pointStride;
|
|
//c->scaledBias = PxMin(c->scaledBias, 0.f);
|
|
c->biasedErr = c->unbiasedErr;
|
|
}
|
|
|
|
cPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3)); //Jump over force buffers
|
|
|
|
const PxU32 frictionStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactFrictionExt)
|
|
: sizeof(SolverContactFriction);
|
|
for(PxU32 i=0;i<numFrictionConstr;i++)
|
|
{
|
|
SolverContactFriction *f = reinterpret_cast<SolverContactFriction*>(cPtr);
|
|
cPtr += frictionStride;
|
|
f->setBias(zero);
|
|
}
|
|
}
|
|
PX_ASSERT(cPtr == last);
|
|
}
|
|
|
|
void writeBackContact(const PxSolverConstraintDesc& desc, SolverContext& cache,
|
|
PxSolverBodyData& bd0, PxSolverBodyData& bd1)
|
|
{
|
|
|
|
PxReal normalForce = 0;
|
|
|
|
PxU8* PX_RESTRICT cPtr = desc.constraint;
|
|
PxReal* PX_RESTRICT vForceWriteback = reinterpret_cast<PxReal*>(desc.writeBack);
|
|
PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
|
|
|
|
bool forceThreshold = false;
|
|
|
|
while(cPtr < last)
|
|
{
|
|
const SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactHeader*>(cPtr);
|
|
cPtr += sizeof(SolverContactHeader);
|
|
|
|
forceThreshold = hdr->flags & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
|
|
const PxU32 numNormalConstr = hdr->numNormalConstr;
|
|
const PxU32 numFrictionConstr = hdr->numFrictionConstr;
|
|
|
|
//if(cPtr < last)
|
|
Ps::prefetchLine(cPtr, 256);
|
|
Ps::prefetchLine(cPtr, 384);
|
|
|
|
const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt)
|
|
: sizeof(SolverContactPoint);
|
|
|
|
cPtr += pointStride * numNormalConstr;
|
|
PxF32* forceBuffer = reinterpret_cast<PxF32*>(cPtr);
|
|
cPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
|
|
|
|
if(vForceWriteback!=NULL)
|
|
{
|
|
for(PxU32 i=0; i<numNormalConstr; i++)
|
|
{
|
|
PxReal appliedForce = forceBuffer[i];
|
|
*vForceWriteback++ = appliedForce;
|
|
normalForce += appliedForce;
|
|
}
|
|
}
|
|
|
|
const PxU32 frictionStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactFrictionExt)
|
|
: sizeof(SolverContactFriction);
|
|
|
|
if(hdr->broken && hdr->frictionBrokenWritebackByte != NULL)
|
|
{
|
|
*hdr->frictionBrokenWritebackByte = 1;
|
|
}
|
|
|
|
cPtr += frictionStride * numFrictionConstr;
|
|
|
|
}
|
|
PX_ASSERT(cPtr == last);
|
|
|
|
|
|
|
|
if(forceThreshold && desc.linkIndexA == PxSolverConstraintDesc::NO_LINK && desc.linkIndexB == PxSolverConstraintDesc::NO_LINK &&
|
|
normalForce !=0 && (bd0.reportThreshold < PX_MAX_REAL || bd1.reportThreshold < PX_MAX_REAL))
|
|
{
|
|
ThresholdStreamElement elt;
|
|
elt.normalForce = normalForce;
|
|
elt.threshold = PxMin<float>(bd0.reportThreshold, bd1.reportThreshold);
|
|
elt.nodeIndexA = IG::NodeIndex(bd0.nodeIndex);
|
|
elt.nodeIndexB = IG::NodeIndex(bd1.nodeIndex);
|
|
elt.shapeInteraction = reinterpret_cast<const SolverContactHeader*>(desc.constraint)->shapeInteraction;
|
|
Ps::order(elt.nodeIndexA, elt.nodeIndexB);
|
|
PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB);
|
|
PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength);
|
|
cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt;
|
|
}
|
|
}
|
|
|
|
// adjust from CoM to joint
|
|
|
|
void writeBack1D(const PxSolverConstraintDesc& desc, SolverContext&, PxSolverBodyData&, PxSolverBodyData&)
|
|
{
|
|
ConstraintWriteback* writeback = reinterpret_cast<ConstraintWriteback*>(desc.writeBack);
|
|
if(writeback)
|
|
{
|
|
SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
|
|
PxU8* base = desc.constraint + sizeof(SolverConstraint1DHeader);
|
|
PxU32 stride = header->type == DY_SC_TYPE_EXT_1D ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D);
|
|
|
|
PxVec3 lin(0), ang(0);
|
|
for(PxU32 i=0; i<header->count; i++)
|
|
{
|
|
const SolverConstraint1D* c = reinterpret_cast<SolverConstraint1D*>(base);
|
|
if(c->flags & DY_SC_FLAG_OUTPUT_FORCE)
|
|
{
|
|
lin += c->lin0 * c->appliedForce;
|
|
ang += c->ang0Writeback * c->appliedForce;
|
|
}
|
|
base += stride;
|
|
}
|
|
|
|
ang -= header->body0WorldOffset.cross(lin);
|
|
writeback->linearImpulse = lin;
|
|
writeback->angularImpulse = ang;
|
|
writeback->broken = header->breakable ? PxU32(lin.magnitude()>header->linBreakImpulse || ang.magnitude()>header->angBreakImpulse) : 0;
|
|
|
|
PX_ASSERT(desc.constraint + getConstraintLength(desc) == base);
|
|
}
|
|
}
|
|
|
|
|
|
void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 1; a < constraintCount; ++a)
|
|
{
|
|
Ps::prefetchLine(desc[a].constraint);
|
|
Ps::prefetchLine(desc[a].constraint, 128);
|
|
Ps::prefetchLine(desc[a].constraint, 256);
|
|
solve1D(desc[a-1], cache);
|
|
}
|
|
solve1D(desc[constraintCount-1], cache);
|
|
}
|
|
|
|
void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 1; a < constraintCount; ++a)
|
|
{
|
|
Ps::prefetchLine(desc[a].constraint);
|
|
Ps::prefetchLine(desc[a].constraint, 128);
|
|
Ps::prefetchLine(desc[a].constraint, 256);
|
|
solve1D(desc[a-1], cache);
|
|
conclude1D(desc[a-1], cache);
|
|
}
|
|
solve1D(desc[constraintCount-1], cache);
|
|
conclude1D(desc[constraintCount-1], cache);
|
|
}
|
|
|
|
void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 1; a < constraintCount; ++a)
|
|
{
|
|
Ps::prefetchLine(desc[a].constraint);
|
|
Ps::prefetchLine(desc[a].constraint, 128);
|
|
Ps::prefetchLine(desc[a].constraint, 256);
|
|
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
|
|
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
|
|
solve1D(desc[a-1], cache);
|
|
writeBack1D(desc[a-1], cache, bd0, bd1);
|
|
}
|
|
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
|
|
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
|
|
solve1D(desc[constraintCount-1], cache);
|
|
writeBack1D(desc[constraintCount-1], cache, bd0, bd1);
|
|
}
|
|
|
|
void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 1; a < constraintCount; ++a)
|
|
{
|
|
Ps::prefetchLine(desc[a].constraint);
|
|
Ps::prefetchLine(desc[a].constraint, 128);
|
|
Ps::prefetchLine(desc[a].constraint, 256);
|
|
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
|
|
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
|
|
writeBack1D(desc[a-1], cache, bd0, bd1);
|
|
}
|
|
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
|
|
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
|
|
writeBack1D(desc[constraintCount-1], cache, bd0, bd1);
|
|
}
|
|
|
|
void solveContactBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 1; a < constraintCount; ++a)
|
|
{
|
|
Ps::prefetchLine(desc[a].constraint);
|
|
Ps::prefetchLine(desc[a].constraint, 128);
|
|
Ps::prefetchLine(desc[a].constraint, 256);
|
|
solveContact(desc[a-1], cache);
|
|
}
|
|
solveContact(desc[constraintCount-1], cache);
|
|
}
|
|
|
|
void solveContactConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 1; a < constraintCount; ++a)
|
|
{
|
|
Ps::prefetchLine(desc[a].constraint);
|
|
Ps::prefetchLine(desc[a].constraint, 128);
|
|
Ps::prefetchLine(desc[a].constraint, 256);
|
|
solveContact(desc[a-1], cache);
|
|
concludeContact(desc[a-1], cache);
|
|
}
|
|
solveContact(desc[constraintCount-1], cache);
|
|
concludeContact(desc[constraintCount-1], cache);
|
|
}
|
|
|
|
void solveContactBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 1; a < constraintCount; ++a)
|
|
{
|
|
Ps::prefetchLine(desc[a].constraint);
|
|
Ps::prefetchLine(desc[a].constraint, 128);
|
|
Ps::prefetchLine(desc[a].constraint, 256);
|
|
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
|
|
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
|
|
solveContact(desc[a-1], cache);
|
|
writeBackContact(desc[a-1], cache, bd0, bd1);
|
|
}
|
|
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
|
|
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
|
|
solveContact(desc[constraintCount-1], cache);
|
|
writeBackContact(desc[constraintCount-1], cache, bd0, bd1);
|
|
|
|
if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
|
|
{
|
|
//Write back to global buffer
|
|
PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
|
|
for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
|
|
{
|
|
cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
|
|
}
|
|
cache.mThresholdStreamIndex = 0;
|
|
}
|
|
}
|
|
|
|
void solveContact_BStaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 1; a < constraintCount; ++a)
|
|
{
|
|
Ps::prefetchLine(desc[a].constraint);
|
|
Ps::prefetchLine(desc[a].constraint, 128);
|
|
Ps::prefetchLine(desc[a].constraint, 256);
|
|
solveContact_BStatic(desc[a-1], cache);
|
|
}
|
|
solveContact_BStatic(desc[constraintCount-1], cache);
|
|
}
|
|
|
|
void solveContact_BStaticConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 1; a < constraintCount; ++a)
|
|
{
|
|
Ps::prefetchLine(desc[a].constraint);
|
|
Ps::prefetchLine(desc[a].constraint, 128);
|
|
Ps::prefetchLine(desc[a].constraint, 256);
|
|
solveContact_BStatic(desc[a-1], cache);
|
|
concludeContact(desc[a-1], cache);
|
|
}
|
|
solveContact_BStatic(desc[constraintCount-1], cache);
|
|
concludeContact(desc[constraintCount-1], cache);
|
|
}
|
|
|
|
void solveContact_BStaticBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 1; a < constraintCount; ++a)
|
|
{
|
|
Ps::prefetchLine(desc[a].constraint);
|
|
Ps::prefetchLine(desc[a].constraint, 128);
|
|
Ps::prefetchLine(desc[a].constraint, 256);
|
|
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
|
|
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
|
|
solveContact_BStatic(desc[a-1], cache);
|
|
writeBackContact(desc[a-1], cache, bd0, bd1);
|
|
}
|
|
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
|
|
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
|
|
solveContact_BStatic(desc[constraintCount-1], cache);
|
|
writeBackContact(desc[constraintCount-1], cache, bd0, bd1);
|
|
|
|
if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
|
|
{
|
|
//Not enough space to write 4 more thresholds back!
|
|
//Write back to global buffer
|
|
PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
|
|
for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
|
|
{
|
|
cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
|
|
}
|
|
cache.mThresholdStreamIndex = 0;
|
|
}
|
|
}
|
|
|
|
void clearExt1D(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
|
|
{
|
|
PxU8* PX_RESTRICT bPtr = desc.constraint;
|
|
const SolverConstraint1DHeader* PX_RESTRICT header = reinterpret_cast<const SolverConstraint1DHeader*>(bPtr);
|
|
SolverConstraint1DExt* PX_RESTRICT base = reinterpret_cast<SolverConstraint1DExt*>(bPtr + sizeof(SolverConstraint1DHeader));
|
|
|
|
for (PxU32 i = 0; i < header->count; ++i, base++)
|
|
{
|
|
base->appliedForce = 0.f;
|
|
}
|
|
}
|
|
|
|
void solveExt1D(const PxSolverConstraintDesc& desc, Vec3V& linVel0, Vec3V& linVel1, Vec3V& angVel0, Vec3V& angVel1,
|
|
Vec3V& li0, Vec3V& li1, Vec3V& ai0, Vec3V& ai1)
|
|
{
|
|
PxU8* PX_RESTRICT bPtr = desc.constraint;
|
|
const SolverConstraint1DHeader* PX_RESTRICT header = reinterpret_cast<const SolverConstraint1DHeader*>(bPtr);
|
|
SolverConstraint1DExt* PX_RESTRICT base = reinterpret_cast<SolverConstraint1DExt*>(bPtr + sizeof(SolverConstraint1DHeader));
|
|
|
|
|
|
|
|
for (PxU32 i = 0; i<header->count; ++i, base++)
|
|
{
|
|
Ps::prefetchLine(base + 1);
|
|
|
|
const Vec4V lin0XYZ_constantW = V4LoadA(&base->lin0.x);
|
|
const Vec4V lin1XYZ_unbiasedConstantW = V4LoadA(&base->lin1.x);
|
|
const Vec4V ang0XYZ_velMultiplierW = V4LoadA(&base->ang0.x);
|
|
const Vec4V ang1XYZ_impulseMultiplierW = V4LoadA(&base->ang1.x);
|
|
const Vec4V minImpulseX_maxImpulseY_appliedForceZ = V4LoadA(&base->minImpulse);
|
|
|
|
const Vec3V lin0 = Vec3V_From_Vec4V(lin0XYZ_constantW); FloatV constant = V4GetW(lin0XYZ_constantW);
|
|
const Vec3V lin1 = Vec3V_From_Vec4V(lin1XYZ_unbiasedConstantW);
|
|
const Vec3V ang0 = Vec3V_From_Vec4V(ang0XYZ_velMultiplierW); FloatV vMul = V4GetW(ang0XYZ_velMultiplierW);
|
|
const Vec3V ang1 = Vec3V_From_Vec4V(ang1XYZ_impulseMultiplierW); FloatV iMul = V4GetW(ang1XYZ_impulseMultiplierW);
|
|
|
|
const FloatV minImpulse = V4GetX(minImpulseX_maxImpulseY_appliedForceZ);
|
|
const FloatV maxImpulse = V4GetY(minImpulseX_maxImpulseY_appliedForceZ);
|
|
const FloatV appliedForce = V4GetZ(minImpulseX_maxImpulseY_appliedForceZ);
|
|
|
|
const Vec3V v0 = V3MulAdd(linVel0, lin0, V3Mul(angVel0, ang0));
|
|
const Vec3V v1 = V3MulAdd(linVel1, lin1, V3Mul(angVel1, ang1));
|
|
const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
|
|
|
|
const FloatV unclampedForce = FScaleAdd(iMul, appliedForce, FScaleAdd(vMul, normalVel, constant));
|
|
const FloatV clampedForce = FMin(maxImpulse, (FMax(minImpulse, unclampedForce)));
|
|
const FloatV deltaF = FSub(clampedForce, appliedForce);
|
|
|
|
FStore(clampedForce, &base->appliedForce);
|
|
li0 = V3ScaleAdd(lin0, deltaF, li0); ai0 = V3ScaleAdd(ang0, deltaF, ai0);
|
|
li1 = V3ScaleAdd(lin1, deltaF, li1); ai1 = V3ScaleAdd(ang1, deltaF, ai1);
|
|
|
|
linVel0 = V3ScaleAdd(base->deltaVA.linear, deltaF, linVel0); angVel0 = V3ScaleAdd(base->deltaVA.angular, deltaF, angVel0);
|
|
linVel1 = V3ScaleAdd(base->deltaVB.linear, deltaF, linVel1); angVel1 = V3ScaleAdd(base->deltaVB.angular, deltaF, angVel1);
|
|
|
|
#if 0
|
|
PxVec3 lv0, lv1, av0, av1;
|
|
V3StoreU(linVel0, lv0); V3StoreU(linVel1, lv1);
|
|
V3StoreU(angVel0, av0); V3StoreU(angVel1, av1);
|
|
|
|
PX_ASSERT(lv0.magnitude() < 30.f);
|
|
PX_ASSERT(lv1.magnitude() < 30.f);
|
|
PX_ASSERT(av0.magnitude() < 30.f);
|
|
PX_ASSERT(av1.magnitude() < 30.f);
|
|
#endif
|
|
|
|
}
|
|
|
|
li0 = V3Scale(li0, FLoad(header->linearInvMassScale0));
|
|
li1 = V3Scale(li1, FLoad(header->linearInvMassScale1));
|
|
ai0 = V3Scale(ai0, FLoad(header->angularInvMassScale0));
|
|
ai1 = V3Scale(ai1, FLoad(header->angularInvMassScale1));
|
|
}
|
|
|
|
//Port of scalar implementation to SIMD maths with some interleaving of instructions
|
|
void solveExt1D(const PxSolverConstraintDesc& desc, SolverContext& cache)
|
|
{
|
|
|
|
//PxU32 length = desc.constraintLength;
|
|
|
|
Vec3V linVel0, angVel0, linVel1, angVel1;
|
|
|
|
if (desc.articulationA == desc.articulationB)
|
|
{
|
|
Cm::SpatialVectorV v0, v1;
|
|
desc.articulationA->pxcFsGetVelocities(desc.linkIndexA, desc.linkIndexB, v0, v1);
|
|
linVel0 = v0.linear;
|
|
angVel0 = v0.angular;
|
|
linVel1 = v1.linear;
|
|
angVel1 = v1.angular;
|
|
}
|
|
else
|
|
{
|
|
|
|
if (desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
|
|
{
|
|
linVel0 = V3LoadA(desc.bodyA->linearVelocity);
|
|
angVel0 = V3LoadA(desc.bodyA->angularState);
|
|
}
|
|
else
|
|
{
|
|
Cm::SpatialVectorV v = desc.articulationA->pxcFsGetVelocity(desc.linkIndexA);
|
|
linVel0 = v.linear;
|
|
angVel0 = v.angular;
|
|
}
|
|
|
|
if (desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
|
|
{
|
|
linVel1 = V3LoadA(desc.bodyB->linearVelocity);
|
|
angVel1 = V3LoadA(desc.bodyB->angularState);
|
|
}
|
|
else
|
|
{
|
|
Cm::SpatialVectorV v = desc.articulationB->pxcFsGetVelocity(desc.linkIndexB);
|
|
linVel1 = v.linear;
|
|
angVel1 = v.angular;
|
|
}
|
|
}
|
|
|
|
Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero();
|
|
|
|
solveExt1D(desc, linVel0, linVel1, angVel0, angVel1, li0, li1, ai0, ai1);
|
|
|
|
if (desc.articulationA == desc.articulationB)
|
|
{
|
|
desc.articulationA->pxcFsApplyImpulses(desc.linkIndexA, li0, ai0,
|
|
desc.linkIndexB, li1, ai1, cache.Z, cache.deltaV);
|
|
}
|
|
else
|
|
{
|
|
|
|
if (desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
|
|
{
|
|
V3StoreA(linVel0, desc.bodyA->linearVelocity);
|
|
V3StoreA(angVel0, desc.bodyA->angularState);
|
|
}
|
|
else
|
|
{
|
|
desc.articulationA->pxcFsApplyImpulse(desc.linkIndexA, li0,
|
|
ai0, cache.Z, cache.deltaV);
|
|
|
|
|
|
}
|
|
|
|
if (desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
|
|
{
|
|
V3StoreA(linVel1, desc.bodyB->linearVelocity);
|
|
V3StoreA(angVel1, desc.bodyB->angularState);
|
|
}
|
|
else
|
|
{
|
|
desc.articulationB->pxcFsApplyImpulse(desc.linkIndexB, li1,
|
|
ai1, cache.Z, cache.deltaV);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
FloatV solveExtContacts(SolverContactPointExt* contacts, const PxU32 nbContactPoints, const Vec3VArg contactNormal,
|
|
Vec3V& linVel0, Vec3V& angVel0,
|
|
Vec3V& linVel1, Vec3V& angVel1,
|
|
Vec3V& li0, Vec3V& ai0,
|
|
Vec3V& li1, Vec3V& ai1,
|
|
PxF32* PX_RESTRICT appliedForceBuffer)
|
|
{
|
|
|
|
FloatV accumulatedNormalImpulse = FZero();
|
|
for (PxU32 i = 0; i<nbContactPoints; i++)
|
|
{
|
|
SolverContactPointExt& c = contacts[i];
|
|
Ps::prefetchLine(&contacts[i + 1]);
|
|
|
|
const Vec3V raXn = c.raXn;
|
|
const Vec3V rbXn = c.rbXn;
|
|
|
|
const FloatV appliedForce = FLoad(appliedForceBuffer[i]);
|
|
const FloatV velMultiplier = c.getVelMultiplier();
|
|
|
|
/*const FloatV targetVel = c.getTargetVelocity();
|
|
const FloatV scaledBias = c.getScaledBias();*/
|
|
|
|
//Compute the normal velocity of the constraint.
|
|
|
|
Vec3V v = V3MulAdd(linVel0, contactNormal, V3Mul(angVel0, raXn));
|
|
v = V3Sub(v, V3MulAdd(linVel1, contactNormal, V3Mul(angVel1, rbXn)));
|
|
const FloatV normalVel = V3SumElems(v);
|
|
|
|
const FloatV biasedErr = c.getBiasedErr();//FNeg(scaledBias);
|
|
|
|
// still lots to do here: using loop pipelining we can interweave this code with the
|
|
// above - the code here has a lot of stalls that we would thereby eliminate
|
|
|
|
const FloatV deltaF = FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce));
|
|
|
|
linVel0 = V3ScaleAdd(c.linDeltaVA, deltaF, linVel0);
|
|
angVel0 = V3ScaleAdd(c.angDeltaVA, deltaF, angVel0);
|
|
linVel1 = V3ScaleAdd(c.linDeltaVB, deltaF, linVel1);
|
|
angVel1 = V3ScaleAdd(c.angDeltaVB, deltaF, angVel1);
|
|
|
|
li0 = V3ScaleAdd(contactNormal, deltaF, li0); ai0 = V3ScaleAdd(raXn, deltaF, ai0);
|
|
li1 = V3ScaleAdd(contactNormal, deltaF, li1); ai1 = V3ScaleAdd(rbXn, deltaF, ai1);
|
|
|
|
const FloatV newAppliedForce = FAdd(appliedForce, deltaF);
|
|
|
|
FStore(newAppliedForce, &appliedForceBuffer[i]);
|
|
|
|
accumulatedNormalImpulse = FAdd(accumulatedNormalImpulse, newAppliedForce);
|
|
|
|
#if 0
|
|
PxVec3 lv0, lv1, av0, av1;
|
|
V3StoreU(linVel0, lv0); V3StoreU(linVel1, lv1);
|
|
V3StoreU(angVel0, av0); V3StoreU(angVel1, av1);
|
|
|
|
PX_ASSERT(lv0.magnitude() < 30.f);
|
|
PX_ASSERT(lv1.magnitude() < 30.f);
|
|
PX_ASSERT(av0.magnitude() < 30.f);
|
|
PX_ASSERT(av1.magnitude() < 30.f);
|
|
#endif
|
|
|
|
}
|
|
return accumulatedNormalImpulse;
|
|
}
|
|
|
|
|
|
void solveExtContact(const PxSolverConstraintDesc& desc, Vec3V& linVel0, Vec3V& linVel1, Vec3V& angVel0, Vec3V& angVel1,
|
|
Vec3V& linImpulse0, Vec3V& linImpulse1, Vec3V& angImpulse0, Vec3V& angImpulse1, bool doFriction)
|
|
{
|
|
const PxU8* PX_RESTRICT last = desc.constraint + desc.constraintLengthOver16 * 16;
|
|
|
|
//hopefully pointer aliasing doesn't bite.
|
|
PxU8* PX_RESTRICT currPtr = desc.constraint;
|
|
|
|
|
|
|
|
while (currPtr < last)
|
|
{
|
|
SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr);
|
|
currPtr += sizeof(SolverContactHeader);
|
|
|
|
const PxU32 numNormalConstr = hdr->numNormalConstr;
|
|
const PxU32 numFrictionConstr = hdr->numFrictionConstr;
|
|
|
|
SolverContactPointExt* PX_RESTRICT contacts = reinterpret_cast<SolverContactPointExt*>(currPtr);
|
|
Ps::prefetchLine(contacts);
|
|
currPtr += numNormalConstr * sizeof(SolverContactPointExt);
|
|
|
|
PxF32* appliedForceBuffer = reinterpret_cast<PxF32*>(currPtr);
|
|
currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
|
|
|
|
SolverContactFrictionExt* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionExt*>(currPtr);
|
|
currPtr += numFrictionConstr * sizeof(SolverContactFrictionExt);
|
|
|
|
|
|
Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero();
|
|
|
|
const Vec3V contactNormal = Vec3V_From_Vec4V(hdr->normal_minAppliedImpulseForFrictionW);
|
|
const FloatV minNorImpulse = V4GetW(hdr->normal_minAppliedImpulseForFrictionW);
|
|
|
|
const FloatV accumulatedNormalImpulse = FMax(solveExtContacts(contacts, numNormalConstr, contactNormal, linVel0, angVel0, linVel1,
|
|
angVel1, li0, ai0, li1, ai1, appliedForceBuffer), minNorImpulse);
|
|
|
|
|
|
if (doFriction && numFrictionConstr)
|
|
{
|
|
Ps::prefetchLine(frictions);
|
|
const FloatV maxFrictionImpulse = FMul(hdr->getStaticFriction(), accumulatedNormalImpulse);
|
|
const FloatV maxDynFrictionImpulse = FMul(hdr->getDynamicFriction(), accumulatedNormalImpulse);
|
|
|
|
BoolV broken = BFFFF();
|
|
|
|
for (PxU32 i = 0; i<numFrictionConstr; i++)
|
|
{
|
|
SolverContactFrictionExt& f = frictions[i];
|
|
Ps::prefetchLine(&frictions[i + 1]);
|
|
|
|
const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW;
|
|
const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW;
|
|
const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW;
|
|
|
|
const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW);
|
|
/*const Vec3V normal0 = V3Scale(normal, sqrtInvMass0);
|
|
const Vec3V normal1 = V3Scale(normal, sqrtInvMass1);*/
|
|
const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);
|
|
const Vec3V rbXn = Vec3V_From_Vec4V(rbXnXYZ_biasW);
|
|
|
|
const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW);
|
|
const FloatV bias = V4GetW(rbXnXYZ_biasW);
|
|
const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW);
|
|
|
|
const FloatV targetVel = FLoad(f.targetVel);
|
|
|
|
const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse);
|
|
const FloatV negMaxFrictionImpulse = FNeg(maxFrictionImpulse);
|
|
|
|
const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angVel0, raXn));
|
|
const Vec3V v1 = V3MulAdd(linVel1, normal, V3Mul(angVel1, rbXn));
|
|
const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
|
|
|
|
// appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
|
|
const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel), velMultiplier, appliedForce);
|
|
|
|
// Algorithm:
|
|
// if abs(appliedForce + deltaF) > maxFrictionImpulse
|
|
// clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse]
|
|
// (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce]
|
|
// set broken flag to true || broken flag
|
|
|
|
// FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier);
|
|
// FloatV potentialSumF = FAdd(appliedForce, deltaF);
|
|
|
|
const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1);
|
|
|
|
// On XBox this clamping code uses the vector simple pipe rather than vector float,
|
|
// which eliminates a lot of stall cycles
|
|
|
|
const BoolV clampLow = FIsGrtr(negMaxFrictionImpulse, totalImpulse);
|
|
const BoolV clampHigh = FIsGrtr(totalImpulse, maxFrictionImpulse);
|
|
|
|
const FloatV totalClampedLow = FMax(negMaxDynFrictionImpulse, totalImpulse);
|
|
const FloatV totalClampedHigh = FMin(maxDynFrictionImpulse, totalImpulse);
|
|
|
|
const FloatV newAppliedForce = FSel(clampLow, totalClampedLow,
|
|
FSel(clampHigh, totalClampedHigh, totalImpulse));
|
|
|
|
broken = BOr(broken, BOr(clampLow, clampHigh));
|
|
|
|
FloatV deltaF = FSub(newAppliedForce, appliedForce);
|
|
|
|
linVel0 = V3ScaleAdd(f.linDeltaVA, deltaF, linVel0);
|
|
angVel0 = V3ScaleAdd(f.angDeltaVA, deltaF, angVel0);
|
|
linVel1 = V3ScaleAdd(f.linDeltaVB, deltaF, linVel1);
|
|
angVel1 = V3ScaleAdd(f.angDeltaVB, deltaF, angVel1);
|
|
|
|
li0 = V3ScaleAdd(normal, deltaF, li0); ai0 = V3ScaleAdd(raXn, deltaF, ai0);
|
|
li1 = V3ScaleAdd(normal, deltaF, li1); ai1 = V3ScaleAdd(rbXn, deltaF, ai1);
|
|
|
|
#if 0
|
|
PxVec3 lv0, lv1, av0, av1;
|
|
V3StoreU(linVel0, lv0); V3StoreU(linVel1, lv1);
|
|
V3StoreU(angVel0, av0); V3StoreU(angVel1, av1);
|
|
|
|
PX_ASSERT(lv0.magnitude() < 30.f);
|
|
PX_ASSERT(lv1.magnitude() < 30.f);
|
|
PX_ASSERT(av0.magnitude() < 30.f);
|
|
PX_ASSERT(av1.magnitude() < 30.f);
|
|
#endif
|
|
|
|
f.setAppliedForce(newAppliedForce);
|
|
}
|
|
Store_From_BoolV(broken, &hdr->broken);
|
|
}
|
|
|
|
linImpulse0 = V3ScaleAdd(li0, hdr->getDominance0(), linImpulse0);
|
|
angImpulse0 = V3ScaleAdd(ai0, FLoad(hdr->angDom0), angImpulse0);
|
|
linImpulse1 = V3NegScaleSub(li1, hdr->getDominance1(), linImpulse1);
|
|
angImpulse1 = V3NegScaleSub(ai1, FLoad(hdr->angDom1), angImpulse1);
|
|
}
|
|
}
|
|
|
|
|
|
void solveExtContact(const PxSolverConstraintDesc& desc, SolverContext& cache)
|
|
{
|
|
Vec3V linVel0, angVel0, linVel1, angVel1;
|
|
|
|
if (desc.articulationA == desc.articulationB)
|
|
{
|
|
Cm::SpatialVectorV v0, v1;
|
|
desc.articulationA->pxcFsGetVelocities(desc.linkIndexA, desc.linkIndexB, v0, v1);
|
|
linVel0 = v0.linear;
|
|
angVel0 = v0.angular;
|
|
linVel1 = v1.linear;
|
|
angVel1 = v1.angular;
|
|
}
|
|
else
|
|
{
|
|
|
|
if (desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
|
|
{
|
|
linVel0 = V3LoadA(desc.bodyA->linearVelocity);
|
|
angVel0 = V3LoadA(desc.bodyA->angularState);
|
|
}
|
|
else
|
|
{
|
|
Cm::SpatialVectorV v = desc.articulationA->pxcFsGetVelocity(desc.linkIndexA);
|
|
linVel0 = v.linear;
|
|
angVel0 = v.angular;
|
|
}
|
|
|
|
if (desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
|
|
{
|
|
linVel1 = V3LoadA(desc.bodyB->linearVelocity);
|
|
angVel1 = V3LoadA(desc.bodyB->angularState);
|
|
}
|
|
else
|
|
{
|
|
Cm::SpatialVectorV v = desc.articulationB->pxcFsGetVelocity(desc.linkIndexB);
|
|
linVel1 = v.linear;
|
|
angVel1 = v.angular;
|
|
}
|
|
}
|
|
|
|
Vec3V linImpulse0 = V3Zero(), linImpulse1 = V3Zero(), angImpulse0 = V3Zero(), angImpulse1 = V3Zero();
|
|
|
|
//Vec3V origLin0 = linVel0, origAng0 = angVel0, origLin1 = linVel1, origAng1 = angVel1;
|
|
|
|
solveExtContact(desc, linVel0, linVel1, angVel0, angVel1, linImpulse0, linImpulse1, angImpulse0, angImpulse1, cache.doFriction);
|
|
|
|
if (desc.articulationA == desc.articulationB)
|
|
{
|
|
desc.articulationA->pxcFsApplyImpulses(desc.linkIndexA, linImpulse0, angImpulse0, desc.linkIndexB, linImpulse1, angImpulse1,
|
|
cache.Z, cache.deltaV);
|
|
}
|
|
else
|
|
{
|
|
if (desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
|
|
{
|
|
V3StoreA(linVel0, desc.bodyA->linearVelocity);
|
|
V3StoreA(angVel0, desc.bodyA->angularState);
|
|
}
|
|
else
|
|
{
|
|
desc.articulationA->pxcFsApplyImpulse(desc.linkIndexA,
|
|
linImpulse0, angImpulse0, cache.Z, cache.deltaV);
|
|
}
|
|
|
|
if (desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
|
|
{
|
|
V3StoreA(linVel1, desc.bodyB->linearVelocity);
|
|
V3StoreA(angVel1, desc.bodyB->angularState);
|
|
}
|
|
else
|
|
{
|
|
desc.articulationB->pxcFsApplyImpulse(desc.linkIndexB,
|
|
linImpulse1, angImpulse1, cache.Z, cache.deltaV);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void solveExtContactBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 0; a < constraintCount; ++a)
|
|
{
|
|
solveExtContact(desc[a], cache);
|
|
}
|
|
}
|
|
|
|
void solveExtContactConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 0; a < constraintCount; ++a)
|
|
{
|
|
solveExtContact(desc[a], cache);
|
|
concludeContact(desc[a], cache);
|
|
}
|
|
}
|
|
|
|
void solveExtContactBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 0; a < constraintCount; ++a)
|
|
{
|
|
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
|
|
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
|
|
|
|
solveExtContact(desc[a], cache);
|
|
writeBackContact(desc[a], cache, bd0, bd1);
|
|
}
|
|
if(cache.mThresholdStreamIndex > 0)
|
|
{
|
|
//Not enough space to write 4 more thresholds back!
|
|
//Write back to global buffer
|
|
PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
|
|
for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
|
|
{
|
|
cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
|
|
}
|
|
cache.mThresholdStreamIndex = 0;
|
|
}
|
|
}
|
|
|
|
void solveExt1DBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 0; a < constraintCount; ++a)
|
|
{
|
|
solveExt1D(desc[a], cache);
|
|
}
|
|
}
|
|
|
|
void solveExt1DConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 0; a < constraintCount; ++a)
|
|
{
|
|
solveExt1D(desc[a], cache);
|
|
conclude1D(desc[a], cache);
|
|
}
|
|
}
|
|
|
|
void solveExt1DBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 0; a < constraintCount; ++a)
|
|
{
|
|
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
|
|
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
|
|
solveExt1D(desc[a], cache);
|
|
writeBack1D(desc[a], cache, bd0, bd1);
|
|
}
|
|
}
|
|
|
|
void ext1DBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
|
|
{
|
|
for(PxU32 a = 0; a < constraintCount; ++a)
|
|
{
|
|
PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
|
|
PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
|
|
writeBack1D(desc[a], cache, bd0, bd1);
|
|
}
|
|
}
|
|
|
|
void solveConcludeExtContact (const PxSolverConstraintDesc& desc, SolverContext& cache)
|
|
{
|
|
solveExtContact(desc, cache);
|
|
concludeContact(desc, cache);
|
|
}
|
|
|
|
void solveConcludeExt1D (const PxSolverConstraintDesc& desc, SolverContext& cache)
|
|
{
|
|
solveExt1D(desc, cache);
|
|
conclude1D(desc, cache);
|
|
}
|
|
|
|
|
|
void solveConclude1D(const PxSolverConstraintDesc& desc, SolverContext& cache)
|
|
{
|
|
solve1D(desc, cache);
|
|
conclude1D(desc, cache);
|
|
}
|
|
|
|
void solveConcludeContact (const PxSolverConstraintDesc& desc, SolverContext& cache)
|
|
{
|
|
solveContact(desc, cache);
|
|
concludeContact(desc, cache);
|
|
}
|
|
|
|
void solveConcludeContact_BStatic (const PxSolverConstraintDesc& desc, SolverContext& cache)
|
|
{
|
|
solveContact_BStatic(desc, cache);
|
|
concludeContact(desc, cache);
|
|
}
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|