Collision + Vec optimizations (liberty only).

This commit is contained in:
Falco Girgis
2025-04-27 14:40:30 -05:00
parent 1f2f270da9
commit 2361fcd882
4 changed files with 107 additions and 21 deletions

View File

@@ -1215,6 +1215,9 @@ CWorld::FindObjectsIntersectingAngledCollisionBox(const CColBox &boundingBox, co
const int32 nStartY = Max(GetSectorIndexY(fStartY), 0);
const int32 nEndX = Min(GetSectorIndexX(fEndX), NUMSECTORS_X - 1);
const int32 nEndY = Min(GetSectorIndexY(fEndY), NUMSECTORS_Y - 1);
#ifdef DC_SH4
mat_load_transpose(matrix);
#endif
for(int32 y = nStartY; y <= nEndY; y++) {
for(int32 x = nStartX; x <= nEndX; x++) {
CSector *pSector = GetSector(x, y);
@@ -1268,14 +1271,19 @@ CWorld::FindObjectsIntersectingAngledCollisionBoxSectorList(CPtrList &list, cons
int16 *nEntitiesFound, int16 maxEntitiesToFind,
CEntity **aEntities)
{
for(CPtrNode *pNode = list.first; pNode; pNode = pNode->next) {
for(CPtrNode *pNode = list.first; pNode; pNode = pNode->next) {
CEntity *pEntity = (CEntity *)pNode->item;
if(pEntity->m_scanCode != GetCurrentScanCode()) {
pEntity->m_scanCode = GetCurrentScanCode();
CColSphere sphere;
CVector vecDistance = pEntity->GetPosition() - position;
sphere.radius = pEntity->GetBoundRadius();
#ifndef DC_SH4
sphere.center = Multiply3x3(vecDistance, matrix);
#else // Transposed matrix was already loaded by the callee, so no need to reload!
mat_trans_normal3_nomod(vecDistance.x, vecDistance.y, vecDistance.z,
sphere.center.x, sphere.center.y, sphere.center.z);
#endif
if(CCollision::TestSphereBox(sphere, boundingBox) && *nEntitiesFound < maxEntitiesToFind) {
if(aEntities) aEntities[*nEntitiesFound] = pEntity;
++*nEntitiesFound;
@@ -1444,13 +1452,27 @@ CWorld::CallOffChaseForAreaSectorListVehicles(CPtrList &list, float x1, float y1
pVehicle->AutoPilot.m_nTimeTempAction = CTimer::GetTimeInMilliseconds() + 2000;
CColModel *pColModel = pVehicle->GetColModel();
bool bInsideSphere = false;
#ifdef DC_SH4
mat_load2(pVehicle->GetMatrix());
#endif
for(int32 i = 0; i < pColModel->numSpheres; i++) {
#ifndef DC_SH4
CVector pos = pVehicle->GetMatrix() * pColModel->spheres[i].center;
float fRadius = pColModel->spheres[i].radius;
#else
CVector pos;
auto &center = pColModel->spheres[i].center;
mat_trans_single3_nodiv_nomod(center.x, center.y, center.z,
pos.x, pos.y, pos.z);
#endif
float fRadius = pColModel->spheres[i].radius;
if(pos.x + fRadius > x1 && pos.x - fRadius < x2 && pos.y + fRadius > y1 &&
pos.y - fRadius < y2)
pos.y - fRadius < y2) {
bInsideSphere = true;
// Maybe break the loop when bInsideSphere is set to true?
#ifdef DC_SH4 // Don't see why not!
break;
#endif
}
}
if(bInsideSphere) {
if(pVehicle->GetPosition().x <= (x1 + x2) * 0.5f)

View File

@@ -949,11 +949,15 @@ CCullZone::FindTestPoints()
if(ElementsY > 32) ElementsY = 32;
if(ElementsZ > 32) ElementsZ = 32;
Memsize = ElementsX * ElementsY * ElementsZ;
StepX = (maxx-minx)/(ElementsX-1);
StepY = (maxy-miny)/(ElementsY-1);
StepZ = (maxz-minz)/(ElementsZ-1);
StepX = Div<true, false>(maxx-minx, ElementsX-1);
StepY = Div<true, false>(maxy-miny, ElementsY-1);
StepZ = Div<true, false>(maxz-minz, ElementsZ-1);
#ifndef DC_SH4
pMem = new uint8[Memsize];
#else
pMem = reinterpret_cast<uint8 *>(alloca(Memsize));
#endif
memset(pMem, 0, Memsize);
// indices of center
@@ -1496,13 +1500,28 @@ CCullZone::TestEntityVisibilityFromCullZone(CEntity *entity, float extraDist, CE
else
boundMaxZ += extraDist;
#ifndef DC_SH4
CVector vecMin = entity->GetMatrix() * CVector(boundMinX, boundMinY, boundMinZ);
CVector vecMaxX = entity->GetMatrix() * CVector(boundMaxX, boundMinY, boundMinZ);
CVector vecMaxY = entity->GetMatrix() * CVector(boundMinX, boundMaxY, boundMinZ);
CVector vecMaxZ = entity->GetMatrix() * CVector(boundMinX, boundMinY, boundMaxZ);
CVector dirx = vecMaxX - vecMin;
CVector diry = vecMaxY - vecMin;
CVector dirz = vecMaxZ - vecMin;
#else
mat_load2(entity->GetMatrix());
CVector vecMin, vecMaxX, vecMaxY, vecMaxZ;
mat_trans_single3_nodiv_nomod(boundMinX, boundMinY, boundMinZ,
vecMin.x, vecMin.y, vecMin.z);
mat_trans_single3_nodiv_nomod(boundMaxX, boundMinY, boundMinZ,
vecMaxX.x, vecMaxX.y, vecMaxX.z);
mat_trans_single3_nodiv_nomod(boundMinX, boundMaxY, boundMinZ,
vecMaxY.x, vecMaxY.y, vecMaxY.z);
mat_trans_single3_nodiv_nomod(boundMinX, boundMinY, boundMaxZ,
vecMaxZ.x, vecMaxZ.y, vecMaxZ.z);
#endif
CVector dirx = vecMaxX - vecMin;
CVector diry = vecMaxY - vecMin;
CVector dirz = vecMaxZ - vecMin;
// If building intersects zone at all, it's visible
int x, y, z;
@@ -1520,22 +1539,30 @@ CCullZone::TestEntityVisibilityFromCullZone(CEntity *entity, float extraDist, CE
float distToZone = CalcDistToCullZone(entity->GetPosition().x, entity->GetPosition().y)/15.0f;
distToZone = Max(distToZone, 7.0f);
int numX = (boundMaxX - boundMinX)/distToZone + 2.0f;
int numY = (boundMaxY - boundMinY)/distToZone + 2.0f;
int numZ = (boundMaxZ - boundMinZ)/distToZone + 2.0f;
float invDistToZone = Invert<true, false>(distToZone);
int numX = (boundMaxX - boundMinX)*invDistToZone + 2.0f;
int numY = (boundMaxY - boundMinY)*invDistToZone + 2.0f;
int numZ = (boundMaxZ - boundMinZ)*invDistToZone + 2.0f;
float stepX = 1.0f/(numX-1);
float stepY = 1.0f/(numY-1);
float stepZ = 1.0f/(numZ-1);
float stepX = Invert<true, false>(numX-1);
float stepY = Invert<true, false>(numY-1);
float stepZ = Invert<true, false>(numZ-1);
float midX = (boundMaxX + boundMinX)/2.0f;
float midY = (boundMaxY + boundMinY)/2.0f;
float midZ = (boundMaxZ + boundMinZ)/2.0f;
// check both xy planes
for(int i = 0; i < NumTestPoints; i++){
#ifndef DC_SH4
CVector mid = entity->GetMatrix() * CVector(midX, midY, midZ);
#else
CVector mid;
mat_trans_single3_nodiv_nomod(midX, midY, midZ,
mid.x, mid.y, mid.z);
#endif
mid.z += 0.1f;
for(int i = 0; i < NumTestPoints; i++){
CVector testPoint = aTestPoints[i];
CVector mid = entity->GetMatrix() * CVector(midX, midY, midZ);
mid.z += 0.1f;
if(DoThoroughLineTest(testPoint, mid, entity))
return true;

View File

@@ -50,8 +50,7 @@ Multiply3x3(const CVector &vec, const CMatrix &mat)
mat.ux * vec.x + mat.uy * vec.y + mat.uz * vec.z);
#else
CVector out;
dc::mat_load2(mat);
mat_transpose();
dc::mat_load_transpose(mat);
mat_trans_normal3_nomod(vec.x, vec.y, vec.z,
out.x, out.y, out.z);
return out;

View File

@@ -185,7 +185,7 @@ __always_inline __hot constexpr auto Norm(auto value, auto min, auto max) {
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
} while(false)
inline __hot __icache_aligned void mat_load2(const matrix_t* mtx) {
inline __hot __icache_aligned void mat_load2(const matrix_t *mtx) {
asm volatile(
R"(
fschg
@@ -208,6 +208,44 @@ inline __hot __icache_aligned void mat_load2(const matrix_t* mtx) {
);
}
inline __hot __icache_aligned void mat_load_transpose(const matrix_t *mtx) {
asm volatile(
R"(
frchg
fmov.s @%[mtx]+, fr0
add #32, %[mtx]
pref @%[mtx]
add #-(32 - 4), %[mtx]
fmov.s @%[mtx]+, fr4
fmov.s @%[mtx]+, fr8
fmov.s @%[mtx]+, fr12
fmov.s @%[mtx]+, fr1
fmov.s @%[mtx]+, fr5
fmov.s @%[mtx]+, fr9
fmov.s @%[mtx]+, fr13
fmov.s @%[mtx]+, fr2
fmov.s @%[mtx]+, fr6
fmov.s @%[mtx]+, fr10
fmov.s @%[mtx]+, fr14
fmov.s @%[mtx]+, fr3
fmov.s @%[mtx]+, fr7
fmov.s @%[mtx]+, fr11
fmov.s @%[mtx]+, fr15
frchg
)"
: [mtx] "+r" (mtx)
:
:
);
}
inline __hot __icache_aligned void mat_store2(matrix_t *mtx) {
asm volatile(
R"(