From 2361fcd88298feeb114ee708140b7a6401ae62a6 Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Sun, 27 Apr 2025 14:40:30 -0500 Subject: [PATCH] Collision + Vec optimizations (liberty only). --- src/liberty/core/World.cpp | 28 +++++++++++++-- src/liberty/core/ZoneCull.cpp | 57 +++++++++++++++++++++++-------- src/liberty/math/Vector.cpp | 3 +- vendor/librw/src/dc/rwdc_common.h | 40 +++++++++++++++++++++- 4 files changed, 107 insertions(+), 21 deletions(-) diff --git a/src/liberty/core/World.cpp b/src/liberty/core/World.cpp index 1c34a633..17a3532f 100644 --- a/src/liberty/core/World.cpp +++ b/src/liberty/core/World.cpp @@ -1215,6 +1215,9 @@ CWorld::FindObjectsIntersectingAngledCollisionBox(const CColBox &boundingBox, co const int32 nStartY = Max(GetSectorIndexY(fStartY), 0); const int32 nEndX = Min(GetSectorIndexX(fEndX), NUMSECTORS_X - 1); const int32 nEndY = Min(GetSectorIndexY(fEndY), NUMSECTORS_Y - 1); +#ifdef DC_SH4 + mat_load_transpose(matrix); +#endif for(int32 y = nStartY; y <= nEndY; y++) { for(int32 x = nStartX; x <= nEndX; x++) { CSector *pSector = GetSector(x, y); @@ -1268,14 +1271,19 @@ CWorld::FindObjectsIntersectingAngledCollisionBoxSectorList(CPtrList &list, cons int16 *nEntitiesFound, int16 maxEntitiesToFind, CEntity **aEntities) { - for(CPtrNode *pNode = list.first; pNode; pNode = pNode->next) { + for(CPtrNode *pNode = list.first; pNode; pNode = pNode->next) { CEntity *pEntity = (CEntity *)pNode->item; if(pEntity->m_scanCode != GetCurrentScanCode()) { pEntity->m_scanCode = GetCurrentScanCode(); CColSphere sphere; CVector vecDistance = pEntity->GetPosition() - position; sphere.radius = pEntity->GetBoundRadius(); +#ifndef DC_SH4 sphere.center = Multiply3x3(vecDistance, matrix); +#else // Transposed matrix was already loaded by the callee, so no need to reload! + mat_trans_normal3_nomod(vecDistance.x, vecDistance.y, vecDistance.z, + sphere.center.x, sphere.center.y, sphere.center.z); +#endif if(CCollision::TestSphereBox(sphere, boundingBox) && *nEntitiesFound < maxEntitiesToFind) { if(aEntities) aEntities[*nEntitiesFound] = pEntity; ++*nEntitiesFound; @@ -1444,13 +1452,27 @@ CWorld::CallOffChaseForAreaSectorListVehicles(CPtrList &list, float x1, float y1 pVehicle->AutoPilot.m_nTimeTempAction = CTimer::GetTimeInMilliseconds() + 2000; CColModel *pColModel = pVehicle->GetColModel(); bool bInsideSphere = false; +#ifdef DC_SH4 + mat_load2(pVehicle->GetMatrix()); +#endif for(int32 i = 0; i < pColModel->numSpheres; i++) { +#ifndef DC_SH4 CVector pos = pVehicle->GetMatrix() * pColModel->spheres[i].center; - float fRadius = pColModel->spheres[i].radius; +#else + CVector pos; + auto ¢er = pColModel->spheres[i].center; + mat_trans_single3_nodiv_nomod(center.x, center.y, center.z, + pos.x, pos.y, pos.z); +#endif + float fRadius = pColModel->spheres[i].radius; if(pos.x + fRadius > x1 && pos.x - fRadius < x2 && pos.y + fRadius > y1 && - pos.y - fRadius < y2) + pos.y - fRadius < y2) { bInsideSphere = true; // Maybe break the loop when bInsideSphere is set to true? +#ifdef DC_SH4 // Don't see why not! + break; +#endif + } } if(bInsideSphere) { if(pVehicle->GetPosition().x <= (x1 + x2) * 0.5f) diff --git a/src/liberty/core/ZoneCull.cpp b/src/liberty/core/ZoneCull.cpp index 5a76e5ed..44e3d4b7 100644 --- a/src/liberty/core/ZoneCull.cpp +++ b/src/liberty/core/ZoneCull.cpp @@ -949,11 +949,15 @@ CCullZone::FindTestPoints() if(ElementsY > 32) ElementsY = 32; if(ElementsZ > 32) ElementsZ = 32; Memsize = ElementsX * ElementsY * ElementsZ; - StepX = (maxx-minx)/(ElementsX-1); - StepY = (maxy-miny)/(ElementsY-1); - StepZ = (maxz-minz)/(ElementsZ-1); + StepX = Div(maxx-minx, ElementsX-1); + StepY = Div(maxy-miny, ElementsY-1); + StepZ = Div(maxz-minz, ElementsZ-1); +#ifndef DC_SH4 pMem = new uint8[Memsize]; +#else + pMem = reinterpret_cast(alloca(Memsize)); +#endif memset(pMem, 0, Memsize); // indices of center @@ -1496,13 +1500,28 @@ CCullZone::TestEntityVisibilityFromCullZone(CEntity *entity, float extraDist, CE else boundMaxZ += extraDist; +#ifndef DC_SH4 CVector vecMin = entity->GetMatrix() * CVector(boundMinX, boundMinY, boundMinZ); CVector vecMaxX = entity->GetMatrix() * CVector(boundMaxX, boundMinY, boundMinZ); CVector vecMaxY = entity->GetMatrix() * CVector(boundMinX, boundMaxY, boundMinZ); CVector vecMaxZ = entity->GetMatrix() * CVector(boundMinX, boundMinY, boundMaxZ); - CVector dirx = vecMaxX - vecMin; - CVector diry = vecMaxY - vecMin; - CVector dirz = vecMaxZ - vecMin; +#else + mat_load2(entity->GetMatrix()); + + CVector vecMin, vecMaxX, vecMaxY, vecMaxZ; + mat_trans_single3_nodiv_nomod(boundMinX, boundMinY, boundMinZ, + vecMin.x, vecMin.y, vecMin.z); + mat_trans_single3_nodiv_nomod(boundMaxX, boundMinY, boundMinZ, + vecMaxX.x, vecMaxX.y, vecMaxX.z); + mat_trans_single3_nodiv_nomod(boundMinX, boundMaxY, boundMinZ, + vecMaxY.x, vecMaxY.y, vecMaxY.z); + mat_trans_single3_nodiv_nomod(boundMinX, boundMinY, boundMaxZ, + vecMaxZ.x, vecMaxZ.y, vecMaxZ.z); +#endif + + CVector dirx = vecMaxX - vecMin; + CVector diry = vecMaxY - vecMin; + CVector dirz = vecMaxZ - vecMin; // If building intersects zone at all, it's visible int x, y, z; @@ -1520,22 +1539,30 @@ CCullZone::TestEntityVisibilityFromCullZone(CEntity *entity, float extraDist, CE float distToZone = CalcDistToCullZone(entity->GetPosition().x, entity->GetPosition().y)/15.0f; distToZone = Max(distToZone, 7.0f); - int numX = (boundMaxX - boundMinX)/distToZone + 2.0f; - int numY = (boundMaxY - boundMinY)/distToZone + 2.0f; - int numZ = (boundMaxZ - boundMinZ)/distToZone + 2.0f; + float invDistToZone = Invert(distToZone); + int numX = (boundMaxX - boundMinX)*invDistToZone + 2.0f; + int numY = (boundMaxY - boundMinY)*invDistToZone + 2.0f; + int numZ = (boundMaxZ - boundMinZ)*invDistToZone + 2.0f; - float stepX = 1.0f/(numX-1); - float stepY = 1.0f/(numY-1); - float stepZ = 1.0f/(numZ-1); + float stepX = Invert(numX-1); + float stepY = Invert(numY-1); + float stepZ = Invert(numZ-1); float midX = (boundMaxX + boundMinX)/2.0f; float midY = (boundMaxY + boundMinY)/2.0f; float midZ = (boundMaxZ + boundMinZ)/2.0f; // check both xy planes - for(int i = 0; i < NumTestPoints; i++){ +#ifndef DC_SH4 + CVector mid = entity->GetMatrix() * CVector(midX, midY, midZ); +#else + CVector mid; + mat_trans_single3_nodiv_nomod(midX, midY, midZ, + mid.x, mid.y, mid.z); +#endif + mid.z += 0.1f; + for(int i = 0; i < NumTestPoints; i++){ CVector testPoint = aTestPoints[i]; - CVector mid = entity->GetMatrix() * CVector(midX, midY, midZ); - mid.z += 0.1f; + if(DoThoroughLineTest(testPoint, mid, entity)) return true; diff --git a/src/liberty/math/Vector.cpp b/src/liberty/math/Vector.cpp index 4f5a21ed..797b22d0 100644 --- a/src/liberty/math/Vector.cpp +++ b/src/liberty/math/Vector.cpp @@ -50,8 +50,7 @@ Multiply3x3(const CVector &vec, const CMatrix &mat) mat.ux * vec.x + mat.uy * vec.y + mat.uz * vec.z); #else CVector out; - dc::mat_load2(mat); - mat_transpose(); + dc::mat_load_transpose(mat); mat_trans_normal3_nomod(vec.x, vec.y, vec.z, out.x, out.y, out.z); return out; diff --git a/vendor/librw/src/dc/rwdc_common.h b/vendor/librw/src/dc/rwdc_common.h index 9c8f673a..dc532776 100644 --- a/vendor/librw/src/dc/rwdc_common.h +++ b/vendor/librw/src/dc/rwdc_common.h @@ -185,7 +185,7 @@ __always_inline __hot constexpr auto Norm(auto value, auto min, auto max) { x2 = __x; y2 = __y; z2 = __z; w2 = __w; \ } while(false) -inline __hot __icache_aligned void mat_load2(const matrix_t* mtx) { +inline __hot __icache_aligned void mat_load2(const matrix_t *mtx) { asm volatile( R"( fschg @@ -208,6 +208,44 @@ inline __hot __icache_aligned void mat_load2(const matrix_t* mtx) { ); } +inline __hot __icache_aligned void mat_load_transpose(const matrix_t *mtx) { + asm volatile( + R"( + frchg + + fmov.s @%[mtx]+, fr0 + + add #32, %[mtx] + pref @%[mtx] + add #-(32 - 4), %[mtx] + + fmov.s @%[mtx]+, fr4 + fmov.s @%[mtx]+, fr8 + fmov.s @%[mtx]+, fr12 + + fmov.s @%[mtx]+, fr1 + fmov.s @%[mtx]+, fr5 + fmov.s @%[mtx]+, fr9 + fmov.s @%[mtx]+, fr13 + + fmov.s @%[mtx]+, fr2 + fmov.s @%[mtx]+, fr6 + fmov.s @%[mtx]+, fr10 + fmov.s @%[mtx]+, fr14 + + fmov.s @%[mtx]+, fr3 + fmov.s @%[mtx]+, fr7 + fmov.s @%[mtx]+, fr11 + fmov.s @%[mtx]+, fr15 + + frchg + )" + : [mtx] "+r" (mtx) + : + : + ); +} + inline __hot __icache_aligned void mat_store2(matrix_t *mtx) { asm volatile( R"(