From 0ba1c595cd8aac94d3312cd3aaa7825a1294c02e Mon Sep 17 00:00:00 2001 From: Falco Girgis Date: Fri, 28 Mar 2025 00:17:25 -0500 Subject: [PATCH] Acceleration working in miami. --- miami/Makefile | 1 + src/miami/collision/ColLine.h | 2 +- src/miami/collision/ColPoint.h | 2 +- src/miami/collision/ColSphere.h | 7 +- src/miami/collision/Collision.cpp | 133 +++++++++++++++++++++++++++-- src/miami/math/Matrix.cpp | 12 +-- src/miami/math/Matrix.h | 16 ++-- src/miami/math/VuVector.h | 134 ++++++++++++++++++++++++++++-- src/miami/math/math.cpp | 113 ------------------------- src/miami/math/maths.h | 26 ++++++ 10 files changed, 302 insertions(+), 144 deletions(-) diff --git a/miami/Makefile b/miami/Makefile index da7af878..45f11bbd 100644 --- a/miami/Makefile +++ b/miami/Makefile @@ -118,6 +118,7 @@ OBJS_NO_FAST_MATH = \ ../src/miami/core/Cam.o \ ../src/miami/core/Camera.o \ ../src/miami/vehicles/Bike.o \ + ../src/miami/vehicles/Boat.o \ ../src/miami/renderer/Particle.o KOS_CPPFLAGS += -fbuiltin -ffast-math -ffp-contract=fast \ diff --git a/src/miami/collision/ColLine.h b/src/miami/collision/ColLine.h index 21587a06..fc942143 100644 --- a/src/miami/collision/ColLine.h +++ b/src/miami/collision/ColLine.h @@ -1,6 +1,6 @@ #pragma once -struct CColLine +struct alignas(8) CColLine { // NB: this has to be compatible with two CVuVectors CVector p0; diff --git a/src/miami/collision/ColPoint.h b/src/miami/collision/ColPoint.h index a15b2345..31a9bde4 100644 --- a/src/miami/collision/ColPoint.h +++ b/src/miami/collision/ColPoint.h @@ -1,6 +1,6 @@ #pragma once -struct CColPoint +struct alignas(8) CColPoint { CVector point; int pad1; diff --git a/src/miami/collision/ColSphere.h b/src/miami/collision/ColSphere.h index f86b282a..906fc1a3 100644 --- a/src/miami/collision/ColSphere.h +++ b/src/miami/collision/ColSphere.h @@ -2,7 +2,7 @@ #include "SurfaceTable.h" -struct CSphere +struct alignas(8) CSphere { // NB: this has to be compatible with a CVuVector CVector center; @@ -15,6 +15,11 @@ struct CColSphere : public CSphere uint8 surface; uint8 piece; + void Set(float radius, uint8 surf = SURFACE_DEFAULT, uint8 piece = 0) { + this->radius = radius; + this->surface = surf; + this->piece = piece; + } void Set(float radius, const CVector ¢er, uint8 surf, uint8 piece); bool IntersectRay(CVector const &from, CVector const &dir, CVector &entry, CVector &exit); using CSphere::Set; diff --git a/src/miami/collision/Collision.cpp b/src/miami/collision/Collision.cpp index f39f3f35..f4627f8e 100644 --- a/src/miami/collision/Collision.cpp +++ b/src/miami/collision/Collision.cpp @@ -24,6 +24,10 @@ #include "Camera.h" #include "ColStore.h" +#ifdef DC_SH4 +#include "VuCollision.h" +#endif + #ifdef VU_COLLISION #include "VuCollision.h" @@ -572,7 +576,12 @@ CCollision::TestLineOfSight(const CColLine &line, const CMatrix &matrix, CColMod // transform line to model space Invert(matrix, matTransform); - CColLine newline(matTransform * line.p0, matTransform * line.p1); + CColLine newline; +#ifndef DC_SH4 + newline.Set(matTransform * line.p0, matTransform * line.p1); +#else + TransformPoints(reinterpret_cast(&newline), 2, matTransform, &line.p0, sizeof(CColLine)/2); +#endif // If we don't intersect with the bounding box, no chance on the rest if(!TestLineBox(newline, model.boundingBox)) @@ -1428,7 +1437,12 @@ CCollision::ProcessLineOfSight(const CColLine &line, // transform line to model space Invert(matrix, matTransform); - CColLine newline(matTransform * line.p0, matTransform * line.p1); + CColLine newline; +#ifdef DC_SH4 + TransformPoints(reinterpret_cast(&newline), 2, matTransform, &line.p0, sizeof(CColLine)/2); +#else + newline.Set(matTransform * line.p0, matTransform * line.p1); +#endif // If we don't intersect with the bounding box, no chance on the rest if(!TestLineBox(newline, model.boundingBox)) @@ -1455,9 +1469,18 @@ CCollision::ProcessLineOfSight(const CColLine &line, } if(coldist < mindist){ +#ifndef DC_SH4 point.point = matrix * point.point; point.normal = Multiply3x3(matrix, point.normal); - mindist = coldist; +#else + mat_load(reinterpret_cast(const_cast(&matrix))); + mat_trans_single3_nodiv(point.point.x, + point.point.y, + point.point.z); + mat_trans_normal3(point.normal.x, + point.normal.y, + point.normal.z); +#endif return true; } return false; @@ -1593,7 +1616,14 @@ CCollision::ProcessVerticalLine(const CColLine &line, // transform line to model space // Why does the game seem to do this differently than above? - CColLine newline(MultiplyInverse(matrix, line.p0), MultiplyInverse(matrix, line.p1)); + CMatrix matTransform; + Invert(matrix, matTransform); + CColLine newline; +#ifndef DC_SH4 + newline.Set(matTransform * line.p0, matTransform * line.p1); +#else + TransformPoints(reinterpret_cast(&newline), 2, matTransform, &line.p0, sizeof(CColLine)/2); +#endif if(!TestLineBox(newline, model.boundingBox)) return false; @@ -1618,13 +1648,29 @@ CCollision::ProcessVerticalLine(const CColLine &line, } if(coldist < mindist){ +#ifndef DC_SH4 point.point = matrix * point.point; point.normal = Multiply3x3(matrix, point.normal); +#else + mat_load(reinterpret_cast(const_cast(&matrix))); + mat_trans_single3_nodiv(point.point.x, + point.point.y, + point.point.z); + mat_trans_normal3(point.normal.x, + point.normal.y, + point.normal.z); +#endif if(TempStoredPoly.valid && poly){ *poly = TempStoredPoly; +#ifndef DC_SH4 poly->verts[0] = matrix * poly->verts[0]; poly->verts[1] = matrix * poly->verts[1]; poly->verts[2] = matrix * poly->verts[2]; +#else + mat_trans_single3_nodiv(poly->verts[0].x, poly->verts[0].y, poly->verts[0].z); + mat_trans_single3_nodiv(poly->verts[1].x, poly->verts[1].y, poly->verts[1].z); + mat_trans_single3_nodiv(poly->verts[2].x, poly->verts[2].y, poly->verts[2].z); +#endif } mindist = coldist; return true; @@ -1981,21 +2027,52 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA, CColSphere bsphereAB; // bounding sphere of A in B space bsphereAB.radius = modelA.boundingSphere.radius; +#ifndef DC_SH4 bsphereAB.center = matAB * modelA.boundingSphere.center; +#else + /* No need to reload the matrix, since it's already banked. + mat_load(reinterpret_cast(&matAB)); */ + mat_trans_single3_nodiv_nomod(modelA.boundingSphere.center.x, + modelA.boundingSphere.center.y, + modelA.boundingSphere.center.z, + bsphereAB.center.x, + bsphereAB.center.y, + bsphereAB.center.z); +#endif if(!TestSphereBox(bsphereAB, modelB.boundingBox)) return 0; - // B to A space - matBA = Invert(matrixA, matBA); - matBA *= matrixB; // transform modelA's spheres and lines to B space for(i = 0; i < modelA.numSpheres; i++){ CColSphere &s = modelA.spheres[i]; + #ifndef DC_SH4 aSpheresA[i].Set(s.radius, matAB * s.center, s.surface, s.piece); +#else + auto &d = aSpheresA[i]; + mat_trans_single3_nodiv_nomod(s.center.x, s.center.y, s.center.z, + d.center.x, d.center.y, d.center.z); + d.Set(s.radius, s.surface, s.piece); +#endif } - for(i = 0; i < modelA.numLines; i++) - aLinesA[i].Set(matAB * modelA.lines[i].p0, matAB * modelA.lines[i].p1); + for(i = 0; i < modelA.numLines; i++) { +#ifndef DC_SH4 + aLinesA[i].Set(matAB * modelA.lines[i].p0, matAB * modelA.lines[i].p1); +#else + mat_trans_single3_nodiv_nomod(modelA.lines[i].p0.x, + modelA.lines[i].p0.y, + modelA.lines[i].p0.z, + aLinesA[i].p0.x, + aLinesA[i].p0.y, + aLinesA[i].p0.z); + mat_trans_single3_nodiv_nomod(modelA.lines[i].p1.x, + modelA.lines[i].p1.y, + modelA.lines[i].p1.z, + aLinesA[i].p1.x, + aLinesA[i].p1.y, + aLinesA[i].p1.z); +#endif + } // Test them against model B's bounding volumes int numSpheresA = 0; int numLinesA = 0; @@ -2013,9 +2090,25 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA, int numSpheresB = 0; int numBoxesB = 0; int numTrianglesB = 0; + // B to A space + matBA = Invert(matrixA, matBA); + matBA *= matrixB; +#ifdef DC_SH4 + /* No need to reload the matrix, since it's already banked. + mat_load(reinterpret_cast(&matBA)); */ +#endif for(i = 0; i < modelB.numSpheres; i++){ s.radius = modelB.spheres[i].radius; +#ifndef DC_SH4 s.center = matBA * modelB.spheres[i].center; +#else + mat_trans_single3_nodiv_nomod(modelB.spheres[i].center.x, + modelB.spheres[i].center.y, + modelB.spheres[i].center.z, + s.center.x, + s.center.y, + s.center.z); +#endif if(TestSphereBox(s, modelA.boundingBox)) aSphereIndicesB[numSpheresB++] = i; } @@ -2062,9 +2155,22 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA, if(hasCollided) numCollisions++; } + +#ifdef DC_SH4 + mat_load(reinterpret_cast(const_cast(&matrixB))); +#endif for(i = 0; i < numCollisions; i++){ +#ifndef DC_SH4 spherepoints[i].point = matrixB * spherepoints[i].point; spherepoints[i].normal = Multiply3x3(matrixB, spherepoints[i].normal); +#else + mat_trans_single3_nodiv(spherepoints[i].point.x, + spherepoints[i].point.y, + spherepoints[i].point.z); + mat_trans_normal3(spherepoints[i].normal.x, + spherepoints[i].normal.y, + spherepoints[i].normal.z); +#endif } // And the same thing for the lines in A @@ -2095,8 +2201,17 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA, for(i = 0; i < numLinesA; i++) if(aCollided[i]){ j = aLineIndicesA[i]; +#ifndef DC_SH4 linepoints[j].point = matrixB * linepoints[j].point; linepoints[j].normal = Multiply3x3(matrixB, linepoints[j].normal); +#else + mat_trans_single3_nodiv(linepoints[j].point.x, + linepoints[j].point.y, + linepoints[j].point.z); + mat_trans_normal3(linepoints[j].normal.x, + linepoints[j].normal.y, + linepoints[j].normal.z); +#endif } return numCollisions; // sphere collisions diff --git a/src/miami/math/Matrix.cpp b/src/miami/math/Matrix.cpp index c0d909cb..62b7029c 100644 --- a/src/miami/math/Matrix.cpp +++ b/src/miami/math/Matrix.cpp @@ -1,11 +1,5 @@ #include "common.h" -CMatrix::CMatrix(void) -{ - m_attachment = nil; - m_hasRwMatrix = false; -} - CMatrix::CMatrix(CMatrix const &m) { m_attachment = nil; @@ -434,6 +428,11 @@ operator*(const CMatrix &m1, const CMatrix &m2) { // TODO: VU0 code CMatrix out; +#if defined(RW_DC) + mat_load(reinterpret_cast(&m1)); + mat_apply(reinterpret_cast(&m2)); + mat_store(reinterpret_cast(&out)); +#else out.rx = m1.rx * m2.rx + m1.fx * m2.ry + m1.ux * m2.rz; out.ry = m1.ry * m2.rx + m1.fy * m2.ry + m1.uy * m2.rz; out.rz = m1.rz * m2.rx + m1.fz * m2.ry + m1.uz * m2.rz; @@ -446,6 +445,7 @@ operator*(const CMatrix &m1, const CMatrix &m2) out.px = m1.rx * m2.px + m1.fx * m2.py + m1.ux * m2.pz + m1.px; out.py = m1.ry * m2.px + m1.fy * m2.py + m1.uy * m2.pz + m1.py; out.pz = m1.rz * m2.px + m1.fz * m2.py + m1.uz * m2.pz + m1.pz; +#endif return out; } diff --git a/src/miami/math/Matrix.h b/src/miami/math/Matrix.h index 0adcf32c..39a476d6 100644 --- a/src/miami/math/Matrix.h +++ b/src/miami/math/Matrix.h @@ -1,6 +1,6 @@ #pragma once -class CMatrix +class alignas(8) CMatrix { public: #ifdef GTA_PS2 @@ -23,18 +23,18 @@ public: float f[4][4]; struct { - float rx, ry, rz, rw; - float fx, fy, fz, fw; - float ux, uy, uz, uw; - float px, py, pz, pw; + float rx, ry, rz, rw=0.0f; + float fx, fy, fz, fw=0.0f; + float ux, uy, uz, uw=0.0f; + float px, py, pz, pw=1.0f; }; }; - RwMatrix *m_attachment; - bool m_hasRwMatrix; // are we the owner? + RwMatrix *m_attachment=nil; + bool m_hasRwMatrix=false; // are we the owner? #endif - CMatrix(void); + CMatrix(void)=default; CMatrix(CMatrix const &m); CMatrix(RwMatrix *matrix, bool owner = false); CMatrix(float scale){ diff --git a/src/miami/math/VuVector.h b/src/miami/math/VuVector.h index 41584095..2e9fbc28 100644 --- a/src/miami/math/VuVector.h +++ b/src/miami/math/VuVector.h @@ -1,6 +1,14 @@ #pragma once -class TYPEALIGN(16) CVuVector : public CVector +#include "maths.h" + +#ifdef RW_DC +#define VECTOR_ALIGN 8 +#else +#define VECTOR_ALIGN 16 +#endif + +class TYPEALIGN(VECTOR_ALIGN) CVuVector : public CVector { public: float w; @@ -26,7 +34,123 @@ public: // TODO: operator- }; -void TransformPoint(CVuVector &out, const CMatrix &mat, const CVuVector &in); -void TransformPoint(CVuVector &out, const CMatrix &mat, const RwV3d &in); -void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const RwV3d *in, int stride); -void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const CVuVector *in); +__always_inline void TransformPoint(CVuVector &out, const CMatrix &mat, const CVuVector &in) +{ +#ifdef GTA_PS2 + __asm__ __volatile__("\n\ + lqc2 vf01,0x0(%2)\n\ + lqc2 vf02,0x0(%1)\n\ + lqc2 vf03,0x10(%1)\n\ + lqc2 vf04,0x20(%1)\n\ + lqc2 vf05,0x30(%1)\n\ + vmulax.xyz ACC, vf02,vf01\n\ + vmadday.xyz ACC, vf03,vf01\n\ + vmaddaz.xyz ACC, vf04,vf01\n\ + vmaddw.xyz vf06,vf05,vf00\n\ + sqc2 vf06,0x0(%0)\n\ + ": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory"); +#elif defined(DC_SH4) + mat_load(reinterpret_cast(const_cast(&mat))); + mat_trans_nodiv_nomod(in.x, in.y, in.z, out.x, out.y, out.z, out.y); +#else + out = mat * in; +#endif +} + +__always_inline void TransformPoint(CVuVector &out, const CMatrix &mat, const RwV3d &in) +{ +#ifdef GTA_PS2 + __asm__ __volatile__("\n\ + ldr $8,0x0(%2)\n\ + ldl $8,0x7(%2)\n\ + lw $9,0x8(%2)\n\ + pcpyld $10,$9,$8\n\ + qmtc2 $10,vf01\n\ + lqc2 vf02,0x0(%1)\n\ + lqc2 vf03,0x10(%1)\n\ + lqc2 vf04,0x20(%1)\n\ + lqc2 vf05,0x30(%1)\n\ + vmulax.xyz ACC, vf02,vf01\n\ + vmadday.xyz ACC, vf03,vf01\n\ + vmaddaz.xyz ACC, vf04,vf01\n\ + vmaddw.xyz vf06,vf05,vf00\n\ + sqc2 vf06,0x0(%0)\n\ + ": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory"); +#elif defined(DC_SH4) + mat_load(reinterpret_cast(const_cast(&mat))); + mat_trans_nodiv_nomod(in.x, in.y, in.z, out.x, out.y, out.z, out.y); +#else + out = mat * in; +#endif +} + +__always_inline void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const RwV3d *in, int stride) +{ +#ifdef GTA_PS3 + __asm__ __volatile__("\n\ + paddub $3,%4,$0\n\ + lqc2 vf02,0x0(%2)\n\ + lqc2 vf03,0x10(%2)\n\ + lqc2 vf04,0x20(%2)\n\ + lqc2 vf05,0x30(%2)\n\ + ldr $8,0x0(%3)\n\ + ldl $8,0x7(%3)\n\ + lw $9,0x8(%3)\n\ + pcpyld $10,$9,$8\n\ + qmtc2 $10,vf01\n\ + 1: vmulax.xyz ACC, vf02,vf01\n\ + vmadday.xyz ACC, vf03,vf01\n\ + vmaddaz.xyz ACC, vf04,vf01\n\ + vmaddw.xyz vf06,vf05,vf00\n\ + add %3,%3,$3\n\ + ldr $8,0x0(%3)\n\ + ldl $8,0x7(%3)\n\ + lw $9,0x8(%3)\n\ + pcpyld $10,$9,$8\n\ + qmtc2 $10,vf01\n\ + addi %1,%1,-1\n\ + addiu %0,%0,0x10\n\ + sqc2 vf06,-0x10(%0)\n\ + bnez %1,1b\n\ + ": : "r" (out) , "r" (n), "r" (&mat), "r" (in), "r" (stride): "memory"); +#elif defined(DC_SH4) + mat_load(reinterpret_cast(const_cast(&mat))); + while(n--) { + mat_trans_single3_nodiv_nomod(in->x, in->y, in->z, out->x, out->y, out->z); + in = reinterpret_cast(reinterpret_cast(in) + stride); + ++out; + } +#else + while(n--){ + *out = mat * *in; + in = (RwV3d*)((uint8*)in + stride); + out++; + } +#endif +} + +__always_inline void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const CVuVector *in) +{ +#ifdef GTA_PS2 + __asm__ __volatile__("\n\ + lqc2 vf02,0x0(%2)\n\ + lqc2 vf03,0x10(%2)\n\ + lqc2 vf04,0x20(%2)\n\ + lqc2 vf05,0x30(%2)\n\ + lqc2 vf01,0x0(%3)\n\ + nop\n\ + 1: vmulax.xyz ACC, vf02,vf01\n\ + vmadday.xyz ACC, vf03,vf01\n\ + vmaddaz.xyz ACC, vf04,vf01\n\ + vmaddw.xyz vf06,vf05,vf00\n\ + lqc2 vf01,0x10(%3)\n\ + addiu %3,%3,0x10\n\ + addi %1,%1,-1\n\ + addiu %0,%0,0x10\n\ + sqc2 vf06,-0x10(%0)\n\ + bnez %1,1b\n\ + ": : "r" (out) , "r" (n), "r" (&mat) ,"r" (in): "memory"); +#else + TransformPoints(out, n, mat, in, sizeof(CVuVector)); +#endif +} diff --git a/src/miami/math/math.cpp b/src/miami/math/math.cpp index 8cb56dab..75b6def8 100644 --- a/src/miami/math/math.cpp +++ b/src/miami/math/math.cpp @@ -3,116 +3,3 @@ #include "VuVector.h" // TODO: move more stuff into here - - -void TransformPoint(CVuVector &out, const CMatrix &mat, const CVuVector &in) -{ -#ifdef GTA_PS2 - __asm__ __volatile__("\n\ - lqc2 vf01,0x0(%2)\n\ - lqc2 vf02,0x0(%1)\n\ - lqc2 vf03,0x10(%1)\n\ - lqc2 vf04,0x20(%1)\n\ - lqc2 vf05,0x30(%1)\n\ - vmulax.xyz ACC, vf02,vf01\n\ - vmadday.xyz ACC, vf03,vf01\n\ - vmaddaz.xyz ACC, vf04,vf01\n\ - vmaddw.xyz vf06,vf05,vf00\n\ - sqc2 vf06,0x0(%0)\n\ - ": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory"); -#else - out = mat * in; -#endif -} - -void TransformPoint(CVuVector &out, const CMatrix &mat, const RwV3d &in) -{ -#ifdef GTA_PS2 - __asm__ __volatile__("\n\ - ldr $8,0x0(%2)\n\ - ldl $8,0x7(%2)\n\ - lw $9,0x8(%2)\n\ - pcpyld $10,$9,$8\n\ - qmtc2 $10,vf01\n\ - lqc2 vf02,0x0(%1)\n\ - lqc2 vf03,0x10(%1)\n\ - lqc2 vf04,0x20(%1)\n\ - lqc2 vf05,0x30(%1)\n\ - vmulax.xyz ACC, vf02,vf01\n\ - vmadday.xyz ACC, vf03,vf01\n\ - vmaddaz.xyz ACC, vf04,vf01\n\ - vmaddw.xyz vf06,vf05,vf00\n\ - sqc2 vf06,0x0(%0)\n\ - ": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory"); -#else - out = mat * in; -#endif -} - -void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const RwV3d *in, int stride) -{ -#ifdef GTA_PS3 - __asm__ __volatile__("\n\ - paddub $3,%4,$0\n\ - lqc2 vf02,0x0(%2)\n\ - lqc2 vf03,0x10(%2)\n\ - lqc2 vf04,0x20(%2)\n\ - lqc2 vf05,0x30(%2)\n\ - ldr $8,0x0(%3)\n\ - ldl $8,0x7(%3)\n\ - lw $9,0x8(%3)\n\ - pcpyld $10,$9,$8\n\ - qmtc2 $10,vf01\n\ - 1: vmulax.xyz ACC, vf02,vf01\n\ - vmadday.xyz ACC, vf03,vf01\n\ - vmaddaz.xyz ACC, vf04,vf01\n\ - vmaddw.xyz vf06,vf05,vf00\n\ - add %3,%3,$3\n\ - ldr $8,0x0(%3)\n\ - ldl $8,0x7(%3)\n\ - lw $9,0x8(%3)\n\ - pcpyld $10,$9,$8\n\ - qmtc2 $10,vf01\n\ - addi %1,%1,-1\n\ - addiu %0,%0,0x10\n\ - sqc2 vf06,-0x10(%0)\n\ - bnez %1,1b\n\ - ": : "r" (out) , "r" (n), "r" (&mat), "r" (in), "r" (stride): "memory"); -#else - while(n--){ - *out = mat * *in; - in = (RwV3d*)((uint8*)in + stride); - out++; - } -#endif -} - -void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const CVuVector *in) -{ -#ifdef GTA_PS2 - __asm__ __volatile__("\n\ - lqc2 vf02,0x0(%2)\n\ - lqc2 vf03,0x10(%2)\n\ - lqc2 vf04,0x20(%2)\n\ - lqc2 vf05,0x30(%2)\n\ - lqc2 vf01,0x0(%3)\n\ - nop\n\ - 1: vmulax.xyz ACC, vf02,vf01\n\ - vmadday.xyz ACC, vf03,vf01\n\ - vmaddaz.xyz ACC, vf04,vf01\n\ - vmaddw.xyz vf06,vf05,vf00\n\ - lqc2 vf01,0x10(%3)\n\ - addiu %3,%3,0x10\n\ - addi %1,%1,-1\n\ - addiu %0,%0,0x10\n\ - sqc2 vf06,-0x10(%0)\n\ - bnez %1,1b\n\ - ": : "r" (out) , "r" (n), "r" (&mat) ,"r" (in): "memory"); -#else - while(n--){ - *out = mat * *in; - in++; - out++; - } -#endif -} diff --git a/src/miami/math/maths.h b/src/miami/math/maths.h index 6a228036..a4701d3e 100644 --- a/src/miami/math/maths.h +++ b/src/miami/math/maths.h @@ -1,5 +1,31 @@ #pragma once +#include "src/common_defines.h" + + +#ifdef DC_SH4 + +#define mat_trans_nodiv_nomod(x, y, z, x2, y2, z2, w2) do { \ + register float __x __asm__("fr12") = (x); \ + register float __y __asm__("fr13") = (y); \ + register float __z __asm__("fr14") = (z); \ + register float __w __asm__("fr15") = 1.0f; \ + __asm__ __volatile__( "ftrv xmtrx, fv12\n" \ + : "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \ + : "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \ + x2 = __x; y2 = __y; z2 = __z; w2 = __w; \ + } while(false) + +#else + +#define mat_trans_nodiv_nomod(x_, y_, z_, x2, y2, z2, w2) do { \ + vector_t tmp = { x_, y_, z_, 1.0f }; \ + mat_transform(&tmp, &tmp, 1, 0); \ + x2 = tmp.x; y2 = tmp.y; z2 = tmp.z; w2 = tmp.w; \ + } while(false) +#endif + + // wrapper around float versions of functions // in gta they are in CMaths but that makes the code rather noisy