Acceleration working in miami.

This commit is contained in:
Falco Girgis
2025-03-28 00:17:25 -05:00
parent 02799207ed
commit 0ba1c595cd
10 changed files with 302 additions and 144 deletions

View File

@@ -118,6 +118,7 @@ OBJS_NO_FAST_MATH = \
../src/miami/core/Cam.o \ ../src/miami/core/Cam.o \
../src/miami/core/Camera.o \ ../src/miami/core/Camera.o \
../src/miami/vehicles/Bike.o \ ../src/miami/vehicles/Bike.o \
../src/miami/vehicles/Boat.o \
../src/miami/renderer/Particle.o ../src/miami/renderer/Particle.o
KOS_CPPFLAGS += -fbuiltin -ffast-math -ffp-contract=fast \ KOS_CPPFLAGS += -fbuiltin -ffast-math -ffp-contract=fast \

View File

@@ -1,6 +1,6 @@
#pragma once #pragma once
struct CColLine struct alignas(8) CColLine
{ {
// NB: this has to be compatible with two CVuVectors // NB: this has to be compatible with two CVuVectors
CVector p0; CVector p0;

View File

@@ -1,6 +1,6 @@
#pragma once #pragma once
struct CColPoint struct alignas(8) CColPoint
{ {
CVector point; CVector point;
int pad1; int pad1;

View File

@@ -2,7 +2,7 @@
#include "SurfaceTable.h" #include "SurfaceTable.h"
struct CSphere struct alignas(8) CSphere
{ {
// NB: this has to be compatible with a CVuVector // NB: this has to be compatible with a CVuVector
CVector center; CVector center;
@@ -15,6 +15,11 @@ struct CColSphere : public CSphere
uint8 surface; uint8 surface;
uint8 piece; uint8 piece;
void Set(float radius, uint8 surf = SURFACE_DEFAULT, uint8 piece = 0) {
this->radius = radius;
this->surface = surf;
this->piece = piece;
}
void Set(float radius, const CVector &center, uint8 surf, uint8 piece); void Set(float radius, const CVector &center, uint8 surf, uint8 piece);
bool IntersectRay(CVector const &from, CVector const &dir, CVector &entry, CVector &exit); bool IntersectRay(CVector const &from, CVector const &dir, CVector &entry, CVector &exit);
using CSphere::Set; using CSphere::Set;

View File

@@ -24,6 +24,10 @@
#include "Camera.h" #include "Camera.h"
#include "ColStore.h" #include "ColStore.h"
#ifdef DC_SH4
#include "VuCollision.h"
#endif
#ifdef VU_COLLISION #ifdef VU_COLLISION
#include "VuCollision.h" #include "VuCollision.h"
@@ -572,7 +576,12 @@ CCollision::TestLineOfSight(const CColLine &line, const CMatrix &matrix, CColMod
// transform line to model space // transform line to model space
Invert(matrix, matTransform); Invert(matrix, matTransform);
CColLine newline(matTransform * line.p0, matTransform * line.p1); CColLine newline;
#ifndef DC_SH4
newline.Set(matTransform * line.p0, matTransform * line.p1);
#else
TransformPoints(reinterpret_cast<CVuVector*>(&newline), 2, matTransform, &line.p0, sizeof(CColLine)/2);
#endif
// If we don't intersect with the bounding box, no chance on the rest // If we don't intersect with the bounding box, no chance on the rest
if(!TestLineBox(newline, model.boundingBox)) if(!TestLineBox(newline, model.boundingBox))
@@ -1428,7 +1437,12 @@ CCollision::ProcessLineOfSight(const CColLine &line,
// transform line to model space // transform line to model space
Invert(matrix, matTransform); Invert(matrix, matTransform);
CColLine newline(matTransform * line.p0, matTransform * line.p1); CColLine newline;
#ifdef DC_SH4
TransformPoints(reinterpret_cast<CVuVector*>(&newline), 2, matTransform, &line.p0, sizeof(CColLine)/2);
#else
newline.Set(matTransform * line.p0, matTransform * line.p1);
#endif
// If we don't intersect with the bounding box, no chance on the rest // If we don't intersect with the bounding box, no chance on the rest
if(!TestLineBox(newline, model.boundingBox)) if(!TestLineBox(newline, model.boundingBox))
@@ -1455,9 +1469,18 @@ CCollision::ProcessLineOfSight(const CColLine &line,
} }
if(coldist < mindist){ if(coldist < mindist){
#ifndef DC_SH4
point.point = matrix * point.point; point.point = matrix * point.point;
point.normal = Multiply3x3(matrix, point.normal); point.normal = Multiply3x3(matrix, point.normal);
mindist = coldist; #else
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrix)));
mat_trans_single3_nodiv(point.point.x,
point.point.y,
point.point.z);
mat_trans_normal3(point.normal.x,
point.normal.y,
point.normal.z);
#endif
return true; return true;
} }
return false; return false;
@@ -1593,7 +1616,14 @@ CCollision::ProcessVerticalLine(const CColLine &line,
// transform line to model space // transform line to model space
// Why does the game seem to do this differently than above? // Why does the game seem to do this differently than above?
CColLine newline(MultiplyInverse(matrix, line.p0), MultiplyInverse(matrix, line.p1)); CMatrix matTransform;
Invert(matrix, matTransform);
CColLine newline;
#ifndef DC_SH4
newline.Set(matTransform * line.p0, matTransform * line.p1);
#else
TransformPoints(reinterpret_cast<CVuVector*>(&newline), 2, matTransform, &line.p0, sizeof(CColLine)/2);
#endif
if(!TestLineBox(newline, model.boundingBox)) if(!TestLineBox(newline, model.boundingBox))
return false; return false;
@@ -1618,13 +1648,29 @@ CCollision::ProcessVerticalLine(const CColLine &line,
} }
if(coldist < mindist){ if(coldist < mindist){
#ifndef DC_SH4
point.point = matrix * point.point; point.point = matrix * point.point;
point.normal = Multiply3x3(matrix, point.normal); point.normal = Multiply3x3(matrix, point.normal);
#else
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrix)));
mat_trans_single3_nodiv(point.point.x,
point.point.y,
point.point.z);
mat_trans_normal3(point.normal.x,
point.normal.y,
point.normal.z);
#endif
if(TempStoredPoly.valid && poly){ if(TempStoredPoly.valid && poly){
*poly = TempStoredPoly; *poly = TempStoredPoly;
#ifndef DC_SH4
poly->verts[0] = matrix * poly->verts[0]; poly->verts[0] = matrix * poly->verts[0];
poly->verts[1] = matrix * poly->verts[1]; poly->verts[1] = matrix * poly->verts[1];
poly->verts[2] = matrix * poly->verts[2]; poly->verts[2] = matrix * poly->verts[2];
#else
mat_trans_single3_nodiv(poly->verts[0].x, poly->verts[0].y, poly->verts[0].z);
mat_trans_single3_nodiv(poly->verts[1].x, poly->verts[1].y, poly->verts[1].z);
mat_trans_single3_nodiv(poly->verts[2].x, poly->verts[2].y, poly->verts[2].z);
#endif
} }
mindist = coldist; mindist = coldist;
return true; return true;
@@ -1981,21 +2027,52 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
CColSphere bsphereAB; // bounding sphere of A in B space CColSphere bsphereAB; // bounding sphere of A in B space
bsphereAB.radius = modelA.boundingSphere.radius; bsphereAB.radius = modelA.boundingSphere.radius;
#ifndef DC_SH4
bsphereAB.center = matAB * modelA.boundingSphere.center; bsphereAB.center = matAB * modelA.boundingSphere.center;
#else
/* No need to reload the matrix, since it's already banked.
mat_load(reinterpret_cast<const matrix_t *>(&matAB)); */
mat_trans_single3_nodiv_nomod(modelA.boundingSphere.center.x,
modelA.boundingSphere.center.y,
modelA.boundingSphere.center.z,
bsphereAB.center.x,
bsphereAB.center.y,
bsphereAB.center.z);
#endif
if(!TestSphereBox(bsphereAB, modelB.boundingBox)) if(!TestSphereBox(bsphereAB, modelB.boundingBox))
return 0; return 0;
// B to A space
matBA = Invert(matrixA, matBA);
matBA *= matrixB;
// transform modelA's spheres and lines to B space // transform modelA's spheres and lines to B space
for(i = 0; i < modelA.numSpheres; i++){ for(i = 0; i < modelA.numSpheres; i++){
CColSphere &s = modelA.spheres[i]; CColSphere &s = modelA.spheres[i];
#ifndef DC_SH4
aSpheresA[i].Set(s.radius, matAB * s.center, s.surface, s.piece); aSpheresA[i].Set(s.radius, matAB * s.center, s.surface, s.piece);
#else
auto &d = aSpheresA[i];
mat_trans_single3_nodiv_nomod(s.center.x, s.center.y, s.center.z,
d.center.x, d.center.y, d.center.z);
d.Set(s.radius, s.surface, s.piece);
#endif
} }
for(i = 0; i < modelA.numLines; i++)
aLinesA[i].Set(matAB * modelA.lines[i].p0, matAB * modelA.lines[i].p1);
for(i = 0; i < modelA.numLines; i++) {
#ifndef DC_SH4
aLinesA[i].Set(matAB * modelA.lines[i].p0, matAB * modelA.lines[i].p1);
#else
mat_trans_single3_nodiv_nomod(modelA.lines[i].p0.x,
modelA.lines[i].p0.y,
modelA.lines[i].p0.z,
aLinesA[i].p0.x,
aLinesA[i].p0.y,
aLinesA[i].p0.z);
mat_trans_single3_nodiv_nomod(modelA.lines[i].p1.x,
modelA.lines[i].p1.y,
modelA.lines[i].p1.z,
aLinesA[i].p1.x,
aLinesA[i].p1.y,
aLinesA[i].p1.z);
#endif
}
// Test them against model B's bounding volumes // Test them against model B's bounding volumes
int numSpheresA = 0; int numSpheresA = 0;
int numLinesA = 0; int numLinesA = 0;
@@ -2013,9 +2090,25 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
int numSpheresB = 0; int numSpheresB = 0;
int numBoxesB = 0; int numBoxesB = 0;
int numTrianglesB = 0; int numTrianglesB = 0;
// B to A space
matBA = Invert(matrixA, matBA);
matBA *= matrixB;
#ifdef DC_SH4
/* No need to reload the matrix, since it's already banked.
mat_load(reinterpret_cast<const matrix_t *>(&matBA)); */
#endif
for(i = 0; i < modelB.numSpheres; i++){ for(i = 0; i < modelB.numSpheres; i++){
s.radius = modelB.spheres[i].radius; s.radius = modelB.spheres[i].radius;
#ifndef DC_SH4
s.center = matBA * modelB.spheres[i].center; s.center = matBA * modelB.spheres[i].center;
#else
mat_trans_single3_nodiv_nomod(modelB.spheres[i].center.x,
modelB.spheres[i].center.y,
modelB.spheres[i].center.z,
s.center.x,
s.center.y,
s.center.z);
#endif
if(TestSphereBox(s, modelA.boundingBox)) if(TestSphereBox(s, modelA.boundingBox))
aSphereIndicesB[numSpheresB++] = i; aSphereIndicesB[numSpheresB++] = i;
} }
@@ -2062,9 +2155,22 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
if(hasCollided) if(hasCollided)
numCollisions++; numCollisions++;
} }
#ifdef DC_SH4
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&matrixB)));
#endif
for(i = 0; i < numCollisions; i++){ for(i = 0; i < numCollisions; i++){
#ifndef DC_SH4
spherepoints[i].point = matrixB * spherepoints[i].point; spherepoints[i].point = matrixB * spherepoints[i].point;
spherepoints[i].normal = Multiply3x3(matrixB, spherepoints[i].normal); spherepoints[i].normal = Multiply3x3(matrixB, spherepoints[i].normal);
#else
mat_trans_single3_nodiv(spherepoints[i].point.x,
spherepoints[i].point.y,
spherepoints[i].point.z);
mat_trans_normal3(spherepoints[i].normal.x,
spherepoints[i].normal.y,
spherepoints[i].normal.z);
#endif
} }
// And the same thing for the lines in A // And the same thing for the lines in A
@@ -2095,8 +2201,17 @@ CCollision::ProcessColModels(const CMatrix &matrixA, CColModel &modelA,
for(i = 0; i < numLinesA; i++) for(i = 0; i < numLinesA; i++)
if(aCollided[i]){ if(aCollided[i]){
j = aLineIndicesA[i]; j = aLineIndicesA[i];
#ifndef DC_SH4
linepoints[j].point = matrixB * linepoints[j].point; linepoints[j].point = matrixB * linepoints[j].point;
linepoints[j].normal = Multiply3x3(matrixB, linepoints[j].normal); linepoints[j].normal = Multiply3x3(matrixB, linepoints[j].normal);
#else
mat_trans_single3_nodiv(linepoints[j].point.x,
linepoints[j].point.y,
linepoints[j].point.z);
mat_trans_normal3(linepoints[j].normal.x,
linepoints[j].normal.y,
linepoints[j].normal.z);
#endif
} }
return numCollisions; // sphere collisions return numCollisions; // sphere collisions

View File

@@ -1,11 +1,5 @@
#include "common.h" #include "common.h"
CMatrix::CMatrix(void)
{
m_attachment = nil;
m_hasRwMatrix = false;
}
CMatrix::CMatrix(CMatrix const &m) CMatrix::CMatrix(CMatrix const &m)
{ {
m_attachment = nil; m_attachment = nil;
@@ -434,6 +428,11 @@ operator*(const CMatrix &m1, const CMatrix &m2)
{ {
// TODO: VU0 code // TODO: VU0 code
CMatrix out; CMatrix out;
#if defined(RW_DC)
mat_load(reinterpret_cast<const matrix_t *>(&m1));
mat_apply(reinterpret_cast<const matrix_t *>(&m2));
mat_store(reinterpret_cast<matrix_t *>(&out));
#else
out.rx = m1.rx * m2.rx + m1.fx * m2.ry + m1.ux * m2.rz; out.rx = m1.rx * m2.rx + m1.fx * m2.ry + m1.ux * m2.rz;
out.ry = m1.ry * m2.rx + m1.fy * m2.ry + m1.uy * m2.rz; out.ry = m1.ry * m2.rx + m1.fy * m2.ry + m1.uy * m2.rz;
out.rz = m1.rz * m2.rx + m1.fz * m2.ry + m1.uz * m2.rz; out.rz = m1.rz * m2.rx + m1.fz * m2.ry + m1.uz * m2.rz;
@@ -446,6 +445,7 @@ operator*(const CMatrix &m1, const CMatrix &m2)
out.px = m1.rx * m2.px + m1.fx * m2.py + m1.ux * m2.pz + m1.px; out.px = m1.rx * m2.px + m1.fx * m2.py + m1.ux * m2.pz + m1.px;
out.py = m1.ry * m2.px + m1.fy * m2.py + m1.uy * m2.pz + m1.py; out.py = m1.ry * m2.px + m1.fy * m2.py + m1.uy * m2.pz + m1.py;
out.pz = m1.rz * m2.px + m1.fz * m2.py + m1.uz * m2.pz + m1.pz; out.pz = m1.rz * m2.px + m1.fz * m2.py + m1.uz * m2.pz + m1.pz;
#endif
return out; return out;
} }

View File

@@ -1,6 +1,6 @@
#pragma once #pragma once
class CMatrix class alignas(8) CMatrix
{ {
public: public:
#ifdef GTA_PS2 #ifdef GTA_PS2
@@ -23,18 +23,18 @@ public:
float f[4][4]; float f[4][4];
struct struct
{ {
float rx, ry, rz, rw; float rx, ry, rz, rw=0.0f;
float fx, fy, fz, fw; float fx, fy, fz, fw=0.0f;
float ux, uy, uz, uw; float ux, uy, uz, uw=0.0f;
float px, py, pz, pw; float px, py, pz, pw=1.0f;
}; };
}; };
RwMatrix *m_attachment; RwMatrix *m_attachment=nil;
bool m_hasRwMatrix; // are we the owner? bool m_hasRwMatrix=false; // are we the owner?
#endif #endif
CMatrix(void); CMatrix(void)=default;
CMatrix(CMatrix const &m); CMatrix(CMatrix const &m);
CMatrix(RwMatrix *matrix, bool owner = false); CMatrix(RwMatrix *matrix, bool owner = false);
CMatrix(float scale){ CMatrix(float scale){

View File

@@ -1,6 +1,14 @@
#pragma once #pragma once
class TYPEALIGN(16) CVuVector : public CVector #include "maths.h"
#ifdef RW_DC
#define VECTOR_ALIGN 8
#else
#define VECTOR_ALIGN 16
#endif
class TYPEALIGN(VECTOR_ALIGN) CVuVector : public CVector
{ {
public: public:
float w; float w;
@@ -26,7 +34,123 @@ public:
// TODO: operator- // TODO: operator-
}; };
void TransformPoint(CVuVector &out, const CMatrix &mat, const CVuVector &in); __always_inline void TransformPoint(CVuVector &out, const CMatrix &mat, const CVuVector &in)
void TransformPoint(CVuVector &out, const CMatrix &mat, const RwV3d &in); {
void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const RwV3d *in, int stride); #ifdef GTA_PS2
void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const CVuVector *in); __asm__ __volatile__("\n\
lqc2 vf01,0x0(%2)\n\
lqc2 vf02,0x0(%1)\n\
lqc2 vf03,0x10(%1)\n\
lqc2 vf04,0x20(%1)\n\
lqc2 vf05,0x30(%1)\n\
vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
sqc2 vf06,0x0(%0)\n\
": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory");
#elif defined(DC_SH4)
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
mat_trans_nodiv_nomod(in.x, in.y, in.z, out.x, out.y, out.z, out.y);
#else
out = mat * in;
#endif
}
__always_inline void TransformPoint(CVuVector &out, const CMatrix &mat, const RwV3d &in)
{
#ifdef GTA_PS2
__asm__ __volatile__("\n\
ldr $8,0x0(%2)\n\
ldl $8,0x7(%2)\n\
lw $9,0x8(%2)\n\
pcpyld $10,$9,$8\n\
qmtc2 $10,vf01\n\
lqc2 vf02,0x0(%1)\n\
lqc2 vf03,0x10(%1)\n\
lqc2 vf04,0x20(%1)\n\
lqc2 vf05,0x30(%1)\n\
vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
sqc2 vf06,0x0(%0)\n\
": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory");
#elif defined(DC_SH4)
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
mat_trans_nodiv_nomod(in.x, in.y, in.z, out.x, out.y, out.z, out.y);
#else
out = mat * in;
#endif
}
__always_inline void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const RwV3d *in, int stride)
{
#ifdef GTA_PS3
__asm__ __volatile__("\n\
paddub $3,%4,$0\n\
lqc2 vf02,0x0(%2)\n\
lqc2 vf03,0x10(%2)\n\
lqc2 vf04,0x20(%2)\n\
lqc2 vf05,0x30(%2)\n\
ldr $8,0x0(%3)\n\
ldl $8,0x7(%3)\n\
lw $9,0x8(%3)\n\
pcpyld $10,$9,$8\n\
qmtc2 $10,vf01\n\
1: vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
add %3,%3,$3\n\
ldr $8,0x0(%3)\n\
ldl $8,0x7(%3)\n\
lw $9,0x8(%3)\n\
pcpyld $10,$9,$8\n\
qmtc2 $10,vf01\n\
addi %1,%1,-1\n\
addiu %0,%0,0x10\n\
sqc2 vf06,-0x10(%0)\n\
bnez %1,1b\n\
": : "r" (out) , "r" (n), "r" (&mat), "r" (in), "r" (stride): "memory");
#elif defined(DC_SH4)
mat_load(reinterpret_cast<matrix_t *>(const_cast<CMatrix *>(&mat)));
while(n--) {
mat_trans_single3_nodiv_nomod(in->x, in->y, in->z, out->x, out->y, out->z);
in = reinterpret_cast<const RwV3d *>(reinterpret_cast<const uint8_t *>(in) + stride);
++out;
}
#else
while(n--){
*out = mat * *in;
in = (RwV3d*)((uint8*)in + stride);
out++;
}
#endif
}
__always_inline void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const CVuVector *in)
{
#ifdef GTA_PS2
__asm__ __volatile__("\n\
lqc2 vf02,0x0(%2)\n\
lqc2 vf03,0x10(%2)\n\
lqc2 vf04,0x20(%2)\n\
lqc2 vf05,0x30(%2)\n\
lqc2 vf01,0x0(%3)\n\
nop\n\
1: vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
lqc2 vf01,0x10(%3)\n\
addiu %3,%3,0x10\n\
addi %1,%1,-1\n\
addiu %0,%0,0x10\n\
sqc2 vf06,-0x10(%0)\n\
bnez %1,1b\n\
": : "r" (out) , "r" (n), "r" (&mat) ,"r" (in): "memory");
#else
TransformPoints(out, n, mat, in, sizeof(CVuVector));
#endif
}

View File

@@ -3,116 +3,3 @@
#include "VuVector.h" #include "VuVector.h"
// TODO: move more stuff into here // TODO: move more stuff into here
void TransformPoint(CVuVector &out, const CMatrix &mat, const CVuVector &in)
{
#ifdef GTA_PS2
__asm__ __volatile__("\n\
lqc2 vf01,0x0(%2)\n\
lqc2 vf02,0x0(%1)\n\
lqc2 vf03,0x10(%1)\n\
lqc2 vf04,0x20(%1)\n\
lqc2 vf05,0x30(%1)\n\
vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
sqc2 vf06,0x0(%0)\n\
": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory");
#else
out = mat * in;
#endif
}
void TransformPoint(CVuVector &out, const CMatrix &mat, const RwV3d &in)
{
#ifdef GTA_PS2
__asm__ __volatile__("\n\
ldr $8,0x0(%2)\n\
ldl $8,0x7(%2)\n\
lw $9,0x8(%2)\n\
pcpyld $10,$9,$8\n\
qmtc2 $10,vf01\n\
lqc2 vf02,0x0(%1)\n\
lqc2 vf03,0x10(%1)\n\
lqc2 vf04,0x20(%1)\n\
lqc2 vf05,0x30(%1)\n\
vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
sqc2 vf06,0x0(%0)\n\
": : "r" (&out) , "r" (&mat) ,"r" (&in): "memory");
#else
out = mat * in;
#endif
}
void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const RwV3d *in, int stride)
{
#ifdef GTA_PS3
__asm__ __volatile__("\n\
paddub $3,%4,$0\n\
lqc2 vf02,0x0(%2)\n\
lqc2 vf03,0x10(%2)\n\
lqc2 vf04,0x20(%2)\n\
lqc2 vf05,0x30(%2)\n\
ldr $8,0x0(%3)\n\
ldl $8,0x7(%3)\n\
lw $9,0x8(%3)\n\
pcpyld $10,$9,$8\n\
qmtc2 $10,vf01\n\
1: vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
add %3,%3,$3\n\
ldr $8,0x0(%3)\n\
ldl $8,0x7(%3)\n\
lw $9,0x8(%3)\n\
pcpyld $10,$9,$8\n\
qmtc2 $10,vf01\n\
addi %1,%1,-1\n\
addiu %0,%0,0x10\n\
sqc2 vf06,-0x10(%0)\n\
bnez %1,1b\n\
": : "r" (out) , "r" (n), "r" (&mat), "r" (in), "r" (stride): "memory");
#else
while(n--){
*out = mat * *in;
in = (RwV3d*)((uint8*)in + stride);
out++;
}
#endif
}
void TransformPoints(CVuVector *out, int n, const CMatrix &mat, const CVuVector *in)
{
#ifdef GTA_PS2
__asm__ __volatile__("\n\
lqc2 vf02,0x0(%2)\n\
lqc2 vf03,0x10(%2)\n\
lqc2 vf04,0x20(%2)\n\
lqc2 vf05,0x30(%2)\n\
lqc2 vf01,0x0(%3)\n\
nop\n\
1: vmulax.xyz ACC, vf02,vf01\n\
vmadday.xyz ACC, vf03,vf01\n\
vmaddaz.xyz ACC, vf04,vf01\n\
vmaddw.xyz vf06,vf05,vf00\n\
lqc2 vf01,0x10(%3)\n\
addiu %3,%3,0x10\n\
addi %1,%1,-1\n\
addiu %0,%0,0x10\n\
sqc2 vf06,-0x10(%0)\n\
bnez %1,1b\n\
": : "r" (out) , "r" (n), "r" (&mat) ,"r" (in): "memory");
#else
while(n--){
*out = mat * *in;
in++;
out++;
}
#endif
}

View File

@@ -1,5 +1,31 @@
#pragma once #pragma once
#include "src/common_defines.h"
#ifdef DC_SH4
#define mat_trans_nodiv_nomod(x, y, z, x2, y2, z2, w2) do { \
register float __x __asm__("fr12") = (x); \
register float __y __asm__("fr13") = (y); \
register float __z __asm__("fr14") = (z); \
register float __w __asm__("fr15") = 1.0f; \
__asm__ __volatile__( "ftrv xmtrx, fv12\n" \
: "=f" (__x), "=f" (__y), "=f" (__z), "=f" (__w) \
: "0" (__x), "1" (__y), "2" (__z), "3" (__w) ); \
x2 = __x; y2 = __y; z2 = __z; w2 = __w; \
} while(false)
#else
#define mat_trans_nodiv_nomod(x_, y_, z_, x2, y2, z2, w2) do { \
vector_t tmp = { x_, y_, z_, 1.0f }; \
mat_transform(&tmp, &tmp, 1, 0); \
x2 = tmp.x; y2 = tmp.y; z2 = tmp.z; w2 = tmp.w; \
} while(false)
#endif
// wrapper around float versions of functions // wrapper around float versions of functions
// in gta they are in CMaths but that makes the code rather noisy // in gta they are in CMaths but that makes the code rather noisy