mirror of
https://gitlab.com/skmp/dca3-game.git
synced 2025-09-01 10:42:34 +02:00
Fixed rw::Matrix inversion routines for 4x4s.
This commit is contained in:
211
vendor/librw/src/base.cpp
vendored
211
vendor/librw/src/base.cpp
vendored
@@ -530,79 +530,11 @@ void
|
||||
Matrix::mult_(Matrix *dst, const Matrix *src1, const Matrix *src2)
|
||||
{
|
||||
|
||||
#if !defined(DC_TEXCONV) && !defined(DC_SIM)
|
||||
#if 1
|
||||
#if 1
|
||||
#ifdef RW_DC
|
||||
mat_mult(reinterpret_cast<matrix_t *>(dst),
|
||||
reinterpret_cast<const matrix_t *>(src2),
|
||||
reinterpret_cast<const matrix_t *>(src1));
|
||||
|
||||
#else
|
||||
unsigned int prefetch_scratch;
|
||||
|
||||
asm volatile (
|
||||
"mov %[bmtrx], %[pref_scratch]\n\t" // (MT)
|
||||
"add #32, %[pref_scratch]\n\t" // offset by 32 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
|
||||
"fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs) (FE)
|
||||
"pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
|
||||
// back matrix
|
||||
"fmov.d @%[bmtrx]+, XD0\n\t" // (LS)
|
||||
"fmov.d @%[bmtrx]+, XD2\n\t"
|
||||
"fmov.d @%[bmtrx]+, XD4\n\t"
|
||||
"fmov.d @%[bmtrx]+, XD6\n\t"
|
||||
"pref @%[fmtrx]\n\t" // prefetch fmtrx now while we wait (LS)
|
||||
"fmov.d @%[bmtrx]+, XD8\n\t" // bmtrx prefetch should work for here
|
||||
"fmov.d @%[bmtrx]+, XD10\n\t"
|
||||
"fmov.d @%[bmtrx]+, XD12\n\t"
|
||||
"mov %[fmtrx], %[pref_scratch]\n\t" // (MT)
|
||||
"add #32, %[pref_scratch]\n\t" // store offset by 32 in r0 (EX - flow dependency, but 'add' is actually parallelized since 'mov Rm, Rn' is 0-cycle)
|
||||
"fmov.d @%[bmtrx], XD14\n\t"
|
||||
"pref @%[pref_scratch]\n\t" // Get a head start prefetching the second half of the 64-byte data (LS)
|
||||
// front matrix
|
||||
// interleave loads and matrix multiply 4x4
|
||||
"fmov.d @%[fmtrx]+, DR0\n\t"
|
||||
"fmov.d @%[fmtrx]+, DR2\n\t"
|
||||
"fmov.d @%[fmtrx]+, DR4\n\t" // (LS) want to issue the next one before 'ftrv' for parallel exec
|
||||
"fldi0 FR3\n\t"
|
||||
"ftrv XMTRX, FV0\n\t" // (FE)
|
||||
|
||||
"fmov.d @%[fmtrx]+, DR6\n\t"
|
||||
"fmov.d @%[fmtrx]+, DR8\n\t"
|
||||
"fldi0 FR7\n\t"
|
||||
"ftrv XMTRX, FV4\n\t"
|
||||
|
||||
"fmov.d @%[fmtrx]+, DR10\n\t"
|
||||
"fldi0 FR11\n\t"
|
||||
"ftrv XMTRX, FV8\n\t"
|
||||
|
||||
"fmov.d @%[fmtrx]+, DR12\n\t"
|
||||
"fmov.d @%[fmtrx]+, DR14\n\t"
|
||||
"fldi1 FR15\n\t"
|
||||
"fschg\n\t" // switch back to single moves (and avoid stalling 'ftrv') (FE)
|
||||
"ftrv XMTRX, FV12\n\t" // (FE)
|
||||
// Save output in XF regs
|
||||
"frchg\n"
|
||||
: [bmtrx] "+&r" ((unsigned int)src2), [fmtrx] "+r" ((unsigned int)src1), [pref_scratch] "=&r" (prefetch_scratch) // outputs, "+" means r/w, "&" means it's written to before all inputs are consumed
|
||||
: // no inputs
|
||||
: "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15" // clobbers (GCC doesn't know about back bank, so writing to it isn't clobbered)
|
||||
);
|
||||
mat_store(reinterpret_cast<matrix_t *>(dst));
|
||||
#endif
|
||||
#else
|
||||
dst->right.x = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
|
||||
dst->right.y = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
|
||||
dst->right.z = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
|
||||
dst->up.x = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
|
||||
dst->up.y = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
|
||||
dst->up.z = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
|
||||
dst->at.x = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
|
||||
dst->at.y = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
|
||||
dst->at.z = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
|
||||
dst->pos.x = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.x, src2->up.x, src2->at.x, src2->pos.x);
|
||||
dst->pos.y = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.y, src2->up.y, src2->at.y, src2->pos.y);
|
||||
dst->pos.z = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.z, src2->up.z, src2->at.z, src2->pos.z);
|
||||
#endif
|
||||
#else
|
||||
dst->right.x = src1->right.x*src2->right.x + src1->right.y*src2->up.x + src1->right.z*src2->at.x;
|
||||
dst->right.y = src1->right.x*src2->right.y + src1->right.y*src2->up.y + src1->right.z*src2->at.y;
|
||||
dst->right.z = src1->right.x*src2->right.z + src1->right.y*src2->up.z + src1->right.z*src2->at.z;
|
||||
@@ -615,13 +547,12 @@ Matrix::mult_(Matrix *dst, const Matrix *src1, const Matrix *src2)
|
||||
dst->pos.x = src1->pos.x*src2->right.x + src1->pos.y*src2->up.x + src1->pos.z*src2->at.x + src2->pos.x;
|
||||
dst->pos.y = src1->pos.x*src2->right.y + src1->pos.y*src2->up.y + src1->pos.z*src2->at.y + src2->pos.y;
|
||||
dst->pos.z = src1->pos.x*src2->right.z + src1->pos.y*src2->up.z + src1->pos.z*src2->at.z + src2->pos.z;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
Matrix::invertOrthonormal(Matrix *dst, const Matrix *src)
|
||||
{
|
||||
#if 0
|
||||
dst->right.x = src->right.x;
|
||||
dst->right.y = src->up.x;
|
||||
dst->right.z = src->at.x;
|
||||
@@ -645,15 +576,11 @@ Matrix::invertOrthonormal(Matrix *dst, const Matrix *src)
|
||||
src->pos.y*src->at.y +
|
||||
src->pos.z*src->at.z);
|
||||
dst->posw = 1.0f;
|
||||
#else
|
||||
invertGeneral(dst, src);
|
||||
#endif
|
||||
}
|
||||
|
||||
Matrix*
|
||||
Matrix::invertGeneral(Matrix *dst, const Matrix *src)
|
||||
{
|
||||
#if 0
|
||||
float32 det, invdet;
|
||||
// calculate a few cofactors
|
||||
dst->right.x = src->up.y*src->at.z - src->up.z*src->at.y;
|
||||
@@ -677,136 +604,10 @@ Matrix::invertGeneral(Matrix *dst, const Matrix *src)
|
||||
dst->pos.y = -(src->pos.x*dst->right.y + src->pos.y*dst->up.y + src->pos.z*dst->at.y);
|
||||
dst->pos.z = -(src->pos.x*dst->right.z + src->pos.y*dst->up.z + src->pos.z*dst->at.z);
|
||||
dst->flags &= ~IDENTITY;
|
||||
#else
|
||||
float inv[16], det;
|
||||
const float *m = reinterpret_cast<const float*>(src);
|
||||
float *out = reinterpret_cast<float*>(dst);
|
||||
int i;
|
||||
|
||||
inv[0] = m[5] * m[10] * m[15] -
|
||||
m[5] * m[11] * m[14] -
|
||||
m[9] * m[6] * m[15] +
|
||||
m[9] * m[7] * m[14] +
|
||||
m[13] * m[6] * m[11] -
|
||||
m[13] * m[7] * m[10];
|
||||
|
||||
inv[4] = -m[4] * m[10] * m[15] +
|
||||
m[4] * m[11] * m[14] +
|
||||
m[8] * m[6] * m[15] -
|
||||
m[8] * m[7] * m[14] -
|
||||
m[12] * m[6] * m[11] +
|
||||
m[12] * m[7] * m[10];
|
||||
|
||||
inv[8] = m[4] * m[9] * m[15] -
|
||||
m[4] * m[11] * m[13] -
|
||||
m[8] * m[5] * m[15] +
|
||||
m[8] * m[7] * m[13] +
|
||||
m[12] * m[5] * m[11] -
|
||||
m[12] * m[7] * m[9];
|
||||
|
||||
inv[12] = -m[4] * m[9] * m[14] +
|
||||
m[4] * m[10] * m[13] +
|
||||
m[8] * m[5] * m[14] -
|
||||
m[8] * m[6] * m[13] -
|
||||
m[12] * m[5] * m[10] +
|
||||
m[12] * m[6] * m[9];
|
||||
|
||||
inv[1] = -m[1] * m[10] * m[15] +
|
||||
m[1] * m[11] * m[14] +
|
||||
m[9] * m[2] * m[15] -
|
||||
m[9] * m[3] * m[14] -
|
||||
m[13] * m[2] * m[11] +
|
||||
m[13] * m[3] * m[10];
|
||||
|
||||
inv[5] = m[0] * m[10] * m[15] -
|
||||
m[0] * m[11] * m[14] -
|
||||
m[8] * m[2] * m[15] +
|
||||
m[8] * m[3] * m[14] +
|
||||
m[12] * m[2] * m[11] -
|
||||
m[12] * m[3] * m[10];
|
||||
|
||||
inv[9] = -m[0] * m[9] * m[15] +
|
||||
m[0] * m[11] * m[13] +
|
||||
m[8] * m[1] * m[15] -
|
||||
m[8] * m[3] * m[13] -
|
||||
m[12] * m[1] * m[11] +
|
||||
m[12] * m[3] * m[9];
|
||||
|
||||
inv[13] = m[0] * m[9] * m[14] -
|
||||
m[0] * m[10] * m[13] -
|
||||
m[8] * m[1] * m[14] +
|
||||
m[8] * m[2] * m[13] +
|
||||
m[12] * m[1] * m[10] -
|
||||
m[12] * m[2] * m[9];
|
||||
|
||||
inv[2] = m[1] * m[6] * m[15] -
|
||||
m[1] * m[7] * m[14] -
|
||||
m[5] * m[2] * m[15] +
|
||||
m[5] * m[3] * m[14] +
|
||||
m[13] * m[2] * m[7] -
|
||||
m[13] * m[3] * m[6];
|
||||
|
||||
inv[6] = -m[0] * m[6] * m[15] +
|
||||
m[0] * m[7] * m[14] +
|
||||
m[4] * m[2] * m[15] -
|
||||
m[4] * m[3] * m[14] -
|
||||
m[12] * m[2] * m[7] +
|
||||
m[12] * m[3] * m[6];
|
||||
|
||||
inv[10] = m[0] * m[5] * m[15] -
|
||||
m[0] * m[7] * m[13] -
|
||||
m[4] * m[1] * m[15] +
|
||||
m[4] * m[3] * m[13] +
|
||||
m[12] * m[1] * m[7] -
|
||||
m[12] * m[3] * m[5];
|
||||
|
||||
inv[14] = -m[0] * m[5] * m[14] +
|
||||
m[0] * m[6] * m[13] +
|
||||
m[4] * m[1] * m[14] -
|
||||
m[4] * m[2] * m[13] -
|
||||
m[12] * m[1] * m[6] +
|
||||
m[12] * m[2] * m[5];
|
||||
|
||||
inv[3] = -m[1] * m[6] * m[11] +
|
||||
m[1] * m[7] * m[10] +
|
||||
m[5] * m[2] * m[11] -
|
||||
m[5] * m[3] * m[10] -
|
||||
m[9] * m[2] * m[7] +
|
||||
m[9] * m[3] * m[6];
|
||||
|
||||
inv[7] = m[0] * m[6] * m[11] -
|
||||
m[0] * m[7] * m[10] -
|
||||
m[4] * m[2] * m[11] +
|
||||
m[4] * m[3] * m[10] +
|
||||
m[8] * m[2] * m[7] -
|
||||
m[8] * m[3] * m[6];
|
||||
|
||||
inv[11] = -m[0] * m[5] * m[11] +
|
||||
m[0] * m[7] * m[9] +
|
||||
m[4] * m[1] * m[11] -
|
||||
m[4] * m[3] * m[9] -
|
||||
m[8] * m[1] * m[7] +
|
||||
m[8] * m[3] * m[5];
|
||||
|
||||
inv[15] = m[0] * m[5] * m[10] -
|
||||
m[0] * m[6] * m[9] -
|
||||
m[4] * m[1] * m[10] +
|
||||
m[4] * m[2] * m[9] +
|
||||
m[8] * m[1] * m[6] -
|
||||
m[8] * m[2] * m[5];
|
||||
|
||||
det = m[0] * inv[0] + m[1] * inv[4] + m[2] * inv[8] + m[3] * inv[12];
|
||||
|
||||
if (det == 0.0f)
|
||||
det = 1.0f;
|
||||
else
|
||||
det = 1.0 / det;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
out[i] = inv[i] * det;
|
||||
|
||||
dst->flags &= IDENTITY;
|
||||
#endif
|
||||
dst->pad0 = 0;
|
||||
dst->upw = 0.0f;
|
||||
dst->atw = 0.0f;
|
||||
dst->posw = 1.0f;
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
57
vendor/librw/src/dc/rwdc.cpp
vendored
57
vendor/librw/src/dc/rwdc.cpp
vendored
@@ -3041,7 +3041,6 @@ static constexpr void(*tnlMeshletSkinVerticesSelector[4])(uint8_t *OCR, uint8_t
|
||||
&tnlMeshletSkinVertices<true , true >,
|
||||
};
|
||||
|
||||
#if 1
|
||||
bool
|
||||
uploadSkinMatrices(Atomic *a, Matrix* skinMatrices)
|
||||
{
|
||||
@@ -3057,17 +3056,20 @@ uploadSkinMatrices(Atomic *a, Matrix* skinMatrices)
|
||||
|
||||
__builtin_prefetch(hier->matrices);
|
||||
if(hier->flags & HAnimHierarchy::LOCALSPACEMATRICES){
|
||||
for(i = 0; i < hier->numNodes; i++) {
|
||||
for(i = 0; i < hier->numNodes - 1; i++) {
|
||||
__builtin_prefetch(&hier->matrices[i + 1]);
|
||||
mat_mult(reinterpret_cast<matrix_t*>(m),
|
||||
reinterpret_cast<const matrix_t*>(&hier->matrices[i]),
|
||||
reinterpret_cast<const matrix_t*>(&invMats[i]));
|
||||
m++;
|
||||
}
|
||||
mat_mult(reinterpret_cast<matrix_t*>(m),
|
||||
reinterpret_cast<const matrix_t*>(&hier->matrices[i]),
|
||||
reinterpret_cast<const matrix_t*>(&invMats[i]));
|
||||
}else{
|
||||
Matrix invAtmMat;
|
||||
Matrix::invert(&invAtmMat, a->getFrame()->getLTM());
|
||||
for(i = 0; i < hier->numNodes; i++){
|
||||
for(i = 0; i < hier->numNodes - 1; i++){
|
||||
__builtin_prefetch(&hier->matrices[i + 1]);
|
||||
mat_load_apply(reinterpret_cast<const matrix_t *>(&invAtmMat),
|
||||
reinterpret_cast<const matrix_t *>(&hier->matrices[i]));
|
||||
@@ -3075,49 +3077,10 @@ uploadSkinMatrices(Atomic *a, Matrix* skinMatrices)
|
||||
mat_store(reinterpret_cast<matrix_t *>(m));
|
||||
m++;
|
||||
}
|
||||
}
|
||||
}else{
|
||||
for(i = 0; i < skin->numBones; i++){
|
||||
m->setIdentity();
|
||||
m++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// optimization if the first matrix is identity
|
||||
return skinMatrices[0].identityError() < 0.01f;
|
||||
}
|
||||
#else
|
||||
|
||||
bool
|
||||
uploadSkinMatrices(Atomic *a, Matrix* skinMatrices)
|
||||
{
|
||||
int i;
|
||||
Skin *skin = Skin::get(a->geometry);
|
||||
Matrix *m = (Matrix*)skinMatrices;
|
||||
HAnimHierarchy *hier = Skin::getHierarchy(a);
|
||||
|
||||
if(hier){
|
||||
Matrix *invMats = (Matrix*)skin->inverseMatrices;
|
||||
Matrix tmp;
|
||||
|
||||
assert(skin->numBones == hier->numNodes);
|
||||
if(hier->flags & HAnimHierarchy::LOCALSPACEMATRICES){
|
||||
for(i = 0; i < hier->numNodes; i++){
|
||||
invMats[i].flags = 0;
|
||||
Matrix::mult(m, &invMats[i], &hier->matrices[i]);
|
||||
m++;
|
||||
}
|
||||
}else{
|
||||
Matrix invAtmMat;
|
||||
Matrix::invert(&invAtmMat, a->getFrame()->getLTM());
|
||||
for(i = 0; i < hier->numNodes; i++){
|
||||
invMats[i].flags = 0;
|
||||
Matrix::mult(&tmp, &hier->matrices[i], &invAtmMat);
|
||||
Matrix::mult(m, &invMats[i], &tmp);
|
||||
m++;
|
||||
}
|
||||
mat_load_apply(reinterpret_cast<const matrix_t *>(&invAtmMat),
|
||||
reinterpret_cast<const matrix_t *>(&hier->matrices[i]));
|
||||
mat_apply(reinterpret_cast<const matrix_t *>(&invMats[i]));
|
||||
mat_store(reinterpret_cast<matrix_t *>(m));
|
||||
}
|
||||
}else{
|
||||
for(i = 0; i < skin->numBones; i++){
|
||||
@@ -3133,8 +3096,6 @@ uploadSkinMatrices(Atomic *a, Matrix* skinMatrices)
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
static RawMatrix normal2texcoord = {{
|
||||
{ 0.5f / 127, 0.0f, 0.0f }, 0.0f,
|
||||
{ 0.0f, -0.5f / 127, 0.0f }, 0.0f,
|
||||
|
Reference in New Issue
Block a user