mirror of
https://gitlab.com/skmp/dca3-game.git
synced 2025-09-01 10:42:34 +02:00
Accelerated lots of RW math + Coronas (liberty)
- lot of the RW matrix stuff has become accelerated - went through and accelerated liberty's coronas/reflections ! apparently introduced a bug somewhere along the lines that cause boats to freak out and do summersaults when trying to drive. Will resolve later.
This commit is contained in:
@@ -300,9 +300,9 @@ CCoronas::Render(void)
|
||||
|
||||
|
||||
if(aCoronas[i].fadeAlpha && spriteCoors.z < aCoronas[i].drawDist){
|
||||
float recipz = 1.0f/spriteCoors.z;
|
||||
float recipz = dc::Invert<true, false>(spriteCoors.z);
|
||||
float fadeDistance = aCoronas[i].drawDist / 2.0f;
|
||||
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - (spriteCoors.z - fadeDistance)/fadeDistance;
|
||||
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - dc::Div<true, false>((spriteCoors.z - fadeDistance), fadeDistance);
|
||||
int totalFade = aCoronas[i].fadeAlpha * distanceFade;
|
||||
|
||||
if(aCoronas[i].LOScheck)
|
||||
@@ -313,6 +313,7 @@ CCoronas::Render(void)
|
||||
// render corona itself
|
||||
if(aCoronas[i].texture){
|
||||
float fogscale = CWeather::Foggyness*Min(spriteCoors.z, 40.0f)/40.0f + 1.0f;
|
||||
float invFogScale = dc::Invert<true, false>(fogscale);
|
||||
if(CCoronas::aCoronas[i].id == SUN_CORE)
|
||||
spriteCoors.z = 0.95f * RwCameraGetFarClipPlane(Scene.camera);
|
||||
RwRenderStateSet(rwRENDERSTATETEXTURERASTER, RwTextureGetRaster(aCoronas[i].texture));
|
||||
@@ -328,9 +329,9 @@ CCoronas::Render(void)
|
||||
CSprite::RenderOneXLUSprite(spriteCoors.x, spriteCoors.y, spriteCoors.z,
|
||||
spritew * aCoronas[i].size * wscale,
|
||||
spriteh * aCoronas[i].size * fogscale * hscale,
|
||||
CCoronas::aCoronas[i].red / fogscale,
|
||||
CCoronas::aCoronas[i].green / fogscale,
|
||||
CCoronas::aCoronas[i].blue / fogscale,
|
||||
CCoronas::aCoronas[i].red * invFogScale,
|
||||
CCoronas::aCoronas[i].green * invFogScale,
|
||||
CCoronas::aCoronas[i].blue * invFogScale,
|
||||
totalFade,
|
||||
recipz,
|
||||
255);
|
||||
@@ -339,9 +340,9 @@ CCoronas::Render(void)
|
||||
spriteCoors.x, spriteCoors.y, spriteCoors.z,
|
||||
spritew * aCoronas[i].size * fogscale,
|
||||
spriteh * aCoronas[i].size * fogscale,
|
||||
CCoronas::aCoronas[i].red / fogscale,
|
||||
CCoronas::aCoronas[i].green / fogscale,
|
||||
CCoronas::aCoronas[i].blue / fogscale,
|
||||
CCoronas::aCoronas[i].red * invFogScale,
|
||||
CCoronas::aCoronas[i].green * invFogScale,
|
||||
CCoronas::aCoronas[i].blue * invFogScale,
|
||||
totalFade,
|
||||
recipz,
|
||||
20.0f * recipz,
|
||||
@@ -365,7 +366,7 @@ CCoronas::Render(void)
|
||||
(spriteCoors.x - (screenw/2)) * flare->position + (screenw/2),
|
||||
(spriteCoors.y - (screenh/2)) * flare->position + (screenh/2),
|
||||
spriteCoors.z,
|
||||
4.0f*flare->size * spritew/spriteh,
|
||||
4.0f*flare->size * dc::Div<true, false>(spritew, spriteh),
|
||||
4.0f*flare->size,
|
||||
(flare->red * aCoronas[i].red)>>8,
|
||||
(flare->green * aCoronas[i].green)>>8,
|
||||
@@ -480,9 +481,9 @@ CCoronas::RenderReflections(void)
|
||||
drawDist = Min(drawDist, 55.0f);
|
||||
if(spriteCoors.z < drawDist){
|
||||
float fadeDistance = drawDist / 2.0f;
|
||||
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - (spriteCoors.z - fadeDistance)/fadeDistance;
|
||||
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - Div<true, false>((spriteCoors.z - fadeDistance), fadeDistance);
|
||||
distanceFade = Clamp(distanceFade, 0.0f, 1.0f);
|
||||
float recipz = 1.0f/RwCameraGetNearClipPlane(Scene.camera);
|
||||
float recipz = dc::Invert<true, false>(RwCameraGetNearClipPlane(Scene.camera));
|
||||
float heightFade = (20.0f - aCoronas[i].heightAboveRoad)/20.0f;
|
||||
int intensity = distanceFade*heightFade * 230.0 * CWeather::WetRoads;
|
||||
|
||||
@@ -606,7 +607,9 @@ CEntity::ProcessLightsForEntity(void)
|
||||
flashTimer1 = 0;
|
||||
flashTimer2 = 0;
|
||||
flashTimer3 = 0;
|
||||
|
||||
#ifdef DC_SH4
|
||||
dc:mat_load2(GetMatrix());
|
||||
#endif
|
||||
n = CModelInfo::GetModelInfo(GetModelIndex())->GetNum2dEffects();
|
||||
for(i = 0; i < n; i++, flashTimer1 += 0x80, flashTimer2 += 0x100, flashTimer3 += 0x200){
|
||||
effect = CModelInfo::GetModelInfo(GetModelIndex())->Get2dEffect(i);
|
||||
@@ -614,8 +617,12 @@ CEntity::ProcessLightsForEntity(void)
|
||||
if(effect->type != EFFECT_LIGHT)
|
||||
continue;
|
||||
|
||||
#ifndef DC_SH4
|
||||
pos = GetMatrix() * effect->pos;
|
||||
|
||||
#else
|
||||
mat_trans_single3_nodiv_nomod(effect->pos.x, effect->pos.y, effect->pos.z,
|
||||
pos.x, pos.y, pos.z);
|
||||
#endif
|
||||
lightOn = false;
|
||||
lightFlickering = false;
|
||||
switch(effect->light.lightType){
|
||||
|
232
vendor/librw/src/base.cpp
vendored
232
vendor/librw/src/base.cpp
vendored
@@ -91,24 +91,6 @@ strncmp_ci(const char *s1, const char *s2, int n)
|
||||
return 0;
|
||||
}
|
||||
|
||||
Quat
|
||||
mult(const Quat &q, const Quat &p)
|
||||
{
|
||||
#ifndef DC_SH4
|
||||
return makeQuat(q.w*p.w - q.x*p.x - q.y*p.y - q.z*p.z,
|
||||
q.w*p.x + q.x*p.w + q.y*p.z - q.z*p.y,
|
||||
q.w*p.y + q.y*p.w + q.z*p.x - q.x*p.z,
|
||||
q.w*p.z + q.z*p.w + q.x*p.y - q.y*p.x);
|
||||
#else
|
||||
Quat o;
|
||||
dc::quat_mult(reinterpret_cast<dc::quaternion_t *>(&o),
|
||||
reinterpret_cast<const dc::quaternion_t &>(q),
|
||||
reinterpret_cast<const dc::quaternion_t &>(p));
|
||||
return o;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Quat*
|
||||
Quat::rotate(const V3d *axis, float32 angle, CombineOp op)
|
||||
{
|
||||
@@ -166,53 +148,39 @@ slerp(const Quat &q, const Quat &p, float32 a)
|
||||
//
|
||||
// V3d
|
||||
//
|
||||
|
||||
V3d
|
||||
cross(const V3d &a, const V3d &b)
|
||||
{
|
||||
return makeV3d(a.y*b.z - a.z*b.y,
|
||||
a.z*b.x - a.x*b.z,
|
||||
a.x*b.y - a.y*b.x);
|
||||
void V3d::transformPoints(V3d *out, const V3d *in, int32 n, const Matrix *m) {
|
||||
int32 i;
|
||||
#ifndef DC_SH4
|
||||
V3d tmp;
|
||||
for(i = 0; i < n; i++){
|
||||
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x + m->pos.x;
|
||||
tmp.y = in[i].x*m->right.y + in[i].y*m->up.y + in[i].z*m->at.y + m->pos.y;
|
||||
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z + m->pos.z;
|
||||
out[i] = tmp;
|
||||
}
|
||||
#else
|
||||
dc::mat_load2(*m);
|
||||
for(i = 0; i < n; i++)
|
||||
mat_trans_single3_nodiv_nomod(in[i].x, in[i].y, in[i].z,
|
||||
out[i].x, out[i].y, out[i].z);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
V3d::transformPoints(V3d *out, const V3d *in, int32 n, const Matrix *m)
|
||||
{
|
||||
int32 i;
|
||||
#ifndef DC_SH4
|
||||
V3d tmp;
|
||||
for(i = 0; i < n; i++){
|
||||
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x + m->pos.x;
|
||||
tmp.y = in[i].x*m->right.y + in[i].y*m->up.y + in[i].z*m->at.y + m->pos.y;
|
||||
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z + m->pos.z;
|
||||
out[i] = tmp;
|
||||
}
|
||||
#else
|
||||
dc::mat_load2(*m);
|
||||
for(i = 0; i < n; i++)
|
||||
mat_trans_single3_nodiv_nomod(in[i].x, in[i].y, in[i].z,
|
||||
out[i].x, out[i].y, out[i].z);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
V3d::transformVectors(V3d *out, const V3d *in, int32 n, const Matrix *m)
|
||||
{
|
||||
int32 i;
|
||||
#ifndef DC_SH4
|
||||
V3d tmp;
|
||||
for(i = 0; i < n; i++){
|
||||
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x;
|
||||
tmp.y = in[i].x*m->right.y + in[i].y*m->up.y + in[i].z*m->at.y;
|
||||
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z;
|
||||
out[i] = tmp;
|
||||
}
|
||||
#else
|
||||
dc::mat_load2(*m);
|
||||
for(i = 0; i < n; i++)
|
||||
mat_trans_normal3_nomod(in[i].x, in[i].y, in[i].z,
|
||||
out[i].x, out[i].y, out[i].z);
|
||||
#endif
|
||||
void V3d::transformVectors(V3d *out, const V3d *in, int32 n, const Matrix *m) {
|
||||
int32 i;
|
||||
#ifndef DC_SH4
|
||||
V3d tmp;
|
||||
for(i = 0; i < n; i++){
|
||||
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x;
|
||||
tmp.y = in[i].x*m->right.y + in[i].y*m->up.y + in[i].z*m->at.y;
|
||||
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z;
|
||||
out[i] = tmp;
|
||||
}
|
||||
#else
|
||||
dc::mat_load2(*m);
|
||||
for(i = 0; i < n; i++)
|
||||
mat_trans_normal3_nomod(in[i].x, in[i].y, in[i].z,
|
||||
out[i].x, out[i].y, out[i].z);
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
@@ -343,9 +311,10 @@ Matrix::mult(Matrix *dst, const Matrix *src1, const Matrix *src2)
|
||||
*dst = *src2;
|
||||
else if(src2->flags & IDENTITY)
|
||||
*dst = *src1;
|
||||
else{
|
||||
else {
|
||||
uint8_t flags = src1->flags & src2->flags;
|
||||
mult_(dst, src1, src2);
|
||||
dst->flags = src1->flags & src2->flags;
|
||||
dst->flags = flags;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
@@ -366,7 +335,8 @@ Matrix::invert(Matrix *dst, const Matrix *src)
|
||||
Matrix*
|
||||
Matrix::transpose(Matrix *dst, const Matrix *src)
|
||||
{
|
||||
if(src->flags & IDENTITY)
|
||||
#ifndef DC_SH4
|
||||
if(src->flags & IDENTITY)
|
||||
*dst = *src;
|
||||
dst->right.x = src->right.x;
|
||||
dst->up.x = src->right.y;
|
||||
@@ -380,25 +350,31 @@ Matrix::transpose(Matrix *dst, const Matrix *src)
|
||||
dst->pos.x = 0.0;
|
||||
dst->pos.y = 0.0;
|
||||
dst->pos.z = 0.0;
|
||||
#else
|
||||
if(src->flags & IDENTITY)
|
||||
*dst = *src;
|
||||
else {
|
||||
dc::mat_load_transpose(*src);
|
||||
dc::mat_store2(*dst);
|
||||
}
|
||||
#endif
|
||||
return dst;
|
||||
}
|
||||
|
||||
Matrix*
|
||||
Matrix::rotate(const V3d *axis, float32 angle, CombineOp op)
|
||||
{
|
||||
Matrix tmp, rot;
|
||||
makeRotation(&rot, axis, angle);
|
||||
Matrix rot;
|
||||
makeRotation(&rot, axis, angle);
|
||||
switch(op){
|
||||
case COMBINEREPLACE:
|
||||
*this = rot;
|
||||
break;
|
||||
case COMBINEPRECONCAT:
|
||||
mult(&tmp, &rot, this);
|
||||
*this = tmp;
|
||||
mult(this, &rot, this);
|
||||
break;
|
||||
case COMBINEPOSTCONCAT:
|
||||
mult(&tmp, this, &rot);
|
||||
*this = tmp;
|
||||
mult(this, this, &rot);
|
||||
break;
|
||||
}
|
||||
return this;
|
||||
@@ -407,27 +383,25 @@ Matrix::rotate(const V3d *axis, float32 angle, CombineOp op)
|
||||
Matrix*
|
||||
Matrix::rotate(const Quat &q, CombineOp op)
|
||||
{
|
||||
Matrix tmp, rot;
|
||||
makeRotation(&rot, q);
|
||||
Matrix rot;
|
||||
makeRotation(&rot, q);
|
||||
switch(op){
|
||||
case COMBINEREPLACE:
|
||||
*this = rot;
|
||||
break;
|
||||
case COMBINEPRECONCAT:
|
||||
mult(&tmp, &rot, this);
|
||||
*this = tmp;
|
||||
mult(this, &rot, this);
|
||||
break;
|
||||
case COMBINEPOSTCONCAT:
|
||||
mult(&tmp, this, &rot);
|
||||
*this = tmp;
|
||||
mult(this, this, &rot);
|
||||
break;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
Matrix*
|
||||
Matrix::translate(const V3d *translation, CombineOp op)
|
||||
{
|
||||
Matrix tmp;
|
||||
Matrix trans = identMat;
|
||||
trans.pos = *translation;
|
||||
trans.flags &= ~IDENTITY;
|
||||
@@ -436,12 +410,10 @@ Matrix::translate(const V3d *translation, CombineOp op)
|
||||
*this = trans;
|
||||
break;
|
||||
case COMBINEPRECONCAT:
|
||||
mult(&tmp, &trans, this);
|
||||
*this = tmp;
|
||||
mult(this, &trans, this);
|
||||
break;
|
||||
case COMBINEPOSTCONCAT:
|
||||
mult(&tmp, this, &trans);
|
||||
*this = tmp;
|
||||
mult(this, this, &trans);
|
||||
break;
|
||||
}
|
||||
return this;
|
||||
@@ -450,7 +422,6 @@ Matrix::translate(const V3d *translation, CombineOp op)
|
||||
Matrix*
|
||||
Matrix::scale(const V3d *scale, CombineOp op)
|
||||
{
|
||||
Matrix tmp;
|
||||
Matrix scl = identMat;
|
||||
scl.right.x = scale->x;
|
||||
scl.up.y = scale->y;
|
||||
@@ -461,12 +432,10 @@ Matrix::scale(const V3d *scale, CombineOp op)
|
||||
*this = scl;
|
||||
break;
|
||||
case COMBINEPRECONCAT:
|
||||
mult(&tmp, &scl, this);
|
||||
*this = tmp;
|
||||
mult(this, &scl, this);
|
||||
break;
|
||||
case COMBINEPOSTCONCAT:
|
||||
mult(&tmp, this, &scl);
|
||||
*this = tmp;
|
||||
mult(this, this, &scl);
|
||||
break;
|
||||
}
|
||||
return this;
|
||||
@@ -475,18 +444,15 @@ Matrix::scale(const V3d *scale, CombineOp op)
|
||||
Matrix*
|
||||
Matrix::transform(const Matrix *mat, CombineOp op)
|
||||
{
|
||||
Matrix tmp;
|
||||
switch(op){
|
||||
case COMBINEREPLACE:
|
||||
*this = *mat;
|
||||
break;
|
||||
case COMBINEPRECONCAT:
|
||||
mult(&tmp, mat, this);
|
||||
*this = tmp;
|
||||
mult(this, mat, this);
|
||||
break;
|
||||
case COMBINEPOSTCONCAT:
|
||||
mult(&tmp, this, mat);
|
||||
*this = tmp;
|
||||
mult(this, this, mat);
|
||||
break;
|
||||
}
|
||||
return this;
|
||||
@@ -501,27 +467,31 @@ Matrix::getRotation(void)
|
||||
if(tr > 0.0f){
|
||||
s = sqrtf(1.0f + tr) * 2.0f;
|
||||
q.w = s / 4.0f;
|
||||
q.x = (up.z - at.y) / s;
|
||||
q.y = (at.x - right.z) / s;
|
||||
q.z = (right.y - up.x) / s;
|
||||
float invS = dc::Invert<true, false>(s);
|
||||
q.x = (up.z - at.y) * invS;
|
||||
q.y = (at.x - right.z) * invS;
|
||||
q.z = (right.y - up.x) * invS;
|
||||
}else if(right.x > up.y && right.x > at.z){
|
||||
s = sqrtf(1.0f + right.x - up.y - at.z) * 2.0f;
|
||||
q.w = (up.z - at.y) / s;
|
||||
q.x = s / 4.0f;
|
||||
q.y = (up.x + right.y) / s;
|
||||
q.z = (at.x + right.z) / s;
|
||||
q.x = s / 4.0f;
|
||||
float invS = dc::Invert<true, false>(s);
|
||||
q.w = (up.z - at.y) * invS;
|
||||
q.y = (up.x + right.y) * invS;
|
||||
q.z = (at.x + right.z) * invS;
|
||||
}else if(up.y > at.z){
|
||||
s = sqrtf(1.0f + up.y - right.x - at.z) * 2.0f;
|
||||
q.w = (at.x - right.z) / s;
|
||||
q.x = (up.x + right.y) / s;
|
||||
q.y = s / 4.0f;
|
||||
q.z = (at.y + up.z) / s;
|
||||
q.y = s / 4.0f;
|
||||
float invS = dc::Invert<true, false>(s);
|
||||
q.w = (at.x - right.z) * invS;
|
||||
q.x = (up.x + right.y) * invS;
|
||||
q.z = (at.y + up.z) * invS;
|
||||
}else{
|
||||
s = sqrtf(1.0f + at.z - right.x - up.y) * 2.0f;
|
||||
q.w = (right.y - up.x) / s;
|
||||
q.x = (at.x + right.z) / s;
|
||||
q.y = (at.y + up.z) / s;
|
||||
q.z = s / 4.0f;
|
||||
q.z = s / 4.0f;
|
||||
float invS = dc::Invert<true, false>(s);
|
||||
q.w = (right.y - up.x) * invS;
|
||||
q.x = (at.x + right.z) * invS;
|
||||
q.y = (at.y + up.z) * invS;
|
||||
}
|
||||
return q;
|
||||
}
|
||||
@@ -543,20 +513,7 @@ Matrix::lookAt(const V3d &dir, const V3d &up)
|
||||
void
|
||||
Matrix::mult_(Matrix *__restrict__ dst, const Matrix *__restrict__ src1, const Matrix *__restrict__ src2)
|
||||
{
|
||||
#if !defined(DC_TEXCONV) && !defined(DC_SIM)
|
||||
dst->right.x = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
|
||||
dst->right.y = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
|
||||
dst->right.z = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
|
||||
dst->up.x = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
|
||||
dst->up.y = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
|
||||
dst->up.z = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
|
||||
dst->at.x = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
|
||||
dst->at.y = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
|
||||
dst->at.z = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
|
||||
dst->pos.x = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.x, src2->up.x, src2->at.x, src2->pos.x);
|
||||
dst->pos.y = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.y, src2->up.y, src2->at.y, src2->pos.y);
|
||||
dst->pos.z = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.z, src2->up.z, src2->at.z, src2->pos.z);
|
||||
#else
|
||||
#ifndef DC_SH4
|
||||
dst->right.x = src1->right.x*src2->right.x + src1->right.y*src2->up.x + src1->right.z*src2->at.x;
|
||||
dst->right.y = src1->right.x*src2->right.y + src1->right.y*src2->up.y + src1->right.z*src2->at.y;
|
||||
dst->right.z = src1->right.x*src2->right.z + src1->right.y*src2->up.z + src1->right.z*src2->at.z;
|
||||
@@ -569,12 +526,15 @@ Matrix::mult_(Matrix *__restrict__ dst, const Matrix *__restrict__ src1, const M
|
||||
dst->pos.x = src1->pos.x*src2->right.x + src1->pos.y*src2->up.x + src1->pos.z*src2->at.x + src2->pos.x;
|
||||
dst->pos.y = src1->pos.x*src2->right.y + src1->pos.y*src2->up.y + src1->pos.z*src2->at.y + src2->pos.y;
|
||||
dst->pos.z = src1->pos.x*src2->right.z + src1->pos.y*src2->up.z + src1->pos.z*src2->at.z + src2->pos.z;
|
||||
#endif
|
||||
#else
|
||||
dc::mat_mult(*dst, *src2, *src1);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
Matrix::invertOrthonormal(Matrix *dst, const Matrix *src)
|
||||
{
|
||||
#if 1
|
||||
dst->right.x = src->right.x;
|
||||
dst->right.y = src->up.x;
|
||||
dst->right.z = src->at.x;
|
||||
@@ -593,7 +553,12 @@ Matrix::invertOrthonormal(Matrix *dst, const Matrix *src)
|
||||
dst->pos.z = -(src->pos.x*src->at.x +
|
||||
src->pos.y*src->at.y +
|
||||
src->pos.z*src->at.z);
|
||||
dst->flags = TYPEORTHONORMAL;
|
||||
#else
|
||||
dc::mat_load_transpose(*src);
|
||||
dc::mat_invert_tranpose();
|
||||
dc::mat_store2(*dst);
|
||||
#endif
|
||||
dst->flags = TYPEORTHONORMAL;
|
||||
}
|
||||
|
||||
Matrix*
|
||||
@@ -688,7 +653,11 @@ Matrix::normalError(void)
|
||||
x = dot(right, right) - 1.0f;
|
||||
y = dot(up, up) - 1.0f;
|
||||
z = dot(at, at) - 1.0f;
|
||||
#ifndef DC_SH4
|
||||
return x*x + y*y + z*z;
|
||||
#else
|
||||
return fipr_magnitude_sqr(x, y, z, 0.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
float32
|
||||
@@ -698,16 +667,27 @@ Matrix::orthogonalError(void)
|
||||
x = dot(at, up);
|
||||
y = dot(at, right);
|
||||
z = dot(up, right);
|
||||
#ifndef DC_SH4
|
||||
return x*x + y*y + z*z;
|
||||
#else
|
||||
return fipr_magnitude_sqr(x, y, z, 0.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
float32
|
||||
Matrix::identityError(void)
|
||||
{
|
||||
V3d r = { right.x-1.0f, right.y, right.z };
|
||||
V3d r = { right.x-1.0f, right.y, right.z };
|
||||
V3d u = { up.x, up.y-1.0f, up.z };
|
||||
V3d a = { at.x, at.y, at.z-1.0f };
|
||||
#ifndef DC_SH4
|
||||
return dot(r,r) + dot(u,u) + dot(a,a) + dot(pos,pos);
|
||||
#else
|
||||
return fipr_magnitude_sqr(r.x, r.y, r.z, 0.0f) +
|
||||
fipr_magnitude_sqr(u.x, u.y, u.z, 0.0f) +
|
||||
fipr_magnitude_sqr(at.x, at.y, at.z, 0.0f) +
|
||||
fipr_magnitude_sqr(pos.x, pos.y, pos.z, 0.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
|
272
vendor/librw/src/dc/rwdc_common.h
vendored
272
vendor/librw/src/dc/rwdc_common.h
vendored
@@ -246,6 +246,83 @@ inline __hot __icache_aligned void mat_load_transpose(const matrix_t *mtx) {
|
||||
);
|
||||
}
|
||||
|
||||
inline __hot __icache_aligned void mat_load_3x3_transpose(const matrix_t *mtx) {
|
||||
asm volatile(
|
||||
R"(
|
||||
frchg
|
||||
|
||||
fmov.s @%[mtx]+, fr0
|
||||
|
||||
add #32, %[mtx]
|
||||
pref @%[mtx]
|
||||
add #-(32 - 4), %[mtx]
|
||||
|
||||
fmov.s @%[mtx]+, fr4
|
||||
fmov.s @%[mtx]+, fr8
|
||||
fldi0 fr12
|
||||
add #4, %[mtx]
|
||||
|
||||
fmov.s @%[mtx]+, fr1
|
||||
fmov.s @%[mtx]+, fr5
|
||||
fmov.s @%[mtx]+, fr9
|
||||
fldi0 fr13
|
||||
add #4, %[mtx]
|
||||
|
||||
fmov.s @%[mtx]+, fr2
|
||||
fmov.s @%[mtx]+, fr6
|
||||
fmov.s @%[mtx]+, fr10
|
||||
fldi0 fr14
|
||||
|
||||
fldi0 fr3
|
||||
fldi0 fr7
|
||||
fmov fr3, fr11
|
||||
fldi1 fr15
|
||||
|
||||
frchg
|
||||
)"
|
||||
: [mtx] "+r" (mtx)
|
||||
:
|
||||
:
|
||||
);
|
||||
}
|
||||
|
||||
inline __hot __icache_aligned void mat_invert_tranpose() {
|
||||
asm volatile(
|
||||
"frchg\n\t"
|
||||
"fneg fr12\n\t"
|
||||
"fneg fr13\n\t"
|
||||
"fneg fr14\n\t"
|
||||
"fldi0 fr15\n\t"
|
||||
"fldi0 fr3\n\t"
|
||||
"fipr fv12, fv0\n\t"
|
||||
"fldi0 fr7\n\t"
|
||||
"fipr fv12, fv4\n\t"
|
||||
"fldi0 fr11\n\t"
|
||||
"fipr fv12, fv8\n\t"
|
||||
|
||||
"fmov fr3, fr12\n\t"
|
||||
"fmov fr7, fr13\n\t"
|
||||
"fmov fr11, fr14\n\t"
|
||||
"fmov fr1, fr15\n\t"
|
||||
"fmov fr4, fr1\n\t"
|
||||
"fmov fr15, fr4\n\t"
|
||||
"fmov fr2, fr15\n\t"
|
||||
"fmov fr8, fr2\n\t"
|
||||
"fmov fr15, fr2\n\t"
|
||||
"fmov fr6, fr15\n\t"
|
||||
"fmov fr9, fr6\n\t"
|
||||
"fmov fr15, fr9\n\t"
|
||||
|
||||
"fldi0 fr3\n\t"
|
||||
"fldi0 fr7\n\t"
|
||||
"fldi0 fr11\n\t"
|
||||
"fldi1 fr15\n\t"
|
||||
"frchg\n"
|
||||
:
|
||||
:
|
||||
:);
|
||||
}
|
||||
|
||||
inline __hot __icache_aligned void mat_store2(matrix_t *mtx) {
|
||||
asm volatile(
|
||||
R"(
|
||||
@@ -449,103 +526,6 @@ __hot __icache_aligned inline void mat_copy(matrix_t *dst, const matrix_t *src)
|
||||
:);
|
||||
}
|
||||
|
||||
//TODO: FIXME FOR VC (AND USE FTRV)
|
||||
template<bool FAST_APPROX=false>
|
||||
__hot constexpr inline void quat_mult(quaternion_t *r, const quaternion_t &q1, const quaternion_t &q2) {
|
||||
if(FAST_APPROX && !std::is_constant_evaluated()) {
|
||||
/*
|
||||
// reorder the coefficients so that q1 stays in constant order {x,y,z,w}
|
||||
// q2 then needs to be rotated after each inner product
|
||||
x = (q1.x * q2.w) + (q1.y * q2.z) - (q1.z * q2.y) + (q1.w * q2.x);
|
||||
y = -(q1.x * q2.z) + (q1.y * q2.w) + (q1.z * q2.x) + (q1.w * q2.y);
|
||||
z = (q1.x * q2.y) - (q1.y * q2.x) + (q1.z * q2.w) + (q1.w * q2.z);
|
||||
w = -(q1.x * q2.x) - (q1.y * q2.y) - (q1.z * q2.z) + (q1.w * q2.w);
|
||||
*/
|
||||
// keep q1 in fv4
|
||||
register float q1x __asm__ ("fr4") = (q1.x);
|
||||
register float q1y __asm__ ("fr5") = (q1.y);
|
||||
register float q1z __asm__ ("fr6") = (q1.z);
|
||||
register float q1w __asm__ ("fr7") = (q1.w);
|
||||
|
||||
// load q2 into fv8, use it to get the shuffled reorder into fv0
|
||||
register float q2x __asm__ ("fr8") = (q2.x);
|
||||
register float q2y __asm__ ("fr9") = (q2.y);
|
||||
register float q2z __asm__ ("fr10") = (q2.z);
|
||||
register float q2w __asm__ ("fr11") = (q2.w);
|
||||
|
||||
// temporary operand / result in fv0
|
||||
register float t1x __asm__ ("fr0");
|
||||
register float t1y __asm__ ("fr1");
|
||||
register float t1z __asm__ ("fr2");
|
||||
register float t1w __asm__ ("fr3");
|
||||
|
||||
// x = (q1.x * q2.w) + (q1.y * q2.z) - (q1.z * q2.y) + (q1.w * q2.x);
|
||||
t1x = q2w;
|
||||
t1y = q2z;
|
||||
t1z = -q2y;
|
||||
t1w = q2w;
|
||||
__asm__ ("\n"
|
||||
" fipr fv4,fv0\n"
|
||||
: "+f" (t1w)
|
||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||
"f" (t1x), "f" (t1y), "f" (t1z)
|
||||
);
|
||||
// x = t1w; try to avoid the stall by not reading the fipr result immediately
|
||||
|
||||
// y = -(q1.x * q2.z) + (q1.y * q2.w) + (q1.z * q2.x) + (q1.w * q2.y);
|
||||
t1x = -q2z;
|
||||
t1y = q2w;
|
||||
t1z = q2x;
|
||||
__atomic_thread_fence(1);
|
||||
r->x = t1w; // get previous result
|
||||
t1w = q2y;
|
||||
__asm__ ("\n"
|
||||
" fipr fv4,fv0\n"
|
||||
: "+f" (t1w)
|
||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||
"f" (t1x), "f" (t1y), "f" (t1z)
|
||||
);
|
||||
//y = t1w;
|
||||
|
||||
// z = (q1.x * q2.y) - (q1.y * q2.x) + (q1.z * q2.w) + (q1.w * q2.z);
|
||||
t1x = q2y;
|
||||
t1y = -q2x;
|
||||
t1z = q2w;
|
||||
__atomic_thread_fence(1);
|
||||
r->y = t1w; // get previous result
|
||||
t1w = q2z;
|
||||
__asm__ ("\n"
|
||||
" fipr fv4,fv0\n"
|
||||
: "+f" (t1w)
|
||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||
"f" (t1x), "f" (t1y), "f" (t1z)
|
||||
);
|
||||
//z = t1w;
|
||||
__atomic_thread_fence(1);
|
||||
|
||||
// w = -(q1.x * q2.x) - (q1.y * q2.y) - (q1.z * q2.z) + (q1.w * q2.w);
|
||||
q2x = -q2x;
|
||||
q2y = -q2y;
|
||||
q2z = -q2z;
|
||||
__asm__ ("\n"
|
||||
" fipr fv4,fv8\n"
|
||||
: "+f" (q2w)
|
||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||
"f" (q2x), "f" (q2y), "f" (q2z)
|
||||
);
|
||||
|
||||
__atomic_thread_fence(1);
|
||||
r->z = t1w;
|
||||
__atomic_thread_fence(1);
|
||||
r->w = q2w;
|
||||
} else {
|
||||
r->x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w);
|
||||
r->y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w);
|
||||
r->z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w);
|
||||
r->w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
|
||||
}
|
||||
}
|
||||
|
||||
__hot inline void mat_load_apply(const matrix_t* matrix1, const matrix_t* matrix2) {
|
||||
unsigned int prefetch_scratch;
|
||||
|
||||
@@ -669,6 +649,104 @@ __hot inline void mat_apply_rotate_z(float z) {
|
||||
: "fpul", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11");
|
||||
}
|
||||
|
||||
|
||||
//TODO: FIXME FOR VC (AND USE FTRV)
|
||||
template<bool FAST_APPROX=false>
|
||||
__hot constexpr inline void quat_mult(quaternion_t *r, const quaternion_t &q1, const quaternion_t &q2) {
|
||||
if(FAST_APPROX && !std::is_constant_evaluated()) {
|
||||
/*
|
||||
// reorder the coefficients so that q1 stays in constant order {x,y,z,w}
|
||||
// q2 then needs to be rotated after each inner product
|
||||
x = (q1.x * q2.w) + (q1.y * q2.z) - (q1.z * q2.y) + (q1.w * q2.x);
|
||||
y = -(q1.x * q2.z) + (q1.y * q2.w) + (q1.z * q2.x) + (q1.w * q2.y);
|
||||
z = (q1.x * q2.y) - (q1.y * q2.x) + (q1.z * q2.w) + (q1.w * q2.z);
|
||||
w = -(q1.x * q2.x) - (q1.y * q2.y) - (q1.z * q2.z) + (q1.w * q2.w);
|
||||
*/
|
||||
// keep q1 in fv4
|
||||
register float q1x __asm__ ("fr4") = (q1.x);
|
||||
register float q1y __asm__ ("fr5") = (q1.y);
|
||||
register float q1z __asm__ ("fr6") = (q1.z);
|
||||
register float q1w __asm__ ("fr7") = (q1.w);
|
||||
|
||||
// load q2 into fv8, use it to get the shuffled reorder into fv0
|
||||
register float q2x __asm__ ("fr8") = (q2.x);
|
||||
register float q2y __asm__ ("fr9") = (q2.y);
|
||||
register float q2z __asm__ ("fr10") = (q2.z);
|
||||
register float q2w __asm__ ("fr11") = (q2.w);
|
||||
|
||||
// temporary operand / result in fv0
|
||||
register float t1x __asm__ ("fr0");
|
||||
register float t1y __asm__ ("fr1");
|
||||
register float t1z __asm__ ("fr2");
|
||||
register float t1w __asm__ ("fr3");
|
||||
|
||||
// x = (q1.x * q2.w) + (q1.y * q2.z) - (q1.z * q2.y) + (q1.w * q2.x);
|
||||
t1x = q2w;
|
||||
t1y = q2z;
|
||||
t1z = -q2y;
|
||||
t1w = q2w;
|
||||
__asm__ ("\n"
|
||||
" fipr fv4,fv0\n"
|
||||
: "+f" (t1w)
|
||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||
"f" (t1x), "f" (t1y), "f" (t1z)
|
||||
);
|
||||
// x = t1w; try to avoid the stall by not reading the fipr result immediately
|
||||
|
||||
// y = -(q1.x * q2.z) + (q1.y * q2.w) + (q1.z * q2.x) + (q1.w * q2.y);
|
||||
t1x = -q2z;
|
||||
t1y = q2w;
|
||||
t1z = q2x;
|
||||
__atomic_thread_fence(1);
|
||||
r->x = t1w; // get previous result
|
||||
t1w = q2y;
|
||||
__asm__ ("\n"
|
||||
" fipr fv4,fv0\n"
|
||||
: "+f" (t1w)
|
||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||
"f" (t1x), "f" (t1y), "f" (t1z)
|
||||
);
|
||||
//y = t1w;
|
||||
|
||||
// z = (q1.x * q2.y) - (q1.y * q2.x) + (q1.z * q2.w) + (q1.w * q2.z);
|
||||
t1x = q2y;
|
||||
t1y = -q2x;
|
||||
t1z = q2w;
|
||||
__atomic_thread_fence(1);
|
||||
r->y = t1w; // get previous result
|
||||
t1w = q2z;
|
||||
__asm__ ("\n"
|
||||
" fipr fv4,fv0\n"
|
||||
: "+f" (t1w)
|
||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||
"f" (t1x), "f" (t1y), "f" (t1z)
|
||||
);
|
||||
//z = t1w;
|
||||
__atomic_thread_fence(1);
|
||||
|
||||
// w = -(q1.x * q2.x) - (q1.y * q2.y) - (q1.z * q2.z) + (q1.w * q2.w);
|
||||
q2x = -q2x;
|
||||
q2y = -q2y;
|
||||
q2z = -q2z;
|
||||
__asm__ ("\n"
|
||||
" fipr fv4,fv8\n"
|
||||
: "+f" (q2w)
|
||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||
"f" (q2x), "f" (q2y), "f" (q2z)
|
||||
);
|
||||
|
||||
__atomic_thread_fence(1);
|
||||
r->z = t1w;
|
||||
__atomic_thread_fence(1);
|
||||
r->w = q2w;
|
||||
} else {
|
||||
r->x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w);
|
||||
r->y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w);
|
||||
r->z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w);
|
||||
r->w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
|
||||
}
|
||||
}
|
||||
|
||||
# else
|
||||
# ifdef DC_TEXCONV
|
||||
# define mat_apply(a)
|
||||
|
51
vendor/librw/src/rwbase.h
vendored
51
vendor/librw/src/rwbase.h
vendored
@@ -238,8 +238,8 @@ inline V2d neg(const V2d &a) { return makeV2d(-a.x, -a.y); }
|
||||
inline V2d add(const V2d &a, const V2d &b) { return makeV2d(a.x+b.x, a.y+b.y); }
|
||||
inline V2d sub(const V2d &a, const V2d &b) { return makeV2d(a.x-b.x, a.y-b.y); }
|
||||
inline V2d scale(const V2d &a, float32 r) { return makeV2d(a.x*r, a.y*r); }
|
||||
inline float32 length(const V2d &v) { return sqrtf(v.x*v.x + v.y*v.y); }
|
||||
inline V2d normalize(const V2d &v) { return scale(v, 1.0f/length(v)); }
|
||||
inline float32 length(const V2d &v) { return dc::Sqrt(v.x*v.x + v.y*v.y); }
|
||||
inline V2d normalize(const V2d &v) { return scale(v, dc::RecipSqrt(v.x*v.x + v.y*v.y)); }
|
||||
|
||||
struct V3d
|
||||
{
|
||||
@@ -265,10 +265,22 @@ inline float32 length(const V3d &v) {
|
||||
return len;
|
||||
#endif
|
||||
}
|
||||
inline V3d normalize(const V3d &v) { return scale(v, 1.0f/length(v)); }
|
||||
inline V3d setlength(const V3d &v, float32 l) { return scale(v, l/length(v)); }
|
||||
V3d cross(const V3d &a, const V3d &b);
|
||||
inline __attribute__((always_inline)) float32 dot(const V3d &a, const V3d &b) {
|
||||
inline V3d normalize(const V3d &v) {
|
||||
float invLen;
|
||||
#ifndef DC_SH4
|
||||
invLen = 1.0f / length(v);
|
||||
#else
|
||||
invLen = dc::RecipSqrt(fipr_magnitude_sqr(v.x, v.y, v.z, 0.0f));
|
||||
#endif
|
||||
return scale(v, invLen);
|
||||
}
|
||||
inline V3d setlength(const V3d &v, float32 l) { return scale(v, dc::Div<true, false>(l, length(v))); }
|
||||
inline V3d cross(const V3d &a, const V3d &b) {
|
||||
return makeV3d(a.y*b.z - a.z*b.y,
|
||||
a.z*b.x - a.x*b.z,
|
||||
a.x*b.y - a.y*b.x);
|
||||
}
|
||||
inline float32 dot(const V3d &a, const V3d &b) {
|
||||
#ifdef DC_SH4
|
||||
return fipr(a.x, a.y, a.z, 0.0f, b.x, b.y, b.z, 0.0f);
|
||||
#else
|
||||
@@ -329,12 +341,33 @@ inline float32 length(const Quat &q) {
|
||||
#ifndef DC_SH4
|
||||
return sqrtf(q.w*q.w + q.x*q.x + q.y*q.y + q.z*q.z);
|
||||
#else
|
||||
return dc::Sqrt(fipr_magnitude_sqr(q.x, q.y, q.z, q.w));
|
||||
return dc::Sqrt(fipr_magnitude_sqr(q.x, q.y, q.z, 0.0f));
|
||||
#endif
|
||||
}
|
||||
inline Quat normalize(const Quat &q) { return scale(q, 1.0f/length(q)); }
|
||||
inline Quat normalize(const Quat &q) {
|
||||
float invLen;
|
||||
#ifndef DC_SH4
|
||||
invLen = 1.0f / length(q);
|
||||
#else
|
||||
invLen = dc::RecipSqrt(fipr_magnitude_sqr(q.x, q.y, q.z, 0.0f));
|
||||
#endif
|
||||
return scale(q, invLen);
|
||||
}
|
||||
inline Quat conj(const Quat &q) { return makeQuat(q.w, -q.x, -q.y, -q.z); }
|
||||
Quat mult(const Quat &q, const Quat &p);
|
||||
inline Quat mult(const Quat &q, const Quat &p) {
|
||||
#ifndef DC_SH4
|
||||
return makeQuat(q.w*p.w - q.x*p.x - q.y*p.y - q.z*p.z,
|
||||
q.w*p.x + q.x*p.w + q.y*p.z - q.z*p.y,
|
||||
q.w*p.y + q.y*p.w + q.z*p.x - q.x*p.z,
|
||||
q.w*p.z + q.z*p.w + q.x*p.y - q.y*p.x);
|
||||
#else
|
||||
Quat o;
|
||||
dc::quat_mult(reinterpret_cast<dc::quaternion_t *>(&o),
|
||||
reinterpret_cast<const dc::quaternion_t &>(q),
|
||||
reinterpret_cast<const dc::quaternion_t &>(p));
|
||||
return o;
|
||||
#endif
|
||||
}
|
||||
inline V3d rotate(const V3d &v, const Quat &q) { return mult(mult(q, makeQuat(0.0f, v)), conj(q)).vec(); }
|
||||
Quat lerp(const Quat &q, const Quat &p, float32 r);
|
||||
Quat slerp(const Quat &q, const Quat &p, float32 a);
|
||||
|
Reference in New Issue
Block a user