mirror of
https://gitlab.com/skmp/dca3-game.git
synced 2025-09-01 18:52:58 +02:00
Accelerated lots of RW math + Coronas (liberty)
- lot of the RW matrix stuff has become accelerated - went through and accelerated liberty's coronas/reflections ! apparently introduced a bug somewhere along the lines that cause boats to freak out and do summersaults when trying to drive. Will resolve later.
This commit is contained in:
@@ -300,9 +300,9 @@ CCoronas::Render(void)
|
|||||||
|
|
||||||
|
|
||||||
if(aCoronas[i].fadeAlpha && spriteCoors.z < aCoronas[i].drawDist){
|
if(aCoronas[i].fadeAlpha && spriteCoors.z < aCoronas[i].drawDist){
|
||||||
float recipz = 1.0f/spriteCoors.z;
|
float recipz = dc::Invert<true, false>(spriteCoors.z);
|
||||||
float fadeDistance = aCoronas[i].drawDist / 2.0f;
|
float fadeDistance = aCoronas[i].drawDist / 2.0f;
|
||||||
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - (spriteCoors.z - fadeDistance)/fadeDistance;
|
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - dc::Div<true, false>((spriteCoors.z - fadeDistance), fadeDistance);
|
||||||
int totalFade = aCoronas[i].fadeAlpha * distanceFade;
|
int totalFade = aCoronas[i].fadeAlpha * distanceFade;
|
||||||
|
|
||||||
if(aCoronas[i].LOScheck)
|
if(aCoronas[i].LOScheck)
|
||||||
@@ -313,6 +313,7 @@ CCoronas::Render(void)
|
|||||||
// render corona itself
|
// render corona itself
|
||||||
if(aCoronas[i].texture){
|
if(aCoronas[i].texture){
|
||||||
float fogscale = CWeather::Foggyness*Min(spriteCoors.z, 40.0f)/40.0f + 1.0f;
|
float fogscale = CWeather::Foggyness*Min(spriteCoors.z, 40.0f)/40.0f + 1.0f;
|
||||||
|
float invFogScale = dc::Invert<true, false>(fogscale);
|
||||||
if(CCoronas::aCoronas[i].id == SUN_CORE)
|
if(CCoronas::aCoronas[i].id == SUN_CORE)
|
||||||
spriteCoors.z = 0.95f * RwCameraGetFarClipPlane(Scene.camera);
|
spriteCoors.z = 0.95f * RwCameraGetFarClipPlane(Scene.camera);
|
||||||
RwRenderStateSet(rwRENDERSTATETEXTURERASTER, RwTextureGetRaster(aCoronas[i].texture));
|
RwRenderStateSet(rwRENDERSTATETEXTURERASTER, RwTextureGetRaster(aCoronas[i].texture));
|
||||||
@@ -328,9 +329,9 @@ CCoronas::Render(void)
|
|||||||
CSprite::RenderOneXLUSprite(spriteCoors.x, spriteCoors.y, spriteCoors.z,
|
CSprite::RenderOneXLUSprite(spriteCoors.x, spriteCoors.y, spriteCoors.z,
|
||||||
spritew * aCoronas[i].size * wscale,
|
spritew * aCoronas[i].size * wscale,
|
||||||
spriteh * aCoronas[i].size * fogscale * hscale,
|
spriteh * aCoronas[i].size * fogscale * hscale,
|
||||||
CCoronas::aCoronas[i].red / fogscale,
|
CCoronas::aCoronas[i].red * invFogScale,
|
||||||
CCoronas::aCoronas[i].green / fogscale,
|
CCoronas::aCoronas[i].green * invFogScale,
|
||||||
CCoronas::aCoronas[i].blue / fogscale,
|
CCoronas::aCoronas[i].blue * invFogScale,
|
||||||
totalFade,
|
totalFade,
|
||||||
recipz,
|
recipz,
|
||||||
255);
|
255);
|
||||||
@@ -339,9 +340,9 @@ CCoronas::Render(void)
|
|||||||
spriteCoors.x, spriteCoors.y, spriteCoors.z,
|
spriteCoors.x, spriteCoors.y, spriteCoors.z,
|
||||||
spritew * aCoronas[i].size * fogscale,
|
spritew * aCoronas[i].size * fogscale,
|
||||||
spriteh * aCoronas[i].size * fogscale,
|
spriteh * aCoronas[i].size * fogscale,
|
||||||
CCoronas::aCoronas[i].red / fogscale,
|
CCoronas::aCoronas[i].red * invFogScale,
|
||||||
CCoronas::aCoronas[i].green / fogscale,
|
CCoronas::aCoronas[i].green * invFogScale,
|
||||||
CCoronas::aCoronas[i].blue / fogscale,
|
CCoronas::aCoronas[i].blue * invFogScale,
|
||||||
totalFade,
|
totalFade,
|
||||||
recipz,
|
recipz,
|
||||||
20.0f * recipz,
|
20.0f * recipz,
|
||||||
@@ -365,7 +366,7 @@ CCoronas::Render(void)
|
|||||||
(spriteCoors.x - (screenw/2)) * flare->position + (screenw/2),
|
(spriteCoors.x - (screenw/2)) * flare->position + (screenw/2),
|
||||||
(spriteCoors.y - (screenh/2)) * flare->position + (screenh/2),
|
(spriteCoors.y - (screenh/2)) * flare->position + (screenh/2),
|
||||||
spriteCoors.z,
|
spriteCoors.z,
|
||||||
4.0f*flare->size * spritew/spriteh,
|
4.0f*flare->size * dc::Div<true, false>(spritew, spriteh),
|
||||||
4.0f*flare->size,
|
4.0f*flare->size,
|
||||||
(flare->red * aCoronas[i].red)>>8,
|
(flare->red * aCoronas[i].red)>>8,
|
||||||
(flare->green * aCoronas[i].green)>>8,
|
(flare->green * aCoronas[i].green)>>8,
|
||||||
@@ -480,9 +481,9 @@ CCoronas::RenderReflections(void)
|
|||||||
drawDist = Min(drawDist, 55.0f);
|
drawDist = Min(drawDist, 55.0f);
|
||||||
if(spriteCoors.z < drawDist){
|
if(spriteCoors.z < drawDist){
|
||||||
float fadeDistance = drawDist / 2.0f;
|
float fadeDistance = drawDist / 2.0f;
|
||||||
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - (spriteCoors.z - fadeDistance)/fadeDistance;
|
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - Div<true, false>((spriteCoors.z - fadeDistance), fadeDistance);
|
||||||
distanceFade = Clamp(distanceFade, 0.0f, 1.0f);
|
distanceFade = Clamp(distanceFade, 0.0f, 1.0f);
|
||||||
float recipz = 1.0f/RwCameraGetNearClipPlane(Scene.camera);
|
float recipz = dc::Invert<true, false>(RwCameraGetNearClipPlane(Scene.camera));
|
||||||
float heightFade = (20.0f - aCoronas[i].heightAboveRoad)/20.0f;
|
float heightFade = (20.0f - aCoronas[i].heightAboveRoad)/20.0f;
|
||||||
int intensity = distanceFade*heightFade * 230.0 * CWeather::WetRoads;
|
int intensity = distanceFade*heightFade * 230.0 * CWeather::WetRoads;
|
||||||
|
|
||||||
@@ -606,7 +607,9 @@ CEntity::ProcessLightsForEntity(void)
|
|||||||
flashTimer1 = 0;
|
flashTimer1 = 0;
|
||||||
flashTimer2 = 0;
|
flashTimer2 = 0;
|
||||||
flashTimer3 = 0;
|
flashTimer3 = 0;
|
||||||
|
#ifdef DC_SH4
|
||||||
|
dc:mat_load2(GetMatrix());
|
||||||
|
#endif
|
||||||
n = CModelInfo::GetModelInfo(GetModelIndex())->GetNum2dEffects();
|
n = CModelInfo::GetModelInfo(GetModelIndex())->GetNum2dEffects();
|
||||||
for(i = 0; i < n; i++, flashTimer1 += 0x80, flashTimer2 += 0x100, flashTimer3 += 0x200){
|
for(i = 0; i < n; i++, flashTimer1 += 0x80, flashTimer2 += 0x100, flashTimer3 += 0x200){
|
||||||
effect = CModelInfo::GetModelInfo(GetModelIndex())->Get2dEffect(i);
|
effect = CModelInfo::GetModelInfo(GetModelIndex())->Get2dEffect(i);
|
||||||
@@ -614,8 +617,12 @@ CEntity::ProcessLightsForEntity(void)
|
|||||||
if(effect->type != EFFECT_LIGHT)
|
if(effect->type != EFFECT_LIGHT)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
#ifndef DC_SH4
|
||||||
pos = GetMatrix() * effect->pos;
|
pos = GetMatrix() * effect->pos;
|
||||||
|
#else
|
||||||
|
mat_trans_single3_nodiv_nomod(effect->pos.x, effect->pos.y, effect->pos.z,
|
||||||
|
pos.x, pos.y, pos.z);
|
||||||
|
#endif
|
||||||
lightOn = false;
|
lightOn = false;
|
||||||
lightFlickering = false;
|
lightFlickering = false;
|
||||||
switch(effect->light.lightType){
|
switch(effect->light.lightType){
|
||||||
|
232
vendor/librw/src/base.cpp
vendored
232
vendor/librw/src/base.cpp
vendored
@@ -91,24 +91,6 @@ strncmp_ci(const char *s1, const char *s2, int n)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Quat
|
|
||||||
mult(const Quat &q, const Quat &p)
|
|
||||||
{
|
|
||||||
#ifndef DC_SH4
|
|
||||||
return makeQuat(q.w*p.w - q.x*p.x - q.y*p.y - q.z*p.z,
|
|
||||||
q.w*p.x + q.x*p.w + q.y*p.z - q.z*p.y,
|
|
||||||
q.w*p.y + q.y*p.w + q.z*p.x - q.x*p.z,
|
|
||||||
q.w*p.z + q.z*p.w + q.x*p.y - q.y*p.x);
|
|
||||||
#else
|
|
||||||
Quat o;
|
|
||||||
dc::quat_mult(reinterpret_cast<dc::quaternion_t *>(&o),
|
|
||||||
reinterpret_cast<const dc::quaternion_t &>(q),
|
|
||||||
reinterpret_cast<const dc::quaternion_t &>(p));
|
|
||||||
return o;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Quat*
|
Quat*
|
||||||
Quat::rotate(const V3d *axis, float32 angle, CombineOp op)
|
Quat::rotate(const V3d *axis, float32 angle, CombineOp op)
|
||||||
{
|
{
|
||||||
@@ -166,53 +148,39 @@ slerp(const Quat &q, const Quat &p, float32 a)
|
|||||||
//
|
//
|
||||||
// V3d
|
// V3d
|
||||||
//
|
//
|
||||||
|
void V3d::transformPoints(V3d *out, const V3d *in, int32 n, const Matrix *m) {
|
||||||
V3d
|
int32 i;
|
||||||
cross(const V3d &a, const V3d &b)
|
#ifndef DC_SH4
|
||||||
{
|
V3d tmp;
|
||||||
return makeV3d(a.y*b.z - a.z*b.y,
|
for(i = 0; i < n; i++){
|
||||||
a.z*b.x - a.x*b.z,
|
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x + m->pos.x;
|
||||||
a.x*b.y - a.y*b.x);
|
tmp.y = in[i].x*m->right.y + in[i].y*m->up.y + in[i].z*m->at.y + m->pos.y;
|
||||||
|
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z + m->pos.z;
|
||||||
|
out[i] = tmp;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
dc::mat_load2(*m);
|
||||||
|
for(i = 0; i < n; i++)
|
||||||
|
mat_trans_single3_nodiv_nomod(in[i].x, in[i].y, in[i].z,
|
||||||
|
out[i].x, out[i].y, out[i].z);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
void V3d::transformVectors(V3d *out, const V3d *in, int32 n, const Matrix *m) {
|
||||||
void
|
int32 i;
|
||||||
V3d::transformPoints(V3d *out, const V3d *in, int32 n, const Matrix *m)
|
#ifndef DC_SH4
|
||||||
{
|
V3d tmp;
|
||||||
int32 i;
|
for(i = 0; i < n; i++){
|
||||||
#ifndef DC_SH4
|
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x;
|
||||||
V3d tmp;
|
tmp.y = in[i].x*m->right.y + in[i].y*m->up.y + in[i].z*m->at.y;
|
||||||
for(i = 0; i < n; i++){
|
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z;
|
||||||
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x + m->pos.x;
|
out[i] = tmp;
|
||||||
tmp.y = in[i].x*m->right.y + in[i].y*m->up.y + in[i].z*m->at.y + m->pos.y;
|
}
|
||||||
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z + m->pos.z;
|
#else
|
||||||
out[i] = tmp;
|
dc::mat_load2(*m);
|
||||||
}
|
for(i = 0; i < n; i++)
|
||||||
#else
|
mat_trans_normal3_nomod(in[i].x, in[i].y, in[i].z,
|
||||||
dc::mat_load2(*m);
|
out[i].x, out[i].y, out[i].z);
|
||||||
for(i = 0; i < n; i++)
|
#endif
|
||||||
mat_trans_single3_nodiv_nomod(in[i].x, in[i].y, in[i].z,
|
|
||||||
out[i].x, out[i].y, out[i].z);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
V3d::transformVectors(V3d *out, const V3d *in, int32 n, const Matrix *m)
|
|
||||||
{
|
|
||||||
int32 i;
|
|
||||||
#ifndef DC_SH4
|
|
||||||
V3d tmp;
|
|
||||||
for(i = 0; i < n; i++){
|
|
||||||
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x;
|
|
||||||
tmp.y = in[i].x*m->right.y + in[i].y*m->up.y + in[i].z*m->at.y;
|
|
||||||
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z;
|
|
||||||
out[i] = tmp;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
dc::mat_load2(*m);
|
|
||||||
for(i = 0; i < n; i++)
|
|
||||||
mat_trans_normal3_nomod(in[i].x, in[i].y, in[i].z,
|
|
||||||
out[i].x, out[i].y, out[i].z);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -343,9 +311,10 @@ Matrix::mult(Matrix *dst, const Matrix *src1, const Matrix *src2)
|
|||||||
*dst = *src2;
|
*dst = *src2;
|
||||||
else if(src2->flags & IDENTITY)
|
else if(src2->flags & IDENTITY)
|
||||||
*dst = *src1;
|
*dst = *src1;
|
||||||
else{
|
else {
|
||||||
|
uint8_t flags = src1->flags & src2->flags;
|
||||||
mult_(dst, src1, src2);
|
mult_(dst, src1, src2);
|
||||||
dst->flags = src1->flags & src2->flags;
|
dst->flags = flags;
|
||||||
}
|
}
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
@@ -366,7 +335,8 @@ Matrix::invert(Matrix *dst, const Matrix *src)
|
|||||||
Matrix*
|
Matrix*
|
||||||
Matrix::transpose(Matrix *dst, const Matrix *src)
|
Matrix::transpose(Matrix *dst, const Matrix *src)
|
||||||
{
|
{
|
||||||
if(src->flags & IDENTITY)
|
#ifndef DC_SH4
|
||||||
|
if(src->flags & IDENTITY)
|
||||||
*dst = *src;
|
*dst = *src;
|
||||||
dst->right.x = src->right.x;
|
dst->right.x = src->right.x;
|
||||||
dst->up.x = src->right.y;
|
dst->up.x = src->right.y;
|
||||||
@@ -380,25 +350,31 @@ Matrix::transpose(Matrix *dst, const Matrix *src)
|
|||||||
dst->pos.x = 0.0;
|
dst->pos.x = 0.0;
|
||||||
dst->pos.y = 0.0;
|
dst->pos.y = 0.0;
|
||||||
dst->pos.z = 0.0;
|
dst->pos.z = 0.0;
|
||||||
|
#else
|
||||||
|
if(src->flags & IDENTITY)
|
||||||
|
*dst = *src;
|
||||||
|
else {
|
||||||
|
dc::mat_load_transpose(*src);
|
||||||
|
dc::mat_store2(*dst);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
Matrix*
|
Matrix*
|
||||||
Matrix::rotate(const V3d *axis, float32 angle, CombineOp op)
|
Matrix::rotate(const V3d *axis, float32 angle, CombineOp op)
|
||||||
{
|
{
|
||||||
Matrix tmp, rot;
|
Matrix rot;
|
||||||
makeRotation(&rot, axis, angle);
|
makeRotation(&rot, axis, angle);
|
||||||
switch(op){
|
switch(op){
|
||||||
case COMBINEREPLACE:
|
case COMBINEREPLACE:
|
||||||
*this = rot;
|
*this = rot;
|
||||||
break;
|
break;
|
||||||
case COMBINEPRECONCAT:
|
case COMBINEPRECONCAT:
|
||||||
mult(&tmp, &rot, this);
|
mult(this, &rot, this);
|
||||||
*this = tmp;
|
|
||||||
break;
|
break;
|
||||||
case COMBINEPOSTCONCAT:
|
case COMBINEPOSTCONCAT:
|
||||||
mult(&tmp, this, &rot);
|
mult(this, this, &rot);
|
||||||
*this = tmp;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
@@ -407,27 +383,25 @@ Matrix::rotate(const V3d *axis, float32 angle, CombineOp op)
|
|||||||
Matrix*
|
Matrix*
|
||||||
Matrix::rotate(const Quat &q, CombineOp op)
|
Matrix::rotate(const Quat &q, CombineOp op)
|
||||||
{
|
{
|
||||||
Matrix tmp, rot;
|
Matrix rot;
|
||||||
makeRotation(&rot, q);
|
makeRotation(&rot, q);
|
||||||
switch(op){
|
switch(op){
|
||||||
case COMBINEREPLACE:
|
case COMBINEREPLACE:
|
||||||
*this = rot;
|
*this = rot;
|
||||||
break;
|
break;
|
||||||
case COMBINEPRECONCAT:
|
case COMBINEPRECONCAT:
|
||||||
mult(&tmp, &rot, this);
|
mult(this, &rot, this);
|
||||||
*this = tmp;
|
|
||||||
break;
|
break;
|
||||||
case COMBINEPOSTCONCAT:
|
case COMBINEPOSTCONCAT:
|
||||||
mult(&tmp, this, &rot);
|
mult(this, this, &rot);
|
||||||
*this = tmp;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
Matrix*
|
Matrix*
|
||||||
Matrix::translate(const V3d *translation, CombineOp op)
|
Matrix::translate(const V3d *translation, CombineOp op)
|
||||||
{
|
{
|
||||||
Matrix tmp;
|
|
||||||
Matrix trans = identMat;
|
Matrix trans = identMat;
|
||||||
trans.pos = *translation;
|
trans.pos = *translation;
|
||||||
trans.flags &= ~IDENTITY;
|
trans.flags &= ~IDENTITY;
|
||||||
@@ -436,12 +410,10 @@ Matrix::translate(const V3d *translation, CombineOp op)
|
|||||||
*this = trans;
|
*this = trans;
|
||||||
break;
|
break;
|
||||||
case COMBINEPRECONCAT:
|
case COMBINEPRECONCAT:
|
||||||
mult(&tmp, &trans, this);
|
mult(this, &trans, this);
|
||||||
*this = tmp;
|
|
||||||
break;
|
break;
|
||||||
case COMBINEPOSTCONCAT:
|
case COMBINEPOSTCONCAT:
|
||||||
mult(&tmp, this, &trans);
|
mult(this, this, &trans);
|
||||||
*this = tmp;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
@@ -450,7 +422,6 @@ Matrix::translate(const V3d *translation, CombineOp op)
|
|||||||
Matrix*
|
Matrix*
|
||||||
Matrix::scale(const V3d *scale, CombineOp op)
|
Matrix::scale(const V3d *scale, CombineOp op)
|
||||||
{
|
{
|
||||||
Matrix tmp;
|
|
||||||
Matrix scl = identMat;
|
Matrix scl = identMat;
|
||||||
scl.right.x = scale->x;
|
scl.right.x = scale->x;
|
||||||
scl.up.y = scale->y;
|
scl.up.y = scale->y;
|
||||||
@@ -461,12 +432,10 @@ Matrix::scale(const V3d *scale, CombineOp op)
|
|||||||
*this = scl;
|
*this = scl;
|
||||||
break;
|
break;
|
||||||
case COMBINEPRECONCAT:
|
case COMBINEPRECONCAT:
|
||||||
mult(&tmp, &scl, this);
|
mult(this, &scl, this);
|
||||||
*this = tmp;
|
|
||||||
break;
|
break;
|
||||||
case COMBINEPOSTCONCAT:
|
case COMBINEPOSTCONCAT:
|
||||||
mult(&tmp, this, &scl);
|
mult(this, this, &scl);
|
||||||
*this = tmp;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
@@ -475,18 +444,15 @@ Matrix::scale(const V3d *scale, CombineOp op)
|
|||||||
Matrix*
|
Matrix*
|
||||||
Matrix::transform(const Matrix *mat, CombineOp op)
|
Matrix::transform(const Matrix *mat, CombineOp op)
|
||||||
{
|
{
|
||||||
Matrix tmp;
|
|
||||||
switch(op){
|
switch(op){
|
||||||
case COMBINEREPLACE:
|
case COMBINEREPLACE:
|
||||||
*this = *mat;
|
*this = *mat;
|
||||||
break;
|
break;
|
||||||
case COMBINEPRECONCAT:
|
case COMBINEPRECONCAT:
|
||||||
mult(&tmp, mat, this);
|
mult(this, mat, this);
|
||||||
*this = tmp;
|
|
||||||
break;
|
break;
|
||||||
case COMBINEPOSTCONCAT:
|
case COMBINEPOSTCONCAT:
|
||||||
mult(&tmp, this, mat);
|
mult(this, this, mat);
|
||||||
*this = tmp;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
@@ -501,27 +467,31 @@ Matrix::getRotation(void)
|
|||||||
if(tr > 0.0f){
|
if(tr > 0.0f){
|
||||||
s = sqrtf(1.0f + tr) * 2.0f;
|
s = sqrtf(1.0f + tr) * 2.0f;
|
||||||
q.w = s / 4.0f;
|
q.w = s / 4.0f;
|
||||||
q.x = (up.z - at.y) / s;
|
float invS = dc::Invert<true, false>(s);
|
||||||
q.y = (at.x - right.z) / s;
|
q.x = (up.z - at.y) * invS;
|
||||||
q.z = (right.y - up.x) / s;
|
q.y = (at.x - right.z) * invS;
|
||||||
|
q.z = (right.y - up.x) * invS;
|
||||||
}else if(right.x > up.y && right.x > at.z){
|
}else if(right.x > up.y && right.x > at.z){
|
||||||
s = sqrtf(1.0f + right.x - up.y - at.z) * 2.0f;
|
s = sqrtf(1.0f + right.x - up.y - at.z) * 2.0f;
|
||||||
q.w = (up.z - at.y) / s;
|
q.x = s / 4.0f;
|
||||||
q.x = s / 4.0f;
|
float invS = dc::Invert<true, false>(s);
|
||||||
q.y = (up.x + right.y) / s;
|
q.w = (up.z - at.y) * invS;
|
||||||
q.z = (at.x + right.z) / s;
|
q.y = (up.x + right.y) * invS;
|
||||||
|
q.z = (at.x + right.z) * invS;
|
||||||
}else if(up.y > at.z){
|
}else if(up.y > at.z){
|
||||||
s = sqrtf(1.0f + up.y - right.x - at.z) * 2.0f;
|
s = sqrtf(1.0f + up.y - right.x - at.z) * 2.0f;
|
||||||
q.w = (at.x - right.z) / s;
|
q.y = s / 4.0f;
|
||||||
q.x = (up.x + right.y) / s;
|
float invS = dc::Invert<true, false>(s);
|
||||||
q.y = s / 4.0f;
|
q.w = (at.x - right.z) * invS;
|
||||||
q.z = (at.y + up.z) / s;
|
q.x = (up.x + right.y) * invS;
|
||||||
|
q.z = (at.y + up.z) * invS;
|
||||||
}else{
|
}else{
|
||||||
s = sqrtf(1.0f + at.z - right.x - up.y) * 2.0f;
|
s = sqrtf(1.0f + at.z - right.x - up.y) * 2.0f;
|
||||||
q.w = (right.y - up.x) / s;
|
q.z = s / 4.0f;
|
||||||
q.x = (at.x + right.z) / s;
|
float invS = dc::Invert<true, false>(s);
|
||||||
q.y = (at.y + up.z) / s;
|
q.w = (right.y - up.x) * invS;
|
||||||
q.z = s / 4.0f;
|
q.x = (at.x + right.z) * invS;
|
||||||
|
q.y = (at.y + up.z) * invS;
|
||||||
}
|
}
|
||||||
return q;
|
return q;
|
||||||
}
|
}
|
||||||
@@ -543,20 +513,7 @@ Matrix::lookAt(const V3d &dir, const V3d &up)
|
|||||||
void
|
void
|
||||||
Matrix::mult_(Matrix *__restrict__ dst, const Matrix *__restrict__ src1, const Matrix *__restrict__ src2)
|
Matrix::mult_(Matrix *__restrict__ dst, const Matrix *__restrict__ src1, const Matrix *__restrict__ src2)
|
||||||
{
|
{
|
||||||
#if !defined(DC_TEXCONV) && !defined(DC_SIM)
|
#ifndef DC_SH4
|
||||||
dst->right.x = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
|
|
||||||
dst->right.y = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
|
|
||||||
dst->right.z = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
|
|
||||||
dst->up.x = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
|
|
||||||
dst->up.y = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
|
|
||||||
dst->up.z = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
|
|
||||||
dst->at.x = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
|
|
||||||
dst->at.y = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
|
|
||||||
dst->at.z = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
|
|
||||||
dst->pos.x = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.x, src2->up.x, src2->at.x, src2->pos.x);
|
|
||||||
dst->pos.y = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.y, src2->up.y, src2->at.y, src2->pos.y);
|
|
||||||
dst->pos.z = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.z, src2->up.z, src2->at.z, src2->pos.z);
|
|
||||||
#else
|
|
||||||
dst->right.x = src1->right.x*src2->right.x + src1->right.y*src2->up.x + src1->right.z*src2->at.x;
|
dst->right.x = src1->right.x*src2->right.x + src1->right.y*src2->up.x + src1->right.z*src2->at.x;
|
||||||
dst->right.y = src1->right.x*src2->right.y + src1->right.y*src2->up.y + src1->right.z*src2->at.y;
|
dst->right.y = src1->right.x*src2->right.y + src1->right.y*src2->up.y + src1->right.z*src2->at.y;
|
||||||
dst->right.z = src1->right.x*src2->right.z + src1->right.y*src2->up.z + src1->right.z*src2->at.z;
|
dst->right.z = src1->right.x*src2->right.z + src1->right.y*src2->up.z + src1->right.z*src2->at.z;
|
||||||
@@ -569,12 +526,15 @@ Matrix::mult_(Matrix *__restrict__ dst, const Matrix *__restrict__ src1, const M
|
|||||||
dst->pos.x = src1->pos.x*src2->right.x + src1->pos.y*src2->up.x + src1->pos.z*src2->at.x + src2->pos.x;
|
dst->pos.x = src1->pos.x*src2->right.x + src1->pos.y*src2->up.x + src1->pos.z*src2->at.x + src2->pos.x;
|
||||||
dst->pos.y = src1->pos.x*src2->right.y + src1->pos.y*src2->up.y + src1->pos.z*src2->at.y + src2->pos.y;
|
dst->pos.y = src1->pos.x*src2->right.y + src1->pos.y*src2->up.y + src1->pos.z*src2->at.y + src2->pos.y;
|
||||||
dst->pos.z = src1->pos.x*src2->right.z + src1->pos.y*src2->up.z + src1->pos.z*src2->at.z + src2->pos.z;
|
dst->pos.z = src1->pos.x*src2->right.z + src1->pos.y*src2->up.z + src1->pos.z*src2->at.z + src2->pos.z;
|
||||||
#endif
|
#else
|
||||||
|
dc::mat_mult(*dst, *src2, *src1);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Matrix::invertOrthonormal(Matrix *dst, const Matrix *src)
|
Matrix::invertOrthonormal(Matrix *dst, const Matrix *src)
|
||||||
{
|
{
|
||||||
|
#if 1
|
||||||
dst->right.x = src->right.x;
|
dst->right.x = src->right.x;
|
||||||
dst->right.y = src->up.x;
|
dst->right.y = src->up.x;
|
||||||
dst->right.z = src->at.x;
|
dst->right.z = src->at.x;
|
||||||
@@ -593,7 +553,12 @@ Matrix::invertOrthonormal(Matrix *dst, const Matrix *src)
|
|||||||
dst->pos.z = -(src->pos.x*src->at.x +
|
dst->pos.z = -(src->pos.x*src->at.x +
|
||||||
src->pos.y*src->at.y +
|
src->pos.y*src->at.y +
|
||||||
src->pos.z*src->at.z);
|
src->pos.z*src->at.z);
|
||||||
dst->flags = TYPEORTHONORMAL;
|
#else
|
||||||
|
dc::mat_load_transpose(*src);
|
||||||
|
dc::mat_invert_tranpose();
|
||||||
|
dc::mat_store2(*dst);
|
||||||
|
#endif
|
||||||
|
dst->flags = TYPEORTHONORMAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
Matrix*
|
Matrix*
|
||||||
@@ -688,7 +653,11 @@ Matrix::normalError(void)
|
|||||||
x = dot(right, right) - 1.0f;
|
x = dot(right, right) - 1.0f;
|
||||||
y = dot(up, up) - 1.0f;
|
y = dot(up, up) - 1.0f;
|
||||||
z = dot(at, at) - 1.0f;
|
z = dot(at, at) - 1.0f;
|
||||||
|
#ifndef DC_SH4
|
||||||
return x*x + y*y + z*z;
|
return x*x + y*y + z*z;
|
||||||
|
#else
|
||||||
|
return fipr_magnitude_sqr(x, y, z, 0.0f);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
float32
|
float32
|
||||||
@@ -698,16 +667,27 @@ Matrix::orthogonalError(void)
|
|||||||
x = dot(at, up);
|
x = dot(at, up);
|
||||||
y = dot(at, right);
|
y = dot(at, right);
|
||||||
z = dot(up, right);
|
z = dot(up, right);
|
||||||
|
#ifndef DC_SH4
|
||||||
return x*x + y*y + z*z;
|
return x*x + y*y + z*z;
|
||||||
|
#else
|
||||||
|
return fipr_magnitude_sqr(x, y, z, 0.0f);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
float32
|
float32
|
||||||
Matrix::identityError(void)
|
Matrix::identityError(void)
|
||||||
{
|
{
|
||||||
V3d r = { right.x-1.0f, right.y, right.z };
|
V3d r = { right.x-1.0f, right.y, right.z };
|
||||||
V3d u = { up.x, up.y-1.0f, up.z };
|
V3d u = { up.x, up.y-1.0f, up.z };
|
||||||
V3d a = { at.x, at.y, at.z-1.0f };
|
V3d a = { at.x, at.y, at.z-1.0f };
|
||||||
|
#ifndef DC_SH4
|
||||||
return dot(r,r) + dot(u,u) + dot(a,a) + dot(pos,pos);
|
return dot(r,r) + dot(u,u) + dot(a,a) + dot(pos,pos);
|
||||||
|
#else
|
||||||
|
return fipr_magnitude_sqr(r.x, r.y, r.z, 0.0f) +
|
||||||
|
fipr_magnitude_sqr(u.x, u.y, u.z, 0.0f) +
|
||||||
|
fipr_magnitude_sqr(at.x, at.y, at.z, 0.0f) +
|
||||||
|
fipr_magnitude_sqr(pos.x, pos.y, pos.z, 0.0f);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
272
vendor/librw/src/dc/rwdc_common.h
vendored
272
vendor/librw/src/dc/rwdc_common.h
vendored
@@ -246,6 +246,83 @@ inline __hot __icache_aligned void mat_load_transpose(const matrix_t *mtx) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline __hot __icache_aligned void mat_load_3x3_transpose(const matrix_t *mtx) {
|
||||||
|
asm volatile(
|
||||||
|
R"(
|
||||||
|
frchg
|
||||||
|
|
||||||
|
fmov.s @%[mtx]+, fr0
|
||||||
|
|
||||||
|
add #32, %[mtx]
|
||||||
|
pref @%[mtx]
|
||||||
|
add #-(32 - 4), %[mtx]
|
||||||
|
|
||||||
|
fmov.s @%[mtx]+, fr4
|
||||||
|
fmov.s @%[mtx]+, fr8
|
||||||
|
fldi0 fr12
|
||||||
|
add #4, %[mtx]
|
||||||
|
|
||||||
|
fmov.s @%[mtx]+, fr1
|
||||||
|
fmov.s @%[mtx]+, fr5
|
||||||
|
fmov.s @%[mtx]+, fr9
|
||||||
|
fldi0 fr13
|
||||||
|
add #4, %[mtx]
|
||||||
|
|
||||||
|
fmov.s @%[mtx]+, fr2
|
||||||
|
fmov.s @%[mtx]+, fr6
|
||||||
|
fmov.s @%[mtx]+, fr10
|
||||||
|
fldi0 fr14
|
||||||
|
|
||||||
|
fldi0 fr3
|
||||||
|
fldi0 fr7
|
||||||
|
fmov fr3, fr11
|
||||||
|
fldi1 fr15
|
||||||
|
|
||||||
|
frchg
|
||||||
|
)"
|
||||||
|
: [mtx] "+r" (mtx)
|
||||||
|
:
|
||||||
|
:
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline __hot __icache_aligned void mat_invert_tranpose() {
|
||||||
|
asm volatile(
|
||||||
|
"frchg\n\t"
|
||||||
|
"fneg fr12\n\t"
|
||||||
|
"fneg fr13\n\t"
|
||||||
|
"fneg fr14\n\t"
|
||||||
|
"fldi0 fr15\n\t"
|
||||||
|
"fldi0 fr3\n\t"
|
||||||
|
"fipr fv12, fv0\n\t"
|
||||||
|
"fldi0 fr7\n\t"
|
||||||
|
"fipr fv12, fv4\n\t"
|
||||||
|
"fldi0 fr11\n\t"
|
||||||
|
"fipr fv12, fv8\n\t"
|
||||||
|
|
||||||
|
"fmov fr3, fr12\n\t"
|
||||||
|
"fmov fr7, fr13\n\t"
|
||||||
|
"fmov fr11, fr14\n\t"
|
||||||
|
"fmov fr1, fr15\n\t"
|
||||||
|
"fmov fr4, fr1\n\t"
|
||||||
|
"fmov fr15, fr4\n\t"
|
||||||
|
"fmov fr2, fr15\n\t"
|
||||||
|
"fmov fr8, fr2\n\t"
|
||||||
|
"fmov fr15, fr2\n\t"
|
||||||
|
"fmov fr6, fr15\n\t"
|
||||||
|
"fmov fr9, fr6\n\t"
|
||||||
|
"fmov fr15, fr9\n\t"
|
||||||
|
|
||||||
|
"fldi0 fr3\n\t"
|
||||||
|
"fldi0 fr7\n\t"
|
||||||
|
"fldi0 fr11\n\t"
|
||||||
|
"fldi1 fr15\n\t"
|
||||||
|
"frchg\n"
|
||||||
|
:
|
||||||
|
:
|
||||||
|
:);
|
||||||
|
}
|
||||||
|
|
||||||
inline __hot __icache_aligned void mat_store2(matrix_t *mtx) {
|
inline __hot __icache_aligned void mat_store2(matrix_t *mtx) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
R"(
|
R"(
|
||||||
@@ -449,103 +526,6 @@ __hot __icache_aligned inline void mat_copy(matrix_t *dst, const matrix_t *src)
|
|||||||
:);
|
:);
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO: FIXME FOR VC (AND USE FTRV)
|
|
||||||
template<bool FAST_APPROX=false>
|
|
||||||
__hot constexpr inline void quat_mult(quaternion_t *r, const quaternion_t &q1, const quaternion_t &q2) {
|
|
||||||
if(FAST_APPROX && !std::is_constant_evaluated()) {
|
|
||||||
/*
|
|
||||||
// reorder the coefficients so that q1 stays in constant order {x,y,z,w}
|
|
||||||
// q2 then needs to be rotated after each inner product
|
|
||||||
x = (q1.x * q2.w) + (q1.y * q2.z) - (q1.z * q2.y) + (q1.w * q2.x);
|
|
||||||
y = -(q1.x * q2.z) + (q1.y * q2.w) + (q1.z * q2.x) + (q1.w * q2.y);
|
|
||||||
z = (q1.x * q2.y) - (q1.y * q2.x) + (q1.z * q2.w) + (q1.w * q2.z);
|
|
||||||
w = -(q1.x * q2.x) - (q1.y * q2.y) - (q1.z * q2.z) + (q1.w * q2.w);
|
|
||||||
*/
|
|
||||||
// keep q1 in fv4
|
|
||||||
register float q1x __asm__ ("fr4") = (q1.x);
|
|
||||||
register float q1y __asm__ ("fr5") = (q1.y);
|
|
||||||
register float q1z __asm__ ("fr6") = (q1.z);
|
|
||||||
register float q1w __asm__ ("fr7") = (q1.w);
|
|
||||||
|
|
||||||
// load q2 into fv8, use it to get the shuffled reorder into fv0
|
|
||||||
register float q2x __asm__ ("fr8") = (q2.x);
|
|
||||||
register float q2y __asm__ ("fr9") = (q2.y);
|
|
||||||
register float q2z __asm__ ("fr10") = (q2.z);
|
|
||||||
register float q2w __asm__ ("fr11") = (q2.w);
|
|
||||||
|
|
||||||
// temporary operand / result in fv0
|
|
||||||
register float t1x __asm__ ("fr0");
|
|
||||||
register float t1y __asm__ ("fr1");
|
|
||||||
register float t1z __asm__ ("fr2");
|
|
||||||
register float t1w __asm__ ("fr3");
|
|
||||||
|
|
||||||
// x = (q1.x * q2.w) + (q1.y * q2.z) - (q1.z * q2.y) + (q1.w * q2.x);
|
|
||||||
t1x = q2w;
|
|
||||||
t1y = q2z;
|
|
||||||
t1z = -q2y;
|
|
||||||
t1w = q2w;
|
|
||||||
__asm__ ("\n"
|
|
||||||
" fipr fv4,fv0\n"
|
|
||||||
: "+f" (t1w)
|
|
||||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
|
||||||
"f" (t1x), "f" (t1y), "f" (t1z)
|
|
||||||
);
|
|
||||||
// x = t1w; try to avoid the stall by not reading the fipr result immediately
|
|
||||||
|
|
||||||
// y = -(q1.x * q2.z) + (q1.y * q2.w) + (q1.z * q2.x) + (q1.w * q2.y);
|
|
||||||
t1x = -q2z;
|
|
||||||
t1y = q2w;
|
|
||||||
t1z = q2x;
|
|
||||||
__atomic_thread_fence(1);
|
|
||||||
r->x = t1w; // get previous result
|
|
||||||
t1w = q2y;
|
|
||||||
__asm__ ("\n"
|
|
||||||
" fipr fv4,fv0\n"
|
|
||||||
: "+f" (t1w)
|
|
||||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
|
||||||
"f" (t1x), "f" (t1y), "f" (t1z)
|
|
||||||
);
|
|
||||||
//y = t1w;
|
|
||||||
|
|
||||||
// z = (q1.x * q2.y) - (q1.y * q2.x) + (q1.z * q2.w) + (q1.w * q2.z);
|
|
||||||
t1x = q2y;
|
|
||||||
t1y = -q2x;
|
|
||||||
t1z = q2w;
|
|
||||||
__atomic_thread_fence(1);
|
|
||||||
r->y = t1w; // get previous result
|
|
||||||
t1w = q2z;
|
|
||||||
__asm__ ("\n"
|
|
||||||
" fipr fv4,fv0\n"
|
|
||||||
: "+f" (t1w)
|
|
||||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
|
||||||
"f" (t1x), "f" (t1y), "f" (t1z)
|
|
||||||
);
|
|
||||||
//z = t1w;
|
|
||||||
__atomic_thread_fence(1);
|
|
||||||
|
|
||||||
// w = -(q1.x * q2.x) - (q1.y * q2.y) - (q1.z * q2.z) + (q1.w * q2.w);
|
|
||||||
q2x = -q2x;
|
|
||||||
q2y = -q2y;
|
|
||||||
q2z = -q2z;
|
|
||||||
__asm__ ("\n"
|
|
||||||
" fipr fv4,fv8\n"
|
|
||||||
: "+f" (q2w)
|
|
||||||
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
|
||||||
"f" (q2x), "f" (q2y), "f" (q2z)
|
|
||||||
);
|
|
||||||
|
|
||||||
__atomic_thread_fence(1);
|
|
||||||
r->z = t1w;
|
|
||||||
__atomic_thread_fence(1);
|
|
||||||
r->w = q2w;
|
|
||||||
} else {
|
|
||||||
r->x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w);
|
|
||||||
r->y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w);
|
|
||||||
r->z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w);
|
|
||||||
r->w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__hot inline void mat_load_apply(const matrix_t* matrix1, const matrix_t* matrix2) {
|
__hot inline void mat_load_apply(const matrix_t* matrix1, const matrix_t* matrix2) {
|
||||||
unsigned int prefetch_scratch;
|
unsigned int prefetch_scratch;
|
||||||
|
|
||||||
@@ -669,6 +649,104 @@ __hot inline void mat_apply_rotate_z(float z) {
|
|||||||
: "fpul", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11");
|
: "fpul", "fr5", "fr6", "fr7", "fr8", "fr9", "fr10", "fr11");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//TODO: FIXME FOR VC (AND USE FTRV)
|
||||||
|
template<bool FAST_APPROX=false>
|
||||||
|
__hot constexpr inline void quat_mult(quaternion_t *r, const quaternion_t &q1, const quaternion_t &q2) {
|
||||||
|
if(FAST_APPROX && !std::is_constant_evaluated()) {
|
||||||
|
/*
|
||||||
|
// reorder the coefficients so that q1 stays in constant order {x,y,z,w}
|
||||||
|
// q2 then needs to be rotated after each inner product
|
||||||
|
x = (q1.x * q2.w) + (q1.y * q2.z) - (q1.z * q2.y) + (q1.w * q2.x);
|
||||||
|
y = -(q1.x * q2.z) + (q1.y * q2.w) + (q1.z * q2.x) + (q1.w * q2.y);
|
||||||
|
z = (q1.x * q2.y) - (q1.y * q2.x) + (q1.z * q2.w) + (q1.w * q2.z);
|
||||||
|
w = -(q1.x * q2.x) - (q1.y * q2.y) - (q1.z * q2.z) + (q1.w * q2.w);
|
||||||
|
*/
|
||||||
|
// keep q1 in fv4
|
||||||
|
register float q1x __asm__ ("fr4") = (q1.x);
|
||||||
|
register float q1y __asm__ ("fr5") = (q1.y);
|
||||||
|
register float q1z __asm__ ("fr6") = (q1.z);
|
||||||
|
register float q1w __asm__ ("fr7") = (q1.w);
|
||||||
|
|
||||||
|
// load q2 into fv8, use it to get the shuffled reorder into fv0
|
||||||
|
register float q2x __asm__ ("fr8") = (q2.x);
|
||||||
|
register float q2y __asm__ ("fr9") = (q2.y);
|
||||||
|
register float q2z __asm__ ("fr10") = (q2.z);
|
||||||
|
register float q2w __asm__ ("fr11") = (q2.w);
|
||||||
|
|
||||||
|
// temporary operand / result in fv0
|
||||||
|
register float t1x __asm__ ("fr0");
|
||||||
|
register float t1y __asm__ ("fr1");
|
||||||
|
register float t1z __asm__ ("fr2");
|
||||||
|
register float t1w __asm__ ("fr3");
|
||||||
|
|
||||||
|
// x = (q1.x * q2.w) + (q1.y * q2.z) - (q1.z * q2.y) + (q1.w * q2.x);
|
||||||
|
t1x = q2w;
|
||||||
|
t1y = q2z;
|
||||||
|
t1z = -q2y;
|
||||||
|
t1w = q2w;
|
||||||
|
__asm__ ("\n"
|
||||||
|
" fipr fv4,fv0\n"
|
||||||
|
: "+f" (t1w)
|
||||||
|
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||||
|
"f" (t1x), "f" (t1y), "f" (t1z)
|
||||||
|
);
|
||||||
|
// x = t1w; try to avoid the stall by not reading the fipr result immediately
|
||||||
|
|
||||||
|
// y = -(q1.x * q2.z) + (q1.y * q2.w) + (q1.z * q2.x) + (q1.w * q2.y);
|
||||||
|
t1x = -q2z;
|
||||||
|
t1y = q2w;
|
||||||
|
t1z = q2x;
|
||||||
|
__atomic_thread_fence(1);
|
||||||
|
r->x = t1w; // get previous result
|
||||||
|
t1w = q2y;
|
||||||
|
__asm__ ("\n"
|
||||||
|
" fipr fv4,fv0\n"
|
||||||
|
: "+f" (t1w)
|
||||||
|
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||||
|
"f" (t1x), "f" (t1y), "f" (t1z)
|
||||||
|
);
|
||||||
|
//y = t1w;
|
||||||
|
|
||||||
|
// z = (q1.x * q2.y) - (q1.y * q2.x) + (q1.z * q2.w) + (q1.w * q2.z);
|
||||||
|
t1x = q2y;
|
||||||
|
t1y = -q2x;
|
||||||
|
t1z = q2w;
|
||||||
|
__atomic_thread_fence(1);
|
||||||
|
r->y = t1w; // get previous result
|
||||||
|
t1w = q2z;
|
||||||
|
__asm__ ("\n"
|
||||||
|
" fipr fv4,fv0\n"
|
||||||
|
: "+f" (t1w)
|
||||||
|
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||||
|
"f" (t1x), "f" (t1y), "f" (t1z)
|
||||||
|
);
|
||||||
|
//z = t1w;
|
||||||
|
__atomic_thread_fence(1);
|
||||||
|
|
||||||
|
// w = -(q1.x * q2.x) - (q1.y * q2.y) - (q1.z * q2.z) + (q1.w * q2.w);
|
||||||
|
q2x = -q2x;
|
||||||
|
q2y = -q2y;
|
||||||
|
q2z = -q2z;
|
||||||
|
__asm__ ("\n"
|
||||||
|
" fipr fv4,fv8\n"
|
||||||
|
: "+f" (q2w)
|
||||||
|
: "f" (q1x), "f" (q1y), "f" (q1z), "f" (q1w),
|
||||||
|
"f" (q2x), "f" (q2y), "f" (q2z)
|
||||||
|
);
|
||||||
|
|
||||||
|
__atomic_thread_fence(1);
|
||||||
|
r->z = t1w;
|
||||||
|
__atomic_thread_fence(1);
|
||||||
|
r->w = q2w;
|
||||||
|
} else {
|
||||||
|
r->x = (q2.z * q1.y) - (q1.z * q2.y) + (q1.x * q2.w) + (q2.x * q1.w);
|
||||||
|
r->y = (q2.x * q1.z) - (q1.x * q2.z) + (q1.y * q2.w) + (q2.y * q1.w);
|
||||||
|
r->z = (q2.y * q1.x) - (q1.y * q2.x) + (q1.z * q2.w) + (q2.z * q1.w);
|
||||||
|
r->w = (q2.w * q1.w) - (q2.x * q1.x) - (q2.y * q1.y) - (q2.z * q1.z);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# else
|
# else
|
||||||
# ifdef DC_TEXCONV
|
# ifdef DC_TEXCONV
|
||||||
# define mat_apply(a)
|
# define mat_apply(a)
|
||||||
|
51
vendor/librw/src/rwbase.h
vendored
51
vendor/librw/src/rwbase.h
vendored
@@ -238,8 +238,8 @@ inline V2d neg(const V2d &a) { return makeV2d(-a.x, -a.y); }
|
|||||||
inline V2d add(const V2d &a, const V2d &b) { return makeV2d(a.x+b.x, a.y+b.y); }
|
inline V2d add(const V2d &a, const V2d &b) { return makeV2d(a.x+b.x, a.y+b.y); }
|
||||||
inline V2d sub(const V2d &a, const V2d &b) { return makeV2d(a.x-b.x, a.y-b.y); }
|
inline V2d sub(const V2d &a, const V2d &b) { return makeV2d(a.x-b.x, a.y-b.y); }
|
||||||
inline V2d scale(const V2d &a, float32 r) { return makeV2d(a.x*r, a.y*r); }
|
inline V2d scale(const V2d &a, float32 r) { return makeV2d(a.x*r, a.y*r); }
|
||||||
inline float32 length(const V2d &v) { return sqrtf(v.x*v.x + v.y*v.y); }
|
inline float32 length(const V2d &v) { return dc::Sqrt(v.x*v.x + v.y*v.y); }
|
||||||
inline V2d normalize(const V2d &v) { return scale(v, 1.0f/length(v)); }
|
inline V2d normalize(const V2d &v) { return scale(v, dc::RecipSqrt(v.x*v.x + v.y*v.y)); }
|
||||||
|
|
||||||
struct V3d
|
struct V3d
|
||||||
{
|
{
|
||||||
@@ -265,10 +265,22 @@ inline float32 length(const V3d &v) {
|
|||||||
return len;
|
return len;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
inline V3d normalize(const V3d &v) { return scale(v, 1.0f/length(v)); }
|
inline V3d normalize(const V3d &v) {
|
||||||
inline V3d setlength(const V3d &v, float32 l) { return scale(v, l/length(v)); }
|
float invLen;
|
||||||
V3d cross(const V3d &a, const V3d &b);
|
#ifndef DC_SH4
|
||||||
inline __attribute__((always_inline)) float32 dot(const V3d &a, const V3d &b) {
|
invLen = 1.0f / length(v);
|
||||||
|
#else
|
||||||
|
invLen = dc::RecipSqrt(fipr_magnitude_sqr(v.x, v.y, v.z, 0.0f));
|
||||||
|
#endif
|
||||||
|
return scale(v, invLen);
|
||||||
|
}
|
||||||
|
inline V3d setlength(const V3d &v, float32 l) { return scale(v, dc::Div<true, false>(l, length(v))); }
|
||||||
|
inline V3d cross(const V3d &a, const V3d &b) {
|
||||||
|
return makeV3d(a.y*b.z - a.z*b.y,
|
||||||
|
a.z*b.x - a.x*b.z,
|
||||||
|
a.x*b.y - a.y*b.x);
|
||||||
|
}
|
||||||
|
inline float32 dot(const V3d &a, const V3d &b) {
|
||||||
#ifdef DC_SH4
|
#ifdef DC_SH4
|
||||||
return fipr(a.x, a.y, a.z, 0.0f, b.x, b.y, b.z, 0.0f);
|
return fipr(a.x, a.y, a.z, 0.0f, b.x, b.y, b.z, 0.0f);
|
||||||
#else
|
#else
|
||||||
@@ -329,12 +341,33 @@ inline float32 length(const Quat &q) {
|
|||||||
#ifndef DC_SH4
|
#ifndef DC_SH4
|
||||||
return sqrtf(q.w*q.w + q.x*q.x + q.y*q.y + q.z*q.z);
|
return sqrtf(q.w*q.w + q.x*q.x + q.y*q.y + q.z*q.z);
|
||||||
#else
|
#else
|
||||||
return dc::Sqrt(fipr_magnitude_sqr(q.x, q.y, q.z, q.w));
|
return dc::Sqrt(fipr_magnitude_sqr(q.x, q.y, q.z, 0.0f));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
inline Quat normalize(const Quat &q) { return scale(q, 1.0f/length(q)); }
|
inline Quat normalize(const Quat &q) {
|
||||||
|
float invLen;
|
||||||
|
#ifndef DC_SH4
|
||||||
|
invLen = 1.0f / length(q);
|
||||||
|
#else
|
||||||
|
invLen = dc::RecipSqrt(fipr_magnitude_sqr(q.x, q.y, q.z, 0.0f));
|
||||||
|
#endif
|
||||||
|
return scale(q, invLen);
|
||||||
|
}
|
||||||
inline Quat conj(const Quat &q) { return makeQuat(q.w, -q.x, -q.y, -q.z); }
|
inline Quat conj(const Quat &q) { return makeQuat(q.w, -q.x, -q.y, -q.z); }
|
||||||
Quat mult(const Quat &q, const Quat &p);
|
inline Quat mult(const Quat &q, const Quat &p) {
|
||||||
|
#ifndef DC_SH4
|
||||||
|
return makeQuat(q.w*p.w - q.x*p.x - q.y*p.y - q.z*p.z,
|
||||||
|
q.w*p.x + q.x*p.w + q.y*p.z - q.z*p.y,
|
||||||
|
q.w*p.y + q.y*p.w + q.z*p.x - q.x*p.z,
|
||||||
|
q.w*p.z + q.z*p.w + q.x*p.y - q.y*p.x);
|
||||||
|
#else
|
||||||
|
Quat o;
|
||||||
|
dc::quat_mult(reinterpret_cast<dc::quaternion_t *>(&o),
|
||||||
|
reinterpret_cast<const dc::quaternion_t &>(q),
|
||||||
|
reinterpret_cast<const dc::quaternion_t &>(p));
|
||||||
|
return o;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
inline V3d rotate(const V3d &v, const Quat &q) { return mult(mult(q, makeQuat(0.0f, v)), conj(q)).vec(); }
|
inline V3d rotate(const V3d &v, const Quat &q) { return mult(mult(q, makeQuat(0.0f, v)), conj(q)).vec(); }
|
||||||
Quat lerp(const Quat &q, const Quat &p, float32 r);
|
Quat lerp(const Quat &q, const Quat &p, float32 r);
|
||||||
Quat slerp(const Quat &q, const Quat &p, float32 a);
|
Quat slerp(const Quat &q, const Quat &p, float32 a);
|
||||||
|
Reference in New Issue
Block a user