Merge branch 'falco/gainz_phase_4' into 'main'
Some checks failed
re3 conan+cmake / build-cmake (openal, glfw, macos-latest, gl3) (push) Has been cancelled
re3 conan+cmake / build-cmake (openal, glfw, ubuntu-18.04, gl3) (push) Has been cancelled
re3 conan+cmake / build-cmake (openal, glfw, windows-latest, gl3) (push) Has been cancelled
re3 conan+cmake / build-cmake (openal, windows-latest, d3d9) (push) Has been cancelled
re3 cmake devkitA64 (Nintendo Switch) / build-nintendo-switch (push) Has been cancelled
re3 premake amd64 / build (Debug, win-amd64-librw_d3d9-oal) (push) Has been cancelled
re3 premake amd64 / build (Debug, win-amd64-librw_gl3_glfw-oal) (push) Has been cancelled
re3 premake amd64 / build (Release, win-amd64-librw_d3d9-oal) (push) Has been cancelled
re3 premake amd64 / build (Release, win-amd64-librw_gl3_glfw-oal) (push) Has been cancelled
re3 premake x86 / build (Debug, win-x86-librw_d3d9-mss) (push) Has been cancelled
re3 premake x86 / build (Debug, win-x86-librw_d3d9-oal) (push) Has been cancelled
re3 premake x86 / build (Debug, win-x86-librw_gl3_glfw-mss) (push) Has been cancelled
re3 premake x86 / build (Debug, win-x86-librw_gl3_glfw-oal) (push) Has been cancelled
re3 premake x86 / build (Release, win-x86-librw_d3d9-mss) (push) Has been cancelled
re3 premake x86 / build (Release, win-x86-librw_d3d9-oal) (push) Has been cancelled
re3 premake x86 / build (Release, win-x86-librw_gl3_glfw-mss) (push) Has been cancelled
re3 premake x86 / build (Release, win-x86-librw_gl3_glfw-oal) (push) Has been cancelled
re3 premake x86 / build (Vanilla, win-x86-librw_d3d9-mss) (push) Has been cancelled
re3 premake x86 / build (Vanilla, win-x86-librw_d3d9-oal) (push) Has been cancelled
re3 premake x86 / build (Vanilla, win-x86-librw_gl3_glfw-mss) (push) Has been cancelled
re3 premake x86 / build (Vanilla, win-x86-librw_gl3_glfw-oal) (push) Has been cancelled

Draft: Liberty/Miami Performance Gainz Phase 4

See merge request skmp/dca3-game!98
This commit is contained in:
Falco Girgis
2025-06-16 07:36:36 +00:00
19 changed files with 1322 additions and 678 deletions

View File

@@ -104,6 +104,8 @@ OBJS_TEXCONV += \
OBJS_O3 = \
../vendor/librw/src/dc/rwdc.o \
../src/liberty/core/World.o \
../src/liberty/core/Zones.o \
../src/liberty/core/ZoneCull.o \
../src/liberty/collision/Collision.o \
../src/liberty/math/math.o \
../src/liberty/math/Matrix.o \
@@ -111,11 +113,22 @@ OBJS_O3 = \
../src/liberty/math/Rect.o \
../src/liberty/math/Vector.o \
../vendor/librw/src/base.o \
../src/liberty/renderer/Shadows.o
OBJS_NO_FAST_MATH = \
../src/liberty/renderer/Shadows.o \
../src/liberty/renderer/Renderer.o \
../src/liberty/animation/FrameUpdate.o \
../src/liberty/animation/RpAnimblend.o \
../src/liberty/control/PathFind.o \
../src/liberty/core/Cam.o \
../src/liberty/core/Camera.o
../src/liberty/peds/Ped.o \
../src/liberty/peds/PedAI.o \
../src/liberty/vehicles/Automobile.o
# ICE list with with -O3
OBJS_O2 = \
../src/liberty/animation/AnimBlendNode.o
# ICE list with -ffast-math
OBJS_NO_FAST_MATH =
KOS_CPPFLAGS += -fbuiltin -ffast-math -ffp-contract=fast \
-mfsrra -mfsca
@@ -169,6 +182,9 @@ clean:
$(OBJS_O3): %.o: %.cpp
kos-c++ $(CXXFLAGS) $(CPPFLAGS) -O3 -c $< -o $@
$(OBJS_O2): %.o: %.cpp
kos-c++ $(CXXFLAGS) $(CPPFLAGS) -O2 -c $< -o $@
$(OBJS_NO_FAST_MATH): %.o: %.cpp
kos-c++ $(CXXFLAGS) $(CPPFLAGS) -O3 -c $< -o $@ -fno-fast-math

View File

@@ -39,7 +39,7 @@ CAnimBlendNode::Update(CVector &trans, CQuaternion &rot, float weight)
float blend = association->GetBlendAmount(weight);
if(blend > 0.0f){
float kfAdt = player->GetNextTimeDelta();
float t = kfAdt == 0.0f ? 0.0f : (kfAdt - remainingTime)/kfAdt;
float t = kfAdt == 0.0f ? 0.0f : dc::Div<true, false>(kfAdt - remainingTime, kfAdt);
if(player->type & CAnimBlendSequence::KF_TRANS){
auto kfdAt = player->GetNextTranslationDelta();
auto kfBt = player->GetPrevTranslation();
@@ -153,7 +153,7 @@ CAnimBlendNode::CalcDeltas(void)
if(cos > 1.0f)
cos = 1.0f;
theta = Acos(cos);
invSin = theta == 0.0f ? 0.0f : 1.0f/Sin(theta);
invSin = theta == 0.0f ? 0.0f : dc::Invert<true, false>(Sin(theta));
}
void
@@ -164,7 +164,7 @@ CAnimBlendNode::GetCurrentTranslation(CVector &trans, float weight)
float blend = association->GetBlendAmount(weight);
if(blend > 0.0f){
auto kfAdt = player->GetNextTimeDelta();
float t = kfAdt == 0.0f ? 0.0f : (kfAdt - remainingTime)/kfAdt;
float t = kfAdt == 0.0f ? 0.0f : dc::Div<true, false>(kfAdt - remainingTime, kfAdt);
if(player->type & CAnimBlendSequence::KF_TRANS){
auto kfdAt = player->GetNextTranslationDelta();
auto kfBt = player->GetPrevTranslation();

View File

@@ -3267,24 +3267,24 @@ CCam::Process_BehindBoat(const CVector &CameraTarget, float TargetOrientation, f
if(m_bCollisionChecksOn || ResetStatics){
CVector TestPoint;
// Weird calculations here, also casting bool to float...
c = Cos(TargetOrientation);
s = Sin(TargetOrientation);
c = Cos<false>(TargetOrientation);
s = Sin<false>(TargetOrientation);
TestPoint = TheCamera.CarZoomValueSmooth * CVector(-c, -s, 0.0f) +
(TheCamera.CarZoomValueSmooth+7.0f) * CVector(-c, -s, 0.0f) +
TargetCoors;
TestPoint.z = WaterLevel + TheCamera.CarZoomValueSmooth;
float Test1 = CWorld::GetIsLineOfSightClear(TestPoint, TargetCoors, true, false, false, true, false, true, true);
c = Cos(TargetOrientation + 0.8f);
s = Sin(TargetOrientation + DEGTORAD(40.0f));
c = Cos<false>(TargetOrientation + 0.8f);
s = Sin<false>(TargetOrientation + DEGTORAD(40.0f));
TestPoint = TheCamera.CarZoomValueSmooth * CVector(-c, -s, 0.0f) +
(TheCamera.CarZoomValueSmooth+7.0f) * CVector(-c, -s, 0.0f) +
TargetCoors;
TestPoint.z = WaterLevel + TheCamera.CarZoomValueSmooth;
float Test2 = CWorld::GetIsLineOfSightClear(TestPoint, TargetCoors, true, false, false, true, false, true, true);
c = Cos(TargetOrientation - 0.8);
s = Sin(TargetOrientation - DEGTORAD(40.0f));
c = Cos<false>(TargetOrientation - 0.8);
s = Sin<false>(TargetOrientation - DEGTORAD(40.0f));
TestPoint = TheCamera.CarZoomValueSmooth * CVector(-c, -s, 0.0f) +
(TheCamera.CarZoomValueSmooth+7.0f) * CVector(-c, -s, 0.0f) +
TargetCoors;
@@ -3307,8 +3307,7 @@ CCam::Process_BehindBoat(const CVector &CameraTarget, float TargetOrientation, f
DeltaBeta = TargetOrientation - Beta;
}
c = Cos(Beta);
s = Sin(Beta);
auto [s, c] = SinCos<false>(Beta);
TestPoint.x = TheCamera.CarZoomValueSmooth * -c +
(TheCamera.CarZoomValueSmooth + 7.0f) * -c +
TargetCoors.x;
@@ -3333,8 +3332,7 @@ CCam::Process_BehindBoat(const CVector &CameraTarget, float TargetOrientation, f
// inlined
WellBufferMe(TargetWhenChecksWereOn, &Beta, &BetaSpeed, 0.07f, 0.015f, true);
s = Sin(Beta);
c = Cos(Beta);
auto [s, c] = SinCos<false>(Beta);
Source = TheCamera.CarZoomValueSmooth * CVector(-c, -s, 0.0f) +
(TheCamera.CarZoomValueSmooth+7.0f) * CVector(-c, -s, 0.0f) +
TargetCoors;

View File

@@ -3694,7 +3694,7 @@ CCamera::IsBoxVisible(CVUVECTOR *box, const CMatrix *mat)
#ifdef GTA_PS2
TransformPoints(box, 8, *mat, box);
#else
#ifdef FIX_BUGS
#if defined(FIX_BUGS) && !defined(DC_SH4)
for (i = 0; i < 8; i++)
box[i] = *mat * box[i];
#else

View File

@@ -1215,6 +1215,9 @@ CWorld::FindObjectsIntersectingAngledCollisionBox(const CColBox &boundingBox, co
const int32 nStartY = Max(GetSectorIndexY(fStartY), 0);
const int32 nEndX = Min(GetSectorIndexX(fEndX), NUMSECTORS_X - 1);
const int32 nEndY = Min(GetSectorIndexY(fEndY), NUMSECTORS_Y - 1);
#ifdef DC_SH4
mat_load_transpose(matrix);
#endif
for(int32 y = nStartY; y <= nEndY; y++) {
for(int32 x = nStartX; x <= nEndX; x++) {
CSector *pSector = GetSector(x, y);
@@ -1268,14 +1271,19 @@ CWorld::FindObjectsIntersectingAngledCollisionBoxSectorList(CPtrList &list, cons
int16 *nEntitiesFound, int16 maxEntitiesToFind,
CEntity **aEntities)
{
for(CPtrNode *pNode = list.first; pNode; pNode = pNode->next) {
for(CPtrNode *pNode = list.first; pNode; pNode = pNode->next) {
CEntity *pEntity = (CEntity *)pNode->item;
if(pEntity->m_scanCode != GetCurrentScanCode()) {
pEntity->m_scanCode = GetCurrentScanCode();
CColSphere sphere;
CVector vecDistance = pEntity->GetPosition() - position;
sphere.radius = pEntity->GetBoundRadius();
#ifndef DC_SH4
sphere.center = Multiply3x3(vecDistance, matrix);
#else // Transposed matrix was already loaded by the callee, so no need to reload!
mat_trans_normal3_nomod(vecDistance.x, vecDistance.y, vecDistance.z,
sphere.center.x, sphere.center.y, sphere.center.z);
#endif
if(CCollision::TestSphereBox(sphere, boundingBox) && *nEntitiesFound < maxEntitiesToFind) {
if(aEntities) aEntities[*nEntitiesFound] = pEntity;
++*nEntitiesFound;
@@ -1444,13 +1452,27 @@ CWorld::CallOffChaseForAreaSectorListVehicles(CPtrList &list, float x1, float y1
pVehicle->AutoPilot.m_nTimeTempAction = CTimer::GetTimeInMilliseconds() + 2000;
CColModel *pColModel = pVehicle->GetColModel();
bool bInsideSphere = false;
#ifdef DC_SH4
mat_load2(pVehicle->GetMatrix());
#endif
for(int32 i = 0; i < pColModel->numSpheres; i++) {
#ifndef DC_SH4
CVector pos = pVehicle->GetMatrix() * pColModel->spheres[i].center;
float fRadius = pColModel->spheres[i].radius;
#else
CVector pos;
auto &center = pColModel->spheres[i].center;
mat_trans_single3_nodiv_nomod(center.x, center.y, center.z,
pos.x, pos.y, pos.z);
#endif
float fRadius = pColModel->spheres[i].radius;
if(pos.x + fRadius > x1 && pos.x - fRadius < x2 && pos.y + fRadius > y1 &&
pos.y - fRadius < y2)
pos.y - fRadius < y2) {
bInsideSphere = true;
// Maybe break the loop when bInsideSphere is set to true?
#ifdef DC_SH4 // Don't see why not!
break;
#endif
}
}
if(bInsideSphere) {
if(pVehicle->GetPosition().x <= (x1 + x2) * 0.5f)

View File

@@ -949,11 +949,15 @@ CCullZone::FindTestPoints()
if(ElementsY > 32) ElementsY = 32;
if(ElementsZ > 32) ElementsZ = 32;
Memsize = ElementsX * ElementsY * ElementsZ;
StepX = (maxx-minx)/(ElementsX-1);
StepY = (maxy-miny)/(ElementsY-1);
StepZ = (maxz-minz)/(ElementsZ-1);
StepX = Div<true, false>(maxx-minx, ElementsX-1);
StepY = Div<true, false>(maxy-miny, ElementsY-1);
StepZ = Div<true, false>(maxz-minz, ElementsZ-1);
#ifndef DC_SH4
pMem = new uint8[Memsize];
#else
pMem = reinterpret_cast<uint8 *>(alloca(Memsize));
#endif
memset(pMem, 0, Memsize);
// indices of center
@@ -1496,13 +1500,28 @@ CCullZone::TestEntityVisibilityFromCullZone(CEntity *entity, float extraDist, CE
else
boundMaxZ += extraDist;
#ifndef DC_SH4
CVector vecMin = entity->GetMatrix() * CVector(boundMinX, boundMinY, boundMinZ);
CVector vecMaxX = entity->GetMatrix() * CVector(boundMaxX, boundMinY, boundMinZ);
CVector vecMaxY = entity->GetMatrix() * CVector(boundMinX, boundMaxY, boundMinZ);
CVector vecMaxZ = entity->GetMatrix() * CVector(boundMinX, boundMinY, boundMaxZ);
CVector dirx = vecMaxX - vecMin;
CVector diry = vecMaxY - vecMin;
CVector dirz = vecMaxZ - vecMin;
#else
mat_load2(entity->GetMatrix());
CVector vecMin, vecMaxX, vecMaxY, vecMaxZ;
mat_trans_single3_nodiv_nomod(boundMinX, boundMinY, boundMinZ,
vecMin.x, vecMin.y, vecMin.z);
mat_trans_single3_nodiv_nomod(boundMaxX, boundMinY, boundMinZ,
vecMaxX.x, vecMaxX.y, vecMaxX.z);
mat_trans_single3_nodiv_nomod(boundMinX, boundMaxY, boundMinZ,
vecMaxY.x, vecMaxY.y, vecMaxY.z);
mat_trans_single3_nodiv_nomod(boundMinX, boundMinY, boundMaxZ,
vecMaxZ.x, vecMaxZ.y, vecMaxZ.z);
#endif
CVector dirx = vecMaxX - vecMin;
CVector diry = vecMaxY - vecMin;
CVector dirz = vecMaxZ - vecMin;
// If building intersects zone at all, it's visible
int x, y, z;
@@ -1520,22 +1539,30 @@ CCullZone::TestEntityVisibilityFromCullZone(CEntity *entity, float extraDist, CE
float distToZone = CalcDistToCullZone(entity->GetPosition().x, entity->GetPosition().y)/15.0f;
distToZone = Max(distToZone, 7.0f);
int numX = (boundMaxX - boundMinX)/distToZone + 2.0f;
int numY = (boundMaxY - boundMinY)/distToZone + 2.0f;
int numZ = (boundMaxZ - boundMinZ)/distToZone + 2.0f;
float invDistToZone = Invert<true, false>(distToZone);
int numX = (boundMaxX - boundMinX)*invDistToZone + 2.0f;
int numY = (boundMaxY - boundMinY)*invDistToZone + 2.0f;
int numZ = (boundMaxZ - boundMinZ)*invDistToZone + 2.0f;
float stepX = 1.0f/(numX-1);
float stepY = 1.0f/(numY-1);
float stepZ = 1.0f/(numZ-1);
float stepX = Invert<true, false>(numX-1);
float stepY = Invert<true, false>(numY-1);
float stepZ = Invert<true, false>(numZ-1);
float midX = (boundMaxX + boundMinX)/2.0f;
float midY = (boundMaxY + boundMinY)/2.0f;
float midZ = (boundMaxZ + boundMinZ)/2.0f;
// check both xy planes
for(int i = 0; i < NumTestPoints; i++){
#ifndef DC_SH4
CVector mid = entity->GetMatrix() * CVector(midX, midY, midZ);
#else
CVector mid;
mat_trans_single3_nodiv_nomod(midX, midY, midZ,
mid.x, mid.y, mid.z);
#endif
mid.z += 0.1f;
for(int i = 0; i < NumTestPoints; i++){
CVector testPoint = aTestPoints[i];
CVector mid = entity->GetMatrix() * CVector(midX, midY, midZ);
mid.z += 0.1f;
if(DoThoroughLineTest(testPoint, mid, entity))
return true;

View File

@@ -54,21 +54,29 @@ CMatrix::Detach(void)
void
CMatrix::Update(void)
{
#ifndef DC_SH4
GetRight() = m_attachment->right;
GetForward() = m_attachment->up;
GetUp() = m_attachment->at;
GetPosition() = m_attachment->pos;
#else
mat_copy(*this, *m_attachment);
#endif
}
void
CMatrix::UpdateRW(void)
{
if (m_attachment) {
#ifndef DC_SH4
m_attachment->right = GetRight();
m_attachment->up = GetForward();
m_attachment->at = GetUp();
m_attachment->pos = GetPosition();
RwMatrixUpdate(m_attachment);
#else
mat_copy(*m_attachment, *this);
#endif
}
}
@@ -76,8 +84,10 @@ void
CMatrix::operator=(CMatrix const &rhs)
{
mat_copy(*this, rhs);
#ifndef DC_SH4
if (m_attachment)
UpdateRW();
#endif
}
void
@@ -99,6 +109,7 @@ CMatrix::operator+=(CMatrix const &rhs)
void
CMatrix::SetUnity(void)
{
#ifndef DC_SH4
rx = 1.0f;
ry = 0.0f;
rz = 0.0f;
@@ -111,6 +122,10 @@ CMatrix::SetUnity(void)
px = 0.0f;
py = 0.0f;
pz = 0.0f;
#else
dc::mat_identity2();
dc::mat_store2(*this);
#endif
}
void
@@ -130,6 +145,7 @@ CMatrix::ResetOrientation(void)
void
CMatrix::SetScale(float s)
{
#ifndef DC_SH4
rx = s;
ry = 0.0f;
rz = 0.0f;
@@ -145,11 +161,16 @@ CMatrix::SetScale(float s)
px = 0.0f;
py = 0.0f;
pz = 0.0f;
#else
mat_set_scale(s);
mat_store2(*this);
#endif
}
void
CMatrix::SetTranslate(float x, float y, float z)
{
#ifndef DC_SH4
rx = 1.0f;
ry = 0.0f;
rz = 0.0f;
@@ -165,6 +186,10 @@ CMatrix::SetTranslate(float x, float y, float z)
px = x;
py = y;
pz = z;
#else
mat_set_translation(x, y, z);
mat_store2(*this);
#endif
}
void
@@ -224,34 +249,52 @@ CMatrix::SetRotateZOnly(float angle)
void
CMatrix::SetRotateX(float angle)
{
#ifndef DC_SH4
SetRotateXOnly(angle);
px = 0.0f;
py = 0.0f;
pz = 0.0f;
#else
dc::mat_identity2();
dc::mat_apply_rotate_x(angle);
dc::mat_store2(*this);
#endif
}
void
CMatrix::SetRotateY(float angle)
{
#ifndef DC_SH4
SetRotateYOnly(angle);
px = 0.0f;
py = 0.0f;
pz = 0.0f;
#else
dc::mat_identity2();
dc::mat_apply_rotate_y(angle);
dc::mat_store2(*this);
#endif
}
void
CMatrix::SetRotateZ(float angle)
{
#ifndef DC_SH4
SetRotateZOnly(angle);
px = 0.0f;
py = 0.0f;
pz = 0.0f;
#else
dc::mat_identity2();
dc::mat_apply_rotate_z(angle);
dc::mat_store2(*this);
#endif
}
void
CMatrix::SetRotate(float xAngle, float yAngle, float zAngle)
{
#if 1
auto [sX, cX] = SinCos(xAngle);
auto [sY, cY] = SinCos(yAngle);
auto [sZ, cZ] = SinCos(zAngle);
@@ -271,15 +314,19 @@ CMatrix::SetRotate(float xAngle, float yAngle, float zAngle)
px = 0.0f;
py = 0.0f;
pz = 0.0f;
#else
dc::mat_set_rotate(xAngle, yAngle, zAngle);
dc::mat_store2(*this);
#endif
}
void
CMatrix::RotateX(float x)
{
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
dc::mat_load2(reinterpret_cast<matrix_t *>(this));
mat_rotate_x(x);
dc::mat_store2(reinterpret_cast<matrix_t *>(this));
#if 0// this is bugged and does not yield correct results
dc::mat_set_rotate_x(x);
mat_apply(*this);
dc::mat_store2(*this);
#else
auto [s, c] = SinCos(x);
@@ -306,10 +353,10 @@ CMatrix::RotateX(float x)
void
CMatrix::RotateY(float y)
{
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
dc::mat_load2(reinterpret_cast<matrix_t *>(this));
mat_rotate_y(y);
dc::mat_store2(reinterpret_cast<matrix_t *>(this));
#if 0 // this is bugged and does not yield correct results
dc::mat_set_rotate_y(y);
mat_apply(*this);
dc::mat_store2(*this);
#else
auto [s, c] = SinCos(y);
@@ -336,10 +383,10 @@ CMatrix::RotateY(float y)
void
CMatrix::RotateZ(float z)
{
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
dc::mat_load2(reinterpret_cast<matrix_t *>(this));
mat_rotate_z(z);
dc::mat_store2(reinterpret_cast<matrix_t *>(this));
#if 0// this is bugged and does not yield correct results
dc::mat_set_rotate_z(z);
mat_apply(*this);
dc::mat_store2(*this);
#else
auto [s, c] = SinCos(z);
@@ -366,10 +413,10 @@ CMatrix::RotateZ(float z)
void
CMatrix::Rotate(float x, float y, float z)
{
#if 0 && defined(DC_SH4) // this is bugged and does not yield correct results
dc::mat_load2(reinterpret_cast<matrix_t *>(this));
mat_rotate(x, y, z);
dc::mat_store2(reinterpret_cast<matrix_t *>(this));
#if 0 // this is bugged and does not yield correct results
dc::mat_set_rotate(x, y, z);
mat_apply(*this);
dc::mat_store2(*this);
#else
auto [sX, cX] = SinCos(x);
auto [sY, cY] = SinCos(y);

View File

@@ -50,8 +50,7 @@ Multiply3x3(const CVector &vec, const CMatrix &mat)
mat.ux * vec.x + mat.uy * vec.y + mat.uz * vec.z);
#else
CVector out;
dc::mat_load2(mat);
mat_transpose();
dc::mat_load_transpose(mat);
mat_trans_normal3_nomod(vec.x, vec.y, vec.z,
out.x, out.y, out.z);
return out;

View File

@@ -1,5 +1,7 @@
#pragma once
#include "maths.h"
class CVector : public RwV3d
{
public:
@@ -30,7 +32,7 @@ public:
}
__always_inline float MagnitudeSqr(void) const {
#ifdef DC_SH4
return fipr_magnitude_sqr(x, y,z, 0.0f);
return fipr_magnitude_sqr(x, y, z, 0.0f);
#else
return x*x + y*y + z*z;
#endif
@@ -68,7 +70,7 @@ public:
}
const CVector &operator/=(float right) {
right = Invert<false>(right);
right = Invert<true, true>(right);
x *= right;
y *= right;
z *= right;
@@ -112,7 +114,7 @@ inline CVector operator*(float left, const CVector &right)
inline CVector operator/(const CVector &left, float right)
{
right = Invert<false>(right);
right = Invert<true, true>(right);
return CVector(left.x * right, left.y * right, left.z * right);
}

View File

@@ -300,9 +300,9 @@ CCoronas::Render(void)
if(aCoronas[i].fadeAlpha && spriteCoors.z < aCoronas[i].drawDist){
float recipz = 1.0f/spriteCoors.z;
float recipz = dc::Invert<true, false>(spriteCoors.z);
float fadeDistance = aCoronas[i].drawDist / 2.0f;
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - (spriteCoors.z - fadeDistance)/fadeDistance;
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - dc::Div<true, false>((spriteCoors.z - fadeDistance), fadeDistance);
int totalFade = aCoronas[i].fadeAlpha * distanceFade;
if(aCoronas[i].LOScheck)
@@ -313,6 +313,7 @@ CCoronas::Render(void)
// render corona itself
if(aCoronas[i].texture){
float fogscale = CWeather::Foggyness*Min(spriteCoors.z, 40.0f)/40.0f + 1.0f;
float invFogScale = dc::Invert<true, false>(fogscale);
if(CCoronas::aCoronas[i].id == SUN_CORE)
spriteCoors.z = 0.95f * RwCameraGetFarClipPlane(Scene.camera);
RwRenderStateSet(rwRENDERSTATETEXTURERASTER, RwTextureGetRaster(aCoronas[i].texture));
@@ -328,9 +329,9 @@ CCoronas::Render(void)
CSprite::RenderOneXLUSprite(spriteCoors.x, spriteCoors.y, spriteCoors.z,
spritew * aCoronas[i].size * wscale,
spriteh * aCoronas[i].size * fogscale * hscale,
CCoronas::aCoronas[i].red / fogscale,
CCoronas::aCoronas[i].green / fogscale,
CCoronas::aCoronas[i].blue / fogscale,
CCoronas::aCoronas[i].red * invFogScale,
CCoronas::aCoronas[i].green * invFogScale,
CCoronas::aCoronas[i].blue * invFogScale,
totalFade,
recipz,
255);
@@ -339,9 +340,9 @@ CCoronas::Render(void)
spriteCoors.x, spriteCoors.y, spriteCoors.z,
spritew * aCoronas[i].size * fogscale,
spriteh * aCoronas[i].size * fogscale,
CCoronas::aCoronas[i].red / fogscale,
CCoronas::aCoronas[i].green / fogscale,
CCoronas::aCoronas[i].blue / fogscale,
CCoronas::aCoronas[i].red * invFogScale,
CCoronas::aCoronas[i].green * invFogScale,
CCoronas::aCoronas[i].blue * invFogScale,
totalFade,
recipz,
20.0f * recipz,
@@ -365,7 +366,7 @@ CCoronas::Render(void)
(spriteCoors.x - (screenw/2)) * flare->position + (screenw/2),
(spriteCoors.y - (screenh/2)) * flare->position + (screenh/2),
spriteCoors.z,
4.0f*flare->size * spritew/spriteh,
4.0f*flare->size * dc::Div<true, false>(spritew, spriteh),
4.0f*flare->size,
(flare->red * aCoronas[i].red)>>8,
(flare->green * aCoronas[i].green)>>8,
@@ -480,9 +481,9 @@ CCoronas::RenderReflections(void)
drawDist = Min(drawDist, 55.0f);
if(spriteCoors.z < drawDist){
float fadeDistance = drawDist / 2.0f;
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - (spriteCoors.z - fadeDistance)/fadeDistance;
float distanceFade = spriteCoors.z < fadeDistance ? 1.0f : 1.0f - Div<true, false>((spriteCoors.z - fadeDistance), fadeDistance);
distanceFade = Clamp(distanceFade, 0.0f, 1.0f);
float recipz = 1.0f/RwCameraGetNearClipPlane(Scene.camera);
float recipz = dc::Invert<true, false>(RwCameraGetNearClipPlane(Scene.camera));
float heightFade = (20.0f - aCoronas[i].heightAboveRoad)/20.0f;
int intensity = distanceFade*heightFade * 230.0 * CWeather::WetRoads;
@@ -606,7 +607,9 @@ CEntity::ProcessLightsForEntity(void)
flashTimer1 = 0;
flashTimer2 = 0;
flashTimer3 = 0;
#ifdef DC_SH4
dc:mat_load2(GetMatrix());
#endif
n = CModelInfo::GetModelInfo(GetModelIndex())->GetNum2dEffects();
for(i = 0; i < n; i++, flashTimer1 += 0x80, flashTimer2 += 0x100, flashTimer3 += 0x200){
effect = CModelInfo::GetModelInfo(GetModelIndex())->Get2dEffect(i);
@@ -614,8 +617,12 @@ CEntity::ProcessLightsForEntity(void)
if(effect->type != EFFECT_LIGHT)
continue;
#ifndef DC_SH4
pos = GetMatrix() * effect->pos;
#else
mat_trans_single3_nodiv_nomod(effect->pos.x, effect->pos.y, effect->pos.z,
pos.x, pos.y, pos.z);
#endif
lightOn = false;
lightFlickering = false;
switch(effect->light.lightType){

View File

@@ -80,7 +80,7 @@ CPointLights::GenerateLightsAffectingObject(Const CVector *objCoors)
distance = dist.Magnitude();
if(distance < radius){
float distNorm = distance/radius;
float distNorm = Div<true, false>(distance, radius);
if(aLights[i].type == LIGHT_DARKEN){
// darken the object the closer it is
ret *= distNorm;

View File

@@ -1315,8 +1315,9 @@ CalcNewDelta(RwV2d *a, RwV2d *b)
#define TOINT(x) ((int)(x))
#endif
template<typename F>
void
CRenderer::ScanSectorPoly(RwV2d *poly, int32 numVertices, void (*scanfunc)(CPtrList *))
CRenderer::ScanSectorPoly(RwV2d *poly, int32 numVertices, F&& scanfunc)
{
float miny, maxy;
int y, yend;

View File

@@ -90,7 +90,8 @@ public:
static void ConstructRenderList(void);
static void ScanWorld(void);
static void RequestObjectsInFrustum(void);
static void ScanSectorPoly(RwV2d *poly, int32 numVertices, void (*scanfunc)(CPtrList *));
template<typename F>
static void ScanSectorPoly(RwV2d *poly, int32 numVertices, F &&scanfunc);
static void ScanBigBuildingList(CPtrList &list);
static void ScanSectorList(CPtrList *lists);
static void ScanSectorList_Priority(CPtrList *lists);

View File

@@ -1,5 +1,7 @@
#pragma once
#include "maths.h"
class CVector : public RwV3d
{
public:
@@ -68,9 +70,16 @@ __always_inline float Heading(void) const { return Atan2(-x, y); }
}
const CVector &operator/=(float right) {
#ifndef DC_SH4
x /= right;
y /= right;
z /= right;
#else
right = dc::Invert<true, true>(right);
x *= right;
y *= right;
z *= right;
#endif
return *this;
}
@@ -111,7 +120,12 @@ inline CVector operator*(float left, const CVector &right)
inline CVector operator/(const CVector &left, float right)
{
#ifndef DC_SH4
return CVector(left.x / right, left.y / right, left.z / right);
#else
right = dc::Invert<true, true>(right);
return CVector(left.x * right, left.y * right, left.z * right);
#endif
}
inline float

View File

@@ -45,12 +45,17 @@ int32 build = 0xFFFF;
bool32 streamAppendFrames = 0;
char *debugFile = nil;
static Matrix identMat = {
{ 1.0f, 0.0f, 0.0f }, Matrix::IDENTITY|Matrix::TYPEORTHONORMAL,
{ 0.0f, 1.0f, 0.0f }, 0,
{ 0.0f, 0.0f, 1.0f }, 0,
{ 0.0f, 0.0f, 0.0f }, 0
};
static Matrix identMat = {{
.right = { 1.0f, 0.0f, 0.0f },
.flags = Matrix::IDENTITY|Matrix::TYPEORTHONORMAL,
.pad0 = 0,
.up = { 0.0f, 1.0f, 0.0f },
.upw = 0.0f,
.at = { 0.0f, 0.0f, 1.0f },
.atw = 0.0f,
.pos = { 0.0f, 0.0f, 0.0f },
.posw = 1.0f
}};
// lazy implementation
int
@@ -86,24 +91,6 @@ strncmp_ci(const char *s1, const char *s2, int n)
return 0;
}
Quat
mult(const Quat &q, const Quat &p)
{
#ifndef DC_SH4
return makeQuat(q.w*p.w - q.x*p.x - q.y*p.y - q.z*p.z,
q.w*p.x + q.x*p.w + q.y*p.z - q.z*p.y,
q.w*p.y + q.y*p.w + q.z*p.x - q.x*p.z,
q.w*p.z + q.z*p.w + q.x*p.y - q.y*p.x);
#else
Quat o;
dc::quat_mult(reinterpret_cast<dc::quaternion_t *>(&o),
reinterpret_cast<const dc::quaternion_t &>(q),
reinterpret_cast<const dc::quaternion_t &>(p));
return o;
#endif
}
Quat*
Quat::rotate(const V3d *axis, float32 angle, CombineOp op)
{
@@ -151,8 +138,9 @@ slerp(const Quat &q, const Quat &p, float32 a)
float32 phi = acosf(c);
if(phi > 0.00001f){
float32 s = sinf(phi);
return add(scale(q1, sinf((1.0f-a)*phi)/s),
scale(p, sinf(a*phi)/s));
float invS = dc::Invert<true, false>(s);
return add(scale(q1, sinf((1.0f-a)*phi) * invS),
scale(p, sinf(a*phi) * invS));
}
return q1;
}
@@ -160,39 +148,39 @@ slerp(const Quat &q, const Quat &p, float32 a)
//
// V3d
//
V3d
cross(const V3d &a, const V3d &b)
{
return makeV3d(a.y*b.z - a.z*b.y,
a.z*b.x - a.x*b.z,
a.x*b.y - a.y*b.x);
void V3d::transformPoints(V3d *out, const V3d *in, int32 n, const Matrix *m) {
int32 i;
#ifndef DC_SH4
V3d tmp;
for(i = 0; i < n; i++){
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x + m->pos.x;
tmp.y = in[i].x*m->right.y + in[i].y*m->up.y + in[i].z*m->at.y + m->pos.y;
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z + m->pos.z;
out[i] = tmp;
}
#else
dc::mat_load2(*m);
for(i = 0; i < n; i++)
mat_trans_single3_nodiv_nomod(in[i].x, in[i].y, in[i].z,
out[i].x, out[i].y, out[i].z);
#endif
}
void
V3d::transformPoints(V3d *out, const V3d *in, int32 n, const Matrix *m)
{
int32 i;
V3d tmp;
for(i = 0; i < n; i++){
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x + m->pos.x;
tmp.y = in[i].x*m->right.y + in[i].y*m->up.y + in[i].z*m->at.y + m->pos.y;
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z + m->pos.z;
out[i] = tmp;
}
}
void
V3d::transformVectors(V3d *out, const V3d *in, int32 n, const Matrix *m)
{
int32 i;
V3d tmp;
for(i = 0; i < n; i++){
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x;
tmp.y = in[i].x*m->right.y + in[i].y*m->up.y + in[i].z*m->at.y;
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z;
out[i] = tmp;
}
void V3d::transformVectors(V3d *out, const V3d *in, int32 n, const Matrix *m) {
int32 i;
#ifndef DC_SH4
V3d tmp;
for(i = 0; i < n; i++){
tmp.x = in[i].x*m->right.x + in[i].y*m->up.x + in[i].z*m->at.x;
tmp.y = in[i].x*m->right.y + in[i].y*m->up.y + in[i].z*m->at.y;
tmp.z = in[i].x*m->right.z + in[i].y*m->up.z + in[i].z*m->at.z;
out[i] = tmp;
}
#else
dc::mat_load2(*m);
for(i = 0; i < n; i++)
mat_trans_normal3_nomod(in[i].x, in[i].y, in[i].z,
out[i].x, out[i].y, out[i].z);
#endif
}
//
@@ -202,6 +190,7 @@ V3d::transformVectors(V3d *out, const V3d *in, int32 n, const Matrix *m)
void
RawMatrix::mult(RawMatrix *dst, RawMatrix *src1, RawMatrix *src2)
{
#ifndef DC_SH4
dst->right.x = src1->right.x*src2->right.x + src1->right.y*src2->up.x + src1->right.z*src2->at.x + src1->rightw*src2->pos.x;
dst->right.y = src1->right.x*src2->right.y + src1->right.y*src2->up.y + src1->right.z*src2->at.y + src1->rightw*src2->pos.y;
dst->right.z = src1->right.x*src2->right.z + src1->right.y*src2->up.z + src1->right.z*src2->at.z + src1->rightw*src2->pos.z;
@@ -218,11 +207,15 @@ RawMatrix::mult(RawMatrix *dst, RawMatrix *src1, RawMatrix *src2)
dst->pos.y = src1->pos.x*src2->right.y + src1->pos.y*src2->up.y + src1->pos.z*src2->at.y + src1->posw*src2->pos.y;
dst->pos.z = src1->pos.x*src2->right.z + src1->pos.y*src2->up.z + src1->pos.z*src2->at.z + src1->posw*src2->pos.z;
dst->posw = src1->pos.x*src2->rightw + src1->pos.y*src2->upw + src1->pos.z*src2->atw + src1->posw*src2->posw;
#else
dc::mat_mult(*dst, *src2, *src1);
#endif
}
void
RawMatrix::transpose(RawMatrix *dst, RawMatrix *src)
{
#ifndef DC_SH4
dst->right.x = src->right.x;
dst->up.x = src->right.y;
dst->at.x = src->right.z;
@@ -239,18 +232,27 @@ RawMatrix::transpose(RawMatrix *dst, RawMatrix *src)
dst->upw = src->pos.y;
dst->atw = src->pos.z;
dst->posw = src->posw;
#else
dc::mat_load_transpose(*src);
dc::mat_store2(*dst);
#endif
}
void
RawMatrix::setIdentity(RawMatrix *dst)
{
static RawMatrix identity = {
#ifndef DC_SH4
static RawMatrix identity = {{
{ 1.0f, 0.0f, 0.0f }, 0.0f,
{ 0.0f, 1.0f, 0.0f }, 0.0f,
{ 0.0f, 0.0f, 1.0f }, 0.0f,
{ 0.0f, 0.0f, 0.0f }, 1.0f
};
}};
*dst = identity;
#else
dc::mat_identity2();
dc::mat_store2(*dst);
#endif
}
//
@@ -309,9 +311,10 @@ Matrix::mult(Matrix *dst, const Matrix *src1, const Matrix *src2)
*dst = *src2;
else if(src2->flags & IDENTITY)
*dst = *src1;
else{
else {
uint8_t flags = src1->flags & src2->flags;
mult_(dst, src1, src2);
dst->flags = src1->flags & src2->flags;
dst->flags = flags;
}
return dst;
}
@@ -332,7 +335,8 @@ Matrix::invert(Matrix *dst, const Matrix *src)
Matrix*
Matrix::transpose(Matrix *dst, const Matrix *src)
{
if(src->flags & IDENTITY)
#ifndef DC_SH4
if(src->flags & IDENTITY)
*dst = *src;
dst->right.x = src->right.x;
dst->up.x = src->right.y;
@@ -346,25 +350,31 @@ Matrix::transpose(Matrix *dst, const Matrix *src)
dst->pos.x = 0.0;
dst->pos.y = 0.0;
dst->pos.z = 0.0;
#else
if(src->flags & IDENTITY)
*dst = *src;
else {
dc::mat_load_transpose(*src);
dc::mat_store2(*dst);
}
#endif
return dst;
}
Matrix*
Matrix::rotate(const V3d *axis, float32 angle, CombineOp op)
{
Matrix tmp, rot;
makeRotation(&rot, axis, angle);
Matrix rot;
makeRotation(&rot, axis, angle);
switch(op){
case COMBINEREPLACE:
*this = rot;
break;
case COMBINEPRECONCAT:
mult(&tmp, &rot, this);
*this = tmp;
mult(this, &rot, this);
break;
case COMBINEPOSTCONCAT:
mult(&tmp, this, &rot);
*this = tmp;
mult(this, this, &rot);
break;
}
return this;
@@ -373,41 +383,44 @@ Matrix::rotate(const V3d *axis, float32 angle, CombineOp op)
Matrix*
Matrix::rotate(const Quat &q, CombineOp op)
{
Matrix tmp, rot;
makeRotation(&rot, q);
Matrix rot;
makeRotation(&rot, q);
switch(op){
case COMBINEREPLACE:
*this = rot;
break;
case COMBINEPRECONCAT:
mult(&tmp, &rot, this);
*this = tmp;
mult(this, &rot, this);
break;
case COMBINEPOSTCONCAT:
mult(&tmp, this, &rot);
*this = tmp;
mult(this, this, &rot);
break;
}
return this;
}
Matrix*
Matrix::translate(const V3d *translation, CombineOp op)
{
Matrix tmp;
#if 1
Matrix trans = identMat;
trans.pos = *translation;
trans.flags &= ~IDENTITY;
#else
Matrix trans;
dc::mat_set_translation(translation->x, translation->y, translation->z);
dc::mat_store2(trans);
trans.flags = TYPEORTHONORMAL;
#endif
switch(op){
case COMBINEREPLACE:
*this = trans;
break;
case COMBINEPRECONCAT:
mult(&tmp, &trans, this);
*this = tmp;
mult(this, &trans, this);
break;
case COMBINEPOSTCONCAT:
mult(&tmp, this, &trans);
*this = tmp;
mult(this, this, &trans);
break;
}
return this;
@@ -416,23 +429,27 @@ Matrix::translate(const V3d *translation, CombineOp op)
Matrix*
Matrix::scale(const V3d *scale, CombineOp op)
{
Matrix tmp;
#ifndef DC_SH4
Matrix scl = identMat;
scl.right.x = scale->x;
scl.up.y = scale->y;
scl.at.z = scale->z;
scl.flags &= ~IDENTITY;
#else
Matrix scl;
dc::mat_set_scale(scale->x, scale->y, scale->z);
dc::mat_store2(scl);
scl.flags = TYPEORTHONORMAL;
#endif
switch(op){
case COMBINEREPLACE:
*this = scl;
break;
case COMBINEPRECONCAT:
mult(&tmp, &scl, this);
*this = tmp;
mult(this, &scl, this);
break;
case COMBINEPOSTCONCAT:
mult(&tmp, this, &scl);
*this = tmp;
mult(this, this, &scl);
break;
}
return this;
@@ -441,18 +458,15 @@ Matrix::scale(const V3d *scale, CombineOp op)
Matrix*
Matrix::transform(const Matrix *mat, CombineOp op)
{
Matrix tmp;
switch(op){
case COMBINEREPLACE:
*this = *mat;
break;
case COMBINEPRECONCAT:
mult(&tmp, mat, this);
*this = tmp;
mult(this, mat, this);
break;
case COMBINEPOSTCONCAT:
mult(&tmp, this, mat);
*this = tmp;
mult(this, this, mat);
break;
}
return this;
@@ -467,27 +481,31 @@ Matrix::getRotation(void)
if(tr > 0.0f){
s = sqrtf(1.0f + tr) * 2.0f;
q.w = s / 4.0f;
q.x = (up.z - at.y) / s;
q.y = (at.x - right.z) / s;
q.z = (right.y - up.x) / s;
float invS = dc::Invert<true, false>(s);
q.x = (up.z - at.y) * invS;
q.y = (at.x - right.z) * invS;
q.z = (right.y - up.x) * invS;
}else if(right.x > up.y && right.x > at.z){
s = sqrtf(1.0f + right.x - up.y - at.z) * 2.0f;
q.w = (up.z - at.y) / s;
q.x = s / 4.0f;
q.y = (up.x + right.y) / s;
q.z = (at.x + right.z) / s;
q.x = s / 4.0f;
float invS = dc::Invert<true, false>(s);
q.w = (up.z - at.y) * invS;
q.y = (up.x + right.y) * invS;
q.z = (at.x + right.z) * invS;
}else if(up.y > at.z){
s = sqrtf(1.0f + up.y - right.x - at.z) * 2.0f;
q.w = (at.x - right.z) / s;
q.x = (up.x + right.y) / s;
q.y = s / 4.0f;
q.z = (at.y + up.z) / s;
q.y = s / 4.0f;
float invS = dc::Invert<true, false>(s);
q.w = (at.x - right.z) * invS;
q.x = (up.x + right.y) * invS;
q.z = (at.y + up.z) * invS;
}else{
s = sqrtf(1.0f + at.z - right.x - up.y) * 2.0f;
q.w = (right.y - up.x) / s;
q.x = (at.x + right.z) / s;
q.y = (at.y + up.z) / s;
q.z = s / 4.0f;
q.z = s / 4.0f;
float invS = dc::Invert<true, false>(s);
q.w = (right.y - up.x) * invS;
q.x = (at.x + right.z) * invS;
q.y = (at.y + up.z) * invS;
}
return q;
}
@@ -509,20 +527,7 @@ Matrix::lookAt(const V3d &dir, const V3d &up)
void
Matrix::mult_(Matrix *__restrict__ dst, const Matrix *__restrict__ src1, const Matrix *__restrict__ src2)
{
#if !defined(DC_TEXCONV) && !defined(DC_SIM)
dst->right.x = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
dst->right.y = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
dst->right.z = fipr(src1->right.x, src1->right.y, src1->right.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
dst->up.x = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
dst->up.y = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
dst->up.z = fipr(src1->up.x, src1->up.y, src1->up.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
dst->at.x = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.x, src2->up.x, src2->at.x, 0);
dst->at.y = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.y, src2->up.y, src2->at.y, 0);
dst->at.z = fipr(src1->at.x, src1->at.y, src1->at.z, 0, src2->right.z, src2->up.z, src2->at.z, 0);
dst->pos.x = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.x, src2->up.x, src2->at.x, src2->pos.x);
dst->pos.y = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.y, src2->up.y, src2->at.y, src2->pos.y);
dst->pos.z = fipr(src1->pos.x, src1->pos.y, src1->pos.z, 1, src2->right.z, src2->up.z, src2->at.z, src2->pos.z);
#else
#ifndef DC_SH4
dst->right.x = src1->right.x*src2->right.x + src1->right.y*src2->up.x + src1->right.z*src2->at.x;
dst->right.y = src1->right.x*src2->right.y + src1->right.y*src2->up.y + src1->right.z*src2->at.y;
dst->right.z = src1->right.x*src2->right.z + src1->right.y*src2->up.z + src1->right.z*src2->at.z;
@@ -535,12 +540,15 @@ Matrix::mult_(Matrix *__restrict__ dst, const Matrix *__restrict__ src1, const M
dst->pos.x = src1->pos.x*src2->right.x + src1->pos.y*src2->up.x + src1->pos.z*src2->at.x + src2->pos.x;
dst->pos.y = src1->pos.x*src2->right.y + src1->pos.y*src2->up.y + src1->pos.z*src2->at.y + src2->pos.y;
dst->pos.z = src1->pos.x*src2->right.z + src1->pos.y*src2->up.z + src1->pos.z*src2->at.z + src2->pos.z;
#endif
#else
dc::mat_mult(*dst, *src2, *src1);
#endif
}
void
Matrix::invertOrthonormal(Matrix *dst, const Matrix *src)
{
#if 1
dst->right.x = src->right.x;
dst->right.y = src->up.x;
dst->right.z = src->at.x;
@@ -559,7 +567,12 @@ Matrix::invertOrthonormal(Matrix *dst, const Matrix *src)
dst->pos.z = -(src->pos.x*src->at.x +
src->pos.y*src->at.y +
src->pos.z*src->at.z);
dst->flags = TYPEORTHONORMAL;
#else
dc::mat_load_transpose(*src);
dc::mat_invert_tranpose();
dc::mat_store2(*dst);
#endif
dst->flags = TYPEORTHONORMAL;
}
Matrix*
@@ -595,8 +608,13 @@ void
Matrix::makeRotation(Matrix *dst, const V3d *axis, float32 angle)
{
// V3d v = normalize(*axis);
#ifndef DC_SH4
float32 len = dot(*axis, *axis);
if(len != 0.0f) len = 1.0f/sqrtf(len);
#else
float len = fipr_magnitude_sqr(axis->x, axis->y, axis->z, 0.0f);
if(len != 0.0f) len = dc::RecipSqrt(len);
#endif
V3d v = rw::scale(*axis, len);
angle = angle*(float)M_PI/180.0f;
float32 s = sinf(angle);
@@ -654,7 +672,11 @@ Matrix::normalError(void)
x = dot(right, right) - 1.0f;
y = dot(up, up) - 1.0f;
z = dot(at, at) - 1.0f;
#ifndef DC_SH4
return x*x + y*y + z*z;
#else
return fipr_magnitude_sqr(x, y, z, 0.0f);
#endif
}
float32
@@ -664,16 +686,27 @@ Matrix::orthogonalError(void)
x = dot(at, up);
y = dot(at, right);
z = dot(up, right);
#ifndef DC_SH4
return x*x + y*y + z*z;
#else
return fipr_magnitude_sqr(x, y, z, 0.0f);
#endif
}
float32
Matrix::identityError(void)
{
V3d r = { right.x-1.0f, right.y, right.z };
V3d r = { right.x-1.0f, right.y, right.z };
V3d u = { up.x, up.y-1.0f, up.z };
V3d a = { at.x, at.y, at.z-1.0f };
#ifndef DC_SH4
return dot(r,r) + dot(u,u) + dot(a,a) + dot(pos,pos);
#else
return fipr_magnitude_sqr(r.x, r.y, r.z, 0.0f) +
fipr_magnitude_sqr(u.x, u.y, u.z, 0.0f) +
fipr_magnitude_sqr(at.x, at.y, at.z, 0.0f) +
fipr_magnitude_sqr(pos.x, pos.y, pos.z, 0.0f);
#endif
}
void

View File

@@ -493,33 +493,7 @@ Camera::frustumTestSphere(const Sphere *s) const
static_assert(offsetof (decltype (p[0].plane), distance)
-offsetof (decltype (p[0].plane.normal), z) == sizeof (float));
asm volatile (R"(
frchg
fmov.s @%0+,fr0
fmov.s @%1+,fr1
fmov.s @%2+,fr2
fmov.s @%3+,fr3
fmov.s @%0+,fr4
fmov.s @%1+,fr5
fmov.s @%2+,fr6
fmov.s @%3+,fr7
fmov.s @%0+,fr8
fmov.s @%1+,fr9
fmov.s @%2+,fr10
fmov.s @%3+,fr11
fmov.s @%0,fr12
fmov.s @%1,fr13
fmov.s @%2,fr14
fmov.s @%3,fr15
frchg
)" : "+&r" (base_ptr0), "+&r" (base_ptr1), "+&r" (base_ptr2), "+&r" (base_ptr3)
:
: );
mat_load_rows(base_ptr0, base_ptr1, base_ptr2, base_ptr3);
float dists[4];
mat_trans_vec4_nodiv_nomod(sx, sy, sz, sw,
@@ -643,33 +617,7 @@ Camera::frustumTestSphereNear(const Sphere *s) const
static_assert(offsetof (decltype (p[0].plane), distance)
-offsetof (decltype (p[0].plane.normal), z) == sizeof (float));
asm volatile (R"(
frchg
fmov.s @%0+,fr0
fmov.s @%1+,fr1
fmov.s @%2+,fr2
fmov.s @%3+,fr3
fmov.s @%0+,fr4
fmov.s @%1+,fr5
fmov.s @%2+,fr6
fmov.s @%3+,fr7
fmov.s @%0+,fr8
fmov.s @%1+,fr9
fmov.s @%2+,fr10
fmov.s @%3+,fr11
fmov.s @%0,fr12
fmov.s @%1,fr13
fmov.s @%2,fr14
fmov.s @%3,fr15
frchg
)" : "+&r" (base_ptr0), "+&r" (base_ptr1), "+&r" (base_ptr2), "+&r" (base_ptr3)
:
: );
mat_load_rows(base_ptr0, base_ptr1, base_ptr2, base_ptr3);
float dists[4];
mat_trans_vec4_nodiv_nomod(sx, sy, sz, sw,

View File

@@ -563,7 +563,7 @@ Camera* rwdcCam;
void beginUpdate(Camera* cam) {
rwdcCam = cam;
float view[16], proj[16];
alignas(8) float view[16], proj[16];
// View Matrix
Matrix inv;
@@ -586,7 +586,7 @@ void beginUpdate(Camera* cam) {
view[13] = -inv.pos.y;
view[14] = inv.pos.z;
view[15] = 1.0f;
memcpy4(&cam->devView, view, sizeof(RawMatrix));
mat_copy(cam->devView, reinterpret_cast<const matrix_t *>(view));
// d3ddevice->SetTransform(D3DTS_VIEW, (D3DMATRIX*)view);
// Projection Matrix
@@ -620,8 +620,8 @@ void beginUpdate(Camera* cam) {
proj[15] = 1.0f;
}
proj[14] = -cam->nearPlane*proj[10];
memcpy4(&cam->devProj, proj, sizeof(RawMatrix));
mat_copy(cam->devProj, reinterpret_cast<const matrix_t *>(proj));
DCE_MatrixViewport(0, 0, cam->frameBuffer->width * VIDEO_MODE_SCALE_X, cam->frameBuffer->height);
mat_load_apply((matrix_t*)&DCE_MAT_SCREENVIEW, (matrix_t*)&cam->devProj);
@@ -811,7 +811,8 @@ struct chunked_vector {
}
// Iterate over each element and invoke the callback.
void forEach(void(*cb)(T&)) {
template<typename F>
void forEach(F&& cb) {
for (chunk* curr = first; curr; curr = curr->header.next) {
for (size_t i = 0; i < curr->header.used; ++i) {
cb(curr->items[i]);
@@ -945,8 +946,6 @@ void dcMotionBlur_v1(uint8_t a, uint8_t r, uint8_t g, uint8_t b) {
auto addr2 = (pvr_ptr_t)&emu_vram[addr64b + 640 * 2];
#endif
PVR_SET(PVR_TEXTURE_MODULO, 640/32);
auto doquad = [=](float x, float y, float w, float h, float tx, float ty, float tw, float th) {
@@ -1666,13 +1665,13 @@ void im2DRenderPrimitive(PrimitiveType primType, void *vertices, int32_t numVert
switch(primType) {
case PRIMTYPETRILIST:
pvrHeaderSubmit();
dcache_pref_block(vtx);
__builtin_prefetch(vtx);
for(int i = 0; i < numVertices; i += 3) [[likely]] {
dcache_pref_block(&vtx[i + 1]);
__builtin_prefetch(&vtx[i + 1]);
pvrVertexSubmit(vtx[i + 0], PVR_CMD_VERTEX);
dcache_pref_block(&vtx[i + 2]);
__builtin_prefetch(&vtx[i + 2]);
pvrVertexSubmit(vtx[i + 1], PVR_CMD_VERTEX);
dcache_pref_block(&vtx[i + 3]);
__builtin_prefetch(&vtx[i + 3]);
pvrVertexSubmit(vtx[i + 2], PVR_CMD_VERTEX_EOL);
}
break;
@@ -1680,14 +1679,11 @@ void im2DRenderPrimitive(PrimitiveType primType, void *vertices, int32_t numVert
pvrHeaderSubmit();
const auto *vtxA = vtx + 0;
const auto *vtxB = vtx + 1;
dcache_pref_block(vtxA);
__builtin_prefetch(vtxA);
for(int i = 2; i < numVertices; ++i) [[likely]] {
const auto *vtxC = vtx + i;
dcache_pref_block(vtxB);
pvrVertexSubmit(*vtxA, PVR_CMD_VERTEX);
dcache_pref_block(vtxC);
pvrVertexSubmit(*vtxB, PVR_CMD_VERTEX);
dcache_pref_block(&vtx[i]);
pvrVertexSubmit(*vtxC, PVR_CMD_VERTEX_EOL);
vtxB = vtxC;
}
@@ -1787,13 +1783,13 @@ void im2DRenderIndexedPrimitive(PrimitiveType primType, void *vertices, int32 nu
switch(primType) {
case PRIMTYPETRILIST:
pvrHeaderSubmit();
dcache_pref_block(vtx);
__builtin_prefetch(vtx);
for(int i = 0; i < numIndices; i += 3) [[likely]] {
dcache_pref_block(&vtx[idx[i + 1]]);
__builtin_prefetch(&vtx[idx[i + 1]]);
pvrVertexSubmit(vtx[idx[i + 0]], PVR_CMD_VERTEX);
dcache_pref_block(&vtx[idx[i + 2]]);
__builtin_prefetch(&vtx[idx[i + 2]]);
pvrVertexSubmit(vtx[idx[i + 1]], PVR_CMD_VERTEX);
dcache_pref_block(&vtx[idx[i + 3]]);
__builtin_prefetch(&vtx[idx[i + 3]]);
pvrVertexSubmit(vtx[idx[i + 2]], PVR_CMD_VERTEX_EOL);
}
break;
@@ -1830,15 +1826,21 @@ void im3DTransform(void *vertices, int32 numVertices, Matrix *worldMat, uint32 f
worldMat = &ident;
}
rw::RawMatrix mtx, proj, world, worldview;
rw::Camera *cam = engine->currentCamera;
rw::convMatrix(&world, worldMat);
#ifndef DC_SH4
rw::RawMatrix mtx, proj, world, worldview;
rw::convMatrix(&world, worldMat);
rw::RawMatrix::mult(&worldview, &world, &cam->devView);
rw::RawMatrix::mult(&proj, &worldview, &cam->devProj);
rw::RawMatrix::mult(&mtx, &proj, (RawMatrix*)&DCE_MAT_SCREENVIEW);
// mat_load2(&DCE_MAT_SCREENVIEW); // ~11 cycles.
mat_load2(( matrix_t*)&mtx.right); // Number of cycles: ~32.
#else
mat_load_apply(&DCE_MAT_SCREENVIEW, cam->devProj);
mat_apply(cam->devView);
mat_apply(*worldMat);
#endif
if (im3dVertices) {
free(im3dVertices);
}
@@ -2136,39 +2138,36 @@ static_assert(sizeof(MeshletInfo) == 40); // or 32 if !skin
inline __attribute__((always_inline)) void setLights(Atomic *atomic, WorldLights *lightData, UniformObject &uniformObject)
{
int n = 0;
int i = 0;
uniformObject.ambLight = lightData->ambient;
if (lightData->numDirectionals) {
Matrix mat;
Matrix matsrc = *atomic->getFrame()->getLTM();
matsrc.pos = V3d {0,0,0};
matsrc.pos = V3d {0.0f, 0.0f, 0.0f};
matsrc.posw = 0.0f;
Matrix::invert(&mat, &matsrc);
n = 0;
for(int i = 0; i < lightData->numDirectionals && i < MAX_LIGHTS; i++){
mat_load2(mat);
for(; i < lightData->numDirectionals && i < MAX_LIGHTS; i++){
Light *l = lightData->directionals[i];
uniformObject.col[n] = scale(l->color, 255);
uniformObject.col[i] = scale(l->color, 255);
V3d at = l->getFrame()->getLTM()->at;
V3d dir;
V3d::transformVectors(&dir, &at, 1, &mat);
mat_trans_normal3_nomod(at.x, at.y, at.z,
dir.x, dir.y, dir.z);
uniformObject.dir[n>>2][0][n&3] = -dir.x / 127.0f;
uniformObject.dir[n>>2][1][n&3] = -dir.y / 127.0f;
uniformObject.dir[n>>2][2][n&3] = -dir.z / 127.0f;
uniformObject.dir[n>>2][3][n&3] = 0;
n++;
if(n >= MAX_LIGHTS)
goto out;
uniformObject.dir[i>>2][0][i&3] = -dir.x / 127.0f;
uniformObject.dir[i>>2][1][i&3] = -dir.y / 127.0f;
uniformObject.dir[i>>2][2][i&3] = -dir.z / 127.0f;
uniformObject.dir[i>>2][3][i&3] = 0;
}
}
out:
uniformObject.lightCount = n;
uniformObject.lightCount = i;
}
@@ -2835,7 +2834,7 @@ void* interpolateAndSubmit(void* dst, const void* src1, const void* src2, uint32
float y = v1->o_r + t * (v2->o_r - v1->o_r);
float w = v1->o_g + t * (v2->o_g - v1->o_g);
w = frsqrt(w * w);
w = Invert<true, false>(w);
v->x = x * w;
v->y = y * w;
@@ -3209,15 +3208,14 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
auto skinningIndexData = (int16_t*)skinIndexes;
auto skinningWeightData = (uint8_t*)skinWeights;
if (!matrix0Identity) {
mat_load_4x4(&skinMatrices[0]);
if (small_xyz) {
mat_apply(&DCE_MESHLET_MAT_DECODE);
}
} else {
if (small_xyz) {
mat_load2(&DCE_MESHLET_MAT_DECODE);
}
if constexpr (!matrix0Identity) {
if (!small_xyz)
mat_load2(skinMatrices[0]);
else
mat_load_apply(skinMatrices[0], &DCE_MESHLET_MAT_DECODE);
} else if constexpr (small_xyz) {
mat_load2(&DCE_MESHLET_MAT_DECODE);
}
for(;;) {
@@ -3227,7 +3225,7 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
int count = *skinningIndexData++;
uint8_t* dstVertexBytes = dest + *skinningIndexData++;
if (matrix0Identity && !small_xyz) {
if constexpr (matrix0Identity && !small_xyz) {
do {
const V3d* srcVtx = (const V3d*)(srcVtxBytes);
srcVtxBytes += vertexSize;
@@ -3246,11 +3244,8 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
srcVtxBytes += vertexSize;
V3d* dstVertex = (V3d*)(dstVertexBytes);
dstVertexBytes += 64;
float x, y, z, w;
mat_trans_nodiv_nomod(srcVtx->x, srcVtx->y, srcVtx->z, x, y, z, w);
dstVertex->x = x;
dstVertex->y = y;
dstVertex->z = z;
mat_trans_single3_nodiv_nomod(srcVtx->x, srcVtx->y, srcVtx->z,
dstVertex->x, dstVertex->y, dstVertex->z);
} while(--count != 0);
}
} else if (!(flags & 0x80)) {
@@ -3280,10 +3275,10 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
break;
}
mat_load_4x4(currentMatrix);
if (small_xyz){
mat_apply(&DCE_MESHLET_MAT_DECODE);
}
if constexpr(!small_xyz)
mat_load2(*currentMatrix);
else
mat_load_apply(*currentMatrix, &DCE_MESHLET_MAT_DECODE);
do {
auto srcOffset = *skinningIndexData++;
@@ -3297,9 +3292,9 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
srcVtx = &tmpSrc;
}
auto dstVtx = (V3d*)(dest + dstOffset);
float x, y, z, w;
mat_trans_nodiv_nomod(srcVtx->x, srcVtx->y, srcVtx->z, x, y, z, w);
V3d tmp = { x, y, z };
V3d tmp;
mat_trans_single3_nodiv_nomod(srcVtx->x, srcVtx->y, srcVtx->z,
tmp.x, tmp.y, tmp.z);
tmp = scale(tmp, *skinningWeightData++ / 255.0f);
*dstVtx = add(*dstVtx, tmp);
} while (--count != 0);
@@ -3312,8 +3307,8 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
auto skinningIndexData = (int16_t*)skinIndexes;
auto skinningWeightData = (uint8_t*)skinWeights;
if (!matrix0Identity) {
mat_load_3x3(&skinMatrices[0]);
if constexpr(!matrix0Identity) {
mat_load2(skinMatrices[0]);
}
for(;;) {
@@ -3323,7 +3318,7 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
int count = *skinningIndexData++;
uint8_t* dstNormalBytes = destNormal + *skinningIndexData++;
if (matrix0Identity) {
if constexpr (matrix0Identity) {
do {
V3d srcNormal = { static_cast<float32>(srcNormalBytes[0]), static_cast<float32>(srcNormalBytes[1]), static_cast<float32>(srcNormalBytes[2]) };
@@ -3339,9 +3334,9 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
srcNormalBytes += vertexSize;
V3d* dstNormal = (V3d*)(dstNormalBytes);
dstNormalBytes += 64;
float x, y, z, w;
mat_trans_nodiv_nomod_zerow(srcNormal.x, srcNormal.y, srcNormal.z, x, y, z, w);
*dstNormal = { x, y, z };
float x, y, z;
mat_trans_normal3_nomod(srcNormal.x, srcNormal.y, srcNormal.z,
dstNormal->x, dstNormal->y, dstNormal->z);
} while(--count != 0);
}
} else if (!(flags & 0x80)) {
@@ -3371,7 +3366,7 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
break;
}
mat_load_3x3(currentMatrix);
mat_load2(*currentMatrix);
do {
auto srcOffset = *skinningIndexData++;
@@ -3383,8 +3378,8 @@ void tnlMeshletSkinVertices(uint8_t *OCR, uint8_t *OCR_normal, const uint8_t* ve
auto dstNormal = (V3d*)(destNormal + dstOffset);
V3d tmp;
float w;
mat_trans_nodiv_nomod_zerow(srcNormal.x, srcNormal.y, srcNormal.z, tmp.x, tmp.y, tmp.z, w);
mat_trans_normal3_nomod(srcNormal.x, srcNormal.y, srcNormal.z,
tmp.x, tmp.y, tmp.z);
tmp = scale(tmp, *skinningWeightData++ / 255.0f);
*dstNormal = add(*dstNormal, tmp);
} while (--count != 0);
@@ -3498,22 +3493,20 @@ uploadSkinMatrices(Atomic *a, Matrix* skinMatrices)
if(hier){
Matrix *invMats = (Matrix*)skin->inverseMatrices;
Matrix tmp;
assert(skin->numBones == hier->numNodes);
if(hier->flags & HAnimHierarchy::LOCALSPACEMATRICES){
for(i = 0; i < hier->numNodes; i++){
invMats[i].flags = 0;
Matrix::mult(m, &invMats[i], &hier->matrices[i]);
mat_mult(*m, invMats[i], hier->matrices[i]);
m++;
}
}else{
Matrix invAtmMat;
Matrix::invert(&invAtmMat, a->getFrame()->getLTM());
for(i = 0; i < hier->numNodes; i++){
invMats[i].flags = 0;
Matrix::mult(&tmp, &hier->matrices[i], &invAtmMat);
Matrix::mult(m, &invMats[i], &tmp);
mat_load_apply(invAtmMat, hier->matrices[i]);
mat_apply(invMats[i]);
mat_store2(*m);
m++;
}
}
@@ -3530,12 +3523,12 @@ uploadSkinMatrices(Atomic *a, Matrix* skinMatrices)
return skinMatrices[0].identityError() < 0.01f;
}
static RawMatrix normal2texcoord = {
static RawMatrix normal2texcoord = {{
{ 0.5f / 127, 0.0f, 0.0f }, 0.0f,
{ 0.0f, -0.5f / 127, 0.0f }, 0.0f,
{ 0.0f, 0.0f, 1.0f }, 0.0f,
{ 0.5f, 0.5f, 0.0f }, 1.0f
};
}};
void
uploadEnvMatrix(Frame *frame, RawMatrix *world, matrix_t* envMatrix)
@@ -3547,13 +3540,13 @@ uploadEnvMatrix(Frame *frame, RawMatrix *world, matrix_t* envMatrix)
RawMatrix *envMtx = (RawMatrix*)envMatrix;
{
RawMatrix invMtx;
//RawMatrix invMtx;
Matrix::invert(&invMat, frame->getLTM());
convMatrix(&invMtx, &invMat);
invMtx.pos.set(0.0f, 0.0f, 0.0f);
//convMatrix(&invMtx, &invMat);
//invMtx.pos.set(0.0f, 0.0f, 0.0f);
float uscale = fabs(normal2texcoord.right.x);
normal2texcoord.right.x = MatFX::envMapFlipU ? -uscale : uscale;
#if 0
RawMatrix tmpMtx;
RawMatrix::mult(&tmpMtx, &invMtx, &normal2texcoord);
@@ -3563,6 +3556,11 @@ uploadEnvMatrix(Frame *frame, RawMatrix *world, matrix_t* envMatrix)
world->upw = 0;
world->atw = 0;
RawMatrix::mult(envMtx, world, &tmpMtx);
#else
mat_load_apply(normal2texcoord, invMat);
mat_apply(*world);
mat_store2(envMatrix);
#endif
}
}
@@ -3812,12 +3810,9 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
ac->skinMatrix0Identity = skinMatrix0Identity;
lightingCB(atomic, ac->uniform);
rw::RawMatrix world;
rw::convMatrix(&world, atomic->getFrame()->getLTM());
mat_load_apply((matrix_t*)&cam->devProjScreen, (matrix_t*)&cam->devView);
mat_apply((matrix_t*)&world);
mat_apply(*atomic->getFrame()->getLTM());
mat_store2((matrix_t*)&atomicContexts.back().mtx);
auto meshes = geo->meshHeader->getMeshes();
@@ -3841,7 +3836,7 @@ void defaultRenderCB(ObjPipeline *pipe, Atomic *atomic) {
matfxContextPointer = &matfxContexts.back();
// N.B. world here gets converted to a 3x3 matrix
// this is fine, as we only use it for env mapping from now on
uploadEnvMatrix(matfx->fx[0].env.frame, &world, &matfxContexts.back().mtx);
uploadEnvMatrix(matfx->fx[0].env.frame, reinterpret_cast<rw::RawMatrix*>(atomic->getFrame()->getLTM()), &matfxContexts.back().mtx);
matfxContextPointer->coefficient = matfxCoefficient;
pvr_poly_cxt_t cxt;
@@ -6247,6 +6242,18 @@ writeNativeSkin(Stream *stream, int32 len, void *object, int32 offset)
stream->write8(&skin->numBones, 4);
for(int32 i = 0; i < skin->numBones; i++){
Matrix &m = *reinterpret_cast<Matrix *>(
&skin->inverseMatrices[i * 16]);
if(m.flags & MatrixBase::IDENTITY_OLD)
m.flags |= MatrixBase::IDENTITY;
m.pad0 = 0;
m.upw = 0.0f;
m.atw = 0.0f;
m.posw = 1.0f;
}
stream->write32(skin->inverseMatrices, skin->numBones*64);
return stream;
}

File diff suppressed because it is too large Load Diff

View File

@@ -238,8 +238,8 @@ inline V2d neg(const V2d &a) { return makeV2d(-a.x, -a.y); }
inline V2d add(const V2d &a, const V2d &b) { return makeV2d(a.x+b.x, a.y+b.y); }
inline V2d sub(const V2d &a, const V2d &b) { return makeV2d(a.x-b.x, a.y-b.y); }
inline V2d scale(const V2d &a, float32 r) { return makeV2d(a.x*r, a.y*r); }
inline float32 length(const V2d &v) { return sqrtf(v.x*v.x + v.y*v.y); }
inline V2d normalize(const V2d &v) { return scale(v, 1.0f/length(v)); }
inline float32 length(const V2d &v) { return dc::Sqrt(v.x*v.x + v.y*v.y); }
inline V2d normalize(const V2d &v) { return scale(v, dc::RecipSqrt(v.x*v.x + v.y*v.y)); }
struct V3d
{
@@ -265,10 +265,22 @@ inline float32 length(const V3d &v) {
return len;
#endif
}
inline V3d normalize(const V3d &v) { return scale(v, 1.0f/length(v)); }
inline V3d setlength(const V3d &v, float32 l) { return scale(v, l/length(v)); }
V3d cross(const V3d &a, const V3d &b);
inline __attribute__((always_inline)) float32 dot(const V3d &a, const V3d &b) {
inline V3d normalize(const V3d &v) {
float invLen;
#ifndef DC_SH4
invLen = 1.0f / length(v);
#else
invLen = dc::RecipSqrt(fipr_magnitude_sqr(v.x, v.y, v.z, 0.0f));
#endif
return scale(v, invLen);
}
inline V3d setlength(const V3d &v, float32 l) { return scale(v, dc::Div<true, false>(l, length(v))); }
inline V3d cross(const V3d &a, const V3d &b) {
return makeV3d(a.y*b.z - a.z*b.y,
a.z*b.x - a.x*b.z,
a.x*b.y - a.y*b.x);
}
inline float32 dot(const V3d &a, const V3d &b) {
#ifdef DC_SH4
return fipr(a.x, a.y, a.z, 0.0f, b.x, b.y, b.z, 0.0f);
#else
@@ -329,19 +341,40 @@ inline float32 length(const Quat &q) {
#ifndef DC_SH4
return sqrtf(q.w*q.w + q.x*q.x + q.y*q.y + q.z*q.z);
#else
return dc::Sqrt(fipr_magnitude_sqr(q.x, q.y, q.z, q.w));
return dc::Sqrt(fipr_magnitude_sqr(q.x, q.y, q.z, 0.0f));
#endif
}
inline Quat normalize(const Quat &q) { return scale(q, 1.0f/length(q)); }
inline Quat normalize(const Quat &q) {
float invLen;
#ifndef DC_SH4
invLen = 1.0f / length(q);
#else
invLen = dc::RecipSqrt(fipr_magnitude_sqr(q.x, q.y, q.z, 0.0f));
#endif
return scale(q, invLen);
}
inline Quat conj(const Quat &q) { return makeQuat(q.w, -q.x, -q.y, -q.z); }
Quat mult(const Quat &q, const Quat &p);
inline Quat mult(const Quat &q, const Quat &p) {
#ifndef DC_SH4
return makeQuat(q.w*p.w - q.x*p.x - q.y*p.y - q.z*p.z,
q.w*p.x + q.x*p.w + q.y*p.z - q.z*p.y,
q.w*p.y + q.y*p.w + q.z*p.x - q.x*p.z,
q.w*p.z + q.z*p.w + q.x*p.y - q.y*p.x);
#else
Quat o;
dc::quat_mult(reinterpret_cast<dc::quaternion_t *>(&o),
reinterpret_cast<const dc::quaternion_t &>(q),
reinterpret_cast<const dc::quaternion_t &>(p));
return o;
#endif
}
inline V3d rotate(const V3d &v, const Quat &q) { return mult(mult(q, makeQuat(0.0f, v)), conj(q)).vec(); }
Quat lerp(const Quat &q, const Quat &p, float32 r);
Quat slerp(const Quat &q, const Quat &p, float32 a);
struct __attribute__((aligned(8))) RawMatrix
struct alignas(8) RawMatrixBase
{
V3d right;
V3d right;
float32 rightw;
V3d up;
float32 upw;
@@ -349,6 +382,32 @@ struct __attribute__((aligned(8))) RawMatrix
float32 atw;
V3d pos;
float32 posw;
};
struct RawMatrix: public RawMatrixBase
{
RawMatrix() {}
RawMatrix(RawMatrixBase &&aggregate):
RawMatrixBase{aggregate}
{}
RawMatrix(const RawMatrix &rhs) {
*this = rhs;
}
operator matrix_t *() {
return reinterpret_cast<matrix_t *>(this);
}
operator const matrix_t *() const {
return reinterpret_cast<const matrix_t *>(this);
}
RawMatrix &operator=(const RawMatrix &rhs) {
dc::mat_copy(*this, rhs);
return *this;
}
// NB: this is dst = src2*src1, i.e. src1 is applied first, then src2
static void mult(RawMatrix *dst, RawMatrix *src1, RawMatrix *src2);
@@ -356,7 +415,7 @@ struct __attribute__((aligned(8))) RawMatrix
static void setIdentity(RawMatrix *dst);
};
struct Matrix
struct alignas(8) MatrixBase
{
enum Type {
TYPENORMAL = 1,
@@ -365,22 +424,65 @@ struct Matrix
TYPEMASK = 3
};
enum Flags {
IDENTITY = 0x20000
IDENTITY = 0x4,
IDENTITY_OLD = 0x20000
};
V3d right;
union {
struct {
uint32_t flags: 3 = TYPEORTHONORMAL|IDENTITY;
uint32_t pad0: 29 = 0;
};
float rightw;
};
V3d up;
union {
uint32 pad1;
float upw = 0.0f;
};
V3d at;
union {
uint32 pad2;
float atw = 0.0f;
};
V3d pos;
union {
uint32 pad3;
float posw = 1.0f;
};
operator matrix_t *() { return reinterpret_cast<matrix_t *>(this); }
operator const matrix_t *() const { return reinterpret_cast<const matrix_t *>(this); }
};
struct Matrix: public MatrixBase
{
struct Tolerance {
float32 normal;
float32 orthogonal;
float32 identity;
};
V3d right;
uint32 flags;
V3d up;
uint32 pad1;
V3d at;
uint32 pad2;
V3d pos;
uint32 pad3;
Matrix() {}
Matrix(MatrixBase &&aggregate){
*this = aggregate;
}
Matrix(const Matrix &rhs) {
*this = rhs;
}
Matrix &operator=(const RawMatrix &rhs) {
dc::mat_copy(*this, rhs);
return *this;
}
Matrix &operator=(const MatrixBase &rhs) {
dc::mat_copy(*this, rhs);
return *this;
}
static Matrix *create(void);
void destroy(void);
@@ -417,10 +519,12 @@ inline void convMatrix(Matrix *dst, RawMatrix *src){
inline void convMatrix(RawMatrix *dst, Matrix *src){
*dst = *(RawMatrix*)src;
#ifndef DC_SH4
dst->rightw = 0.0;
dst->upw = 0.0;
dst->atw = 0.0;
dst->posw = 1.0;
#endif
}
struct Line