From efde681ab5aa630a9d8c1b2a67c0f43b3fb9394e Mon Sep 17 00:00:00 2001 From: XProger Date: Wed, 17 Nov 2021 08:27:13 +0300 Subject: [PATCH] #370 3DO 16-bit room vertices (reduce level data size), use MulManyVec4Mat44_F16 instead of MulManyVec3Mat33DivZ_F16, minor fix for z-clipping and gaps, fix animations and aiming, remove debug data from executable --- src/fixed/common.cpp | 35 +++- src/fixed/common.h | 108 +++++----- src/fixed/draw.h | 10 +- src/fixed/room.h | 193 ++---------------- src/platform/3do/Makefile | 2 +- src/platform/3do/matrixLerp.s | 132 ++++++++++++ src/platform/3do/render_cel.cpp | 343 +++++++++++++++++++++----------- src/platform/3do/unpackMesh.s | 72 +++---- src/platform/3do/unpackRoom.s | 79 +++++--- 9 files changed, 535 insertions(+), 439 deletions(-) create mode 100644 src/platform/3do/matrixLerp.s diff --git a/src/fixed/common.cpp b/src/fixed/common.cpp index 01cc632..830d338 100644 --- a/src/fixed/common.cpp +++ b/src/fixed/common.cpp @@ -847,34 +847,35 @@ void matrixFrame(const vec3s &pos, const uint32* angles) matrixRotateYXZ(aX, aY, aZ); } +#ifndef USE_MATRIX_ASM #ifdef IWRAM_MATRIX_LERP -void matrixLerp(const Matrix &n, int32 multiplier, int32 divider); +void matrixLerp_c(const Matrix &n, int32 pmul, int32 pdiv); #else -void matrixLerp(const Matrix &n, int32 multiplier, int32 divider) +void matrixLerp_c(const Matrix &n, int32 pmul, int32 pdiv) { Matrix &m = matrixGet(); - if ((divider == 2) || ((divider == 4) && (multiplier == 2))) { + if ((pdiv == 2) || ((pdiv == 4) && (pmul == 2))) { LERP_MATRIX(LERP_1_2); - } else if (divider == 4) { + } else if (pdiv == 4) { - if (multiplier == 1) { + if (pmul == 1) { LERP_MATRIX(LERP_1_4); } else { LERP_MATRIX(LERP_3_4); } - } else if (divider == 3) { + } else if (pdiv == 3) { - if (multiplier == 1) { + if (pmul == 1) { LERP_MATRIX(LERP_1_3); } else { LERP_MATRIX(LERP_2_3); } - } else if (divider == 5) { + } else if (pdiv == 5) { - switch (multiplier) + switch (pmul) { case 4 : LERP_MATRIX(LERP_4_5); break; case 3 : LERP_MATRIX(LERP_3_5); break; @@ -883,10 +884,12 @@ void matrixLerp(const Matrix &n, int32 multiplier, int32 divider) } } else { + int32 t = pmul * FixedInvU(pdiv) >> 8; LERP_MATRIX(LERP_SLOW); } } #endif +#endif void matrixFrameLerp(const vec3s &pos, const uint32* anglesA, const uint32* anglesB, int32 delta, int32 rate) { @@ -942,6 +945,13 @@ void matrixSetIdentity() m.e21 = 0; m.e22 = 0x4000; m.e23 = 0; + +#ifdef __3DO__ + m.e30 = 0; + m.e31 = 0; + m.e32 = 0; + m.e33 = 0x4000; +#endif } void matrixSetView(const vec3i &pos, int32 angleX, int32 angleY) @@ -968,6 +978,13 @@ void matrixSetView(const vec3i &pos, int32 angleX, int32 angleY) m.e22 = (cx * cy) >> FIXED_SHIFT; m.e23 = 0; +#ifdef __3DO__ + m.e30 = 0; + m.e31 = 0; + m.e32 = 0; + m.e33 = 0x4000; +#endif + cameraViewPos = pos; cameraViewOffset = _vec3i(0, 0, 0); } diff --git a/src/fixed/common.h b/src/fixed/common.h index 37fdeec..c9dd182 100644 --- a/src/fixed/common.h +++ b/src/fixed/common.h @@ -37,7 +37,7 @@ #include #elif defined(__3DO__) #define MODEHW - //#define USE_DIV_TABLE // 4k of DRAM + #define USE_DIV_TABLE // 4k of DRAM #define CPU_BIG_ENDIAN #define BLOCK_SIZE_DRAM (32 * 1024) @@ -47,7 +47,7 @@ #define SND_BUFFER_SIZE (4 * BLOCK_SIZE_CD) #define SND_BUFFERS 4 - #define MAX_RAM_LVL (BLOCK_SIZE_DRAM * 31) // 35 for LEVEL10C! >_< + #define MAX_RAM_LVL (BLOCK_SIZE_DRAM * 31) // 34 for LEVEL10C! >_< #define MAX_RAM_TEX (BLOCK_SIZE_VRAM * 44) #define MAX_RAM_CEL (MAX_FACES * sizeof(CCB)) #define MAX_RAM_SND (SND_BUFFERS * SND_BUFFER_SIZE) @@ -76,7 +76,6 @@ #include #include #include - //#include #else #error unsupported platform #endif @@ -125,6 +124,7 @@ #define IWRAM_MATRIX_LERP // the maximum of active enemies // #define MAX_ENEMIES 3 + #define VIS_DIST (1024 * 10) #endif #ifdef __3DO__ @@ -134,13 +134,13 @@ #define LOD_TRAP_FLOOR // disable matrix interpolation //#define NO_ANIM_LERP - #define ANIM_LERP_ANGLE // the maximum navigation iterations per simulation tick - #define NAV_STEPS 1 + #define NAV_STEPS 1 // the maximum of active enemies #define MAX_ENEMIES 3 // set the maximum number of simultaneously played channels #define SND_CHANNELS 4 + #define VIS_DIST (1024 * 10) #endif #ifndef NAV_STEPS @@ -428,16 +428,20 @@ extern int32 fps; #define MAX_DYN_SECTORS (1024*3) #define MAX_SAMPLES 180 +#ifndef VIS_DIST + #define VIS_DIST (1024 * 16) +#endif + #define FOV_SHIFT 3 #define FOG_SHIFT 1 -#define FOG_MAX (10 * 1024) +#define FOG_MAX VIS_DIST #define FOG_MIN (FOG_MAX - (8192 >> FOG_SHIFT)) -#define VIEW_MIN_F (32 << FIXED_SHIFT) +#define VIEW_MIN_F (256 << FIXED_SHIFT) #define VIEW_MAX_F (FOG_MAX << FIXED_SHIFT) -#define FRUSTUM_FAR_X 5 << 10 -#define FRUSTUM_FAR_Y 3 << 10 -#define FRUSTUM_FAR_Z 9 << 10 +#define FRUSTUM_FAR_X (5 << 10) +#define FRUSTUM_FAR_Y (3 << 10) +#define FRUSTUM_FAR_Z (9 << 10) #define FACE_TRIANGLE 0x8000 #define FACE_COLORED 0x4000 @@ -592,10 +596,10 @@ struct vec4i { struct Matrix { #ifdef __3DO__ - int32 e00, e10, e20; - int32 e01, e11, e21; - int32 e02, e12, e22; - int32 e03, e13, e23; + int32 e00, e10, e20, e30; + int32 e01, e11, e21, e31; + int32 e02, e12, e22, e32; + int32 e03, e13, e23, e33; #else int32 e00, e01, e02, e03; int32 e10, e11, e12, e13; @@ -850,11 +854,8 @@ struct Room { const Sector* getWaterSector(int32 x, int32 z) const; Room* getRoom(int32 x, int32 y, int32 z); bool collideStatic(CollisionInfo &cinfo, const vec3i &p, int32 height); -#ifdef __3DO__ - bool checkPortal(const Portal* portal, vec3i* points); -#else bool checkPortal(const Portal* portal); -#endif + Room** addVisibleRoom(Room** list); Room** addNearRoom(Room** list, int32 x, int32 y, int32 z); Room** getNearRooms(const vec3i &pos, int32 radius, int32 height); @@ -1910,8 +1911,8 @@ struct CollisionInfo struct Box { - int8 minZ, maxZ; - int8 minX, maxX; + uint8 minZ, maxZ; + uint8 minX, maxX; int16 floor; uint16 overlap; }; @@ -1996,16 +1997,13 @@ struct IMA_STATE int32 idx; }; -/* -#define PERSPECTIVE(x, y, z) {\ - x = (x / (z >> 7));\ - y = (y / (z >> 6));\ -} -*/ #if defined(MODEHW) #define PERSPECTIVE(x, y, z) {\ - x = (x << 6) / (z >> 2);\ - y = (y << 6) / (z >> 2);\ + int32 dz = z >> 4;\ + if (dz >= DIV_TABLE_SIZE) dz = DIV_TABLE_SIZE - 1;\ + int32 d = FixedInvU(dz);\ + x = (x * d) >> 12;\ + y = (y * d) >> 12;\ } #elif defined(MODE13) #define PERSPECTIVE(x, y, z) {\ @@ -2116,27 +2114,39 @@ vec3i boxPushOut(const AABBs &a, const AABBs &b); y = (((uint16*)(a))[1] & 0x000F) << 12 | (((uint16*)(a))[0] & 0xFC00) >> 4;\ z = (((uint16*)(a))[0] & 0x03FF) << 6; -#define LERP_1_2(a, b, mul, div) a = (b + a) >> 1 -#define LERP_1_3(a, b, mul, div) a = a + ((b - a) / 3) -#define LERP_2_3(a, b, mul, div) a = b - ((b - a) / 3) -#define LERP_1_4(a, b, mul, div) a = a + ((b - a) >> 2) -#define LERP_3_4(a, b, mul, div) a = b - ((b - a) >> 2) -#define LERP_1_5(a, b, mul, div) a = a + ((b - a) / 5) -#define LERP_2_5(a, b, mul, div) a = a + ((b - a) * 2 / 5) -#define LERP_3_5(a, b, mul, div) a = b - ((b - a) * 2 / 5) -#define LERP_4_5(a, b, mul, div) a = b - ((b - a) / 5) -#define LERP_SLOW(a, b, mul, div) a = a + ((b - a) * mul / div) +#define USE_MATRIX_ASM -#define LERP_ROW(lerp_func, a, b, mul, div) \ - lerp_func(a##0, b##0, mul, div); \ - lerp_func(a##1, b##1, mul, div); \ - lerp_func(a##2, b##2, mul, div); \ - lerp_func(a##3, b##3, mul, div); +#ifdef USE_MATRIX_ASM + extern "C" void matrixLerp_asm(const Matrix &n, int32 pmul, int32 pdiv); -#define LERP_MATRIX(lerp_func) \ - LERP_ROW(lerp_func, m.e0, n.e0, multiplier, divider); \ - LERP_ROW(lerp_func, m.e1, n.e1, multiplier, divider); \ - LERP_ROW(lerp_func, m.e2, n.e2, multiplier, divider); + #define matrixLerp matrixLerp_asm +#else + #define LERP_1_2(a, b) a = (b + a) >> 1 + #define LERP_1_3(a, b) a = a + (b - a) / 3 + #define LERP_2_3(a, b) a = b - (b - a) / 3 + #define LERP_1_4(a, b) a = a + ((b - a) >> 2) + #define LERP_3_4(a, b) a = b - ((b - a) >> 2) + #define LERP_1_5(a, b) a = a + (b - a) / 5 + #define LERP_2_5(a, b) a = a + ((b - a) << 1) / 5 + #define LERP_3_5(a, b) a = b - ((b - a) << 1) / 5 + #define LERP_4_5(a, b) a = b - (b - a) / 5 + #define LERP_SLOW(a, b) a = a + ((b - a) * t >> 8) + + #define LERP_ROW(lerp_func, a, b, row) \ + lerp_func(a.e##row##0, b.e##row##0); \ + lerp_func(a.e##row##1, b.e##row##1); \ + lerp_func(a.e##row##2, b.e##row##2); \ + lerp_func(a.e##row##3, b.e##row##3); + + #define LERP_MATRIX(lerp_func) \ + LERP_ROW(lerp_func, m, n, 0); \ + LERP_ROW(lerp_func, m, n, 1); \ + LERP_ROW(lerp_func, m, n, 2); + + void matrixLerp_c(const Matrix &n, int32 pmul, int32 pdiv); + + #define matrixLerp matrixLerp_c +#endif X_INLINE Matrix& matrixGet() { @@ -2181,7 +2191,6 @@ void matrixRotateY(int32 angle); void matrixRotateZ(int32 angle); void matrixRotateYXZ(int32 angleX, int32 angleY, int32 angleZ); void matrixRotateZXY(int32 angleX, int32 angleY, int32 angleZ); -void matrixLerp(const Matrix &n, int32 multiplier, int32 divider); void matrixFrame(const vec3s &pos, const uint32* angles); void matrixFrameLerp(const vec3s &pos, const uint32* anglesA, const uint32* anglesB, int32 delta, int32 rate); void matrixSetIdentity(); @@ -2196,9 +2205,8 @@ void setPaletteIndex(int32 index); void clear(); int32 rectIsVisible(const RectMinMax* rect); int32 boxIsVisible(const AABBs* box); -void transform(vec3i* points, int32 count); bool transformBoxRect(const AABBs* box, RectMinMax* rect); -void transformRoom(const RoomVertex* vertices, int32 vCount, bool underwater); +void transformRoom(const Room* room); void transformMesh(const MeshVertex* vertices, int32 vCount, const uint16* vIntensity, const vec3s* vNormal); void faceAddQuad(uint32 flags, const Index* indices); void faceAddTriangle(uint32 flags, const Index* indices); diff --git a/src/fixed/draw.h b/src/fixed/draw.h index 954e510..7786722 100644 --- a/src/fixed/draw.h +++ b/src/fixed/draw.h @@ -494,9 +494,7 @@ void drawLaraNodes(const ItemObj* lara, const AnimFrame* frameA) matrixTranslate(node->pos.x, node->pos.y, node->pos.z); node++; if (arm->useBasis) { // hands are rotated relative to the basis - #ifndef __3DO__ // TODO_3DO matrixSetBasis(matrixGet(), basis); - #endif matrixRotateYXZ(arm->angle.x, arm->angle.y, arm->angle.z); } matrixFrame(_vec3s(0, 0, 0), anglesArm[i]++); @@ -628,9 +626,7 @@ void drawLaraNodesLerp(const ItemObj* lara, const AnimFrame* frameA, const AnimF matrixTranslate(node->pos.x, node->pos.y, node->pos.z); node++; if (arm->useBasis) { // hands are rotated relative to the basis - #ifndef __3DO__ // TODO_3DO matrixSetBasis(matrixGet(), basis); - #endif matrixRotateYXZ(arm->angle.x, arm->angle.y, arm->angle.z); } @@ -769,9 +765,9 @@ void drawRoom(const Room* room, Camera* camera) int32 rz = info->z << 8; matrixPush(); - matrixTranslateAbs(info->x << 8, 0, info->z << 8); + matrixTranslateAbs(info->x << 8, info->yTop, info->z << 8); - camera->updateFrustum(info->x << 8, 0, info->z << 8); + camera->updateFrustum(info->x << 8, info->yTop, info->z << 8); setPaletteIndex(ROOM_FLAG_WATER(info->flags) ? 1 : 0); @@ -802,7 +798,7 @@ void drawRoom(const Room* room, Camera* camera) { PROFILE(CNT_TRANSFORM); - transformRoom(data.vertices, info->verticesCount, ROOM_FLAG_WATER(info->flags)); + transformRoom(room); } { diff --git a/src/fixed/room.h b/src/fixed/room.h index 839b07e..655ac9a 100644 --- a/src/fixed/room.h +++ b/src/fixed/room.h @@ -368,180 +368,6 @@ bool Room::collideStatic(CollisionInfo &cinfo, const vec3i &p, int32 height) return false; } -#ifdef __3DO__ -bool Room::checkPortal(const Portal* portal, vec3i* points) -{ - int32 x0 = clip.x1; - int32 y0 = clip.y1; - int32 x1 = clip.x0; - int32 y1 = clip.y0; - - int32 znear = 0, zfar = 0; - - for (int32 i = 0; i < 4; i++) - { - int32 x = points[i].x; - int32 y = points[i].y; - int32 z = points[i].z; - - if (z <= 0) { - points[i].x = -points[i].x; - points[i].y = -points[i].y; - znear++; - continue; - } - - if (z >= (VIEW_MAX_F >> FIXED_SHIFT)) { - zfar++; - } - - x += FRAME_WIDTH >> 1; - y += FRAME_HEIGHT >> 1; - - if (x < x0) x0 = x; - if (x > x1) x1 = x; - if (y < y0) y0 = y; - if (y > y1) y1 = y; - } - - if (znear == 4 || zfar == 4) - return false; - - if (znear) - { - const vec3i *a = points; - const vec3i *b = points + 3; - for (int32 i = 0; i < 4; i++) - { - if ((a->z <= 0) ^ (b->z <= 0)) - { - if (a->x < 0 && b->x < 0) { - x0 = 0; - } else if (a->x > 0 && b->x > 0) { - x1 = FRAME_WIDTH; - } else { - x0 = 0; - x1 = FRAME_WIDTH; - } - - if (a->y < 0 && b->y < 0) { - y0 = 0; - } else if (a->y > 0 && b->y > 0) { - y1 = FRAME_HEIGHT; - } else { - y0 = 0; - y1 = FRAME_HEIGHT; - } - } - b = a; - a++; - } - } - - if (x0 < clip.x0) x0 = clip.x0; - if (x1 > clip.x1) x1 = clip.x1; - if (y0 < clip.y0) y0 = clip.y0; - if (y1 > clip.y1) y1 = clip.y1; - - if (x0 >= x1 || y0 >= y1) - return false; - - Room* nextRoom = rooms + portal->roomIndex; - - if (x0 < nextRoom->clip.x0) nextRoom->clip.x0 = x0; - if (x1 > nextRoom->clip.x1) nextRoom->clip.x1 = x1; - if (y0 < nextRoom->clip.y0) nextRoom->clip.y0 = y0; - if (y1 > nextRoom->clip.y1) nextRoom->clip.y1 = y1; - - return true; -} - -Room** Room::addVisibleRoom(Room** list) -{ - uint32 vis[MAX_PORTALS]; - vec3i vertices[MAX_PORTALS * 4]; - - int32 cx = cameraViewPos.x - (info->x << 8); - int32 cy = cameraViewPos.y; - int32 cz = cameraViewPos.z - (info->z << 8); - - const Portal* portal = data.portals; - vec3i* v = (vec3i*)vertices; - int32 vCount = 0; - - for (int32 i = 0; i < info->portalsCount; i++, portal++) - { - int32 axis = 0; - int32 x = (portal->v[0].x - cx) << F16_SHIFT; - if (x >= 0) axis |= (2 << 0); - if (x < 0) axis |= (1 << 0); - int32 y = (portal->v[0].y - cy) << F16_SHIFT; - if (y >= 0) axis |= (2 << 2); - if (y < 0) axis |= (1 << 2); - int32 z = (portal->v[0].z - cz) << F16_SHIFT; - if (z >= 0) axis |= (2 << 4); - if (z < 0) axis |= (1 << 4); - - vis[i] = (portal->normalMask & axis); - - if (!vis[i]) - continue; - - v->x = x; - v->y = y; - v->z = z; - v++; - - v->x = (portal->v[1].x - cx) << F16_SHIFT; - v->y = (portal->v[1].y - cy) << F16_SHIFT; - v->z = (portal->v[1].z - cz) << F16_SHIFT; - v++; - - v->x = (portal->v[2].x - cx) << F16_SHIFT; - v->y = (portal->v[2].y - cy) << F16_SHIFT; - v->z = (portal->v[2].z - cz) << F16_SHIFT; - v++; - - v->x = (portal->v[3].x - cx) << F16_SHIFT; - v->y = (portal->v[3].y - cy) << F16_SHIFT; - v->z = (portal->v[3].z - cz) << F16_SHIFT; - v++; - - vCount += 4; - } - - if (!vCount) - return list; - - transform((vec3i*)vertices, vCount); - - portal = data.portals; - v = (vec3i*)vertices; - - for (int32 i = 0; i < info->portalsCount; i++, portal++) - { - if (!vis[i]) - continue; - - if (checkPortal(portal, v)) - { - Room* nextRoom = rooms + portal->roomIndex; - - list = nextRoom->addVisibleRoom(list); - - if (!nextRoom->visible) - { - nextRoom->visible = true; - *list++ = nextRoom; - } - } - - v += 4; - } - - return list; -} -#else bool Room::checkPortal(const Portal* portal) { vec3i d; @@ -555,12 +381,12 @@ bool Room::checkPortal(const Portal* portal) #ifdef __3DO__ int32 axis = 0; - if (d.x > 0) axis |= (2 << 0); - if (d.x < 0) axis |= (1 << 0); - if (d.y > 0) axis |= (2 << 2); - if (d.y < 0) axis |= (1 << 2); - if (d.z > 0) axis |= (2 << 4); - if (d.z < 0) axis |= (1 << 4); + if (d.x >= 0) axis |= (2 << 0); + if (d.x < 0) axis |= (1 << 0); + if (d.y >= 0) axis |= (2 << 2); + if (d.y < 0) axis |= (1 << 2); + if (d.z >= 0) axis |= (2 << 4); + if (d.z < 0) axis |= (1 << 4); if (!(portal->normalMask & axis)) return false; @@ -602,7 +428,13 @@ bool Room::checkPortal(const Portal* portal) } int32 dz = (z >> (FIXED_SHIFT + FOV_SHIFT + 1)); + if (dz > 0) { + #ifdef __3DO__ + x >>= FIXED_SHIFT; + y >>= FIXED_SHIFT; + z >>= FIXED_SHIFT; + #endif PERSPECTIVE(x, y, z); x += FRAME_WIDTH >> 1; y += FRAME_HEIGHT >> 1; @@ -696,7 +528,6 @@ Room** Room::addVisibleRoom(Room** list) return list; } -#endif Room** Room::getVisibleRooms() { diff --git a/src/platform/3do/Makefile b/src/platform/3do/Makefile index 5b14b62..a72018f 100644 --- a/src/platform/3do/Makefile +++ b/src/platform/3do/Makefile @@ -21,7 +21,7 @@ CXXFLAGS = $(CFLAGS) ASFLAGS = -BI -i $(SDK)/include/3do INCPATH = -I $(SDK)/include/3do -I $(SDK)/include/ttl -I ../../fixed LIBPATH = $(SDK)/lib -LDFLAGS = -aif -reloc -ro-base 0 -sym $(EXENAME).sym -libpath $(LIBPATH) +LDFLAGS = -aif -reloc -ro-base 0 -libpath $(LIBPATH) -nodebug -remove -info Sizes STARTUP = $(LIBPATH)/cstartup.o LIBS = \ diff --git a/src/platform/3do/matrixLerp.s b/src/platform/3do/matrixLerp.s new file mode 100644 index 0000000..5c0ce97 --- /dev/null +++ b/src/platform/3do/matrixLerp.s @@ -0,0 +1,132 @@ + AREA |C$$code|, CODE, READONLY +|x$codeseg| + + IMPORT matrixPtr + IMPORT divTable + EXPORT matrixLerp_asm + +n RN r0 +pmul RN r1 +pdiv RN r2 +m0 RN r3 +m1 RN r4 +m2 RN r5 +n0 RN r6 +n1 RN r12 +n2 RN lr +tmp RN r4 +m RN r7 + + MACRO + next_row + add m, m, #16 + add n, n, #16 + MEND + + MACRO + load + ldmia m, {m0, m1, m2} + ldmia n, {n0, n1, n2} + MEND + + MACRO + store + stmia m, {m0, m1, m2} + MEND + + MACRO ; a = (a + b) / 2 + _1_2 + load + add m0, m0, n0 + add m1, m1, n1 + add m2, m2, n2 + mov m0, m0, asr #1 + mov m1, m1, asr #1 + mov m2, m2, asr #1 + store + MEND + + MACRO ; a = a + (b - a) / 4 + _1_4 + load + sub n0, n0, m0 + sub n1, n1, m1 + sub n2, n2, m2 + add m0, m0, n0, asr #2 + add m1, m1, n1, asr #2 + add m2, m2, n2, asr #2 + store + MEND + + MACRO ; a = b - (b - a) / 4 + _3_4 + load + sub m0, n0, m0 + sub m1, n1, m1 + sub m2, n2, m2 + sub m0, n0, m0, asr #2 + sub m1, n1, m1, asr #2 + sub m2, n2, m2, asr #2 + store + MEND + + MACRO ; a = a + (b - a) * mul / div + _X_Y + load + sub n0, n0, m0 + sub n1, n1, m1 + sub n2, n2, m2 + mul n0, pmul, n0 + mul n1, pmul, n1 + mul n2, pmul, n2 + add m0, m0, n0, asr #8 + add m1, m1, n1, asr #8 + add m2, m2, n2, asr #8 + store + MEND + + MACRO ; transposed (3x4) + lerp $func + $func ; e00, e10, e20 + next_row + $func ; e01, e11, e21 + next_row + $func ; e02, e12, e22 + next_row + $func ; e03, e13, e23 + b done + MEND + +matrixLerp_asm + stmfd sp!, {r4-r7, lr} + ldr m, =matrixPtr + ldr m, [m] + +check_2 + cmp pdiv, #2 + beq m1_d2 + +check_4 + cmp pdiv, #4 + bne mX_dY + cmp pmul, #1 + beq m1_d4 + cmp pmul, #2 + beq m1_d2 ; 2/4 = 1/2 + b m3_d4 + +mX_dY + ldr tmp, =divTable + ldr tmp, [tmp, pdiv, lsl #2] + mul tmp, pmul, tmp + mov pmul, tmp, asr #8 + lerp _X_Y +m1_d2 + lerp _1_2 +m1_d4 + lerp _1_4 +m3_d4 + lerp _3_4 + +done ldmfd sp!, {r4-r7, pc} + END diff --git a/src/platform/3do/render_cel.cpp b/src/platform/3do/render_cel.cpp index 7341c32..4641705 100644 --- a/src/platform/3do/render_cel.cpp +++ b/src/platform/3do/render_cel.cpp @@ -1,8 +1,10 @@ #include "common.h" +//#define DEBUG_CLIPPING + struct Vertex { - int32 x, y, z; // for rooms z = (depth << CLIP_SHIFT) | ClipFlags + int32 x, y, z, w; // for rooms z = (depth << CLIP_SHIFT) | ClipFlags }; uint16* gPalette; @@ -28,9 +30,7 @@ int32 otMax = 0; RectMinMax viewportRel; -bool enableAlphaTest; bool enableClipping; -bool secondPalette; #define SHADOW_OPACITY 3 // 50% #define MIP_DIST (1024 * 5) @@ -174,24 +174,29 @@ enum ClipFlags { CLIP_NEAR = 1 << 5 }; -X_INLINE int32 classify(const Vertex* v, int32 x0, int32 y0, int32 x1, int32 y1) +X_INLINE int32 classify(int32 x, int32 y, int32 x0, int32 y0, int32 x1, int32 y1) { - return (v->x < x0 ? CLIP_LEFT : 0) | - (v->x > x1 ? CLIP_RIGHT : 0) | - (v->y < y0 ? CLIP_TOP : 0) | - (v->y > y1 ? CLIP_BOTTOM : 0); + return (x < x0 ? CLIP_LEFT : 0) | + (x > x1 ? CLIP_RIGHT : 0) | + (y < y0 ? CLIP_TOP : 0) | + (y > y1 ? CLIP_BOTTOM : 0); } -void transform(vec3i* points, int32 count) +X_INLINE void transform(Vertex* points, int32 count) { - mmv3m33d desc; - desc.dest = - desc.src = (vec3f16*)points; - desc.mat = (mat33f16*)&matrixGet(); - desc.n = 1 << 8; - desc.count = count; + Matrix& m = matrixGet(); + int32 mx = m.e03; + int32 my = m.e13; + int32 mz = m.e23; + m.e03 >>= FIXED_SHIFT; + m.e13 >>= FIXED_SHIFT; + m.e23 >>= FIXED_SHIFT; - MulManyVec3Mat33DivZ_F16(&desc); + MulManyVec4Mat44_F16((vec4f16*)points, (vec4f16*)points, *(mat44f16*)&matrixGet(), count); + + m.e03 = mx; + m.e13 = my; + m.e23 = mz; } bool transformBoxRect(const AABBs* box, RectMinMax* rect) @@ -202,27 +207,23 @@ bool transformBoxRect(const AABBs* box, RectMinMax* rect) return false; } - int32 cx = Dot3_F16(*(vec3f16*)&m.e03, *(vec3f16*)&m.e00) >> DOT_SHIFT; // dot(pos, right) - int32 cy = Dot3_F16(*(vec3f16*)&m.e03, *(vec3f16*)&m.e01) >> DOT_SHIFT; // dot(pos, up) - int32 cz = Dot3_F16(*(vec3f16*)&m.e03, *(vec3f16*)&m.e02) >> DOT_SHIFT; // dot(pos, dir) - AABBi b; - b.minX = (box->minX << F16_SHIFT) + cx; - b.maxX = (box->maxX << F16_SHIFT) + cx; - b.minY = (box->minY << F16_SHIFT) + cy; - b.maxY = (box->maxY << F16_SHIFT) + cy; - b.minZ = (box->minZ << F16_SHIFT) + cz; - b.maxZ = (box->maxZ << F16_SHIFT) + cz; + b.minX = (box->minX << F16_SHIFT); + b.maxX = (box->maxX << F16_SHIFT); + b.minY = (box->minY << F16_SHIFT); + b.maxY = (box->maxY << F16_SHIFT); + b.minZ = (box->minZ << F16_SHIFT); + b.maxZ = (box->maxZ << F16_SHIFT); - vec3i v[8] = { - { b.minX, b.minY, b.minZ }, - { b.maxX, b.minY, b.minZ }, - { b.minX, b.maxY, b.minZ }, - { b.maxX, b.maxY, b.minZ }, - { b.minX, b.minY, b.maxZ }, - { b.maxX, b.minY, b.maxZ }, - { b.minX, b.maxY, b.maxZ }, - { b.maxX, b.maxY, b.maxZ } + Vertex v[8] = { + { b.minX, b.minY, b.minZ, 1 << 16 }, + { b.maxX, b.minY, b.minZ, 1 << 16 }, + { b.minX, b.maxY, b.minZ, 1 << 16 }, + { b.maxX, b.maxY, b.minZ, 1 << 16 }, + { b.minX, b.minY, b.maxZ, 1 << 16 }, + { b.maxX, b.minY, b.maxZ, 1 << 16 }, + { b.minX, b.maxY, b.maxZ, 1 << 16 }, + { b.maxX, b.maxY, b.maxZ, 1 << 16 } }; transform(v, 8); @@ -231,16 +232,19 @@ bool transformBoxRect(const AABBs* box, RectMinMax* rect) for (int32 i = 0; i < 8; i++) { - if (v[i].z <= 0) - return false; + int32 x = v[i].x; + int32 y = v[i].y; + int32 z = v[i].z; - if (v[i].z >= (VIEW_MAX_F >> FIXED_SHIFT)) + if (z < (VIEW_MIN_F >> FIXED_SHIFT) || z >= (VIEW_MAX_F >> FIXED_SHIFT)) continue; - if (v[i].x < rect->x0) rect->x0 = v[i].x; - if (v[i].x > rect->x1) rect->x1 = v[i].x; - if (v[i].y < rect->y0) rect->y0 = v[i].y; - if (v[i].y > rect->y1) rect->y1 = v[i].y; + PERSPECTIVE(x, y, z); + + if (x < rect->x0) rect->x0 = x; + if (x > rect->x1) rect->x1 = x; + if (y < rect->y0) rect->y0 = y; + if (y > rect->y1) rect->y1 = y; } rect->x0 += (FRAME_WIDTH / 2); @@ -256,48 +260,54 @@ bool transformBoxRect(const AABBs* box, RectMinMax* rect) #ifdef USE_ASM #define unpackRoom unpackRoom_asm #define unpackMesh unpackMesh_asm - extern void unpackRoom_asm(const RoomVertex* vertices, int32 vCount); - extern void unpackMesh_asm(const MeshVertex* vertices, int32 vCount); + //#define ccbMap4 ccbMap4_asm + extern "C" void unpackRoom_asm(const RoomVertex* vertices, int32 vCount); + extern "C" void unpackMesh_asm(const MeshVertex* vertices, int32 vCount); + //extern "C" void ccbMap4_asm(Face* f, const Vertex* v0, const Vertex* v1, const Vertex* v2, const Vertex* v3, uint32 shift); #else #define unpackRoom unpackRoom_c #define unpackMesh unpackMesh_c void unpackRoom_c(const RoomVertex* vertices, int32 vCount) { - int32 cx = cameraViewOffset.x << F16_SHIFT; - int32 cy = cameraViewOffset.y << F16_SHIFT; - int32 cz = cameraViewOffset.z << F16_SHIFT; - Vertex* res = gVertices; uint32 *v32 = (uint32*)vertices; - for (int32 i = 0; i < vCount; i += 2) + for (int32 i = 0; i < vCount; i += 4) { - uint32 a = *v32++; - uint32 b = *v32++; - res->x = cx + ((a & 0xFF00) << 4); - res->y = cy + (int32(a) >> 16 << 2); - res->z = cz + ((a & 0xFF) << 12); + uint32 n0 = *v32++; + uint32 n1 = *v32++; + + res->x = (n0 << 12) & 0x1F000; + res->y = (n0 << 5) & 0xFC00; + res->z = (n0 << 1) & 0x1F000; + res->w = 1 << 16; res++; - res->x = cx + ((b & 0xFF00) << 4); - res->y = cy + (int32(b) >> 16 << 2); - res->z = cz + ((b & 0xFF) << 12); + + res->x = (n0 >> 4) & 0x1F000; + res->y = (n0 >> 11) & 0xFC00; + res->z = (n0 >> 15) & 0x1F000; + res->w = 1 << 16; + res++; + + res->x = (n1 << 12) & 0x1F000; + res->y = (n1 << 5) & 0xFC00; + res->z = (n1 << 1) & 0x1F000; + res->w = 1 << 16; + res++; + + res->x = (n1 >> 4) & 0x1F000; + res->y = (n1 >> 11) & 0xFC00; + res->z = (n1 >> 15) & 0x1F000; + res->w = 1 << 16; res++; } } void unpackMesh_c(const MeshVertex* vertices, int32 vCount) { - Matrix &m = matrixGet(); - - // TODO_3DO normalize 3x3 interpolated matrix or get cameraViewOffset for the general case somehow - // TODO_3DO MulVec3Mat33_F16 (transposed?) - int32 cx = Dot3_F16(*(vec3f16*)&m.e03, *(vec3f16*)&m.e00) >> DOT_SHIFT; // dot(pos, right) - int32 cy = Dot3_F16(*(vec3f16*)&m.e03, *(vec3f16*)&m.e01) >> DOT_SHIFT; // dot(pos, up) - int32 cz = Dot3_F16(*(vec3f16*)&m.e03, *(vec3f16*)&m.e02) >> DOT_SHIFT; // dot(pos, dir) - uint32 *v32 = (uint32*)vertices; Vertex* res = gVertices; @@ -308,24 +318,30 @@ void unpackMesh_c(const MeshVertex* vertices, int32 vCount) uint32 n1 = *v32++; uint32 n2 = *v32++; - res->x = cx + int16(n0 >> 16); - res->y = cy + int16(n0); - res->z = cz + int16(n1 >> 16); + res->x = int16(n0 >> 16); + res->y = int16(n0); + res->z = int16(n1 >> 16); + res->w = 1 << 16; res++; - res->x = cx + int16(n1); - res->y = cy + int16(n2 >> 16); - res->z = cz + int16(n2); + res->x = int16(n1); + res->y = int16(n2 >> 16); + res->z = int16(n2); + res->w = 1 << 16; res++; } } #endif -void transformRoom(const RoomVertex* vertices, int32 vCount, bool underwater) +void transformRoom(const Room* room) { - unpackRoom(vertices, vCount); + int32 vCount = room->info->verticesCount; + if (vCount <= 0) + return; - transform((vec3i*)gVertices, vCount); + unpackRoom(room->data.vertices, vCount); + + transform(gVertices, vCount); int32 x0 = viewportRel.x0; int32 y0 = viewportRel.y0; @@ -336,17 +352,27 @@ void transformRoom(const RoomVertex* vertices, int32 vCount, bool underwater) for (int32 i = 0; i < vCount; i++, res++) { - if (res->z < (VIEW_MIN_F >> FIXED_SHIFT)) { - res->z = ((VIEW_MIN_F >> FIXED_SHIFT) << CLIP_SHIFT) | CLIP_NEAR; - } else if (res->z >= (VIEW_MAX_F >> FIXED_SHIFT)) { - res->z = ((VIEW_MAX_F >> FIXED_SHIFT) << CLIP_SHIFT) | CLIP_FAR; - } else { - res->z = (res->z << CLIP_SHIFT) | classify(res, x0, y0, x1, y1); - } - } + int32 x = res->x; + int32 y = res->y; + int32 z = res->z; + int32 clip = 0; - if (viewport.x0 == 0 && viewport.y0 == 0) - return; + if (z < (VIEW_MIN_F >> FIXED_SHIFT)) { + z = (VIEW_MIN_F >> FIXED_SHIFT); + clip = CLIP_NEAR; + } else if (z > (VIEW_MAX_F >> FIXED_SHIFT)) { + z = (VIEW_MAX_F >> FIXED_SHIFT); + clip = CLIP_FAR; + } + + PERSPECTIVE(x, y, z); + + clip |= classify(x, y, x0, y0, x1, y1); + + res->x = x; + res->y = y; + res->z = (z << CLIP_SHIFT) | clip; + } gVerticesCount += vCount; } @@ -355,13 +381,44 @@ void transformMesh(const MeshVertex* vertices, int32 vCount, const uint16* vInte { unpackMesh(vertices, vCount); - transform((vec3i*)gVertices, vCount); + transform(gVertices, vCount); + + int32 x0 = viewportRel.x0; + int32 y0 = viewportRel.y0; + int32 x1 = viewportRel.x1; + int32 y1 = viewportRel.y1; + + Vertex* res = gVertices; + + for (int32 i = 0; i < vCount; i++, res++) + { + int32 x = res->x; + int32 y = res->y; + int32 z = res->z; + int32 clip = 0; + + if (z < (VIEW_MIN_F >> FIXED_SHIFT)) { + z = (VIEW_MIN_F >> FIXED_SHIFT); + clip = CLIP_NEAR; + } else if (z >= (VIEW_MAX_F >> FIXED_SHIFT)) { + z = (VIEW_MAX_F >> FIXED_SHIFT); + clip = CLIP_FAR; + } + + PERSPECTIVE(x, y, z); + + clip |= classify(x, y, x0, y0, x1, y1); + + res->x = x; + res->y = y; + res->z = (z << CLIP_SHIFT) | clip; + } gVerticesCount += vCount; } -#define DEPTH_T_AVG(z0,z1,z2) ((z0 + z1 + z2 + z2) >> (2 + OT_SHIFT)) -#define DEPTH_Q_AVG(z0,z1,z2,z3) ((z0 + z1 + z2 + z3) >> (2 + OT_SHIFT)) +#define DEPTH_T_AVG(z0,z1,z2) ((z0 + z1 + z2 + z2) >> (2 + CLIP_SHIFT + OT_SHIFT)) +#define DEPTH_Q_AVG(z0,z1,z2,z3) ((z0 + z1 + z2 + z3) >> (2 + CLIP_SHIFT + OT_SHIFT)) #define DEPTH_T_MAX(z0,z1,z2) (X_MAX(z0, X_MAX(z1, z2)) >> (CLIP_SHIFT + OT_SHIFT)) #define DEPTH_Q_MAX(z0,z1,z2,z3) (X_MAX(z0, X_MAX(z1, X_MAX(z2, z3))) >> (CLIP_SHIFT + OT_SHIFT)) @@ -430,14 +487,14 @@ X_INLINE void ccbSetColor(uint32 flags, Face* face) X_INLINE void ccbMap4(Face* f, const Vertex* v0, const Vertex* v1, const Vertex* v2, const Vertex* v3, uint32 shift) { - int32 x0 = v0->x; - int32 y0 = v0->y; int32 x1 = v1->x; int32 y1 = v1->y; - int32 x2 = v2->x; - int32 y2 = v2->y; int32 x3 = v3->x; int32 y3 = v3->y; + int32 x2 = v2->x; + int32 y2 = v2->y; + int32 x0 = v0->x; + int32 y0 = v0->y; uint32 ws = shift & 0xFF; uint32 hs = shift >> 8; @@ -449,17 +506,22 @@ X_INLINE void ccbMap4(Face* f, const Vertex* v0, const Vertex* v1, const Vertex* int32 vdx0 = (x3 - x0) << hs; int32 vdy0 = (y3 - y0) << hs; + hs = 16 - hs; + int32 hddx = (hdx1 - hdx0) >> hs; + int32 hddy = (hdy1 - hdy0) >> hs; + + f->ccb_XPos = (x0 << 16) + (((FRAME_WIDTH >> 1) << 16) + 32768); + f->ccb_YPos = (y0 << 16) + (((FRAME_HEIGHT >> 1) << 16) + 32768); f->ccb_HDX = hdx0; f->ccb_HDY = hdy0; f->ccb_VDX = vdx0; f->ccb_VDY = vdy0; + f->ccb_HDDX = hddx; + f->ccb_HDDY = hddy; - f->ccb_XPos = (x0 + (FRAME_WIDTH >> 1)) << 16; - f->ccb_YPos = (y0 + (FRAME_HEIGHT >> 1)) << 16; - - hs = 16 - hs; - f->ccb_HDDX = (hdx1 - hdx0) >> hs; - f->ccb_HDDY = (hdy1 - hdy0) >> hs; +#ifdef DEBUG_CLIPPING + f->ccb_PIXC = SHADE_SHADOW; +#endif } X_INLINE void ccbMap3(Face* f, const Vertex* v0, const Vertex* v1, const Vertex* v2, uint32 shift) @@ -479,17 +541,20 @@ X_INLINE void ccbMap3(Face* f, const Vertex* v0, const Vertex* v1, const Vertex* int32 vdx0 = (x2 - x0) << hs; int32 vdy0 = (y2 - y0) << hs; + f->ccb_XPos = (x0 << 16) + (((FRAME_WIDTH >> 1) << 16) + 32768); + f->ccb_YPos = (y0 << 16) + (((FRAME_HEIGHT >> 1) << 16) + 32768); f->ccb_HDX = hdx0; f->ccb_HDY = hdy0; f->ccb_VDX = vdx0; f->ccb_VDY = vdy0; - f->ccb_XPos = (x0 + (FRAME_WIDTH >> 1)) << 16; - f->ccb_YPos = (y0 + (FRAME_HEIGHT >> 1)) << 16; - hs = 16 - hs; f->ccb_HDDX = -hdx0 >> hs; f->ccb_HDDY = -hdy0 >> hs; + +#ifdef DEBUG_CLIPPING + f->ccb_PIXC = SHADE_SHADOW; +#endif } X_INLINE void faceAddRoomQuad(uint32 flags, const Index* indices) @@ -515,9 +580,6 @@ X_INLINE void faceAddRoomQuad(uint32 flags, const Index* indices) if ((c0 & c1 & c2 & c3) & CLIP_MASK) return; - if ((c0 & CLIP_NEAR) || (c1 & CLIP_NEAR) || (c2 & CLIP_NEAR) || (c3 & CLIP_NEAR)) - return; - int32 depth = DEPTH_Q_MAX(c0, c1, c2, c3); if (checkBackface(v0, v1, v3) == !(flags & FACE_CCW)) @@ -563,9 +625,6 @@ X_INLINE void faceAddRoomTriangle(uint32 flags, const Index* indices) if ((c0 & c1 & c2) & CLIP_MASK) return; - if ((c0 & CLIP_NEAR) || (c1 & CLIP_NEAR) || (c2 & CLIP_NEAR)) - return; - int32 depth = DEPTH_T_MAX(c0, c1, c2); if (checkBackface(v0, v1, v2) == !(flags & FACE_CCW)) @@ -603,6 +662,14 @@ X_INLINE void faceAddMeshQuad(uint32 flags, uint32 indices, uint32 shade) const Vertex* v2 = gVertices + i2; const Vertex* v3 = gVertices + i3; + uint32 c0 = v0->z; + uint32 c1 = v1->z; + uint32 c2 = v2->z; + uint32 c3 = v3->z; + + if ((c0 & c1 & c2 & c3) & CLIP_MASK) + return; + if (checkBackface(v0, v1, v3) == !(flags & FACE_CCW)) // TODO (hdx0 * vdy0 - vdx0 * hdy0) <= 0 return; @@ -627,6 +694,13 @@ X_INLINE void faceAddMeshTriangle(uint32 flags, uint32 indices, uint32 shade) const Vertex* v1 = gVertices + i1; const Vertex* v2 = gVertices + i2; + uint32 c0 = v0->z; + uint32 c1 = v1->z; + uint32 c2 = v2->z; + + if ((c0 & c1 & c2) & CLIP_MASK) + return; + if (checkBackface(v0, v1, v2)) return; @@ -653,6 +727,14 @@ X_INLINE void faceAddMeshQuadFlat(uint32 flags, uint32 indices, uint32 shade) const Vertex* v2 = gVertices + i2; const Vertex* v3 = gVertices + i3; + uint32 c0 = v0->z; + uint32 c1 = v1->z; + uint32 c2 = v2->z; + uint32 c3 = v3->z; + + if ((c0 & c1 & c2 & c3) & CLIP_MASK) + return; + if (checkBackface(v0, v1, v3)) return; @@ -676,6 +758,13 @@ X_INLINE void faceAddMeshTriangleFlat(uint32 flags, uint32 indices, uint32 shade const Vertex* v1 = gVertices + i1; const Vertex* v2 = gVertices + i2; + uint32 c0 = v0->z; + uint32 c1 = v1->z; + uint32 c2 = v2->z; + + if ((c0 & c1 & c2) & CLIP_MASK) + return; + if (checkBackface(v0, v1, v2)) return; @@ -724,38 +813,50 @@ void faceAddSprite(int32 vx, int32 vy, int32 vz, int32 vg, int32 index) const Matrix &m = matrixGet(); - vec3i v; - v.x = (vx - cameraViewPos.x) << F16_SHIFT; - v.y = (vy - cameraViewPos.y) << F16_SHIFT; - v.z = (vz - cameraViewPos.z) << F16_SHIFT; + vx -= cameraViewPos.x; + vy -= cameraViewPos.y; + vz -= cameraViewPos.z; - MulVec3Mat33DivZ_F16(*(vec3f16*)&v, *(vec3f16*)&v, *(mat33f16*)&m, 1 << 8); + int32 z = DP33(m.e20, m.e21, m.e22, vx, vy, vz); - if (v.z < (VIEW_MIN_F >> FIXED_SHIFT) || v.z >= (VIEW_MAX_F >> FIXED_SHIFT)) + if (z < VIEW_MIN_F || z >= VIEW_MAX_F) + { return; + } + + ASSERT(gFacesCount < MAX_FACES); + + int32 x = DP33(m.e00, m.e01, m.e02, vx, vy, vz); + int32 y = DP33(m.e10, m.e11, m.e12, vx, vy, vz); + + x >>= FIXED_SHIFT; + y >>= FIXED_SHIFT; + z >>= FIXED_SHIFT; + + PERSPECTIVE(x, y, z); const Sprite* sprite = level.sprites + index; - int32 d = (1 << 20) / v.z; + int32 d = (1 << 20) / z; int32 x0 = sprite->l * d >> 12; - int32 y0 = sprite->t * d >> 12; int32 x1 = sprite->r * d >> 12; - int32 y1 = sprite->b * d >> 12; - if (x0 == x1) return; + + int32 y0 = sprite->t * d >> 12; + int32 y1 = sprite->b * d >> 12; if (y0 == y1) return; - x0 += v.x; - x1 += v.x; - y0 += v.y; - y1 += v.y; + x0 += x; + x1 += x; + y0 += y; + y1 += y; if (x0 >= viewportRel.x1) return; if (y0 >= viewportRel.y1) return; if (x1 <= viewportRel.x0) return; if (y1 <= viewportRel.y0) return; - int32 depth = X_MAX(0, v.z - 128) >> OT_SHIFT; // depth hack + int32 depth = X_MAX(0, z - 128) >> OT_SHIFT; // depth hack Face* f = faceAdd(depth); diff --git a/src/platform/3do/unpackMesh.s b/src/platform/3do/unpackMesh.s index 08f3f38..ce8e548 100644 --- a/src/platform/3do/unpackMesh.s +++ b/src/platform/3do/unpackMesh.s @@ -1,68 +1,52 @@ AREA |C$$code|, CODE, READONLY |x$codeseg| - IMPORT matrixPtr IMPORT gVertices - EXPORT unpackMesh_asm__FPC10MeshVertexl + EXPORT unpackMesh_asm -unpackMesh_asm__FPC10MeshVertexl +unpackMesh_asm vertices RN r0 -vptr RN r5 vCount RN r1 -cx RN r8 -cy RN r9 -cz RN r10 +vx0 RN r1 +vy0 RN r2 +vz0 RN r3 +vw0 RN r4 +vx1 RN r5 +vy1 RN r6 +vz1 RN r7 +vw1 RN r8 +n0 RN vy0 +n1 RN vx1 +n2 RN vz1 +res RN r12 last RN lr -n0 RN r3 -n1 RN r5 -n2 RN r7 -res RN r1 -DOT3_F16 EQU 0x5000C - stmfd sp!, {r4-r10, lr} - add last, vertices, vCount, lsl #2 - add last, last, vCount, lsl #1 - mov vptr, vertices ; save vertices ptr - ldr r4, =matrixPtr - ldr r7, [r4] ; &m.e00 - add r4, r7, #36 ; &m.e03 - - mov r0, r4 - add r1, r7, #24 ; &m.e02 - swi DOT3_F16 - mov cz, r0, asr #10 - - mov r0, r4 - add r1, r7, #12 ; &m.e01 - swi DOT3_F16 - mov cy, r0, asr #10 - - mov r0, r4 - mov r1, r7 ; &m.e00 - swi DOT3_F16 - mov cx, r0, asr #10 - - mov vertices, vptr ; restore vertices ptr + stmfd sp!, {r4-r8, lr} ldr res, =gVertices + ; last = vertices + vCount * 6 + add vCount, vCount, vCount, lsl #1 + add last, vertices, vCount, lsl #1 + mov vw0, #(1 << 16) + mov vw1, #(1 << 16) loop ldmia vertices!, {n0, n1, n2} ; load two encoded vertices cmp vertices, last - add r2, cx, n0, asr #16 ; x + mov vx0, n0, asr #16 ; x mov n0, n0, lsl #16 - add r3, cy, n0, asr #16 ; y + mov vy0, n0, asr #16 ; y - add r4, cz, n1, asr #16 ; z + mov vz0, n1, asr #16 ; z mov n1, n1, lsl #16 - add r5, cx, n1, asr #16 ; x + mov vx1, n1, asr #16 ; x - add r6, cy, n2, asr #16 ; y + mov vy1, n2, asr #16 ; y mov n2, n2, lsl #16 - add r7, cz, n2, asr #16 ; z + mov vz1, n2, asr #16 ; z - stmia res!, {r2, r3, r4, r5, r6, r7} + stmia res!, {vx0, vy0, vz0, vw0, vx1, vy1, vz1, vw1} blt loop - ldmfd sp!, {r4-r10, pc} + ldmfd sp!, {r4-r8, pc} END diff --git a/src/platform/3do/unpackRoom.s b/src/platform/3do/unpackRoom.s index a574ec0..9db2827 100644 --- a/src/platform/3do/unpackRoom.s +++ b/src/platform/3do/unpackRoom.s @@ -3,48 +3,75 @@ IMPORT cameraViewOffset IMPORT gVertices - EXPORT unpackRoom_asm__FPC10RoomVertexl + EXPORT unpackRoom_asm -unpackRoom_asm__FPC10RoomVertexl +unpackRoom_asm vertices RN r0 vCount RN r1 -cx RN r2 -cy RN r3 -cz RN r4 -res RN r5 +vx0 RN r1 +vy0 RN r2 +vz0 RN r3 +vw0 RN r4 +vx1 RN r5 +vy1 RN r6 +vz1 RN r7 +vw1 RN r8 +vx2 RN vx0 +vy2 RN vy0 +vz2 RN vz0 +vw2 RN vw0 +vx3 RN vx1 +vy3 RN vy1 +vz3 RN vz1 +vw3 RN vw1 +n0 RN vz1 +n1 RN r9 +maskH RN r10 +maskV RN r11 +res RN r12 last RN lr stmfd sp!, {r4-r11, lr} - ldr r2, =cameraViewOffset ldr res, =gVertices - add last, vertices, vCount, lsl #2 - ldmfd r2, {cx, cy, cz} - mov cx, cx, lsl #2 - mov cy, cy, lsl #2 - mov cz, cz, lsl #2 + add last, vertices, vCount, lsl #1 ; last = vertices + vCount * 2 + mov vw0, #(1 << 16) ; vw2 + mov vw1, #(1 << 16) ; vw3 + mov maskH, #0x1F000 + mov maskV, #0xFC00 -loop ldmia vertices!, {r9, r11} ; load two encoded vertices +loop ldmia vertices!, {n0, n1} ; load four encoded vertices cmp vertices, last + ; n0 = z1:5, y1:6, x1:5, z0:5, y0:6, x0:5 + ; n0 = z3:5, y3:6, x3:5, z2:5, y2:6, x2:5 + ; 1st vertex - and r6, r9, #0xFF00 ; decode x - mov r7, r9, asr #16 ; decode y (signed) - and r8, r9, #0xFF ; decode z - add r6, cx, r6, lsl #4 - add r7, cy, r7, asl #2 - add r8, cz, r8, lsl #12 + and vx0, maskH, n0, lsl #12 ; decode x0 + and vy0, maskV, n0, lsl #5 ; decode y0 + and vz0, maskH, n0, lsl #1 ; decode z0 ; 2nd vertex - and r9, r11, #0xFF00 ; decode x - mov r10, r11, asr #16 ; decode y (signed) - and r11, r11, #0xFF ; decode z - add r9, cx, r9, lsl #4 - add r10, cy, r10, asl #2 - add r11, cz, r11, lsl #12 + and vx1, maskH, n0, lsr #4 ; decode x0 + and vy1, maskV, n0, lsr #11 ; decode y0 + and vz1, maskH, n0, lsr #15 ; decode z0 ; store - stmia res!, {r6, r7, r8, r9, r10, r11} + stmia res!, {vx0, vy0, vz0, vw0, vx1, vy1, vz1, vw1} + + ; 3rd vertex + and vx2, maskH, n1, lsl #12 ; decode x0 + and vy2, maskV, n1, lsl #5 ; decode y0 + and vz2, maskH, n1, lsl #1 ; decode z0 + + ; 4th vertex + and vx3, maskH, n1, lsr #4 ; decode x0 + and vy3, maskV, n1, lsr #11 ; decode y0 + and vz3, maskH, n1, lsr #15 ; decode z0 + + ; store + stmia res!, {vx2, vy2, vz2, vw2, vx3, vy3, vz3, vw3} + blt loop ldmfd sp!, {r4-r11, pc}