1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-05 04:37:50 +02:00

#368 GBA pre-shift z by OT_SHIFT, pre-shift mesh vertex coords, fog math optims

This commit is contained in:
XProger
2022-12-03 18:23:19 +03:00
parent b1a559002c
commit 90e03fa38b
17 changed files with 101 additions and 109 deletions

View File

@@ -205,7 +205,7 @@
// the maximum of active enemies // the maximum of active enemies
#define MAX_ENEMIES 3 #define MAX_ENEMIES 3
// visibility distance // visibility distance
#define VIEW_DIST (1024 * 10) #define VIEW_DIST (10 << 10)
// skip collideSpheres for enemies // skip collideSpheres for enemies
#define FAST_HITMASK #define FAST_HITMASK
#endif #endif
@@ -224,7 +224,7 @@
// set the maximum number of simultaneously played channels // set the maximum number of simultaneously played channels
#define SND_CHANNELS 4 #define SND_CHANNELS 4
// visibility distance // visibility distance
#define VIEW_DIST (1024 * 10) #define VIEW_DIST (10 << 10)
// skip collideSpheres for enemies // skip collideSpheres for enemies
#define FAST_HITMASK #define FAST_HITMASK
#endif #endif
@@ -239,7 +239,7 @@
// the maximum of active enemies // the maximum of active enemies
#define MAX_ENEMIES 3 #define MAX_ENEMIES 3
// visibility distance // visibility distance
#define VIEW_DIST (1024 * 10) #define VIEW_DIST (10 << 10)
// skip collideSpheres for enemies // skip collideSpheres for enemies
#define FAST_HITMASK #define FAST_HITMASK
#endif #endif
@@ -457,10 +457,12 @@ extern int32 fps;
#define FOV_SHIFT 3 #define FOV_SHIFT 3
#define FOG_SHIFT 1 #define FOG_SHIFT 1
#define FOG_MAX VIEW_DIST #define FOG_MAX VIEW_DIST
#define FOG_MIN (FOG_MAX - (8192 >> FOG_SHIFT)) #define FOG_MIN (FOG_MAX - 4096)
#define VIEW_MIN_F (64 << FIXED_SHIFT) #define VIEW_MIN_F (64 << FIXED_SHIFT)
#define VIEW_MAX_F (VIEW_DIST << FIXED_SHIFT) #define VIEW_MAX_F (VIEW_DIST << FIXED_SHIFT)
#define MESH_SHIFT 2
#define TEX_ATTR_AKILL 1 #define TEX_ATTR_AKILL 1
#define NOT_ENEMY -0x4000 // default hp for non enemies #define NOT_ENEMY -0x4000 // default hp for non enemies

View File

@@ -667,12 +667,8 @@ bool traceX(const Location &from, Location &to, bool accurate)
if (!d.x) if (!d.x)
return true; return true;
int32 dx = abs(d.x) >> 3; d.y = (d.y << TRACE_SHIFT) / d.x;
ASSERT(dx < DIV_TABLE_SIZE); d.z = (d.z << TRACE_SHIFT) / d.x;
dx = FixedInvU(dx);
d.y = (d.y * dx) >> (16 + 3 - TRACE_SHIFT);
d.z = (d.z * dx) >> (16 + 3 - TRACE_SHIFT);
vec3i p = from.pos; vec3i p = from.pos;
@@ -680,10 +676,10 @@ bool traceX(const Location &from, Location &to, bool accurate)
if (d.x < 0) if (d.x < 0)
{ {
d.x = -1024; d.x = 1024;
p.x &= ~1023; p.x &= ~1023;
p.y -= d.y * (p.x - from.pos.x) >> TRACE_SHIFT; p.y += d.y * (p.x - from.pos.x) >> TRACE_SHIFT;
p.z -= d.z * (p.x - from.pos.x) >> TRACE_SHIFT; p.z += d.z * (p.x - from.pos.x) >> TRACE_SHIFT;
while (p.x > to.pos.x) while (p.x > to.pos.x)
{ {
@@ -694,7 +690,7 @@ bool traceX(const Location &from, Location &to, bool accurate)
TRACE_CHECK(nextRoom, p.x - 1, p.y, p.z); TRACE_CHECK(nextRoom, p.x - 1, p.y, p.z);
room = nextRoom; room = nextRoom;
p += d; p -= d;
} }
} }
else else
@@ -729,12 +725,8 @@ bool traceZ(const Location &from, Location &to, bool accurate)
if (!d.z) if (!d.z)
return true; return true;
int32 dz = abs(d.z) >> 3; d.x = (d.x << TRACE_SHIFT) / d.z;
ASSERT(dz < DIV_TABLE_SIZE); d.y = (d.y << TRACE_SHIFT) / d.z;
dz = FixedInvU(dz);
d.x = (d.x * dz) >> (16 + 3 - TRACE_SHIFT);
d.y = (d.y * dz) >> (16 + 3 - TRACE_SHIFT);
vec3i p = from.pos; vec3i p = from.pos;
@@ -742,10 +734,10 @@ bool traceZ(const Location &from, Location &to, bool accurate)
if (d.z < 0) if (d.z < 0)
{ {
d.z = -1024; d.z = 1024;
p.z &= ~1023; p.z &= ~1023;
p.x -= d.x * (p.z - from.pos.z) >> TRACE_SHIFT; p.x += d.x * (p.z - from.pos.z) >> TRACE_SHIFT;
p.y -= d.y * (p.z - from.pos.z) >> TRACE_SHIFT; p.y += d.y * (p.z - from.pos.z) >> TRACE_SHIFT;
while (p.z > to.pos.z) while (p.z > to.pos.z)
{ {
@@ -756,7 +748,7 @@ bool traceZ(const Location &from, Location &to, bool accurate)
TRACE_CHECK(nextRoom, p.x, p.y, p.z - 1); TRACE_CHECK(nextRoom, p.x, p.y, p.z - 1);
room = nextRoom; room = nextRoom;
p += d; p -= d;
} }
} }
else else

View File

@@ -71,21 +71,16 @@
.equ FIXED_SHIFT, 14 .equ FIXED_SHIFT, 14
.equ PROJ_SHIFT, 4 .equ PROJ_SHIFT, 4
.equ OT_SHIFT, 4 .equ MESH_SHIFT, 2
.equ VIEW_DIST, (1024 * 10) .equ VIEW_MIN, 64
.equ FOG_SHIFT, 1 .equ VIEW_MAX, (10 << 10)
.equ FOG_MAX, VIEW_DIST
.equ FOG_MIN, (FOG_MAX - (8192 >> FOG_SHIFT))
.equ VIEW_MIN, (64)
.equ VIEW_MAX, (VIEW_DIST)
.equ VIEW_OFF, 4096 .equ VIEW_OFF, 4096
.equ FOG_SHIFT, 4
.equ FOG_MIN, (VIEW_MAX - 4096)
.equ OT_SIZE, 641 .equ OT_SHIFT, 4
.equ OT_SIZE, ((VIEW_MAX >> OT_SHIFT) + 1)
.equ VIEW_MIN_F, (VIEW_MIN << FIXED_SHIFT)
.equ VIEW_MAX_F, (VIEW_MAX << FIXED_SHIFT)
.equ VIEW_OFF_F, (VIEW_OFF << FIXED_SHIFT)
.equ MAX_CAUSTICS, 32 .equ MAX_CAUSTICS, 32
.equ MAX_RAND_TABLE, 32 .equ MAX_RAND_TABLE, 32

View File

@@ -89,7 +89,7 @@ faceAddMeshQuads_asm:
add depth, vg0, vg1, lsl #16 add depth, vg0, vg1, lsl #16
add depth, vg2, lsl #16 add depth, vg2, lsl #16
add depth, vg3, lsl #16 add depth, vg3, lsl #16
lsr depth, #(16 + 2 + OT_SHIFT) lsr depth, #(16 + 2)
// faceAdd // faceAdd
rsb vp0, vertices, vp0, lsr #3 rsb vp0, vertices, vp0, lsr #3

View File

@@ -80,7 +80,7 @@ faceAddMeshTriangles_asm:
lsl vg0, #16 lsl vg0, #16
add depth, vg0, vg1, lsl #16 add depth, vg0, vg1, lsl #16
add depth, vg2, lsl #17 add depth, vg2, lsl #17
lsr depth, #(16 + 2 + OT_SHIFT) lsr depth, #(16 + 2)
// faceAdd // faceAdd
rsb vp0, vertices, vp0, lsr #3 rsb vp0, vertices, vp0, lsr #3

View File

@@ -92,7 +92,7 @@ faceAddRoomQuads_asm:
CCW .skip CCW .skip
// depth = MAX_Z4 // depth (vz0) = MAX_Z4
ldrh vz0, [vp0, #VERTEX_Z] ldrh vz0, [vp0, #VERTEX_Z]
ldrh vz1, [vp1, #VERTEX_Z] ldrh vz1, [vp1, #VERTEX_Z]
ldrh vz2, [vp2, #VERTEX_Z] ldrh vz2, [vp2, #VERTEX_Z]
@@ -103,7 +103,6 @@ faceAddRoomQuads_asm:
movlt vz0, vz2 movlt vz0, vz2
cmp vz0, vz3 cmp vz0, vz3
movlt vz0, vz3 movlt vz0, vz3
mov depth, vz0, lsr #OT_SHIFT
// faceAdd // faceAdd
rsb vp0, vertices, vp0, lsr #3 rsb vp0, vertices, vp0, lsr #3

View File

@@ -83,7 +83,7 @@ faceAddRoomTriangles_asm:
CCW .skip CCW .skip
// depth = MAX_Z3 // depth (vz0) = MAX_Z3
ldrh vz0, [vp0, #VERTEX_Z] ldrh vz0, [vp0, #VERTEX_Z]
ldrh vz1, [vp1, #VERTEX_Z] ldrh vz1, [vp1, #VERTEX_Z]
ldrh vz2, [vp2, #VERTEX_Z] ldrh vz2, [vp2, #VERTEX_Z]
@@ -91,7 +91,6 @@ faceAddRoomTriangles_asm:
movlt vz0, vz1 movlt vz0, vz1
cmp vz0, vz2 cmp vz0, vz2
movlt vz0, vz2 movlt vz0, vz2
mov depth, vz0, lsr #OT_SHIFT
// faceAdd // faceAdd
rsb vp0, vertices, vp0, lsr #3 rsb vp0, vertices, vp0, lsr #3

View File

@@ -14,13 +14,14 @@ vz .req r13
m .req r14 m .req r14
tmp .req m tmp .req m
vp .req m vp .req m
vMinXY .req z dz .req vz
vMaxXY .req r minXY .req z
maxXY .req r
rMinX .req vx minX .req vx
rMaxX .req x maxX .req x
rMinY .req vy minY .req vy
rMaxY .req y maxY .req y
.global sphereIsVisible_asm .global sphereIsVisible_asm
sphereIsVisible_asm: sphereIsVisible_asm:
@@ -42,38 +43,42 @@ sphereIsVisible_asm:
mla vz, my, y, vz mla vz, my, y, vz
mla vz, mz, z, vz mla vz, mz, z, vz
cmp vz, #VIEW_MAX_F cmp vz, #(VIEW_MAX << FIXED_SHIFT)
bhi .fail bhi .fail
mov x, vx, asr #FIXED_SHIFT mov x, vx, asr #FIXED_SHIFT
mov y, vy, asr #FIXED_SHIFT mov y, vy, asr #FIXED_SHIFT
mov z, vz, asr #(FIXED_SHIFT + OT_SHIFT)
mov z, vz, lsr #(FIXED_SHIFT + 4) add dz, z, z, lsr #2
add z, vz, lsr #(FIXED_SHIFT + 6) divLUT tmp, dz
divLUT tmp, z
mul x, tmp, x mul x, tmp, x
mul y, tmp, y mul y, tmp, y
mul r, tmp, r mul r, tmp, r
mov x, x, asr #(16 - PROJ_SHIFT) mov x, x, asr #(16 - PROJ_SHIFT)
mov y, y, lsl #(PROJ_SHIFT) mov y, y, asr #(16 - PROJ_SHIFT)
sub rMinX, x, r, lsr #(16 - PROJ_SHIFT) sub minX, x, r, lsr #(16 - PROJ_SHIFT)
add rMaxX, x, r, lsr #(16 - PROJ_SHIFT) add maxX, x, r, lsr #(16 - PROJ_SHIFT)
sub rMinY, y, r, lsl #PROJ_SHIFT sub minY, y, r, lsr #(16 - PROJ_SHIFT)
add rMaxY, y, r, lsl #PROJ_SHIFT add maxY, y, r, lsr #(16 - PROJ_SHIFT)
ldr vp, =viewportRel ldr vp, =viewportRel
ldmia vp, {vMinXY, vMaxXY} ldmia vp, {minXY, maxXY}
cmp rMaxX, vMinXY, asr #16 cmp maxX, minXY, asr #16
blt .fail ble .fail
cmp rMaxY, vMinXY, lsl #16 cmp minX, maxXY, asr #16
blt .fail bge .fail
cmp rMinX, vMaxXY, asr #16
bgt .fail lsl minXY, #16
cmp rMinY, vMaxXY, lsl #16 lsl maxXY, #16
bgt .fail
cmp maxY, minXY, asr #16
ble .fail
cmp minY, maxXY, asr #16
bge .fail
mov r0, #1 mov r0, #1
fiq_off fiq_off

View File

@@ -58,7 +58,7 @@ transformMesh_asm:
asr mw1, #FIXED_SHIFT asr mw1, #FIXED_SHIFT
fiq_off fiq_off
ldmia m, {mx2, my2, mz2, mw2} ldmia m, {mx2, my2, mz2, mw2}
asr mw2, #FIXED_SHIFT asr mw2, #(FIXED_SHIFT + OT_SHIFT)
fiq_on fiq_on
.loop: .loop:
@@ -71,40 +71,39 @@ transformMesh_asm:
mul x, mx0, vx mul x, mx0, vx
mla x, my0, vy, x mla x, my0, vy, x
mla x, mz0, vz, x mla x, mz0, vz, x
add x, mw0, x, asr #FIXED_SHIFT add x, mw0, x, asr #(FIXED_SHIFT - MESH_SHIFT)
// transform y // transform y
mul y, mx1, vx mul y, mx1, vx
mla y, my1, vy, y mla y, my1, vy, y
mla y, mz1, vz, y mla y, mz1, vz, y
add y, mw1, y, asr #FIXED_SHIFT add y, mw1, y, asr #(FIXED_SHIFT - MESH_SHIFT)
fiq_off fiq_off
// transform z // transform z
mul z, mx2, vx mul z, mx2, vx
mla z, my2, vy, z mla z, my2, vy, z
mla z, mz2, vz, z mla z, mz2, vz, z
add z, mw2, z, asr #FIXED_SHIFT add z, mw2, z, asr #(FIXED_SHIFT - MESH_SHIFT + OT_SHIFT)
bic vg, #CLIP_MASK // clear clipping flags bic vg, #CLIP_MASK // clear clipping flags
// z clipping // z clipping
cmp z, #VIEW_MIN cmp z, #(VIEW_MIN >> OT_SHIFT)
movle z, #VIEW_MIN movle z, #(VIEW_MIN >> OT_SHIFT)
orrle vg, #CLIP_NEAR orrle vg, #CLIP_NEAR
cmp z, #VIEW_MAX cmp z, #(VIEW_MAX >> OT_SHIFT)
movge z, #VIEW_MAX movge z, #(VIEW_MAX >> OT_SHIFT)
orrge vg, #CLIP_FAR orrge vg, #CLIP_FAR
// project // project
mov dz, z, lsr #4 add dz, z, z, lsr #2
add dz, z, lsr #6
divLUT tmp, dz divLUT tmp, dz
mul dx, x, tmp mul dx, x, tmp
mul dy, y, tmp mul dy, y, tmp
asr x, dx, #(16 - PROJ_SHIFT) asr x, dx, #(16 - PROJ_SHIFT)
asr y, dy, #(16 - PROJ_SHIFT) asr y, dy, #(16 - PROJ_SHIFT)
add x, #(FRAME_WIDTH >> 1) add x, #(FRAME_WIDTH >> 1)
add y, #(FRAME_HEIGHT >> 1) add y, #(FRAME_HEIGHT >> 1)

View File

@@ -35,7 +35,7 @@ maxXY .req vz
tmp .req vy tmp .req vy
dx .req vz dx .req vz
dy .req tmp dy .req vy
dz .req vz dz .req vz
fog .req vz fog .req vz
@@ -65,7 +65,7 @@ transformRoom_asm:
asr mw1, #FIXED_SHIFT asr mw1, #FIXED_SHIFT
fiq_off fiq_off
ldmia m, {mx2, my2, mz2, mw2} ldmia m, {mx2, my2, mz2, mw2}
asr mw2, #FIXED_SHIFT asr mw2, #(FIXED_SHIFT + OT_SHIFT)
.loop: .loop:
// unpack vertex // unpack vertex
@@ -79,16 +79,16 @@ transformRoom_asm:
mul z, mx2, vx mul z, mx2, vx
mla z, my2, vy, z mla z, my2, vy, z
mla z, mz2, vz, z mla z, mz2, vz, z
add z, mw2, z, asr #(FIXED_SHIFT - 8) add z, mw2, z, asr #(FIXED_SHIFT - 8 + OT_SHIFT)
// skip if vertex is out of z-range // skip if vertex is out of z-range
add z, #VIEW_OFF add z, #(VIEW_OFF >> OT_SHIFT)
cmp z, #(VIEW_OFF + VIEW_OFF + VIEW_MAX) cmp z, #((VIEW_OFF + VIEW_OFF + VIEW_MAX) >> OT_SHIFT)
movhi vg, #(CLIP_NEAR + CLIP_FAR) movhi vg, #(CLIP_NEAR + CLIP_FAR)
bhi .skip bhi .skip
mov vg, v, lsr #24 mov vg, v, lsr #(24 + 3)
sub z, #VIEW_OFF sub z, #(VIEW_OFF >> OT_SHIFT)
fiq_on fiq_on
// transform y // transform y
@@ -105,26 +105,21 @@ transformRoom_asm:
fiq_off fiq_off
// fog // fog
cmp z, #FOG_MIN subs fog, z, #(FOG_MIN >> OT_SHIFT)
subgt fog, z, #FOG_MIN addgt vg, fog, lsr #(3 + FOG_SHIFT - OT_SHIFT)
addgt vg, fog, lsr #4 cmpgt vg, #31
// vg 0..255 -> 0..31
lsr vg, #3
cmp vg, #31
movgt vg, #31 movgt vg, #31
// z clipping // z clipping
cmp z, #VIEW_MIN cmp z, #(VIEW_MIN >> OT_SHIFT)
movle z, #VIEW_MIN movle z, #(VIEW_MIN >> OT_SHIFT)
orrle vg, #CLIP_NEAR orrle vg, #CLIP_NEAR
cmp z, #VIEW_MAX cmp z, #(VIEW_MAX >> OT_SHIFT)
movge z, #VIEW_MAX movge z, #(VIEW_MAX >> OT_SHIFT)
orrge vg, #CLIP_FAR orrge vg, #CLIP_FAR
// project // project
mov dz, z, lsr #4 add dz, z, z, lsr #2
add dz, z, lsr #6
divLUT tmp, dz divLUT tmp, dz
mul dx, x, tmp mul dx, x, tmp
mul dy, y, tmp mul dy, y, tmp

View File

@@ -136,8 +136,7 @@ transformRoomUW_asm:
add vg, caust, asr #5 add vg, caust, asr #5
// fog // fog
cmp z, #FOG_MIN subs fog, z, #FOG_MIN
subgt fog, z, #FOG_MIN
addgt vg, fog, lsr #4 addgt vg, fog, lsr #4
// vg 0..255 -> 0..31 // vg 0..255 -> 0..31
@@ -154,8 +153,8 @@ transformRoomUW_asm:
orrge vg, #CLIP_FAR orrge vg, #CLIP_FAR
// project // project
mov dz, z, lsr #4 add dz, z, z, lsr #2
add dz, z, lsr #6 lsr dz, #4
divLUT tmp, dz divLUT tmp, dz
mul dx, x, tmp mul dx, x, tmp
mul dy, y, tmp mul dy, y, tmp

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -692,9 +692,9 @@ struct out_GBA
int16 x, y, z; int16 x, y, z;
} v; } v;
v.x = vertices[j].x; v.x = vertices[j].x >> 2;
v.y = vertices[j].y; v.y = vertices[j].y >> 2;
v.z = vertices[j].z; v.z = vertices[j].z >> 2;
f.write(v.x); f.write(v.x);
f.write(v.y); f.write(v.y);

View File

@@ -166,7 +166,7 @@ void transformRoom_c(const RoomVertex* vertices, int32 count)
int32 vx = (value & (0xFF)) << 8; int32 vx = (value & (0xFF)) << 8;
int32 vy = (value & (0xFF << 8)); int32 vy = (value & (0xFF << 8));
int32 vz = (value & (0xFF << 16)) >> 8; int32 vz = (value & (0xFF << 16)) >> 8;
int32 vg = (value & (0xFF << 24)) >> (24 - 5); int32 vg = (value & (0xFF << 24)) >> (24 + 3);
const Matrix &m = matrixGet(); const Matrix &m = matrixGet();
int32 x = DP43(m.e00, m.e01, m.e02, m.e03, vx, vy, vz); int32 x = DP43(m.e00, m.e01, m.e02, m.e03, vx, vy, vz);
@@ -189,11 +189,13 @@ void transformRoom_c(const RoomVertex* vertices, int32 count)
y >>= FIXED_SHIFT; y >>= FIXED_SHIFT;
z >>= FIXED_SHIFT; z >>= FIXED_SHIFT;
if (z > FOG_MIN) int32 fog = z - FOG_MIN;
if (fog > 0)
{ {
vg += (z - FOG_MIN) << FOG_SHIFT; vg += fog >> (FOG_SHIFT + 3);
if (vg > 8191) { if (vg > 31)
vg = 8191; {
vg = 31;
} }
} }
@@ -889,6 +891,11 @@ void renderShadow(int32 x, int32 z, int32 sx, int32 sz)
return; return;
} }
x >>= MESH_SHIFT;
z >>= MESH_SHIFT;
sx >>= MESH_SHIFT;
sz >>= MESH_SHIFT;
int16 xns1 = x - sx; int16 xns1 = x - sx;
int16 xps1 = x + sx; int16 xps1 = x + sx;
int16 xns2 = xns1 - sx; int16 xns2 = xns1 - sx;