1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-01-17 04:48:57 +01:00

#368 GBA pre-shift z by OT_SHIFT, pre-shift mesh vertex coords, fog math optims

This commit is contained in:
XProger 2022-12-03 18:23:19 +03:00
parent b1a559002c
commit 90e03fa38b
17 changed files with 101 additions and 109 deletions

View File

@ -205,7 +205,7 @@
// the maximum of active enemies
#define MAX_ENEMIES 3
// visibility distance
#define VIEW_DIST (1024 * 10)
#define VIEW_DIST (10 << 10)
// skip collideSpheres for enemies
#define FAST_HITMASK
#endif
@ -224,7 +224,7 @@
// set the maximum number of simultaneously played channels
#define SND_CHANNELS 4
// visibility distance
#define VIEW_DIST (1024 * 10)
#define VIEW_DIST (10 << 10)
// skip collideSpheres for enemies
#define FAST_HITMASK
#endif
@ -239,7 +239,7 @@
// the maximum of active enemies
#define MAX_ENEMIES 3
// visibility distance
#define VIEW_DIST (1024 * 10)
#define VIEW_DIST (10 << 10)
// skip collideSpheres for enemies
#define FAST_HITMASK
#endif
@ -457,10 +457,12 @@ extern int32 fps;
#define FOV_SHIFT 3
#define FOG_SHIFT 1
#define FOG_MAX VIEW_DIST
#define FOG_MIN (FOG_MAX - (8192 >> FOG_SHIFT))
#define FOG_MIN (FOG_MAX - 4096)
#define VIEW_MIN_F (64 << FIXED_SHIFT)
#define VIEW_MAX_F (VIEW_DIST << FIXED_SHIFT)
#define MESH_SHIFT 2
#define TEX_ATTR_AKILL 1
#define NOT_ENEMY -0x4000 // default hp for non enemies

View File

@ -667,12 +667,8 @@ bool traceX(const Location &from, Location &to, bool accurate)
if (!d.x)
return true;
int32 dx = abs(d.x) >> 3;
ASSERT(dx < DIV_TABLE_SIZE);
dx = FixedInvU(dx);
d.y = (d.y * dx) >> (16 + 3 - TRACE_SHIFT);
d.z = (d.z * dx) >> (16 + 3 - TRACE_SHIFT);
d.y = (d.y << TRACE_SHIFT) / d.x;
d.z = (d.z << TRACE_SHIFT) / d.x;
vec3i p = from.pos;
@ -680,10 +676,10 @@ bool traceX(const Location &from, Location &to, bool accurate)
if (d.x < 0)
{
d.x = -1024;
d.x = 1024;
p.x &= ~1023;
p.y -= d.y * (p.x - from.pos.x) >> TRACE_SHIFT;
p.z -= d.z * (p.x - from.pos.x) >> TRACE_SHIFT;
p.y += d.y * (p.x - from.pos.x) >> TRACE_SHIFT;
p.z += d.z * (p.x - from.pos.x) >> TRACE_SHIFT;
while (p.x > to.pos.x)
{
@ -694,7 +690,7 @@ bool traceX(const Location &from, Location &to, bool accurate)
TRACE_CHECK(nextRoom, p.x - 1, p.y, p.z);
room = nextRoom;
p += d;
p -= d;
}
}
else
@ -729,12 +725,8 @@ bool traceZ(const Location &from, Location &to, bool accurate)
if (!d.z)
return true;
int32 dz = abs(d.z) >> 3;
ASSERT(dz < DIV_TABLE_SIZE);
dz = FixedInvU(dz);
d.x = (d.x * dz) >> (16 + 3 - TRACE_SHIFT);
d.y = (d.y * dz) >> (16 + 3 - TRACE_SHIFT);
d.x = (d.x << TRACE_SHIFT) / d.z;
d.y = (d.y << TRACE_SHIFT) / d.z;
vec3i p = from.pos;
@ -742,10 +734,10 @@ bool traceZ(const Location &from, Location &to, bool accurate)
if (d.z < 0)
{
d.z = -1024;
d.z = 1024;
p.z &= ~1023;
p.x -= d.x * (p.z - from.pos.z) >> TRACE_SHIFT;
p.y -= d.y * (p.z - from.pos.z) >> TRACE_SHIFT;
p.x += d.x * (p.z - from.pos.z) >> TRACE_SHIFT;
p.y += d.y * (p.z - from.pos.z) >> TRACE_SHIFT;
while (p.z > to.pos.z)
{
@ -756,7 +748,7 @@ bool traceZ(const Location &from, Location &to, bool accurate)
TRACE_CHECK(nextRoom, p.x, p.y, p.z - 1);
room = nextRoom;
p += d;
p -= d;
}
}
else

View File

@ -71,21 +71,16 @@
.equ FIXED_SHIFT, 14
.equ PROJ_SHIFT, 4
.equ OT_SHIFT, 4
.equ MESH_SHIFT, 2
.equ VIEW_DIST, (1024 * 10)
.equ FOG_SHIFT, 1
.equ FOG_MAX, VIEW_DIST
.equ FOG_MIN, (FOG_MAX - (8192 >> FOG_SHIFT))
.equ VIEW_MIN, (64)
.equ VIEW_MAX, (VIEW_DIST)
.equ VIEW_MIN, 64
.equ VIEW_MAX, (10 << 10)
.equ VIEW_OFF, 4096
.equ FOG_SHIFT, 4
.equ FOG_MIN, (VIEW_MAX - 4096)
.equ OT_SIZE, 641
.equ VIEW_MIN_F, (VIEW_MIN << FIXED_SHIFT)
.equ VIEW_MAX_F, (VIEW_MAX << FIXED_SHIFT)
.equ VIEW_OFF_F, (VIEW_OFF << FIXED_SHIFT)
.equ OT_SHIFT, 4
.equ OT_SIZE, ((VIEW_MAX >> OT_SHIFT) + 1)
.equ MAX_CAUSTICS, 32
.equ MAX_RAND_TABLE, 32

View File

@ -89,7 +89,7 @@ faceAddMeshQuads_asm:
add depth, vg0, vg1, lsl #16
add depth, vg2, lsl #16
add depth, vg3, lsl #16
lsr depth, #(16 + 2 + OT_SHIFT)
lsr depth, #(16 + 2)
// faceAdd
rsb vp0, vertices, vp0, lsr #3

View File

@ -80,7 +80,7 @@ faceAddMeshTriangles_asm:
lsl vg0, #16
add depth, vg0, vg1, lsl #16
add depth, vg2, lsl #17
lsr depth, #(16 + 2 + OT_SHIFT)
lsr depth, #(16 + 2)
// faceAdd
rsb vp0, vertices, vp0, lsr #3

View File

@ -92,7 +92,7 @@ faceAddRoomQuads_asm:
CCW .skip
// depth = MAX_Z4
// depth (vz0) = MAX_Z4
ldrh vz0, [vp0, #VERTEX_Z]
ldrh vz1, [vp1, #VERTEX_Z]
ldrh vz2, [vp2, #VERTEX_Z]
@ -103,7 +103,6 @@ faceAddRoomQuads_asm:
movlt vz0, vz2
cmp vz0, vz3
movlt vz0, vz3
mov depth, vz0, lsr #OT_SHIFT
// faceAdd
rsb vp0, vertices, vp0, lsr #3

View File

@ -83,7 +83,7 @@ faceAddRoomTriangles_asm:
CCW .skip
// depth = MAX_Z3
// depth (vz0) = MAX_Z3
ldrh vz0, [vp0, #VERTEX_Z]
ldrh vz1, [vp1, #VERTEX_Z]
ldrh vz2, [vp2, #VERTEX_Z]
@ -91,7 +91,6 @@ faceAddRoomTriangles_asm:
movlt vz0, vz1
cmp vz0, vz2
movlt vz0, vz2
mov depth, vz0, lsr #OT_SHIFT
// faceAdd
rsb vp0, vertices, vp0, lsr #3

View File

@ -14,13 +14,14 @@ vz .req r13
m .req r14
tmp .req m
vp .req m
vMinXY .req z
vMaxXY .req r
dz .req vz
minXY .req z
maxXY .req r
rMinX .req vx
rMaxX .req x
rMinY .req vy
rMaxY .req y
minX .req vx
maxX .req x
minY .req vy
maxY .req y
.global sphereIsVisible_asm
sphereIsVisible_asm:
@ -42,38 +43,42 @@ sphereIsVisible_asm:
mla vz, my, y, vz
mla vz, mz, z, vz
cmp vz, #VIEW_MAX_F
cmp vz, #(VIEW_MAX << FIXED_SHIFT)
bhi .fail
mov x, vx, asr #FIXED_SHIFT
mov y, vy, asr #FIXED_SHIFT
mov z, vz, asr #(FIXED_SHIFT + OT_SHIFT)
mov z, vz, lsr #(FIXED_SHIFT + 4)
add z, vz, lsr #(FIXED_SHIFT + 6)
divLUT tmp, z
add dz, z, z, lsr #2
divLUT tmp, dz
mul x, tmp, x
mul y, tmp, y
mul r, tmp, r
mov x, x, asr #(16 - PROJ_SHIFT)
mov y, y, lsl #(PROJ_SHIFT)
mov y, y, asr #(16 - PROJ_SHIFT)
sub rMinX, x, r, lsr #(16 - PROJ_SHIFT)
add rMaxX, x, r, lsr #(16 - PROJ_SHIFT)
sub rMinY, y, r, lsl #PROJ_SHIFT
add rMaxY, y, r, lsl #PROJ_SHIFT
sub minX, x, r, lsr #(16 - PROJ_SHIFT)
add maxX, x, r, lsr #(16 - PROJ_SHIFT)
sub minY, y, r, lsr #(16 - PROJ_SHIFT)
add maxY, y, r, lsr #(16 - PROJ_SHIFT)
ldr vp, =viewportRel
ldmia vp, {vMinXY, vMaxXY}
ldmia vp, {minXY, maxXY}
cmp rMaxX, vMinXY, asr #16
blt .fail
cmp rMaxY, vMinXY, lsl #16
blt .fail
cmp rMinX, vMaxXY, asr #16
bgt .fail
cmp rMinY, vMaxXY, lsl #16
bgt .fail
cmp maxX, minXY, asr #16
ble .fail
cmp minX, maxXY, asr #16
bge .fail
lsl minXY, #16
lsl maxXY, #16
cmp maxY, minXY, asr #16
ble .fail
cmp minY, maxXY, asr #16
bge .fail
mov r0, #1
fiq_off

View File

@ -58,7 +58,7 @@ transformMesh_asm:
asr mw1, #FIXED_SHIFT
fiq_off
ldmia m, {mx2, my2, mz2, mw2}
asr mw2, #FIXED_SHIFT
asr mw2, #(FIXED_SHIFT + OT_SHIFT)
fiq_on
.loop:
@ -71,40 +71,39 @@ transformMesh_asm:
mul x, mx0, vx
mla x, my0, vy, x
mla x, mz0, vz, x
add x, mw0, x, asr #FIXED_SHIFT
add x, mw0, x, asr #(FIXED_SHIFT - MESH_SHIFT)
// transform y
mul y, mx1, vx
mla y, my1, vy, y
mla y, mz1, vz, y
add y, mw1, y, asr #FIXED_SHIFT
add y, mw1, y, asr #(FIXED_SHIFT - MESH_SHIFT)
fiq_off
// transform z
mul z, mx2, vx
mla z, my2, vy, z
mla z, mz2, vz, z
add z, mw2, z, asr #FIXED_SHIFT
add z, mw2, z, asr #(FIXED_SHIFT - MESH_SHIFT + OT_SHIFT)
bic vg, #CLIP_MASK // clear clipping flags
// z clipping
cmp z, #VIEW_MIN
movle z, #VIEW_MIN
cmp z, #(VIEW_MIN >> OT_SHIFT)
movle z, #(VIEW_MIN >> OT_SHIFT)
orrle vg, #CLIP_NEAR
cmp z, #VIEW_MAX
movge z, #VIEW_MAX
cmp z, #(VIEW_MAX >> OT_SHIFT)
movge z, #(VIEW_MAX >> OT_SHIFT)
orrge vg, #CLIP_FAR
// project
mov dz, z, lsr #4
add dz, z, lsr #6
add dz, z, z, lsr #2
divLUT tmp, dz
mul dx, x, tmp
mul dy, y, tmp
asr x, dx, #(16 - PROJ_SHIFT)
asr y, dy, #(16 - PROJ_SHIFT)
add x, #(FRAME_WIDTH >> 1)
add y, #(FRAME_HEIGHT >> 1)

View File

@ -35,7 +35,7 @@ maxXY .req vz
tmp .req vy
dx .req vz
dy .req tmp
dy .req vy
dz .req vz
fog .req vz
@ -65,7 +65,7 @@ transformRoom_asm:
asr mw1, #FIXED_SHIFT
fiq_off
ldmia m, {mx2, my2, mz2, mw2}
asr mw2, #FIXED_SHIFT
asr mw2, #(FIXED_SHIFT + OT_SHIFT)
.loop:
// unpack vertex
@ -79,16 +79,16 @@ transformRoom_asm:
mul z, mx2, vx
mla z, my2, vy, z
mla z, mz2, vz, z
add z, mw2, z, asr #(FIXED_SHIFT - 8)
add z, mw2, z, asr #(FIXED_SHIFT - 8 + OT_SHIFT)
// skip if vertex is out of z-range
add z, #VIEW_OFF
cmp z, #(VIEW_OFF + VIEW_OFF + VIEW_MAX)
add z, #(VIEW_OFF >> OT_SHIFT)
cmp z, #((VIEW_OFF + VIEW_OFF + VIEW_MAX) >> OT_SHIFT)
movhi vg, #(CLIP_NEAR + CLIP_FAR)
bhi .skip
mov vg, v, lsr #24
sub z, #VIEW_OFF
mov vg, v, lsr #(24 + 3)
sub z, #(VIEW_OFF >> OT_SHIFT)
fiq_on
// transform y
@ -105,26 +105,21 @@ transformRoom_asm:
fiq_off
// fog
cmp z, #FOG_MIN
subgt fog, z, #FOG_MIN
addgt vg, fog, lsr #4
// vg 0..255 -> 0..31
lsr vg, #3
cmp vg, #31
subs fog, z, #(FOG_MIN >> OT_SHIFT)
addgt vg, fog, lsr #(3 + FOG_SHIFT - OT_SHIFT)
cmpgt vg, #31
movgt vg, #31
// z clipping
cmp z, #VIEW_MIN
movle z, #VIEW_MIN
cmp z, #(VIEW_MIN >> OT_SHIFT)
movle z, #(VIEW_MIN >> OT_SHIFT)
orrle vg, #CLIP_NEAR
cmp z, #VIEW_MAX
movge z, #VIEW_MAX
cmp z, #(VIEW_MAX >> OT_SHIFT)
movge z, #(VIEW_MAX >> OT_SHIFT)
orrge vg, #CLIP_FAR
// project
mov dz, z, lsr #4
add dz, z, lsr #6
add dz, z, z, lsr #2
divLUT tmp, dz
mul dx, x, tmp
mul dy, y, tmp

View File

@ -136,8 +136,7 @@ transformRoomUW_asm:
add vg, caust, asr #5
// fog
cmp z, #FOG_MIN
subgt fog, z, #FOG_MIN
subs fog, z, #FOG_MIN
addgt vg, fog, lsr #4
// vg 0..255 -> 0..31
@ -154,8 +153,8 @@ transformRoomUW_asm:
orrge vg, #CLIP_FAR
// project
mov dz, z, lsr #4
add dz, z, lsr #6
add dz, z, z, lsr #2
lsr dz, #4
divLUT tmp, dz
mul dx, x, tmp
mul dy, y, tmp

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -692,9 +692,9 @@ struct out_GBA
int16 x, y, z;
} v;
v.x = vertices[j].x;
v.y = vertices[j].y;
v.z = vertices[j].z;
v.x = vertices[j].x >> 2;
v.y = vertices[j].y >> 2;
v.z = vertices[j].z >> 2;
f.write(v.x);
f.write(v.y);

View File

@ -166,7 +166,7 @@ void transformRoom_c(const RoomVertex* vertices, int32 count)
int32 vx = (value & (0xFF)) << 8;
int32 vy = (value & (0xFF << 8));
int32 vz = (value & (0xFF << 16)) >> 8;
int32 vg = (value & (0xFF << 24)) >> (24 - 5);
int32 vg = (value & (0xFF << 24)) >> (24 + 3);
const Matrix &m = matrixGet();
int32 x = DP43(m.e00, m.e01, m.e02, m.e03, vx, vy, vz);
@ -189,11 +189,13 @@ void transformRoom_c(const RoomVertex* vertices, int32 count)
y >>= FIXED_SHIFT;
z >>= FIXED_SHIFT;
if (z > FOG_MIN)
int32 fog = z - FOG_MIN;
if (fog > 0)
{
vg += (z - FOG_MIN) << FOG_SHIFT;
if (vg > 8191) {
vg = 8191;
vg += fog >> (FOG_SHIFT + 3);
if (vg > 31)
{
vg = 31;
}
}
@ -889,6 +891,11 @@ void renderShadow(int32 x, int32 z, int32 sx, int32 sz)
return;
}
x >>= MESH_SHIFT;
z >>= MESH_SHIFT;
sx >>= MESH_SHIFT;
sz >>= MESH_SHIFT;
int16 xns1 = x - sx;
int16 xps1 = x + sx;
int16 xns2 = xns1 - sx;