1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-03-13 23:59:41 +01:00

#368 micro optimizations, some code cleanup

This commit is contained in:
XProger 2022-02-13 14:10:02 +03:00
parent ae63f1c090
commit 6656837473
7 changed files with 125 additions and 129 deletions

View File

@ -59,26 +59,25 @@ faceAddMeshQuads_asm:
CCW .skip
// fetch ((clip << 8) | g)
ldrh vg0, [vp0, #VERTEX_G]
ldrh vg1, [vp1, #VERTEX_G]
ldrh vg2, [vp2, #VERTEX_G]
ldrh vg3, [vp3, #VERTEX_G]
// fetch clip flags
ldrb vg0, [vp0, #VERTEX_CLIP]
ldrb vg1, [vp1, #VERTEX_CLIP]
ldrb vg2, [vp2, #VERTEX_CLIP]
ldrb vg3, [vp3, #VERTEX_CLIP]
// check clipping
and tmp, vg0, vg1
and tmp, tmp, vg2
and tmp, tmp, vg3
tst tmp, #CLIP_MASK
and tmp, vg2
ands tmp, vg3
bne .skip
// mark if should be clipped by viewport
orr tmp, vg0, vg1
orr tmp, tmp, vg2
orr tmp, tmp, vg3
tst tmp, #CLIP_MASK_VP
orr tmp, vg2
orr tmp, vg3
tst tmp, #(CLIP_MASK_VP >> 8)
ldrh flags, [polys, #-8]
orrne flags, flags, #FACE_CLIPPED
orrne flags, #FACE_CLIPPED
// vz0 = AVG_Z4 (depth)
ldrh vz0, [vp0, #VERTEX_Z]
@ -86,28 +85,27 @@ faceAddMeshQuads_asm:
ldrh vz2, [vp2, #VERTEX_Z]
ldrh vz3, [vp3, #VERTEX_Z]
add depth, vz0, vz1
add depth, depth, vz2
add depth, depth, vz3
mov depth, depth, lsr #(2 + OT_SHIFT)
add depth, vz2
add depth, vz3
lsr depth, #(2 + OT_SHIFT)
// faceAdd
ldr vertices, =gVertices
sub vp0, vertices
sub vp1, vertices
sub vp2, vertices
sub vp3, vertices
sub vp0, vp0, vertices
sub vp1, vp1, vertices
sub vp2, vp2, vertices
sub vp3, vp3, vertices
mov vp0, vp0, lsr #3
lsr vp0, #3
orr vp1, vp0, vp1, lsl #(16 - 3)
mov vp2, vp2, lsr #3
lsr vp2, #3
orr vp3, vp2, vp3, lsl #(16 - 3)
ldr next, [ot, depth, lsl #2]
str face, [ot, depth, lsl #2]
stmia face!, {flags, next, vp1, vp3}
.skip:
subs count, count, #1
subs count, #1
bne .loop
ldr tmp, =gFacesBase

View File

@ -56,40 +56,39 @@ faceAddMeshTriangles_asm:
CCW .skip
// fetch ((clip << 8) | g)
ldrh vg0, [vp0, #VERTEX_G]
ldrh vg1, [vp1, #VERTEX_G]
ldrh vg2, [vp2, #VERTEX_G]
// fetch clip flags
ldrb vg0, [vp0, #VERTEX_CLIP]
ldrb vg1, [vp1, #VERTEX_CLIP]
ldrb vg2, [vp2, #VERTEX_CLIP]
// check clipping
and tmp, vg0, vg1
and tmp, tmp, vg2
tst tmp, #CLIP_MASK
ands tmp, vg2
bne .skip
// mark if should be clipped by viewport
orr tmp, vg0, vg1
orr tmp, tmp, vg2
tst tmp, #CLIP_MASK_VP
orr tmp, vg2
tst tmp, #(CLIP_MASK_VP >> 8)
ldrh flags, [polys, #-8]
orrne flags, flags, #FACE_CLIPPED
orrne flags, #FACE_CLIPPED
// vz0 = AVG_Z3 (depth)
ldrh vz0, [vp0, #VERTEX_Z]
ldrh vz1, [vp1, #VERTEX_Z]
ldrh vz2, [vp2, #VERTEX_Z]
add depth, vz0, vz1
add depth, depth, vz2, lsl #1
mov depth, depth, lsr #(2 + OT_SHIFT)
add depth, vz2, lsl #1
lsr depth, #(2 + OT_SHIFT)
// faceAdd
sub vp0, vp0, vertices
sub vp1, vp1, vertices
sub vp2, vp2, vertices
sub vp0, vertices
sub vp1, vertices
sub vp2, vertices
mov vp0, vp0, lsr #3
lsr vp0, #3
orr vp1, vp0, vp1, lsl #(16 - 3)
mov vp2, vp2, lsr #3
lsr vp2, #3
orr flags, #FACE_TRIANGLE
@ -97,7 +96,7 @@ faceAddMeshTriangles_asm:
str face, [ot, depth, lsl #2]
stmia face!, {flags, next, vp1, vp2}
.skip:
subs count, count, #1
subs count, #1
bne .loop
ldr tmp, =gFacesBase

View File

@ -65,25 +65,25 @@ faceAddRoomQuads_asm:
// check clipping
and tmp, vg0, vg1
and tmp, tmp, vg2
and tmp, tmp, vg3
and tmp, vg2
and tmp, vg3
tst tmp, #CLIP_MASK
bne .skip
// mark if should be clipped by viewport
orr tmp, vg0, vg1
orr tmp, tmp, vg2
orr tmp, tmp, vg3
orr tmp, vg2
orr tmp, vg3
tst tmp, #CLIP_MASK_VP
ldrh flags, [polys, #-12]
orrne flags, flags, #FACE_CLIPPED
orrne flags, #FACE_CLIPPED
// shift and compare VERTEX_G for flat rasterization
mov vg0, vg0, lsl #24
// shift and compare VERTEX_G for gouraud rasterization
lsl vg0, #24
cmp vg0, vg1, lsl #24
cmpeq vg0, vg2, lsl #24
cmpeq vg0, vg3, lsl #24
addne flags, flags, #FACE_GOURAUD
addne flags, #FACE_GOURAUD
CCW .skip
@ -102,22 +102,21 @@ faceAddRoomQuads_asm:
// faceAdd
ldr vertices, =gVertices
sub vp0, vertices
sub vp1, vertices
sub vp2, vertices
sub vp3, vertices
sub vp0, vp0, vertices
sub vp1, vp1, vertices
sub vp2, vp2, vertices
sub vp3, vp3, vertices
mov vp0, vp0, lsr #3
lsr vp0, #3
orr vp1, vp0, vp1, lsl #(16 - 3)
mov vp2, vp2, lsr #3
lsr vp2, #3
orr vp3, vp2, vp3, lsl #(16 - 3)
ldr next, [ot, depth, lsl #2]
str face, [ot, depth, lsl #2]
stmia face!, {flags, next, vp1, vp3}
.skip:
subs count, count, #1
subs count, #1
bne .loop
ldr tmp, =gFacesBase

View File

@ -61,22 +61,22 @@ faceAddRoomTriangles_asm:
// check clipping
and tmp, vg0, vg1
and tmp, tmp, vg2
and tmp, vg2
tst tmp, #CLIP_MASK
bne .skip
// mark if should be clipped by viewport
orr tmp, vg0, vg1
orr tmp, tmp, vg2
orr tmp, vg2
tst tmp, #CLIP_MASK_VP
ldrh flags, [polys, #-10]
orrne flags, flags, #FACE_CLIPPED
orrne flags, #FACE_CLIPPED
// shift and compare VERTEX_G for flat rasterization
mov vg0, vg0, lsl #24
// shift and compare VERTEX_G for gouraud rasterization
lsl vg0, #24
cmp vg0, vg1, lsl #24
cmpeq vg0, vg2, lsl #24
addne flags, flags, #FACE_GOURAUD
addne flags, #FACE_GOURAUD
CCW .skip
@ -91,13 +91,13 @@ faceAddRoomTriangles_asm:
mov depth, vz0, lsr #OT_SHIFT
// faceAdd
sub vp0, vp0, vertices
sub vp1, vp1, vertices
sub vp2, vp2, vertices
sub vp0, vertices
sub vp1, vertices
sub vp2, vertices
mov vp0, vp0, lsr #3
lsr vp0, #3
orr vp1, vp0, vp1, lsl #(16 - 3)
mov vp2, vp2, lsr #3
lsr vp2, #3
orr flags, #FACE_TRIANGLE
@ -105,7 +105,7 @@ faceAddRoomTriangles_asm:
str face, [ot, depth, lsl #2]
stmia face!, {flags, next, vp1, vp2}
.skip:
subs count, count, #1
subs count, #1
bne .loop
ldr tmp, =gFacesBase

View File

@ -37,11 +37,11 @@ transformMesh_asm:
ldr ambient, =gLightAmbient
ldr ambient, [ambient]
add vg, ambient, intensity
mov vg, vg, asr #8
asr vg, #8
// clamp spAmbient to 0..31
cmp vg, #31
movge vg, #31
bic vg, vg, vg, asr #31
bic vg, vg, asr #31
ldr vp, =viewportRel
ldmia vp, {minXY, maxXY}
@ -57,66 +57,66 @@ transformMesh_asm:
ldrsh vy, [vertices], #2
ldrsh vz, [vertices], #2
bic vg, vg, #CLIP_MASK // clear clipping flags
bic vg, #CLIP_MASK // clear clipping flags
// transform x
ldmia m!, {mx, my, mz, x}
mla x, mx, vx, x
mla x, my, vy, x
mla x, mz, vz, x
mov x, x, asr #FIXED_SHIFT
asr x, #FIXED_SHIFT
// transform y
ldmia m!, {mx, my, mz, y}
mla y, mx, vx, y
mla y, my, vy, y
mla y, mz, vz, y
mov y, y, asr #FIXED_SHIFT
asr y, #FIXED_SHIFT
// transform z
ldmia m!, {mx, my, mz, z}
mla z, mx, vx, z
mla z, my, vy, z
mla z, mz, vz, z
mov z, z, asr #FIXED_SHIFT
asr z, #FIXED_SHIFT
sub m, #(12 * 4) // restore matrix ptr
// z clipping
cmp z, #VIEW_MIN
movle z, #VIEW_MIN
orrle vg, vg, #CLIP_NEAR
orrle vg, #CLIP_NEAR
cmp z, #VIEW_MAX
movge z, #VIEW_MAX
orrge vg, vg, #CLIP_FAR
orrge vg, #CLIP_FAR
// project
mov dz, z, lsr #4
add dz, dz, z, lsr #6
add dz, z, lsr #6
divLUT tmp, dz
mul x, tmp, x
mul y, tmp, y
mov x, x, asr #(16 - PROJ_SHIFT)
mov y, y, asr #(16 - PROJ_SHIFT)
asr x, #(16 - PROJ_SHIFT)
asr y, #(16 - PROJ_SHIFT)
// viewport clipping
ldmia sp, {minXY, maxXY}
cmp x, minXY, asr #16
orrle vg, vg, #CLIP_LEFT
orrle vg, #CLIP_LEFT
cmp x, maxXY, asr #16
orrge vg, vg, #CLIP_RIGHT
orrge vg, #CLIP_RIGHT
mov minXY, minXY, lsl #16
mov maxXY, maxXY, lsl #16
lsl minXY, #16
lsl maxXY, #16
cmp y, minXY, asr #16
orrle vg, vg, #CLIP_TOP
orrle vg, #CLIP_TOP
cmp y, maxXY, asr #16
orrge vg, vg, #CLIP_BOTTOM
orrge vg, #CLIP_BOTTOM
add x, x, #(FRAME_WIDTH >> 1)
add y, y, #(FRAME_HEIGHT >> 1)
add x, #(FRAME_WIDTH >> 1)
add y, #(FRAME_HEIGHT >> 1)
// store the result
strh x, [res], #2
@ -127,5 +127,5 @@ transformMesh_asm:
subs count, #1
bne .loop
add sp, sp, #SP_SIZE
add sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc}

View File

@ -66,10 +66,10 @@ transformRoom_asm:
mla t, mx, vx, z
mla t, my, vy, t
mla t, mz, vz, t
mov t, t, asr #FIXED_SHIFT
asr t, #FIXED_SHIFT
// skip if vertex is out of z-range
add t, t, #VIEW_OFF
add t, #VIEW_OFF
cmp t, #(VIEW_OFF + VIEW_OFF + VIEW_MAX)
movhi vg, #(CLIP_NEAR + CLIP_FAR)
bhi .skip
@ -82,58 +82,58 @@ transformRoom_asm:
mla y, mx, vx, y
mla y, my, vy, y
mla y, mz, vz, y
mov y, y, asr #FIXED_SHIFT
asr y, #FIXED_SHIFT
// transform x
ldmdb m!, {mx, my, mz, x}
mla x, mx, vx, x
mla x, my, vy, x
mla x, mz, vz, x
mov x, x, asr #FIXED_SHIFT
asr x, #FIXED_SHIFT
// fog
cmp z, #FOG_MIN
subgt fog, z, #FOG_MIN
addgt vg, fog, lsl #6
mov vg, vg, lsr #13
lsr vg, #13
cmp vg, #31
movgt vg, #31
// z clipping
cmp z, #VIEW_MIN
movle z, #VIEW_MIN
orrle vg, vg, #CLIP_NEAR
orrle vg, #CLIP_NEAR
cmp z, #VIEW_MAX
movge z, #VIEW_MAX
orrge vg, vg, #CLIP_FAR
orrge vg, #CLIP_FAR
// project
mov dz, z, lsr #6
add dz, dz, z, lsr #4
add dz, z, lsr #4
divLUT tmp, dz
mul x, tmp, x
mul y, tmp, y
mov x, x, asr #(16 - PROJ_SHIFT)
mov y, y, asr #(16 - PROJ_SHIFT)
asr x, #(16 - PROJ_SHIFT)
asr y, #(16 - PROJ_SHIFT)
// viewport clipping
ldmia sp, {minXY, maxXY}
cmp x, minXY, asr #16
orrle vg, vg, #CLIP_LEFT
orrle vg, #CLIP_LEFT
cmp x, maxXY, asr #16
orrge vg, vg, #CLIP_RIGHT
orrge vg, #CLIP_RIGHT
mov minXY, minXY, lsl #16
mov maxXY, maxXY, lsl #16
lsl minXY, #16
lsl maxXY, #16
cmp y, minXY, asr #16
orrle vg, vg, #CLIP_TOP
orrle vg, #CLIP_TOP
cmp y, maxXY, asr #16
orrge vg, vg, #CLIP_BOTTOM
orrge vg, #CLIP_BOTTOM
add x, x, #(FRAME_WIDTH >> 1)
add y, y, #(FRAME_HEIGHT >> 1)
add x, #(FRAME_WIDTH >> 1)
add y, #(FRAME_HEIGHT >> 1)
// store the result
strh x, [res, #-6]
@ -151,5 +151,5 @@ transformRoom_asm:
subs count, #1
bne .loop
add sp, sp, #SP_SIZE
add sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc}

View File

@ -82,10 +82,10 @@ transformRoomUW_asm:
mla t, mx, vx, z
mla t, my, vy, t
mla t, mz, vz, t
mov t, t, asr #FIXED_SHIFT
asr t, #FIXED_SHIFT
// skip if vertex is out of z-range
add t, t, #VIEW_OFF
add t, #VIEW_OFF
cmp t, #(VIEW_OFF + VIEW_OFF + VIEW_MAX)
movhi vg, #(CLIP_NEAR + CLIP_FAR)
bhi .skip
@ -98,68 +98,68 @@ transformRoomUW_asm:
mla y, mx, vx, y
mla y, my, vy, y
mla y, mz, vz, y
mov y, y, asr #FIXED_SHIFT
asr y, #FIXED_SHIFT
// transform x
ldmdb m!, {mx, my, mz, x}
mla x, mx, vx, x
mla x, my, vy, x
mla x, mz, vz, x
mov x, x, asr #FIXED_SHIFT
asr x, #FIXED_SHIFT
// caustics
add tmp, sp, #SP_FRAME
ldmia tmp, {frame, caust, rand}
and tmp, count, #(MAX_RAND_TABLE - 1)
ldr rand, [rand, tmp, lsl #2]
add rand, rand, frame
and rand, rand, #(MAX_CAUSTICS - 1)
add rand, frame
and rand, #(MAX_CAUSTICS - 1)
ldr caust, [caust, rand, lsl #2]
add vg, vg, caust, lsl #5
add vg, caust, lsl #5
// fog
cmp z, #FOG_MIN
subgt fog, z, #FOG_MIN
addgt vg, fog, lsl #6
mov vg, vg, lsr #13
lsr vg, #13
cmp vg, #31
movgt vg, #31
// z clipping
cmp z, #VIEW_MIN
movle z, #VIEW_MIN
orrle vg, vg, #CLIP_NEAR
orrle vg, #CLIP_NEAR
cmp z, #VIEW_MAX
movge z, #VIEW_MAX
orrge vg, vg, #CLIP_FAR
orrge vg, #CLIP_FAR
// project
mov dz, z, lsr #6
add dz, dz, z, lsr #4
add dz, z, lsr #4
divLUT tmp, dz
mul x, tmp, x
mul y, tmp, y
mov x, x, asr #(16 - PROJ_SHIFT)
mov y, y, asr #(16 - PROJ_SHIFT)
asr x, #(16 - PROJ_SHIFT)
asr y, #(16 - PROJ_SHIFT)
// viewport clipping
ldmia sp, {minXY, maxXY}
cmp x, minXY, asr #16
orrle vg, vg, #CLIP_LEFT
orrle vg, #CLIP_LEFT
cmp x, maxXY, asr #16
orrge vg, vg, #CLIP_RIGHT
orrge vg, #CLIP_RIGHT
mov minXY, minXY, lsl #16
mov maxXY, maxXY, lsl #16
lsl minXY, #16
lsl maxXY, #16
cmp y, minXY, asr #16
orrle vg, vg, #CLIP_TOP
orrle vg, #CLIP_TOP
cmp y, maxXY, asr #16
orrge vg, vg, #CLIP_BOTTOM
orrge vg, #CLIP_BOTTOM
add x, x, #(FRAME_WIDTH >> 1)
add y, y, #(FRAME_HEIGHT >> 1)
add x, #(FRAME_WIDTH >> 1)
add y, #(FRAME_HEIGHT >> 1)
// store the result
strh x, [res, #-6]
@ -177,5 +177,5 @@ transformRoomUW_asm:
subs count, #1
bne .loop
add sp, sp, #SP_SIZE
add sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc}