From 665683747371d21c54fab6dbfeb3ab9c4c870430 Mon Sep 17 00:00:00 2001 From: XProger Date: Sun, 13 Feb 2022 14:10:02 +0300 Subject: [PATCH] #368 micro optimizations, some code cleanup --- src/platform/gba/asm/faceAddMeshQuads.s | 44 ++++++++++----------- src/platform/gba/asm/faceAddMeshTriangles.s | 33 ++++++++-------- src/platform/gba/asm/faceAddRoomQuads.s | 31 +++++++-------- src/platform/gba/asm/faceAddRoomTriangles.s | 24 +++++------ src/platform/gba/asm/transformMesh.s | 40 +++++++++---------- src/platform/gba/asm/transformRoom.s | 38 +++++++++--------- src/platform/gba/asm/transformRoomUW.s | 44 ++++++++++----------- 7 files changed, 125 insertions(+), 129 deletions(-) diff --git a/src/platform/gba/asm/faceAddMeshQuads.s b/src/platform/gba/asm/faceAddMeshQuads.s index 4efa708..009b366 100644 --- a/src/platform/gba/asm/faceAddMeshQuads.s +++ b/src/platform/gba/asm/faceAddMeshQuads.s @@ -59,26 +59,25 @@ faceAddMeshQuads_asm: CCW .skip - // fetch ((clip << 8) | g) - ldrh vg0, [vp0, #VERTEX_G] - ldrh vg1, [vp1, #VERTEX_G] - ldrh vg2, [vp2, #VERTEX_G] - ldrh vg3, [vp3, #VERTEX_G] + // fetch clip flags + ldrb vg0, [vp0, #VERTEX_CLIP] + ldrb vg1, [vp1, #VERTEX_CLIP] + ldrb vg2, [vp2, #VERTEX_CLIP] + ldrb vg3, [vp3, #VERTEX_CLIP] // check clipping and tmp, vg0, vg1 - and tmp, tmp, vg2 - and tmp, tmp, vg3 - tst tmp, #CLIP_MASK + and tmp, vg2 + ands tmp, vg3 bne .skip // mark if should be clipped by viewport orr tmp, vg0, vg1 - orr tmp, tmp, vg2 - orr tmp, tmp, vg3 - tst tmp, #CLIP_MASK_VP + orr tmp, vg2 + orr tmp, vg3 + tst tmp, #(CLIP_MASK_VP >> 8) ldrh flags, [polys, #-8] - orrne flags, flags, #FACE_CLIPPED + orrne flags, #FACE_CLIPPED // vz0 = AVG_Z4 (depth) ldrh vz0, [vp0, #VERTEX_Z] @@ -86,28 +85,27 @@ faceAddMeshQuads_asm: ldrh vz2, [vp2, #VERTEX_Z] ldrh vz3, [vp3, #VERTEX_Z] add depth, vz0, vz1 - add depth, depth, vz2 - add depth, depth, vz3 - mov depth, depth, lsr #(2 + OT_SHIFT) + add depth, vz2 + add depth, vz3 + lsr depth, #(2 + OT_SHIFT) // faceAdd ldr vertices, =gVertices + sub vp0, vertices + sub vp1, vertices + sub vp2, vertices + sub vp3, vertices - sub vp0, vp0, vertices - sub vp1, vp1, vertices - sub vp2, vp2, vertices - sub vp3, vp3, vertices - - mov vp0, vp0, lsr #3 + lsr vp0, #3 orr vp1, vp0, vp1, lsl #(16 - 3) - mov vp2, vp2, lsr #3 + lsr vp2, #3 orr vp3, vp2, vp3, lsl #(16 - 3) ldr next, [ot, depth, lsl #2] str face, [ot, depth, lsl #2] stmia face!, {flags, next, vp1, vp3} .skip: - subs count, count, #1 + subs count, #1 bne .loop ldr tmp, =gFacesBase diff --git a/src/platform/gba/asm/faceAddMeshTriangles.s b/src/platform/gba/asm/faceAddMeshTriangles.s index 5997c08..f71635b 100644 --- a/src/platform/gba/asm/faceAddMeshTriangles.s +++ b/src/platform/gba/asm/faceAddMeshTriangles.s @@ -56,40 +56,39 @@ faceAddMeshTriangles_asm: CCW .skip - // fetch ((clip << 8) | g) - ldrh vg0, [vp0, #VERTEX_G] - ldrh vg1, [vp1, #VERTEX_G] - ldrh vg2, [vp2, #VERTEX_G] + // fetch clip flags + ldrb vg0, [vp0, #VERTEX_CLIP] + ldrb vg1, [vp1, #VERTEX_CLIP] + ldrb vg2, [vp2, #VERTEX_CLIP] // check clipping and tmp, vg0, vg1 - and tmp, tmp, vg2 - tst tmp, #CLIP_MASK + ands tmp, vg2 bne .skip // mark if should be clipped by viewport orr tmp, vg0, vg1 - orr tmp, tmp, vg2 - tst tmp, #CLIP_MASK_VP + orr tmp, vg2 + tst tmp, #(CLIP_MASK_VP >> 8) ldrh flags, [polys, #-8] - orrne flags, flags, #FACE_CLIPPED + orrne flags, #FACE_CLIPPED // vz0 = AVG_Z3 (depth) ldrh vz0, [vp0, #VERTEX_Z] ldrh vz1, [vp1, #VERTEX_Z] ldrh vz2, [vp2, #VERTEX_Z] add depth, vz0, vz1 - add depth, depth, vz2, lsl #1 - mov depth, depth, lsr #(2 + OT_SHIFT) + add depth, vz2, lsl #1 + lsr depth, #(2 + OT_SHIFT) // faceAdd - sub vp0, vp0, vertices - sub vp1, vp1, vertices - sub vp2, vp2, vertices + sub vp0, vertices + sub vp1, vertices + sub vp2, vertices - mov vp0, vp0, lsr #3 + lsr vp0, #3 orr vp1, vp0, vp1, lsl #(16 - 3) - mov vp2, vp2, lsr #3 + lsr vp2, #3 orr flags, #FACE_TRIANGLE @@ -97,7 +96,7 @@ faceAddMeshTriangles_asm: str face, [ot, depth, lsl #2] stmia face!, {flags, next, vp1, vp2} .skip: - subs count, count, #1 + subs count, #1 bne .loop ldr tmp, =gFacesBase diff --git a/src/platform/gba/asm/faceAddRoomQuads.s b/src/platform/gba/asm/faceAddRoomQuads.s index f80bb1a..6cd7906 100644 --- a/src/platform/gba/asm/faceAddRoomQuads.s +++ b/src/platform/gba/asm/faceAddRoomQuads.s @@ -65,25 +65,25 @@ faceAddRoomQuads_asm: // check clipping and tmp, vg0, vg1 - and tmp, tmp, vg2 - and tmp, tmp, vg3 + and tmp, vg2 + and tmp, vg3 tst tmp, #CLIP_MASK bne .skip // mark if should be clipped by viewport orr tmp, vg0, vg1 - orr tmp, tmp, vg2 - orr tmp, tmp, vg3 + orr tmp, vg2 + orr tmp, vg3 tst tmp, #CLIP_MASK_VP ldrh flags, [polys, #-12] - orrne flags, flags, #FACE_CLIPPED + orrne flags, #FACE_CLIPPED - // shift and compare VERTEX_G for flat rasterization - mov vg0, vg0, lsl #24 + // shift and compare VERTEX_G for gouraud rasterization + lsl vg0, #24 cmp vg0, vg1, lsl #24 cmpeq vg0, vg2, lsl #24 cmpeq vg0, vg3, lsl #24 - addne flags, flags, #FACE_GOURAUD + addne flags, #FACE_GOURAUD CCW .skip @@ -102,22 +102,21 @@ faceAddRoomQuads_asm: // faceAdd ldr vertices, =gVertices + sub vp0, vertices + sub vp1, vertices + sub vp2, vertices + sub vp3, vertices - sub vp0, vp0, vertices - sub vp1, vp1, vertices - sub vp2, vp2, vertices - sub vp3, vp3, vertices - - mov vp0, vp0, lsr #3 + lsr vp0, #3 orr vp1, vp0, vp1, lsl #(16 - 3) - mov vp2, vp2, lsr #3 + lsr vp2, #3 orr vp3, vp2, vp3, lsl #(16 - 3) ldr next, [ot, depth, lsl #2] str face, [ot, depth, lsl #2] stmia face!, {flags, next, vp1, vp3} .skip: - subs count, count, #1 + subs count, #1 bne .loop ldr tmp, =gFacesBase diff --git a/src/platform/gba/asm/faceAddRoomTriangles.s b/src/platform/gba/asm/faceAddRoomTriangles.s index ea0df95..39aa2a2 100644 --- a/src/platform/gba/asm/faceAddRoomTriangles.s +++ b/src/platform/gba/asm/faceAddRoomTriangles.s @@ -61,22 +61,22 @@ faceAddRoomTriangles_asm: // check clipping and tmp, vg0, vg1 - and tmp, tmp, vg2 + and tmp, vg2 tst tmp, #CLIP_MASK bne .skip // mark if should be clipped by viewport orr tmp, vg0, vg1 - orr tmp, tmp, vg2 + orr tmp, vg2 tst tmp, #CLIP_MASK_VP ldrh flags, [polys, #-10] - orrne flags, flags, #FACE_CLIPPED + orrne flags, #FACE_CLIPPED - // shift and compare VERTEX_G for flat rasterization - mov vg0, vg0, lsl #24 + // shift and compare VERTEX_G for gouraud rasterization + lsl vg0, #24 cmp vg0, vg1, lsl #24 cmpeq vg0, vg2, lsl #24 - addne flags, flags, #FACE_GOURAUD + addne flags, #FACE_GOURAUD CCW .skip @@ -91,13 +91,13 @@ faceAddRoomTriangles_asm: mov depth, vz0, lsr #OT_SHIFT // faceAdd - sub vp0, vp0, vertices - sub vp1, vp1, vertices - sub vp2, vp2, vertices + sub vp0, vertices + sub vp1, vertices + sub vp2, vertices - mov vp0, vp0, lsr #3 + lsr vp0, #3 orr vp1, vp0, vp1, lsl #(16 - 3) - mov vp2, vp2, lsr #3 + lsr vp2, #3 orr flags, #FACE_TRIANGLE @@ -105,7 +105,7 @@ faceAddRoomTriangles_asm: str face, [ot, depth, lsl #2] stmia face!, {flags, next, vp1, vp2} .skip: - subs count, count, #1 + subs count, #1 bne .loop ldr tmp, =gFacesBase diff --git a/src/platform/gba/asm/transformMesh.s b/src/platform/gba/asm/transformMesh.s index e13f596..e28fb89 100644 --- a/src/platform/gba/asm/transformMesh.s +++ b/src/platform/gba/asm/transformMesh.s @@ -37,11 +37,11 @@ transformMesh_asm: ldr ambient, =gLightAmbient ldr ambient, [ambient] add vg, ambient, intensity - mov vg, vg, asr #8 + asr vg, #8 // clamp spAmbient to 0..31 cmp vg, #31 movge vg, #31 - bic vg, vg, vg, asr #31 + bic vg, vg, asr #31 ldr vp, =viewportRel ldmia vp, {minXY, maxXY} @@ -57,66 +57,66 @@ transformMesh_asm: ldrsh vy, [vertices], #2 ldrsh vz, [vertices], #2 - bic vg, vg, #CLIP_MASK // clear clipping flags + bic vg, #CLIP_MASK // clear clipping flags // transform x ldmia m!, {mx, my, mz, x} mla x, mx, vx, x mla x, my, vy, x mla x, mz, vz, x - mov x, x, asr #FIXED_SHIFT + asr x, #FIXED_SHIFT // transform y ldmia m!, {mx, my, mz, y} mla y, mx, vx, y mla y, my, vy, y mla y, mz, vz, y - mov y, y, asr #FIXED_SHIFT + asr y, #FIXED_SHIFT // transform z ldmia m!, {mx, my, mz, z} mla z, mx, vx, z mla z, my, vy, z mla z, mz, vz, z - mov z, z, asr #FIXED_SHIFT + asr z, #FIXED_SHIFT sub m, #(12 * 4) // restore matrix ptr // z clipping cmp z, #VIEW_MIN movle z, #VIEW_MIN - orrle vg, vg, #CLIP_NEAR + orrle vg, #CLIP_NEAR cmp z, #VIEW_MAX movge z, #VIEW_MAX - orrge vg, vg, #CLIP_FAR + orrge vg, #CLIP_FAR // project mov dz, z, lsr #4 - add dz, dz, z, lsr #6 + add dz, z, lsr #6 divLUT tmp, dz mul x, tmp, x mul y, tmp, y - mov x, x, asr #(16 - PROJ_SHIFT) - mov y, y, asr #(16 - PROJ_SHIFT) + asr x, #(16 - PROJ_SHIFT) + asr y, #(16 - PROJ_SHIFT) // viewport clipping ldmia sp, {minXY, maxXY} cmp x, minXY, asr #16 - orrle vg, vg, #CLIP_LEFT + orrle vg, #CLIP_LEFT cmp x, maxXY, asr #16 - orrge vg, vg, #CLIP_RIGHT + orrge vg, #CLIP_RIGHT - mov minXY, minXY, lsl #16 - mov maxXY, maxXY, lsl #16 + lsl minXY, #16 + lsl maxXY, #16 cmp y, minXY, asr #16 - orrle vg, vg, #CLIP_TOP + orrle vg, #CLIP_TOP cmp y, maxXY, asr #16 - orrge vg, vg, #CLIP_BOTTOM + orrge vg, #CLIP_BOTTOM - add x, x, #(FRAME_WIDTH >> 1) - add y, y, #(FRAME_HEIGHT >> 1) + add x, #(FRAME_WIDTH >> 1) + add y, #(FRAME_HEIGHT >> 1) // store the result strh x, [res], #2 @@ -127,5 +127,5 @@ transformMesh_asm: subs count, #1 bne .loop - add sp, sp, #SP_SIZE + add sp, #SP_SIZE ldmfd sp!, {r4-r11, pc} diff --git a/src/platform/gba/asm/transformRoom.s b/src/platform/gba/asm/transformRoom.s index 51e6040..c408268 100644 --- a/src/platform/gba/asm/transformRoom.s +++ b/src/platform/gba/asm/transformRoom.s @@ -66,10 +66,10 @@ transformRoom_asm: mla t, mx, vx, z mla t, my, vy, t mla t, mz, vz, t - mov t, t, asr #FIXED_SHIFT + asr t, #FIXED_SHIFT // skip if vertex is out of z-range - add t, t, #VIEW_OFF + add t, #VIEW_OFF cmp t, #(VIEW_OFF + VIEW_OFF + VIEW_MAX) movhi vg, #(CLIP_NEAR + CLIP_FAR) bhi .skip @@ -82,58 +82,58 @@ transformRoom_asm: mla y, mx, vx, y mla y, my, vy, y mla y, mz, vz, y - mov y, y, asr #FIXED_SHIFT + asr y, #FIXED_SHIFT // transform x ldmdb m!, {mx, my, mz, x} mla x, mx, vx, x mla x, my, vy, x mla x, mz, vz, x - mov x, x, asr #FIXED_SHIFT + asr x, #FIXED_SHIFT // fog cmp z, #FOG_MIN subgt fog, z, #FOG_MIN addgt vg, fog, lsl #6 - mov vg, vg, lsr #13 + lsr vg, #13 cmp vg, #31 movgt vg, #31 // z clipping cmp z, #VIEW_MIN movle z, #VIEW_MIN - orrle vg, vg, #CLIP_NEAR + orrle vg, #CLIP_NEAR cmp z, #VIEW_MAX movge z, #VIEW_MAX - orrge vg, vg, #CLIP_FAR + orrge vg, #CLIP_FAR // project mov dz, z, lsr #6 - add dz, dz, z, lsr #4 + add dz, z, lsr #4 divLUT tmp, dz mul x, tmp, x mul y, tmp, y - mov x, x, asr #(16 - PROJ_SHIFT) - mov y, y, asr #(16 - PROJ_SHIFT) + asr x, #(16 - PROJ_SHIFT) + asr y, #(16 - PROJ_SHIFT) // viewport clipping ldmia sp, {minXY, maxXY} cmp x, minXY, asr #16 - orrle vg, vg, #CLIP_LEFT + orrle vg, #CLIP_LEFT cmp x, maxXY, asr #16 - orrge vg, vg, #CLIP_RIGHT + orrge vg, #CLIP_RIGHT - mov minXY, minXY, lsl #16 - mov maxXY, maxXY, lsl #16 + lsl minXY, #16 + lsl maxXY, #16 cmp y, minXY, asr #16 - orrle vg, vg, #CLIP_TOP + orrle vg, #CLIP_TOP cmp y, maxXY, asr #16 - orrge vg, vg, #CLIP_BOTTOM + orrge vg, #CLIP_BOTTOM - add x, x, #(FRAME_WIDTH >> 1) - add y, y, #(FRAME_HEIGHT >> 1) + add x, #(FRAME_WIDTH >> 1) + add y, #(FRAME_HEIGHT >> 1) // store the result strh x, [res, #-6] @@ -151,5 +151,5 @@ transformRoom_asm: subs count, #1 bne .loop - add sp, sp, #SP_SIZE + add sp, #SP_SIZE ldmfd sp!, {r4-r11, pc} diff --git a/src/platform/gba/asm/transformRoomUW.s b/src/platform/gba/asm/transformRoomUW.s index 57d1e03..6291de4 100644 --- a/src/platform/gba/asm/transformRoomUW.s +++ b/src/platform/gba/asm/transformRoomUW.s @@ -82,10 +82,10 @@ transformRoomUW_asm: mla t, mx, vx, z mla t, my, vy, t mla t, mz, vz, t - mov t, t, asr #FIXED_SHIFT + asr t, #FIXED_SHIFT // skip if vertex is out of z-range - add t, t, #VIEW_OFF + add t, #VIEW_OFF cmp t, #(VIEW_OFF + VIEW_OFF + VIEW_MAX) movhi vg, #(CLIP_NEAR + CLIP_FAR) bhi .skip @@ -98,68 +98,68 @@ transformRoomUW_asm: mla y, mx, vx, y mla y, my, vy, y mla y, mz, vz, y - mov y, y, asr #FIXED_SHIFT + asr y, #FIXED_SHIFT // transform x ldmdb m!, {mx, my, mz, x} mla x, mx, vx, x mla x, my, vy, x mla x, mz, vz, x - mov x, x, asr #FIXED_SHIFT + asr x, #FIXED_SHIFT // caustics add tmp, sp, #SP_FRAME ldmia tmp, {frame, caust, rand} and tmp, count, #(MAX_RAND_TABLE - 1) ldr rand, [rand, tmp, lsl #2] - add rand, rand, frame - and rand, rand, #(MAX_CAUSTICS - 1) + add rand, frame + and rand, #(MAX_CAUSTICS - 1) ldr caust, [caust, rand, lsl #2] - add vg, vg, caust, lsl #5 + add vg, caust, lsl #5 // fog cmp z, #FOG_MIN subgt fog, z, #FOG_MIN addgt vg, fog, lsl #6 - mov vg, vg, lsr #13 + lsr vg, #13 cmp vg, #31 movgt vg, #31 // z clipping cmp z, #VIEW_MIN movle z, #VIEW_MIN - orrle vg, vg, #CLIP_NEAR + orrle vg, #CLIP_NEAR cmp z, #VIEW_MAX movge z, #VIEW_MAX - orrge vg, vg, #CLIP_FAR + orrge vg, #CLIP_FAR // project mov dz, z, lsr #6 - add dz, dz, z, lsr #4 + add dz, z, lsr #4 divLUT tmp, dz mul x, tmp, x mul y, tmp, y - mov x, x, asr #(16 - PROJ_SHIFT) - mov y, y, asr #(16 - PROJ_SHIFT) + asr x, #(16 - PROJ_SHIFT) + asr y, #(16 - PROJ_SHIFT) // viewport clipping ldmia sp, {minXY, maxXY} cmp x, minXY, asr #16 - orrle vg, vg, #CLIP_LEFT + orrle vg, #CLIP_LEFT cmp x, maxXY, asr #16 - orrge vg, vg, #CLIP_RIGHT + orrge vg, #CLIP_RIGHT - mov minXY, minXY, lsl #16 - mov maxXY, maxXY, lsl #16 + lsl minXY, #16 + lsl maxXY, #16 cmp y, minXY, asr #16 - orrle vg, vg, #CLIP_TOP + orrle vg, #CLIP_TOP cmp y, maxXY, asr #16 - orrge vg, vg, #CLIP_BOTTOM + orrge vg, #CLIP_BOTTOM - add x, x, #(FRAME_WIDTH >> 1) - add y, y, #(FRAME_HEIGHT >> 1) + add x, #(FRAME_WIDTH >> 1) + add y, #(FRAME_HEIGHT >> 1) // store the result strh x, [res, #-6] @@ -177,5 +177,5 @@ transformRoomUW_asm: subs count, #1 bne .loop - add sp, sp, #SP_SIZE + add sp, #SP_SIZE ldmfd sp!, {r4-r11, pc}