#368 GBA matrixRotateYXZ fetch sincos address lut once

2025-04-21 19:41:53 +02:00 · 2022-12-04 03:15:53 +03:00 · 2022-12-04 03:15:53 +03:00 · 3ba5ec3818
commit 3ba5ec3818
parent 4bb76c204f
3 changed files with 30 additions and 18 deletions
--- a/src/platform/gba/asm/faceAddMeshQuads.s
+++ b/src/platform/gba/asm/faceAddMeshQuads.s
@ -49,9 +49,10 @@ faceAddMeshQuads_asm:

 .loop:
    ldrh vp0, [polys], #2
+    ldrh vp2, [polys], #4   // + flags
+
    lsr vp1, vp0, #8
    and vp0, #0xFF
-    ldrh vp2, [polys], #4   // + flags
    lsr vp3, vp2, #8
    and vp2, #0xFF

@ -85,11 +86,11 @@ faceAddMeshQuads_asm:
    orrne flags, #FACE_CLIPPED

    // depth = AVG_Z4
-    lsl vg0, #16
-    add depth, vg0, vg1, lsl #16
-    add depth, vg2, lsl #16
-    add depth, vg3, lsl #16
-    lsr depth, #(16 + 2)
+    lsl vg0, #16                    // clip g part (high half)
+    add depth, vg0, vg1, lsl #16    // depth = vz0 + vz1
+    add depth, vg2, lsl #16         // depth += vz2
+    add depth, vg3, lsl #16         // depth += vz3
+    lsr depth, #(16 + 2)            // dpeth /= 4

    // faceAdd
    rsb vp0, vertices, vp0, lsr #3
--- a/src/platform/gba/asm/faceAddMeshTriangles.s
+++ b/src/platform/gba/asm/faceAddMeshTriangles.s
@ -47,9 +47,10 @@ faceAddMeshTriangles_asm:

 .loop:
    ldrh vp0, [polys], #2
+    ldrh vp2, [polys], #4   // + flags
+
    lsr vp1, vp0, #8
    and vp0, #0xFF
-    ldrh vp2, [polys], #4   // + flags
    and vp2, #0xFF

    add vp0, vp, vp0, lsl #3
@ -77,10 +78,10 @@ faceAddMeshTriangles_asm:
    orrne flags, #FACE_CLIPPED

    // depth = AVG_Z3
-    lsl vg0, #16
-    add depth, vg0, vg1, lsl #16
-    add depth, vg2, lsl #17
-    lsr depth, #(16 + 2)
+    lsl vg0, #16                    // clip g part (high half)
+    add depth, vg0, vg1, lsl #16    // depth = vz0 + vz1
+    add depth, vg2, lsl #17         // depth += vz2 * 2
+    lsr depth, #(16 + 2)            // depth /= 4

    // faceAdd
    rsb vp0, vertices, vp0, lsr #3
--- a/src/platform/gba/asm/matrixRotate.s
+++ b/src/platform/gba/asm/matrixRotate.s
@ -8,6 +8,13 @@
    mov \sin, \sin, asr #16
 .endm

+.macro sincosLUT lut, angle, sin, cos
+    ldr \sin, [\lut, \angle, lsl #2]
+    mov \cos, \sin, lsl #16
+    mov \cos, \cos, asr #16
+    mov \sin, \sin, asr #16
+.endm
+
 .macro rotxy x, y, sin, cos, t
    mul \t, \y, \cos
    mla \t, \x, \sin, \t
@ -126,6 +133,7 @@ e00     .req r3
 e01     .req r4
 e02     .req r5
 e10     .req r6
+scLUT   .req r7
 // FIQ regs
 e11     .req r8
 e12     .req r9
@ -151,14 +159,16 @@ matrixRotateYXZ_asm:
    and angleY, mask, angleY, lsr #4
    and angleZ, mask, angleZ, lsr #4

-matrixRotateYXZ_fast_asm:
+matrixRotateYXZ_fast_asm:   // routine for pre-shifted angles
    orr mask, angleX, angleY
    orrs mask, mask, angleZ
    bxeq lr

-    stmfd sp!, {r4-r6}
+    stmfd sp!, {r4-r7}
    fiq_on

+    ldr scLUT, =gSinCosTable
+
    ldr mm, =gMatrixPtr
    ldr mm, [mm]
    ldmia mm, {e00, e01, e02}
@ -171,7 +181,7 @@ matrixRotateYXZ_fast_asm:
    cmp angleY, #0
    beq .rotX

-    sincos angleY, sinY, cosY
+    sincosLUT scLUT, angleY, sinY, cosY

    rotxy e00, e02, sinY, cosY, tmp
    rotxy e10, e12, sinY, cosY, tmp
@ -181,7 +191,7 @@ matrixRotateYXZ_fast_asm:
    cmp angleX, #0
    beq .rotZ

-    sincos angleX, sinX, cosX
+    sincosLUT scLUT, angleX, sinX, cosX

    rotxy e02, e01, sinX, cosX, tmp
    rotxy e12, e11, sinX, cosX, tmp
@ -191,13 +201,13 @@ matrixRotateYXZ_fast_asm:
    cmp angleZ, #0
    beq .done

-    sincos angleZ, sinZ, cosZ
+    sincosLUT scLUT, angleZ, sinZ, cosZ

    rotxy e01, e00, sinZ, cosZ, tmp
    rotxy e11, e10, sinZ, cosZ, tmp
    rotxy e21, e20, sinZ, cosZ, tmp

-.done:  
+.done:
    ldr mm, =gMatrixPtr
    ldr mm, [mm]

@ -208,7 +218,7 @@ matrixRotateYXZ_fast_asm:
    stmia mm, {e20, e21, e22}

    fiq_off
-    ldmfd sp!, {r4-r6}
+    ldmfd sp!, {r4-r7}
    bx lr

 q   .req r0     // arg