1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-07 13:46:45 +02:00

#368 GBA matrixRotateYXZ fetch sincos address lut once

This commit is contained in:
XProger
2022-12-04 03:15:53 +03:00
parent 4bb76c204f
commit 3ba5ec3818
3 changed files with 30 additions and 18 deletions

View File

@@ -49,9 +49,10 @@ faceAddMeshQuads_asm:
.loop: .loop:
ldrh vp0, [polys], #2 ldrh vp0, [polys], #2
ldrh vp2, [polys], #4 // + flags
lsr vp1, vp0, #8 lsr vp1, vp0, #8
and vp0, #0xFF and vp0, #0xFF
ldrh vp2, [polys], #4 // + flags
lsr vp3, vp2, #8 lsr vp3, vp2, #8
and vp2, #0xFF and vp2, #0xFF
@@ -85,11 +86,11 @@ faceAddMeshQuads_asm:
orrne flags, #FACE_CLIPPED orrne flags, #FACE_CLIPPED
// depth = AVG_Z4 // depth = AVG_Z4
lsl vg0, #16 lsl vg0, #16 // clip g part (high half)
add depth, vg0, vg1, lsl #16 add depth, vg0, vg1, lsl #16 // depth = vz0 + vz1
add depth, vg2, lsl #16 add depth, vg2, lsl #16 // depth += vz2
add depth, vg3, lsl #16 add depth, vg3, lsl #16 // depth += vz3
lsr depth, #(16 + 2) lsr depth, #(16 + 2) // dpeth /= 4
// faceAdd // faceAdd
rsb vp0, vertices, vp0, lsr #3 rsb vp0, vertices, vp0, lsr #3

View File

@@ -47,9 +47,10 @@ faceAddMeshTriangles_asm:
.loop: .loop:
ldrh vp0, [polys], #2 ldrh vp0, [polys], #2
ldrh vp2, [polys], #4 // + flags
lsr vp1, vp0, #8 lsr vp1, vp0, #8
and vp0, #0xFF and vp0, #0xFF
ldrh vp2, [polys], #4 // + flags
and vp2, #0xFF and vp2, #0xFF
add vp0, vp, vp0, lsl #3 add vp0, vp, vp0, lsl #3
@@ -77,10 +78,10 @@ faceAddMeshTriangles_asm:
orrne flags, #FACE_CLIPPED orrne flags, #FACE_CLIPPED
// depth = AVG_Z3 // depth = AVG_Z3
lsl vg0, #16 lsl vg0, #16 // clip g part (high half)
add depth, vg0, vg1, lsl #16 add depth, vg0, vg1, lsl #16 // depth = vz0 + vz1
add depth, vg2, lsl #17 add depth, vg2, lsl #17 // depth += vz2 * 2
lsr depth, #(16 + 2) lsr depth, #(16 + 2) // depth /= 4
// faceAdd // faceAdd
rsb vp0, vertices, vp0, lsr #3 rsb vp0, vertices, vp0, lsr #3

View File

@@ -8,6 +8,13 @@
mov \sin, \sin, asr #16 mov \sin, \sin, asr #16
.endm .endm
.macro sincosLUT lut, angle, sin, cos
ldr \sin, [\lut, \angle, lsl #2]
mov \cos, \sin, lsl #16
mov \cos, \cos, asr #16
mov \sin, \sin, asr #16
.endm
.macro rotxy x, y, sin, cos, t .macro rotxy x, y, sin, cos, t
mul \t, \y, \cos mul \t, \y, \cos
mla \t, \x, \sin, \t mla \t, \x, \sin, \t
@@ -126,6 +133,7 @@ e00 .req r3
e01 .req r4 e01 .req r4
e02 .req r5 e02 .req r5
e10 .req r6 e10 .req r6
scLUT .req r7
// FIQ regs // FIQ regs
e11 .req r8 e11 .req r8
e12 .req r9 e12 .req r9
@@ -151,14 +159,16 @@ matrixRotateYXZ_asm:
and angleY, mask, angleY, lsr #4 and angleY, mask, angleY, lsr #4
and angleZ, mask, angleZ, lsr #4 and angleZ, mask, angleZ, lsr #4
matrixRotateYXZ_fast_asm: matrixRotateYXZ_fast_asm: // routine for pre-shifted angles
orr mask, angleX, angleY orr mask, angleX, angleY
orrs mask, mask, angleZ orrs mask, mask, angleZ
bxeq lr bxeq lr
stmfd sp!, {r4-r6} stmfd sp!, {r4-r7}
fiq_on fiq_on
ldr scLUT, =gSinCosTable
ldr mm, =gMatrixPtr ldr mm, =gMatrixPtr
ldr mm, [mm] ldr mm, [mm]
ldmia mm, {e00, e01, e02} ldmia mm, {e00, e01, e02}
@@ -171,7 +181,7 @@ matrixRotateYXZ_fast_asm:
cmp angleY, #0 cmp angleY, #0
beq .rotX beq .rotX
sincos angleY, sinY, cosY sincosLUT scLUT, angleY, sinY, cosY
rotxy e00, e02, sinY, cosY, tmp rotxy e00, e02, sinY, cosY, tmp
rotxy e10, e12, sinY, cosY, tmp rotxy e10, e12, sinY, cosY, tmp
@@ -181,7 +191,7 @@ matrixRotateYXZ_fast_asm:
cmp angleX, #0 cmp angleX, #0
beq .rotZ beq .rotZ
sincos angleX, sinX, cosX sincosLUT scLUT, angleX, sinX, cosX
rotxy e02, e01, sinX, cosX, tmp rotxy e02, e01, sinX, cosX, tmp
rotxy e12, e11, sinX, cosX, tmp rotxy e12, e11, sinX, cosX, tmp
@@ -191,7 +201,7 @@ matrixRotateYXZ_fast_asm:
cmp angleZ, #0 cmp angleZ, #0
beq .done beq .done
sincos angleZ, sinZ, cosZ sincosLUT scLUT, angleZ, sinZ, cosZ
rotxy e01, e00, sinZ, cosZ, tmp rotxy e01, e00, sinZ, cosZ, tmp
rotxy e11, e10, sinZ, cosZ, tmp rotxy e11, e10, sinZ, cosZ, tmp
@@ -208,7 +218,7 @@ matrixRotateYXZ_fast_asm:
stmia mm, {e20, e21, e22} stmia mm, {e20, e21, e22}
fiq_off fiq_off
ldmfd sp!, {r4-r6} ldmfd sp!, {r4-r7}
bx lr bx lr
q .req r0 // arg q .req r0 // arg