1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-12 08:04:09 +02:00

#368 micro optimization reduce registers pressure and LDRs in faceAdd* routines

This commit is contained in:
XProger
2022-02-13 08:46:25 +03:00
parent 1744204cd8
commit ae63f1c090
5 changed files with 45 additions and 46 deletions

View File

@@ -97,19 +97,25 @@
ldrh \res, [\res, \x] ldrh \res, [\res, \x]
.endm .endm
// vx0 - vg0
// vy0 - vg1
// vx1 - vg2
// vy1 - vg3
// vx2 - vg2
// vy2 - vg2
.macro CCW skip .macro CCW skip
ldrsh vx0, [vp0, #VERTEX_X] ldrsh vx0, [vp0, #VERTEX_X]
ldrsh vy0, [vp0, #VERTEX_Y] ldrsh vy0, [vp0, #VERTEX_Y]
ldrsh vx2, [vp2, #VERTEX_X] ldrsh vx2, [vp2, #VERTEX_X]
ldrsh vy1, [vp1, #VERTEX_Y] ldrsh vy1, [vp1, #VERTEX_Y]
rsb vx2, vx2, vx0 // reverse order for mla rsb vx2, vx2, vx0 // reverse order for mla
sub vy1, vy1, vy0 sub vy1, vy1, vy0
mul tmp, vx2, vy1 mul vy1, vx2, vy1
ldrsh vx1, [vp1, #VERTEX_X] ldrsh vx1, [vp1, #VERTEX_X]
sub vx0, vx1, vx0
ldrsh vy2, [vp2, #VERTEX_Y] ldrsh vy2, [vp2, #VERTEX_Y]
sub vx1, vx1, vx0 sub vy0, vy2, vy0
sub vy2, vy2, vy0 mlas vy1, vx0, vy0, vy1
mlas tmp, vx1, vy2, tmp
ble \skip ble \skip
.endm .endm

View File

@@ -12,7 +12,7 @@ vp0 .req r8
vp1 .req r9 vp1 .req r9
vp2 .req r10 vp2 .req r10
vp3 .req r11 vp3 .req r11
tmp .req r12 ot .req r12
face .req lr face .req lr
vx0 .req vg0 vx0 .req vg0
@@ -20,7 +20,7 @@ vy0 .req vg1
vx1 .req vg2 vx1 .req vg2
vy1 .req vg3 vy1 .req vg3
vx2 .req vg2 vx2 .req vg2
vy2 .req vg3 vy2 .req vg2
vz0 .req vg0 vz0 .req vg0
vz1 .req vg1 vz1 .req vg1
@@ -28,14 +28,10 @@ vz2 .req vg2
vz3 .req vg3 vz3 .req vg3
depth .req vg0 depth .req vg0
ot .req vg1 tmp .req flags
vertices .req vg2 vertices .req vg2
next .req vp0 next .req vp0
SP_OT = 0
SP_VERTICES = 4
SP_SIZE = 8
.global faceAddMeshQuads_asm .global faceAddMeshQuads_asm
faceAddMeshQuads_asm: faceAddMeshQuads_asm:
stmfd sp!, {r4-r11, lr} stmfd sp!, {r4-r11, lr}
@@ -47,15 +43,14 @@ faceAddMeshQuads_asm:
ldr face, [face] ldr face, [face]
ldr ot, =gOT ldr ot, =gOT
ldr vertices, =gVertices
stmfd sp!, {ot, vertices} add polys, #2 // skip flags
.loop: .loop:
ldrh flags, [polys], #2
ldrb vp0, [polys], #1 ldrb vp0, [polys], #1
ldrb vp1, [polys], #1 ldrb vp1, [polys], #1
ldrb vp2, [polys], #1 ldrb vp2, [polys], #1
ldrb vp3, [polys], #1 ldrb vp3, [polys], #3 // + flags
add vp0, vp, vp0, lsl #3 add vp0, vp, vp0, lsl #3
add vp1, vp, vp1, lsl #3 add vp1, vp, vp1, lsl #3
@@ -82,6 +77,7 @@ faceAddMeshQuads_asm:
orr tmp, tmp, vg2 orr tmp, tmp, vg2
orr tmp, tmp, vg3 orr tmp, tmp, vg3
tst tmp, #CLIP_MASK_VP tst tmp, #CLIP_MASK_VP
ldrh flags, [polys, #-8]
orrne flags, flags, #FACE_CLIPPED orrne flags, flags, #FACE_CLIPPED
// vz0 = AVG_Z4 (depth) // vz0 = AVG_Z4 (depth)
@@ -95,7 +91,7 @@ faceAddMeshQuads_asm:
mov depth, depth, lsr #(2 + OT_SHIFT) mov depth, depth, lsr #(2 + OT_SHIFT)
// faceAdd // faceAdd
ldmia sp, {ot, vertices} ldr vertices, =gVertices
sub vp0, vp0, vertices sub vp0, vp0, vertices
sub vp1, vp1, vertices sub vp1, vp1, vertices
@@ -117,5 +113,4 @@ faceAddMeshQuads_asm:
ldr tmp, =gFacesBase ldr tmp, =gFacesBase
str face, [tmp] str face, [tmp]
add sp, sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc} ldmfd sp!, {r4-r11, pc}

View File

@@ -12,7 +12,7 @@ vp0 .req r8
vp1 .req r9 vp1 .req r9
vp2 .req r10 vp2 .req r10
vertices .req r11 vertices .req r11
tmp .req r12 ot .req r12
face .req lr face .req lr
vx0 .req vg0 vx0 .req vg0
@@ -20,14 +20,14 @@ vy0 .req vg1
vx1 .req vg2 vx1 .req vg2
vy1 .req vg3 vy1 .req vg3
vx2 .req vg2 vx2 .req vg2
vy2 .req vg3 vy2 .req vg2
vz0 .req vg0 vz0 .req vg0
vz1 .req vg1 vz1 .req vg1
vz2 .req vg2 vz2 .req vg2
depth .req vg0 depth .req vg0
ot .req vg1 tmp .req flags
next .req vp0 next .req vp0
.global faceAddMeshTriangles_asm .global faceAddMeshTriangles_asm
@@ -40,14 +40,15 @@ faceAddMeshTriangles_asm:
ldr face, =gFacesBase ldr face, =gFacesBase
ldr face, [face] ldr face, [face]
ldr ot, =gOT
ldr vertices, =gVertices ldr vertices, =gVertices
add polys, #2 // skip flags
.loop: .loop:
ldrh flags, [polys, #0] ldrb vp0, [polys], #1
ldrb vp0, [polys, #2] ldrb vp1, [polys], #1
ldrb vp1, [polys, #3] ldrb vp2, [polys], #4 // + padding + flags
ldrb vp2, [polys, #4]
add polys, polys, #6
add vp0, vp, vp0, lsl #3 add vp0, vp, vp0, lsl #3
add vp1, vp, vp1, lsl #3 add vp1, vp, vp1, lsl #3
@@ -70,6 +71,7 @@ faceAddMeshTriangles_asm:
orr tmp, vg0, vg1 orr tmp, vg0, vg1
orr tmp, tmp, vg2 orr tmp, tmp, vg2
tst tmp, #CLIP_MASK_VP tst tmp, #CLIP_MASK_VP
ldrh flags, [polys, #-8]
orrne flags, flags, #FACE_CLIPPED orrne flags, flags, #FACE_CLIPPED
// vz0 = AVG_Z3 (depth) // vz0 = AVG_Z3 (depth)
@@ -91,7 +93,6 @@ faceAddMeshTriangles_asm:
orr flags, #FACE_TRIANGLE orr flags, #FACE_TRIANGLE
ldr ot, =gOT
ldr next, [ot, depth, lsl #2] ldr next, [ot, depth, lsl #2]
str face, [ot, depth, lsl #2] str face, [ot, depth, lsl #2]
stmia face!, {flags, next, vp1, vp2} stmia face!, {flags, next, vp1, vp2}

View File

@@ -12,7 +12,7 @@ vp0 .req r8
vp1 .req r9 vp1 .req r9
vp2 .req r10 vp2 .req r10
vp3 .req r11 vp3 .req r11
tmp .req r12 ot .req r12
face .req lr face .req lr
vx0 .req vg0 vx0 .req vg0
@@ -20,7 +20,7 @@ vy0 .req vg1
vx1 .req vg2 vx1 .req vg2
vy1 .req vg3 vy1 .req vg3
vx2 .req vg2 vx2 .req vg2
vy2 .req vg3 vy2 .req vg2
vz0 .req vg0 vz0 .req vg0
vz1 .req vg1 vz1 .req vg1
@@ -28,14 +28,10 @@ vz2 .req vg2
vz3 .req vg3 vz3 .req vg3
depth .req vg0 depth .req vg0
ot .req vg1 tmp .req flags
vertices .req vg2 vertices .req vg2
next .req vp0 next .req vp0
SP_OT = 0
SP_VERTICES = 4
SP_SIZE = 8
.global faceAddRoomQuads_asm .global faceAddRoomQuads_asm
faceAddRoomQuads_asm: faceAddRoomQuads_asm:
stmfd sp!, {r4-r11, lr} stmfd sp!, {r4-r11, lr}
@@ -47,15 +43,14 @@ faceAddRoomQuads_asm:
ldr face, [face] ldr face, [face]
ldr ot, =gOT ldr ot, =gOT
ldr vertices, =gVertices
stmfd sp!, {ot, vertices} add polys, #2 // skip flags
.loop: .loop:
ldrh flags, [polys], #2
ldrh vp0, [polys], #2 ldrh vp0, [polys], #2
ldrh vp1, [polys], #2 ldrh vp1, [polys], #2
ldrh vp2, [polys], #2 ldrh vp2, [polys], #2
ldrh vp3, [polys], #2 ldrh vp3, [polys], #4 // + flags
add vp0, vp, vp0, lsl #3 add vp0, vp, vp0, lsl #3
add vp1, vp, vp1, lsl #3 add vp1, vp, vp1, lsl #3
@@ -80,6 +75,7 @@ faceAddRoomQuads_asm:
orr tmp, tmp, vg2 orr tmp, tmp, vg2
orr tmp, tmp, vg3 orr tmp, tmp, vg3
tst tmp, #CLIP_MASK_VP tst tmp, #CLIP_MASK_VP
ldrh flags, [polys, #-12]
orrne flags, flags, #FACE_CLIPPED orrne flags, flags, #FACE_CLIPPED
// shift and compare VERTEX_G for flat rasterization // shift and compare VERTEX_G for flat rasterization
@@ -105,7 +101,7 @@ faceAddRoomQuads_asm:
mov depth, vz0, lsr #OT_SHIFT mov depth, vz0, lsr #OT_SHIFT
// faceAdd // faceAdd
ldmia sp, {ot, vertices} ldr vertices, =gVertices
sub vp0, vp0, vertices sub vp0, vp0, vertices
sub vp1, vp1, vertices sub vp1, vp1, vertices
@@ -127,5 +123,4 @@ faceAddRoomQuads_asm:
ldr tmp, =gFacesBase ldr tmp, =gFacesBase
str face, [tmp] str face, [tmp]
add sp, sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc} ldmfd sp!, {r4-r11, pc}

View File

@@ -12,7 +12,7 @@ vp0 .req r8
vp1 .req r9 vp1 .req r9
vp2 .req r10 vp2 .req r10
vertices .req r11 vertices .req r11
tmp .req r12 ot .req r12
face .req lr face .req lr
vx0 .req vg0 vx0 .req vg0
@@ -20,14 +20,14 @@ vy0 .req vg1
vx1 .req vg2 vx1 .req vg2
vy1 .req vg3 vy1 .req vg3
vx2 .req vg2 vx2 .req vg2
vy2 .req vg3 vy2 .req vg2
vz0 .req vg0 vz0 .req vg0
vz1 .req vg1 vz1 .req vg1
vz2 .req vg2 vz2 .req vg2
depth .req vg0 depth .req vg0
ot .req vg1 tmp .req flags
next .req vp0 next .req vp0
.global faceAddRoomTriangles_asm .global faceAddRoomTriangles_asm
@@ -40,13 +40,15 @@ faceAddRoomTriangles_asm:
ldr face, =gFacesBase ldr face, =gFacesBase
ldr face, [face] ldr face, [face]
ldr ot, =gOT
ldr vertices, =gVertices ldr vertices, =gVertices
add polys, #2 // skip flags
.loop: .loop:
ldrh flags, [polys], #2
ldrh vp0, [polys], #2 ldrh vp0, [polys], #2
ldrh vp1, [polys], #2 ldrh vp1, [polys], #2
ldrh vp2, [polys], #2 ldrh vp2, [polys], #4 // + flags
add vp0, vp, vp0, lsl #3 add vp0, vp, vp0, lsl #3
add vp1, vp, vp1, lsl #3 add vp1, vp, vp1, lsl #3
@@ -67,6 +69,7 @@ faceAddRoomTriangles_asm:
orr tmp, vg0, vg1 orr tmp, vg0, vg1
orr tmp, tmp, vg2 orr tmp, tmp, vg2
tst tmp, #CLIP_MASK_VP tst tmp, #CLIP_MASK_VP
ldrh flags, [polys, #-10]
orrne flags, flags, #FACE_CLIPPED orrne flags, flags, #FACE_CLIPPED
// shift and compare VERTEX_G for flat rasterization // shift and compare VERTEX_G for flat rasterization
@@ -98,7 +101,6 @@ faceAddRoomTriangles_asm:
orr flags, #FACE_TRIANGLE orr flags, #FACE_TRIANGLE
ldr ot, =gOT
ldr next, [ot, depth, lsl #2] ldr next, [ot, depth, lsl #2]
str face, [ot, depth, lsl #2] str face, [ot, depth, lsl #2]
stmia face!, {flags, next, vp1, vp2} stmia face!, {flags, next, vp1, vp2}