1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-12 08:04:09 +02:00

#368 micro optimization reduce registers pressure and LDRs in faceAdd* routines

This commit is contained in:
XProger
2022-02-13 08:46:25 +03:00
parent 1744204cd8
commit ae63f1c090
5 changed files with 45 additions and 46 deletions

View File

@@ -97,19 +97,25 @@
ldrh \res, [\res, \x]
.endm
// vx0 - vg0
// vy0 - vg1
// vx1 - vg2
// vy1 - vg3
// vx2 - vg2
// vy2 - vg2
.macro CCW skip
ldrsh vx0, [vp0, #VERTEX_X]
ldrsh vy0, [vp0, #VERTEX_Y]
ldrsh vx2, [vp2, #VERTEX_X]
ldrsh vy1, [vp1, #VERTEX_Y]
rsb vx2, vx2, vx0 // reverse order for mla
rsb vx2, vx2, vx0 // reverse order for mla
sub vy1, vy1, vy0
mul tmp, vx2, vy1
mul vy1, vx2, vy1
ldrsh vx1, [vp1, #VERTEX_X]
sub vx0, vx1, vx0
ldrsh vy2, [vp2, #VERTEX_Y]
sub vx1, vx1, vx0
sub vy2, vy2, vy0
mlas tmp, vx1, vy2, tmp
sub vy0, vy2, vy0
mlas vy1, vx0, vy0, vy1
ble \skip
.endm

View File

@@ -12,7 +12,7 @@ vp0 .req r8
vp1 .req r9
vp2 .req r10
vp3 .req r11
tmp .req r12
ot .req r12
face .req lr
vx0 .req vg0
@@ -20,7 +20,7 @@ vy0 .req vg1
vx1 .req vg2
vy1 .req vg3
vx2 .req vg2
vy2 .req vg3
vy2 .req vg2
vz0 .req vg0
vz1 .req vg1
@@ -28,14 +28,10 @@ vz2 .req vg2
vz3 .req vg3
depth .req vg0
ot .req vg1
tmp .req flags
vertices .req vg2
next .req vp0
SP_OT = 0
SP_VERTICES = 4
SP_SIZE = 8
.global faceAddMeshQuads_asm
faceAddMeshQuads_asm:
stmfd sp!, {r4-r11, lr}
@@ -47,15 +43,14 @@ faceAddMeshQuads_asm:
ldr face, [face]
ldr ot, =gOT
ldr vertices, =gVertices
stmfd sp!, {ot, vertices}
add polys, #2 // skip flags
.loop:
ldrh flags, [polys], #2
ldrb vp0, [polys], #1
ldrb vp1, [polys], #1
ldrb vp2, [polys], #1
ldrb vp3, [polys], #1
ldrb vp3, [polys], #3 // + flags
add vp0, vp, vp0, lsl #3
add vp1, vp, vp1, lsl #3
@@ -82,6 +77,7 @@ faceAddMeshQuads_asm:
orr tmp, tmp, vg2
orr tmp, tmp, vg3
tst tmp, #CLIP_MASK_VP
ldrh flags, [polys, #-8]
orrne flags, flags, #FACE_CLIPPED
// vz0 = AVG_Z4 (depth)
@@ -95,7 +91,7 @@ faceAddMeshQuads_asm:
mov depth, depth, lsr #(2 + OT_SHIFT)
// faceAdd
ldmia sp, {ot, vertices}
ldr vertices, =gVertices
sub vp0, vp0, vertices
sub vp1, vp1, vertices
@@ -117,5 +113,4 @@ faceAddMeshQuads_asm:
ldr tmp, =gFacesBase
str face, [tmp]
add sp, sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc}

View File

@@ -12,7 +12,7 @@ vp0 .req r8
vp1 .req r9
vp2 .req r10
vertices .req r11
tmp .req r12
ot .req r12
face .req lr
vx0 .req vg0
@@ -20,14 +20,14 @@ vy0 .req vg1
vx1 .req vg2
vy1 .req vg3
vx2 .req vg2
vy2 .req vg3
vy2 .req vg2
vz0 .req vg0
vz1 .req vg1
vz2 .req vg2
depth .req vg0
ot .req vg1
tmp .req flags
next .req vp0
.global faceAddMeshTriangles_asm
@@ -40,14 +40,15 @@ faceAddMeshTriangles_asm:
ldr face, =gFacesBase
ldr face, [face]
ldr ot, =gOT
ldr vertices, =gVertices
add polys, #2 // skip flags
.loop:
ldrh flags, [polys, #0]
ldrb vp0, [polys, #2]
ldrb vp1, [polys, #3]
ldrb vp2, [polys, #4]
add polys, polys, #6
ldrb vp0, [polys], #1
ldrb vp1, [polys], #1
ldrb vp2, [polys], #4 // + padding + flags
add vp0, vp, vp0, lsl #3
add vp1, vp, vp1, lsl #3
@@ -70,6 +71,7 @@ faceAddMeshTriangles_asm:
orr tmp, vg0, vg1
orr tmp, tmp, vg2
tst tmp, #CLIP_MASK_VP
ldrh flags, [polys, #-8]
orrne flags, flags, #FACE_CLIPPED
// vz0 = AVG_Z3 (depth)
@@ -91,7 +93,6 @@ faceAddMeshTriangles_asm:
orr flags, #FACE_TRIANGLE
ldr ot, =gOT
ldr next, [ot, depth, lsl #2]
str face, [ot, depth, lsl #2]
stmia face!, {flags, next, vp1, vp2}

View File

@@ -12,7 +12,7 @@ vp0 .req r8
vp1 .req r9
vp2 .req r10
vp3 .req r11
tmp .req r12
ot .req r12
face .req lr
vx0 .req vg0
@@ -20,7 +20,7 @@ vy0 .req vg1
vx1 .req vg2
vy1 .req vg3
vx2 .req vg2
vy2 .req vg3
vy2 .req vg2
vz0 .req vg0
vz1 .req vg1
@@ -28,14 +28,10 @@ vz2 .req vg2
vz3 .req vg3
depth .req vg0
ot .req vg1
tmp .req flags
vertices .req vg2
next .req vp0
SP_OT = 0
SP_VERTICES = 4
SP_SIZE = 8
.global faceAddRoomQuads_asm
faceAddRoomQuads_asm:
stmfd sp!, {r4-r11, lr}
@@ -47,15 +43,14 @@ faceAddRoomQuads_asm:
ldr face, [face]
ldr ot, =gOT
ldr vertices, =gVertices
stmfd sp!, {ot, vertices}
add polys, #2 // skip flags
.loop:
ldrh flags, [polys], #2
ldrh vp0, [polys], #2
ldrh vp1, [polys], #2
ldrh vp2, [polys], #2
ldrh vp3, [polys], #2
ldrh vp3, [polys], #4 // + flags
add vp0, vp, vp0, lsl #3
add vp1, vp, vp1, lsl #3
@@ -80,6 +75,7 @@ faceAddRoomQuads_asm:
orr tmp, tmp, vg2
orr tmp, tmp, vg3
tst tmp, #CLIP_MASK_VP
ldrh flags, [polys, #-12]
orrne flags, flags, #FACE_CLIPPED
// shift and compare VERTEX_G for flat rasterization
@@ -105,7 +101,7 @@ faceAddRoomQuads_asm:
mov depth, vz0, lsr #OT_SHIFT
// faceAdd
ldmia sp, {ot, vertices}
ldr vertices, =gVertices
sub vp0, vp0, vertices
sub vp1, vp1, vertices
@@ -127,5 +123,4 @@ faceAddRoomQuads_asm:
ldr tmp, =gFacesBase
str face, [tmp]
add sp, sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc}

View File

@@ -12,7 +12,7 @@ vp0 .req r8
vp1 .req r9
vp2 .req r10
vertices .req r11
tmp .req r12
ot .req r12
face .req lr
vx0 .req vg0
@@ -20,14 +20,14 @@ vy0 .req vg1
vx1 .req vg2
vy1 .req vg3
vx2 .req vg2
vy2 .req vg3
vy2 .req vg2
vz0 .req vg0
vz1 .req vg1
vz2 .req vg2
depth .req vg0
ot .req vg1
tmp .req flags
next .req vp0
.global faceAddRoomTriangles_asm
@@ -40,13 +40,15 @@ faceAddRoomTriangles_asm:
ldr face, =gFacesBase
ldr face, [face]
ldr ot, =gOT
ldr vertices, =gVertices
add polys, #2 // skip flags
.loop:
ldrh flags, [polys], #2
ldrh vp0, [polys], #2
ldrh vp1, [polys], #2
ldrh vp2, [polys], #2
ldrh vp2, [polys], #4 // + flags
add vp0, vp, vp0, lsl #3
add vp1, vp, vp1, lsl #3
@@ -67,6 +69,7 @@ faceAddRoomTriangles_asm:
orr tmp, vg0, vg1
orr tmp, tmp, vg2
tst tmp, #CLIP_MASK_VP
ldrh flags, [polys, #-10]
orrne flags, flags, #FACE_CLIPPED
// shift and compare VERTEX_G for flat rasterization
@@ -98,7 +101,6 @@ faceAddRoomTriangles_asm:
orr flags, #FACE_TRIANGLE
ldr ot, =gOT
ldr next, [ot, depth, lsl #2]
str face, [ot, depth, lsl #2]
stmia face!, {flags, next, vp1, vp2}